chore(ci): separate cpu and gpu aws builds

This is done to be able to wait on the result on their runs without
trigger workflow waiting on them (via workflow_run event) twice.
This commit is contained in:
David Testé
2023-01-11 19:15:41 +01:00
committed by David Testé
parent f2dd9879b4
commit 3ede5642d8
3 changed files with 177 additions and 20 deletions

View File

@@ -1,5 +1,5 @@
# Build on an AWS instance.
name: AwsEc2Build
# Build on an AWS instance (CPU).
name: AwsEc2BuildCpu
on:
workflow_dispatch:
@@ -24,14 +24,7 @@ on:
type: string
env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
MAKE_RULE_TESTS: run-tests run-end-to-end-dataflow-tests
DOCKER_IMAGE_TEST: ghcr.io/zama-ai/concrete-compiler
DOCKER_GPU_OPTION:
CUDA_SUPPORT: OFF
CUDA_PATH: /usr/local/cuda-11.7
GCC_VERSION: 11
jobs:
BuildAndTest:
@@ -63,13 +56,6 @@ jobs:
#echo "SSH_AUTH_SOCK=$SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
- name: Set up GPU support
if: ${{ startsWith(inputs.instance_type, 'p3.') }}
run: |
echo "CUDA_SUPPORT=ON" >> "${GITHUB_ENV}"
echo "MAKE_RULE_TESTS=run-end-to-end-tests-gpu" >> "${GITHUB_ENV}"
echo "DOCKER_GPU_OPTION=--gpus all" >> "${GITHUB_ENV}"
# Free 4Gb of workspace
- name: Freeing space
run: |
@@ -121,7 +107,7 @@ jobs:
rm -rf /build/*
pip install pytest
sed "s/pytest/python -m pytest/g" -i Makefile
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build CUDA_SUPPORT=${{ env.CUDA_SUPPORT }} CUDA_PATH=${{ env.CUDA_PATH }} all build-end-to-end-dataflow-tests
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build all build-end-to-end-dataflow-tests
echo "Debug: ccache statistics (after the build):"
ccache -s
@@ -155,7 +141,7 @@ jobs:
cd /compiler
pip install pytest
mkdir -p /tmp/concrete_compiler/gpu_tests/
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build ${{ env.MAKE_RULE_TESTS }}
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-tests run-end-to-end-dataflow-tests
chmod -R ugo+rwx /tmp/KeySetCache
- name: Prune KeySetCache

171
.github/workflows/aws_build_gpu.yml vendored Normal file
View File

@@ -0,0 +1,171 @@
# Build on an AWS instance (GPU).
name: AwsEc2BuildGpu
on:
workflow_dispatch:
inputs:
instance_id:
description: 'Instance ID'
type: string
instance_image_id:
description: 'Instance AMI ID'
type: string
instance_type:
description: 'Instance product type'
type: string
runner_name:
description: 'Action runner name'
type: string
request_id:
description: 'Slab request ID'
type: string
matrix_item:
description: 'Build matrix item'
type: string
env:
DOCKER_IMAGE_TEST: ghcr.io/zama-ai/concrete-compiler
CUDA_PATH: /usr/local/cuda-11.7
GCC_VERSION: 11
jobs:
BuildAndTest:
name: Build and test compiler in EC2 with CUDA support
concurrency:
group: ${{ github.ref }}_${{ github.event.inputs.instance_image_id }}_${{ github.event.inputs.instance_type }}
cancel-in-progress: true
runs-on: ${{ github.event.inputs.runner_name }}
if: ${{ !cancelled() }}
steps:
- name: Instance configuration used
run: |
echo "IDs: ${{ inputs.instance_id }}"
echo "AMI: ${{ inputs.instance_image_id }}"
echo "Type: ${{ inputs.instance_type }}"
echo "Request ID: ${{ inputs.request_id }}"
echo "Matrix item: ${{ inputs.matrix_item }}"
# A SSH private key is required as some dependencies are from private repos
- name: Set up SSH agent
uses: webfactory/ssh-agent@v0.5.2
with:
ssh-private-key: ${{ secrets.CONCRETE_COMPILER_CI_SSH_PRIVATE }}
- name: Set up env
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
#echo "SSH_AUTH_SOCK=$SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
echo "SSH_AUTH_SOCK_DIR=$(dirname $SSH_AUTH_SOCK)" >> "${GITHUB_ENV}"
# Free 4Gb of workspace
- name: Freeing space
run: |
df -h
for image in ubuntu:{16,18}.04 \
node:{12,14,16}{-alpine,} \
buildpack-deps:{stretch,buster,bullseye} \
debian:{9,10,11} alpine:3.{12,13,14} \
moby/buildkit:latest docker:20.10
do
docker image rm $image || echo Please clean remove it from this step
done
df -h
- name: Fetch repository
uses: actions/checkout@v3
with:
submodules: recursive
token: ${{ secrets.GH_TOKEN }}
- name: Install rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Create build dir
run: mkdir build
- name: Build compiler
uses: addnab/docker-run-action@v3
id: build-compiler
with:
registry: ghcr.io
image: ${{ env.DOCKER_IMAGE_TEST }}
username: ${{ secrets.GHCR_LOGIN }}
password: ${{ secrets.GHCR_PASSWORD }}
options: >-
-v ${{ github.workspace }}/llvm-project:/llvm-project
-v ${{ github.workspace }}/compiler:/compiler
-v ${{ github.workspace }}/build:/build
-v ${{ env.SSH_AUTH_SOCK }}:/ssh.socket
-e SSH_AUTH_SOCK=/ssh.socket
--gpus all
shell: bash
run: |
set -e
cd /compiler
rm -rf /build/*
pip install pytest
sed "s/pytest/python -m pytest/g" -i Makefile
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build CUDA_SUPPORT=ON CUDA_PATH=${{ env.CUDA_PATH }} all build-end-to-end-dataflow-tests
echo "Debug: ccache statistics (after the build):"
ccache -s
- name: Download KeySetCache
if: ${{ !contains(github.head_ref, 'newkeysetcache') }}
continue-on-error: true
run: |
cd compiler
GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} make keysetcache_ci_populated
- name: Mark KeySetCache
run: |
touch keysetcache.timestamp
- name: Test compiler
uses: addnab/docker-run-action@v3
with:
registry: ghcr.io
image: ${{ env.DOCKER_IMAGE_TEST }}
username: ${{ secrets.GHCR_LOGIN }}
password: ${{ secrets.GHCR_PASSWORD }}
options: >-
-v ${{ github.workspace }}/llvm-project:/llvm-project
-v ${{ github.workspace }}/compiler:/compiler
-v ${{ github.workspace }}/KeySetCache:/tmp/KeySetCache
-v ${{ github.workspace }}/build:/build
--gpus all
shell: bash
run: |
set -e
cd /compiler
pip install pytest
mkdir -p /tmp/concrete_compiler/gpu_tests/
make DATAFLOW_EXECUTION_ENABLED=ON CCACHE=ON Python3_EXECUTABLE=$PYTHON_EXEC BUILD_DIR=/build run-end-to-end-tests-gpu
chmod -R ugo+rwx /tmp/KeySetCache
- name: Prune KeySetCache
run: |
echo "Previous cache size is"
du -sh KeySetCache
echo "Cleaning"
find KeySetCache/* -maxdepth 1 -mindepth 1 -not -newer keysetcache.timestamp -type d -exec rm -vr {} \;
echo "New cache size is"
du -sh KeySetCache
- name: Upload KeySetCache
if: ${{ github.ref == 'refs/heads/main' }}
uses: actions/upload-artifact@v3
with:
name: KeySetCacheV2
path: KeySetCache
retention-days: 90
- name: Cleanup Old KeySetCache
uses: Remagpie/gha-remove-artifact@v1
if: ${{ github.ref == 'refs/heads/main' }}
with:
only-name: KeySetCacheV2
max-count: 1

View File

@@ -21,13 +21,13 @@ security_group= ["sg-0f8b52622a2669491", ]
# Trigger CPU build
[command.cpu-build]
workflow = "aws_build.yml"
workflow = "aws_build_cpu.yml"
profile = "m6i-old"
check_run_name = "AWS CPU build (Slab)"
# Trigger GPU build
[command.gpu-build]
workflow = "aws_build.yml"
workflow = "aws_build_gpu.yml"
profile = "gpu"
check_run_name = "AWS GPU build (Slab)"