mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 22:57:59 -05:00
chore(ci): transfer all GPU CI to hyperstack
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||||
name: TFHE Cuda Backend - Base tests on H100
|
name: TFHE Cuda Backend - Fast tests on H100
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
CARGO_TERM_COLOR: always
|
||||||
@@ -49,7 +49,7 @@ jobs:
|
|||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
- Makefile
|
- Makefile
|
||||||
- '.github/workflows/hyperstack**'
|
- '.github/workflows/gpu_fast_h100_tests.yml'
|
||||||
- scripts/**
|
- scripts/**
|
||||||
- ci/**
|
- ci/**
|
||||||
|
|
||||||
@@ -109,6 +109,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Checkout tfhe-rs
|
- name: Checkout tfhe-rs
|
||||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||||
|
with:
|
||||||
|
persist-credentials: 'false'
|
||||||
|
|
||||||
- name: Set up home
|
- name: Set up home
|
||||||
run: |
|
run: |
|
||||||
@@ -170,7 +172,7 @@ jobs:
|
|||||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||||
SLACK_MESSAGE: "Base H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-h100-tests)
|
name: Teardown instance (cuda-h100-tests)
|
||||||
@@ -47,6 +47,10 @@ jobs:
|
|||||||
- tfhe/src/high_level_api/**
|
- tfhe/src/high_level_api/**
|
||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
|
- '.github/workflows/gpu_fast_tests.yml'
|
||||||
|
- Makefile
|
||||||
|
- scripts/**
|
||||||
|
- ci/**
|
||||||
|
|
||||||
setup-instance:
|
setup-instance:
|
||||||
name: Setup instance (cuda-tests)
|
name: Setup instance (cuda-tests)
|
||||||
@@ -65,7 +69,7 @@ jobs:
|
|||||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
job-secret: ${{ secrets.JOB_SECRET }}
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
backend: aws
|
backend: hyperstack
|
||||||
profile: gpu-test
|
profile: gpu-test
|
||||||
|
|
||||||
cuda-tests-linux:
|
cuda-tests-linux:
|
||||||
@@ -84,11 +88,23 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
cuda: "12.2"
|
cuda: "12.2"
|
||||||
gcc: 9
|
gcc: 11
|
||||||
env:
|
env:
|
||||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||||
|
CMAKE_VERSION: 3.29.6
|
||||||
steps:
|
steps:
|
||||||
|
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
cd cmake-${{ env.CMAKE_VERSION }}
|
||||||
|
./bootstrap
|
||||||
|
make -j"$(nproc)"
|
||||||
|
sudo make install
|
||||||
|
|
||||||
- name: Checkout tfhe-rs
|
- name: Checkout tfhe-rs
|
||||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||||
with:
|
with:
|
||||||
@@ -122,6 +138,10 @@ jobs:
|
|||||||
echo "HOME=/home/ubuntu";
|
echo "HOME=/home/ubuntu";
|
||||||
} >> "${GITHUB_ENV}"
|
} >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
|
- name: Check device is detected
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
run: nvidia-smi
|
||||||
|
|
||||||
- name: Run core crypto and internal CUDA backend tests
|
- name: Run core crypto and internal CUDA backend tests
|
||||||
run: |
|
run: |
|
||||||
make test_core_crypto_gpu
|
make test_core_crypto_gpu
|
||||||
@@ -139,13 +159,18 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make test_high_level_api_gpu
|
make test_high_level_api_gpu
|
||||||
|
|
||||||
- name: Slack Notification
|
slack-notify:
|
||||||
if: ${{ always() }}
|
name: Slack Notification
|
||||||
continue-on-error: true
|
needs: [ setup-instance, cuda-tests-linux ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||||
|
continue-on-error: true
|
||||||
|
steps:
|
||||||
|
- name: Send message
|
||||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ job.status }}
|
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||||
SLACK_MESSAGE: "CUDA AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-tests)
|
name: Teardown instance (cuda-tests)
|
||||||
@@ -49,7 +49,7 @@ jobs:
|
|||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
- Makefile
|
- Makefile
|
||||||
- '.github/workflows/aws_tfhe_multi_gpu**'
|
- '.github/workflows/**_multi_gpu_tests.yml'
|
||||||
- scripts/**
|
- scripts/**
|
||||||
- ci/**
|
- ci/**
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ jobs:
|
|||||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
job-secret: ${{ secrets.JOB_SECRET }}
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
backend: aws
|
backend: hyperstack
|
||||||
profile: multi-gpu-test
|
profile: multi-gpu-test
|
||||||
|
|
||||||
cuda-tests-linux:
|
cuda-tests-linux:
|
||||||
@@ -90,13 +90,27 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
cuda: "12.2"
|
cuda: "12.2"
|
||||||
gcc: 9
|
gcc: 11
|
||||||
env:
|
env:
|
||||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||||
|
CMAKE_VERSION: 3.29.6
|
||||||
steps:
|
steps:
|
||||||
|
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
cd cmake-${{ env.CMAKE_VERSION }}
|
||||||
|
./bootstrap
|
||||||
|
make -j"$(nproc)"
|
||||||
|
sudo make install
|
||||||
|
|
||||||
- name: Checkout tfhe-rs
|
- name: Checkout tfhe-rs
|
||||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||||
|
with:
|
||||||
|
persist-credentials: 'false'
|
||||||
|
|
||||||
- name: Set up home
|
- name: Set up home
|
||||||
run: |
|
run: |
|
||||||
@@ -126,30 +140,39 @@ jobs:
|
|||||||
echo "HOME=/home/ubuntu";
|
echo "HOME=/home/ubuntu";
|
||||||
} >> "${GITHUB_ENV}"
|
} >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
|
- name: Check device is detected
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
run: nvidia-smi
|
||||||
|
|
||||||
# No need to test core_crypto and classic PBS in integer since it's already tested on single GPU.
|
# No need to test core_crypto and classic PBS in integer since it's already tested on single GPU.
|
||||||
- name: Run multi-bit CUDA integer tests
|
- name: Run multi-bit CUDA integer tests
|
||||||
run: |
|
run: |
|
||||||
make test_integer_multi_bit_gpu_ci
|
BIG_TESTS_INSTANCE=TRUE make test_integer_multi_bit_gpu_ci
|
||||||
|
|
||||||
- name: Run user docs tests
|
- name: Run user docs tests
|
||||||
run: |
|
run: |
|
||||||
make test_user_doc_gpu
|
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||||
|
|
||||||
- name: Test C API
|
- name: Test C API
|
||||||
run: |
|
run: |
|
||||||
make test_c_api_gpu
|
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||||
|
|
||||||
- name: Run High Level API Tests
|
- name: Run High Level API Tests
|
||||||
run: |
|
run: |
|
||||||
make test_high_level_api_gpu
|
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||||
|
|
||||||
- name: Slack Notification
|
slack-notify:
|
||||||
if: ${{ always() }}
|
name: Slack Notification
|
||||||
continue-on-error: true
|
needs: [ setup-instance, cuda-tests-linux ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||||
|
continue-on-error: true
|
||||||
|
steps:
|
||||||
|
- name: Send message
|
||||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ job.status }}
|
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||||
SLACK_MESSAGE: "CUDA AWS multi-GPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-tests-multi-gpu)
|
name: Teardown instance (cuda-tests-multi-gpu)
|
||||||
@@ -49,7 +49,7 @@ jobs:
|
|||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
- Makefile
|
- Makefile
|
||||||
- '.github/workflows/hyperstack**'
|
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
|
||||||
- scripts/**
|
- scripts/**
|
||||||
- ci/**
|
- ci/**
|
||||||
|
|
||||||
@@ -56,6 +56,10 @@ jobs:
|
|||||||
- tfhe/src/high_level_api/**
|
- tfhe/src/high_level_api/**
|
||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
|
- '.github/workflows/gpu_signed_integer_tests.yml'
|
||||||
|
- Makefile
|
||||||
|
- scripts/**
|
||||||
|
- ci/**
|
||||||
|
|
||||||
setup-instance:
|
setup-instance:
|
||||||
name: Setup instance (cuda-signed-integer-tests)
|
name: Setup instance (cuda-signed-integer-tests)
|
||||||
@@ -75,7 +79,7 @@ jobs:
|
|||||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
job-secret: ${{ secrets.JOB_SECRET }}
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
backend: aws
|
backend: hyperstack
|
||||||
profile: gpu-test
|
profile: gpu-test
|
||||||
|
|
||||||
cuda-signed-integer-tests:
|
cuda-signed-integer-tests:
|
||||||
@@ -94,13 +98,27 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
cuda: "12.2"
|
cuda: "12.2"
|
||||||
gcc: 9
|
gcc: 11
|
||||||
env:
|
env:
|
||||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||||
|
CMAKE_VERSION: 3.29.6
|
||||||
steps:
|
steps:
|
||||||
|
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
cd cmake-${{ env.CMAKE_VERSION }}
|
||||||
|
./bootstrap
|
||||||
|
make -j"$(nproc)"
|
||||||
|
sudo make install
|
||||||
|
|
||||||
- name: Checkout tfhe-rs
|
- name: Checkout tfhe-rs
|
||||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||||
|
with:
|
||||||
|
persist-credentials: 'false'
|
||||||
|
|
||||||
- name: Set up home
|
- name: Set up home
|
||||||
run: |
|
run: |
|
||||||
@@ -138,17 +156,26 @@ jobs:
|
|||||||
echo "NIGHTLY_TESTS=TRUE";
|
echo "NIGHTLY_TESTS=TRUE";
|
||||||
} >> "${GITHUB_ENV}"
|
} >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
|
- name: Check device is detected
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
run: nvidia-smi
|
||||||
|
|
||||||
- name: Run signed integer multi-bit tests
|
- name: Run signed integer multi-bit tests
|
||||||
run: |
|
run: |
|
||||||
make test_signed_integer_multi_bit_gpu_ci
|
make test_signed_integer_multi_bit_gpu_ci
|
||||||
|
|
||||||
- name: Slack Notification
|
slack-notify:
|
||||||
if: ${{ always() }}
|
name: Slack Notification
|
||||||
continue-on-error: true
|
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ always() && needs.cuda-signed-integer-tests.result != 'skipped' }}
|
||||||
|
continue-on-error: true
|
||||||
|
steps:
|
||||||
|
- name: Send message
|
||||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ job.status }}
|
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
|
||||||
SLACK_MESSAGE: "CUDA AWS signed integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-tests)
|
name: Teardown instance (cuda-tests)
|
||||||
@@ -49,7 +49,7 @@ jobs:
|
|||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
- Makefile
|
- Makefile
|
||||||
- '.github/workflows/hyperstack**'
|
- '.github/workflows/gpu_unsigned_integer_tests.yml'
|
||||||
- scripts/**
|
- scripts/**
|
||||||
- ci/**
|
- ci/**
|
||||||
|
|
||||||
@@ -55,6 +55,10 @@ jobs:
|
|||||||
- tfhe/src/high_level_api/**
|
- tfhe/src/high_level_api/**
|
||||||
- tfhe/src/c_api/**
|
- tfhe/src/c_api/**
|
||||||
- 'tfhe/docs/**.md'
|
- 'tfhe/docs/**.md'
|
||||||
|
- '.github/workflows/gpu_unsigned_integer_tests.yml'
|
||||||
|
- Makefile
|
||||||
|
- scripts/**
|
||||||
|
- ci/**
|
||||||
|
|
||||||
setup-instance:
|
setup-instance:
|
||||||
name: Setup instance (cuda-unsigned-integer-tests)
|
name: Setup instance (cuda-unsigned-integer-tests)
|
||||||
@@ -74,7 +78,7 @@ jobs:
|
|||||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
job-secret: ${{ secrets.JOB_SECRET }}
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
backend: aws
|
backend: hyperstack
|
||||||
profile: gpu-test
|
profile: gpu-test
|
||||||
|
|
||||||
cuda-unsigned-integer-tests:
|
cuda-unsigned-integer-tests:
|
||||||
@@ -93,11 +97,23 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
cuda: "12.2"
|
cuda: "12.2"
|
||||||
gcc: 9
|
gcc: 11
|
||||||
env:
|
env:
|
||||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||||
|
CMAKE_VERSION: 3.29.6
|
||||||
steps:
|
steps:
|
||||||
|
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||||
|
cd cmake-${{ env.CMAKE_VERSION }}
|
||||||
|
./bootstrap
|
||||||
|
make -j"$(nproc)"
|
||||||
|
sudo make install
|
||||||
|
|
||||||
- name: Checkout tfhe-rs
|
- name: Checkout tfhe-rs
|
||||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||||
|
|
||||||
@@ -137,17 +153,26 @@ jobs:
|
|||||||
echo "NIGHTLY_TESTS=TRUE";
|
echo "NIGHTLY_TESTS=TRUE";
|
||||||
} >> "${GITHUB_ENV}"
|
} >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
|
- name: Check device is detected
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
run: nvidia-smi
|
||||||
|
|
||||||
- name: Run unsigned integer multi-bit tests
|
- name: Run unsigned integer multi-bit tests
|
||||||
run: |
|
run: |
|
||||||
make test_unsigned_integer_multi_bit_gpu_ci
|
make test_unsigned_integer_multi_bit_gpu_ci
|
||||||
|
|
||||||
- name: Slack Notification
|
slack-notify:
|
||||||
if: ${{ always() }}
|
name: Slack Notification
|
||||||
continue-on-error: true
|
needs: [ setup-instance, cuda-unsigned-integer-tests ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ always() && needs.cuda-unsigned-integer-tests.result != 'skipped' }}
|
||||||
|
continue-on-error: true
|
||||||
|
steps:
|
||||||
|
- name: Send message
|
||||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ job.status }}
|
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
|
||||||
SLACK_MESSAGE: "CUDA AWS unsigned integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-tests)
|
name: Teardown instance (cuda-tests)
|
||||||
22
ci/slab.toml
22
ci/slab.toml
@@ -30,13 +30,10 @@ region = "us-east-1"
|
|||||||
image_id = "ami-06b3d61f41bf8350a"
|
image_id = "ami-06b3d61f41bf8350a"
|
||||||
instance_type = "m6i.4xlarge"
|
instance_type = "m6i.4xlarge"
|
||||||
|
|
||||||
[backend.aws.gpu-test]
|
[backend.hyperstack.gpu-test]
|
||||||
region = "us-east-1"
|
environment_name = "canada"
|
||||||
image_id = "ami-06b3d61f41bf8350a"
|
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||||
instance_type = "p3.2xlarge"
|
flavor_name = "n3-RTX-A6000x1"
|
||||||
# One spawn attempt every 30 seconds for 1 hour
|
|
||||||
spawn_retry_attempts = 120
|
|
||||||
spawn_retry_duration = 60
|
|
||||||
|
|
||||||
[backend.hyperstack.single-h100]
|
[backend.hyperstack.single-h100]
|
||||||
environment_name = "canada"
|
environment_name = "canada"
|
||||||
@@ -58,13 +55,10 @@ environment_name = "canada"
|
|||||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||||
flavor_name = "n3-A100x8-NVLink"
|
flavor_name = "n3-A100x8-NVLink"
|
||||||
|
|
||||||
[backend.aws.multi-gpu-test]
|
[backend.hyperstack.multi-gpu-test]
|
||||||
region = "us-east-1"
|
environment_name = "canada"
|
||||||
image_id = "ami-06b3d61f41bf8350a"
|
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||||
instance_type = "p3.8xlarge"
|
flavor_name = "n3-A100x4"
|
||||||
# One spawn attempt every 30 seconds for 1 hour
|
|
||||||
spawn_retry_attempts = 120
|
|
||||||
spawn_retry_duration = 60
|
|
||||||
|
|
||||||
[command.signed_integer_full_bench]
|
[command.signed_integer_full_bench]
|
||||||
workflow = "signed_integer_full_benchmark.yml"
|
workflow = "signed_integer_full_benchmark.yml"
|
||||||
|
|||||||
@@ -130,8 +130,8 @@ fi
|
|||||||
# Override test-threads number to avoid Out-of-memory issues on GPU instances
|
# Override test-threads number to avoid Out-of-memory issues on GPU instances
|
||||||
if [[ "${backend}" == "gpu" ]]; then
|
if [[ "${backend}" == "gpu" ]]; then
|
||||||
if [[ "${BIG_TESTS_INSTANCE}" == TRUE ]]; then
|
if [[ "${BIG_TESTS_INSTANCE}" == TRUE ]]; then
|
||||||
test_threads=5
|
test_threads=8
|
||||||
doctest_threads=5
|
doctest_threads=8
|
||||||
else
|
else
|
||||||
test_threads=3
|
test_threads=3
|
||||||
doctest_threads=3
|
doctest_threads=3
|
||||||
|
|||||||
Reference in New Issue
Block a user