chore(ci): split integer tests for gpu

This is done to parallelize integer tests on GPU backend and thus
reduce iteration duration.
This commit is contained in:
David Testé
2024-06-04 11:45:22 +02:00
committed by David Testé
parent 879699c072
commit 05a0327874
5 changed files with 312 additions and 8 deletions

View File

@@ -17,13 +17,16 @@ on:
workflow_dispatch:
pull_request:
types: [ labeled ]
# FIXME: remove pull_request event and schedule it as nightly
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
jobs:
cuda-tests-linux:
name: CUDA tests (RTX 4090)
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, '4090_test') }}
if: github.event_name == 'workflow_dispatch' ||
contains(github.event.label.name, '4090_test') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
concurrency:
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an AWS instance
name: TFHE Cuda Backend - Full tests
name: TFHE Cuda Backend - Fast tests
env:
CARGO_TERM_COLOR: always
@@ -155,9 +155,10 @@ jobs:
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Run core crypto, integer and internal CUDA backend tests
- name: Run core crypto and internal CUDA backend tests
run: |
make test_gpu
make test_core_crypto_gpu
make test_cuda_backend
- name: Run user docs tests
run: |

View File

@@ -0,0 +1,126 @@
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
name: TFHE Cuda Backend - Unsigned integer tests
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
jobs:
setup-instance:
name: Setup instance (cuda-unsigned-integer-tests)
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: gpu-test
cuda-unsigned-integer-tests:
name: CUDA unsigned integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@d8352f6b1d2e870bc5716e7a6d9b65c4cc244a1a
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Run unsigned integer tests
run: |
make test_unsigned_integer_gpu_ci
- name: Run unsigned integer multi-bit tests
run: |
make test_unsigned_integer_multi_bit_gpu_ci
- name: Slack Notification
if: ${{ always() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CUDA AWS unsigned integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cuda-unsigned-integer-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -0,0 +1,126 @@
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
name: TFHE Cuda Backend - Signed integer tests
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
jobs:
setup-instance:
name: Setup instance (cuda-signed-integer-tests)
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: gpu-test
cuda-signed-integer-tests:
name: CUDA signed integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@d8352f6b1d2e870bc5716e7a6d9b65c4cc244a1a
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Run signed integer tests
run: |
make test_signed_integer_gpu_ci
- name: Run signed integer multi-bit tests
run: |
make test_signed_integer_multi_bit_gpu_ci
- name: Slack Notification
if: ${{ always() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CUDA AWS signed integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cuda-signed-integer-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -465,6 +465,54 @@ test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--tfhe-package "$(TFHE_SPEC)"
.PHONY: test_unsigned_integer_gpu_ci # Run the tests for unsigned integer ci on gpu backend
test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_signed_integer_gpu_ci # Run the tests for signed integer ci on gpu backend
test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_integer_multi_bit_gpu_ci # Run the tests for integer ci on gpu backend running only multibit tests
test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--tfhe-package "$(TFHE_SPEC)"
.PHONY: test_unsigned_integer_multi_bit_gpu_ci # Run the tests for unsigned integer ci on gpu backend running only multibit tests
test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_signed_integer_multi_bit_gpu_ci # Run the tests for signed integer ci on gpu backend running only multibit tests
test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_boolean # Run the tests of the boolean module
test_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -555,7 +603,7 @@ test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for unsigned integer ci running only multibit tests
test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
@@ -563,7 +611,7 @@ test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nex
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for signed integer ci running only multibit tests
test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \