mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-10 07:08:03 -05:00
chore(gpu): define higher values for the sm size based on compute capability
This commit is contained in:
10
.github/workflows/hyperstack_tfhe_gpu_tests.yml
vendored
10
.github/workflows/hyperstack_tfhe_gpu_tests.yml
vendored
@@ -144,20 +144,20 @@ jobs:
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
make test_cuda_backend
|
||||
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_cuda_backend
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
make test_user_doc_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
make test_c_api_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
@@ -144,11 +144,11 @@ jobs:
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
make test_signed_integer_gpu_ci
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
@@ -144,11 +144,11 @@ jobs:
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
make test_unsigned_integer_gpu_ci
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
@@ -247,5 +247,14 @@ int cuda_get_max_shared_memory(uint32_t gpu_index) {
|
||||
cudaDeviceGetAttribute(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,
|
||||
gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
#if CUDA_ARCH == 900
|
||||
max_shared_memory = 226000;
|
||||
#elif CUDA_ARCH == 890
|
||||
max_shared_memory = 127000;
|
||||
#elif CUDA_ARCH == 800
|
||||
max_shared_memory = 163000;
|
||||
#elif CUDA_ARCH == 700
|
||||
max_shared_memory = 95000;
|
||||
#endif
|
||||
return max_shared_memory;
|
||||
}
|
||||
|
||||
@@ -234,7 +234,12 @@ __host__ void host_integer_sum_ciphertexts_vec_kb(
|
||||
int32_t h_smart_copy_in[r * num_blocks];
|
||||
int32_t h_smart_copy_out[r * num_blocks];
|
||||
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_indexes[0]);
|
||||
/// Here it is important to query the default max shared memory on device 0
|
||||
/// instead of cuda_get_max_shared_memory,
|
||||
/// to avoid bugs with tree_add_chunks trying to use too much shared memory
|
||||
int max_shared_memory = 0;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, 0));
|
||||
|
||||
// create lut object for message and carry
|
||||
// we allocate luts_message_carry in the host function (instead of scratch)
|
||||
|
||||
@@ -129,8 +129,13 @@ fi
|
||||
|
||||
# Override test-threads number to avoid Out-of-memory issues on GPU instances
|
||||
if [[ "${backend}" == "gpu" ]]; then
|
||||
test_threads=5
|
||||
doctest_threads=5
|
||||
if [[ "${BIG_TESTS_INSTANCE}" == TRUE ]]; then
|
||||
test_threads=5
|
||||
doctest_threads=5
|
||||
else
|
||||
test_threads=3
|
||||
doctest_threads=3
|
||||
fi
|
||||
fi
|
||||
|
||||
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
|
||||
|
||||
Reference in New Issue
Block a user