mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 22:57:59 -05:00
chore(gpu): add valgrind and fix leaks
This commit is contained in:
committed by
Andrei Stoian
parent
677da3855e
commit
c06b513182
16
.github/workflows/gpu_code_validation_tests.yml
vendored
16
.github/workflows/gpu_code_validation_tests.yml
vendored
@@ -1,5 +1,5 @@
|
|||||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||||
name: Cuda - Code Validation
|
name: Cuda - CPU Memory Checks
|
||||||
|
|
||||||
env:
|
env:
|
||||||
CARGO_TERM_COLOR: always
|
CARGO_TERM_COLOR: always
|
||||||
@@ -22,8 +22,9 @@ env:
|
|||||||
on:
|
on:
|
||||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
pull_request:
|
schedule:
|
||||||
types: [ labeled ]
|
# every 3 months
|
||||||
|
- cron: "0 0 1 */3 *"
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
@@ -57,7 +58,7 @@ jobs:
|
|||||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
cuda-tests-linux:
|
cuda-tests-linux:
|
||||||
name: CUDA Code Validation tests
|
name: CUDA Memory Checks tests
|
||||||
needs: [ setup-instance ]
|
needs: [ setup-instance ]
|
||||||
if: github.event_name != 'pull_request' ||
|
if: github.event_name != 'pull_request' ||
|
||||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||||
@@ -65,6 +66,7 @@ jobs:
|
|||||||
group: ${{ github.workflow_ref }}
|
group: ${{ github.workflow_ref }}
|
||||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||||
|
timeout-minutes: 5760
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
# explicit include-based build matrix, of known valid options
|
# explicit include-based build matrix, of known valid options
|
||||||
@@ -89,7 +91,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Find tools
|
- name: Find tools
|
||||||
run: |
|
run: |
|
||||||
|
sudo apt update && sudo apt install -y valgrind
|
||||||
find /usr -executable -name "compute-sanitizer"
|
find /usr -executable -name "compute-sanitizer"
|
||||||
|
which valgrind
|
||||||
|
|
||||||
- name: Install latest stable
|
- name: Install latest stable
|
||||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||||
@@ -98,7 +102,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Run memory sanitizer
|
- name: Run memory sanitizer
|
||||||
run: |
|
run: |
|
||||||
make test_high_level_api_gpu_debug
|
make test_high_level_api_gpu_valgrind
|
||||||
|
|
||||||
slack-notify:
|
slack-notify:
|
||||||
name: Slack Notification
|
name: Slack Notification
|
||||||
@@ -120,7 +124,7 @@ jobs:
|
|||||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||||
env:
|
env:
|
||||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||||
SLACK_MESSAGE: "GPU code validation tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||||
|
|
||||||
teardown-instance:
|
teardown-instance:
|
||||||
name: Teardown instance (cuda-tests)
|
name: Teardown instance (cuda-tests)
|
||||||
|
|||||||
2
.github/workflows/gpu_fast_tests.yml
vendored
2
.github/workflows/gpu_fast_tests.yml
vendored
@@ -127,9 +127,11 @@ jobs:
|
|||||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||||
with:
|
with:
|
||||||
toolchain: stable
|
toolchain: stable
|
||||||
|
|
||||||
- name: Enable nvidia multi-process service
|
- name: Enable nvidia multi-process service
|
||||||
run: |
|
run: |
|
||||||
nvidia-cuda-mps-control -d
|
nvidia-cuda-mps-control -d
|
||||||
|
|
||||||
- name: Run core crypto and internal CUDA backend tests
|
- name: Run core crypto and internal CUDA backend tests
|
||||||
run: |
|
run: |
|
||||||
make test_core_crypto_gpu
|
make test_core_crypto_gpu
|
||||||
|
|||||||
149
.github/workflows/gpu_memory_sanitizer.yml
vendored
Normal file
149
.github/workflows/gpu_memory_sanitizer.yml
vendored
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||||
|
name: Cuda - GPU Memory Checks
|
||||||
|
|
||||||
|
env:
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||||
|
RUSTFLAGS: "-C target-cpu=native"
|
||||||
|
RUST_BACKTRACE: "full"
|
||||||
|
RUST_MIN_STACK: "8388608"
|
||||||
|
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||||
|
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||||
|
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||||
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||||
|
SLACKIFY_MARKDOWN: true
|
||||||
|
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||||
|
PULL_REQUEST_MD_LINK: ""
|
||||||
|
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||||
|
# Secrets will be available only to zama-ai organization members
|
||||||
|
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||||
|
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||||
|
|
||||||
|
on:
|
||||||
|
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||||
|
pull_request:
|
||||||
|
types: [ labeled ]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
setup-instance:
|
||||||
|
name: Setup instance (cuda-tests)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.event_name != 'pull_request' ||
|
||||||
|
(github.event.action == 'labeled' && github.event.label.name == 'approved')
|
||||||
|
outputs:
|
||||||
|
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||||
|
steps:
|
||||||
|
- name: Start remote instance
|
||||||
|
id: start-remote-instance
|
||||||
|
if: env.SECRETS_AVAILABLE == 'true'
|
||||||
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||||
|
with:
|
||||||
|
mode: start
|
||||||
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
|
backend: hyperstack
|
||||||
|
profile: gpu-test
|
||||||
|
|
||||||
|
# This instance will be spawned especially for pull-request from forked repository
|
||||||
|
- name: Start GitHub instance
|
||||||
|
id: start-github-instance
|
||||||
|
if: env.SECRETS_AVAILABLE == 'false'
|
||||||
|
run: |
|
||||||
|
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
cuda-tests-linux:
|
||||||
|
name: CUDA Memory Checks tests
|
||||||
|
needs: [ setup-instance ]
|
||||||
|
if: github.event_name != 'pull_request' ||
|
||||||
|
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow_ref }}
|
||||||
|
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||||
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||||
|
timeout-minutes: 240
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
# explicit include-based build matrix, of known valid options
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- os: ubuntu-22.04
|
||||||
|
cuda: "12.8"
|
||||||
|
gcc: 11
|
||||||
|
steps:
|
||||||
|
- name: Checkout tfhe-rs
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||||
|
with:
|
||||||
|
persist-credentials: 'false'
|
||||||
|
token: ${{ env.CHECKOUT_TOKEN }}
|
||||||
|
|
||||||
|
- name: Setup Hyperstack dependencies
|
||||||
|
uses: ./.github/actions/gpu_setup
|
||||||
|
with:
|
||||||
|
cuda-version: ${{ matrix.cuda }}
|
||||||
|
gcc-version: ${{ matrix.gcc }}
|
||||||
|
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||||
|
|
||||||
|
- name: Find tools
|
||||||
|
run: |
|
||||||
|
find /usr -executable -name "compute-sanitizer"
|
||||||
|
|
||||||
|
- name: Install latest stable
|
||||||
|
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
|
||||||
|
- name: Run memory sanitizer
|
||||||
|
run: |
|
||||||
|
make test_high_level_api_gpu_sanitizer
|
||||||
|
|
||||||
|
slack-notify:
|
||||||
|
name: Slack Notification
|
||||||
|
needs: [ setup-instance, cuda-tests-linux ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||||
|
continue-on-error: true
|
||||||
|
steps:
|
||||||
|
- name: Set pull-request URL
|
||||||
|
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||||
|
run: |
|
||||||
|
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||||
|
env:
|
||||||
|
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
|
||||||
|
- name: Send message
|
||||||
|
if: env.SECRETS_AVAILABLE == 'true'
|
||||||
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||||
|
env:
|
||||||
|
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||||
|
SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||||
|
|
||||||
|
teardown-instance:
|
||||||
|
name: Teardown instance (cuda-tests)
|
||||||
|
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||||
|
needs: [ setup-instance, cuda-tests-linux ]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Stop remote instance
|
||||||
|
id: stop-instance
|
||||||
|
if: env.SECRETS_AVAILABLE == 'true'
|
||||||
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||||
|
with:
|
||||||
|
mode: stop
|
||||||
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||||
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||||
|
job-secret: ${{ secrets.JOB_SECRET }}
|
||||||
|
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||||
|
|
||||||
|
- name: Slack Notification
|
||||||
|
if: ${{ failure() }}
|
||||||
|
continue-on-error: true
|
||||||
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||||
|
env:
|
||||||
|
SLACK_COLOR: ${{ job.status }}
|
||||||
|
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||||
15
Makefile
15
Makefile
@@ -702,12 +702,19 @@ test_integer_gpu_debug: install_rs_build_toolchain
|
|||||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile release_lto_off \
|
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile release_lto_off \
|
||||||
--features=integer,gpu-debug -p tfhe -- integer::gpu::server_key::
|
--features=integer,gpu-debug -p tfhe -- integer::gpu::server_key::
|
||||||
|
|
||||||
.PHONY: test_high_level_api_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
|
.PHONY: test_high_level_api_gpu_valgrind # Run the tests of the integer module with Debug flags for CUDA
|
||||||
test_high_level_api_gpu_debug: install_rs_build_toolchain install_cargo_nextest
|
test_high_level_api_gpu_valgrind: install_rs_build_toolchain install_cargo_nextest
|
||||||
export RUSTFLAGS="$(RUSTFLAGS)" && \
|
export RUSTFLAGS="-C target-cpu=x86-64" && \
|
||||||
export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
|
export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
|
||||||
export TFHE_SPEC="tfhe" && \
|
export TFHE_SPEC="tfhe" && \
|
||||||
export CARGO_PROFILE="$(CARGO_PROFILE)" && scripts/check_memory_errors.sh
|
export CARGO_PROFILE="$(CARGO_PROFILE)" && scripts/check_memory_errors.sh --cpu
|
||||||
|
|
||||||
|
.PHONY: test_high_level_api_gpu_sanitizer # Run the tests of the integer module with Debug flags for CUDA
|
||||||
|
test_high_level_api_gpu_sanitizer: install_rs_build_toolchain install_cargo_nextest
|
||||||
|
export RUSTFLAGS="-C target-cpu=x86-64" && \
|
||||||
|
export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
|
||||||
|
export TFHE_SPEC="tfhe" && \
|
||||||
|
export CARGO_PROFILE="$(CARGO_PROFILE)" && scripts/check_memory_errors.sh --gpu
|
||||||
|
|
||||||
.PHONY: test_integer_hl_test_gpu_check_warnings
|
.PHONY: test_integer_hl_test_gpu_check_warnings
|
||||||
test_integer_hl_test_gpu_check_warnings: install_rs_build_toolchain
|
test_integer_hl_test_gpu_check_warnings: install_rs_build_toolchain
|
||||||
|
|||||||
@@ -12,6 +12,8 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
class NoiseLevel {
|
class NoiseLevel {
|
||||||
public:
|
public:
|
||||||
// Constants equivalent to the Rust code
|
// Constants equivalent to the Rust code
|
||||||
@@ -761,22 +763,20 @@ template <typename Torus> struct int_radix_lut {
|
|||||||
if (!mem_reuse) {
|
if (!mem_reuse) {
|
||||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
||||||
tmp_lwe_before_ks, gpu_memory_allocated);
|
tmp_lwe_before_ks, gpu_memory_allocated);
|
||||||
if (gpu_memory_allocated) {
|
for (int i = 0; i < buffer.size(); i++) {
|
||||||
for (int i = 0; i < buffer.size(); i++) {
|
switch (params.pbs_type) {
|
||||||
switch (params.pbs_type) {
|
case MULTI_BIT:
|
||||||
case MULTI_BIT:
|
cleanup_cuda_multi_bit_programmable_bootstrap(
|
||||||
cleanup_cuda_multi_bit_programmable_bootstrap(
|
streams[i], gpu_indexes[i], &buffer[i]);
|
||||||
streams[i], gpu_indexes[i], &buffer[i]);
|
break;
|
||||||
break;
|
case CLASSICAL:
|
||||||
case CLASSICAL:
|
cleanup_cuda_programmable_bootstrap(streams[i], gpu_indexes[i],
|
||||||
cleanup_cuda_programmable_bootstrap(streams[i], gpu_indexes[i],
|
&buffer[i]);
|
||||||
&buffer[i]);
|
break;
|
||||||
break;
|
default:
|
||||||
default:
|
PANIC("Cuda error (PBS): unknown PBS type. ")
|
||||||
PANIC("Cuda error (PBS): unknown PBS type. ")
|
|
||||||
}
|
|
||||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
|
||||||
}
|
}
|
||||||
|
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||||
}
|
}
|
||||||
delete tmp_lwe_before_ks;
|
delete tmp_lwe_before_ks;
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
@@ -4910,12 +4910,22 @@ template <typename Torus> struct int_scalar_mul_buffer {
|
|||||||
sc_prop_mem->release(streams, gpu_indexes, gpu_count);
|
sc_prop_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete sc_prop_mem;
|
delete sc_prop_mem;
|
||||||
delete all_shifted_buffer;
|
delete all_shifted_buffer;
|
||||||
if (!anticipated_buffers_drop) {
|
release_buffers(streams, gpu_indexes, gpu_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void release_buffers(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||||
|
uint32_t gpu_count) {
|
||||||
|
if (preshifted_buffer) {
|
||||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
||||||
preshifted_buffer, gpu_memory_allocated);
|
preshifted_buffer, gpu_memory_allocated);
|
||||||
|
delete preshifted_buffer;
|
||||||
|
preshifted_buffer = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logical_scalar_shift_buffer) {
|
||||||
logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||||
delete logical_scalar_shift_buffer;
|
delete logical_scalar_shift_buffer;
|
||||||
delete preshifted_buffer;
|
logical_scalar_shift_buffer = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -5197,6 +5207,7 @@ template <typename Torus> struct int_scalar_mul_high_buffer {
|
|||||||
|
|
||||||
scalar_mul_mem->release(streams, gpu_indexes, gpu_count);
|
scalar_mul_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete scalar_mul_mem;
|
delete scalar_mul_mem;
|
||||||
|
scalar_mul_mem = nullptr;
|
||||||
|
|
||||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], tmp,
|
release_radix_ciphertext_async(streams[0], gpu_indexes[0], tmp,
|
||||||
allocate_gpu_memory);
|
allocate_gpu_memory);
|
||||||
@@ -5718,23 +5729,29 @@ template <typename Torus> struct int_signed_scalar_div_rem_buffer {
|
|||||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], numerator_ct,
|
release_radix_ciphertext_async(streams[0], gpu_indexes[0], numerator_ct,
|
||||||
allocate_gpu_memory);
|
allocate_gpu_memory);
|
||||||
delete numerator_ct;
|
delete numerator_ct;
|
||||||
|
numerator_ct = nullptr;
|
||||||
|
|
||||||
signed_div_mem->release(streams, gpu_indexes, gpu_count);
|
signed_div_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete signed_div_mem;
|
delete signed_div_mem;
|
||||||
|
signed_div_mem = nullptr;
|
||||||
|
|
||||||
scp_mem->release(streams, gpu_indexes, gpu_count);
|
scp_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete scp_mem;
|
delete scp_mem;
|
||||||
|
scp_mem = nullptr;
|
||||||
|
|
||||||
if (logical_scalar_shift_mem != nullptr) {
|
if (logical_scalar_shift_mem != nullptr) {
|
||||||
logical_scalar_shift_mem->release(streams, gpu_indexes, gpu_count);
|
logical_scalar_shift_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete logical_scalar_shift_mem;
|
delete logical_scalar_shift_mem;
|
||||||
|
logical_scalar_shift_mem = nullptr;
|
||||||
}
|
}
|
||||||
if (scalar_mul_mem != nullptr) {
|
if (scalar_mul_mem != nullptr) {
|
||||||
scalar_mul_mem->release(streams, gpu_indexes, gpu_count);
|
scalar_mul_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete scalar_mul_mem;
|
delete scalar_mul_mem;
|
||||||
|
scalar_mul_mem = nullptr;
|
||||||
}
|
}
|
||||||
sub_and_propagate_mem->release(streams, gpu_indexes, gpu_count);
|
sub_and_propagate_mem->release(streams, gpu_indexes, gpu_count);
|
||||||
delete sub_and_propagate_mem;
|
delete sub_and_propagate_mem;
|
||||||
|
sub_and_propagate_mem = nullptr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -40,4 +40,6 @@ void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
|||||||
int_abs_buffer<uint64_t> *mem_ptr =
|
int_abs_buffer<uint64_t> *mem_ptr =
|
||||||
(int_abs_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_abs_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,8 @@ void cleanup_cuda_integer_bitop(void *const *streams,
|
|||||||
int_bitop_buffer<uint64_t> *mem_ptr =
|
int_bitop_buffer<uint64_t> *mem_ptr =
|
||||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_degrees_after_bitand(uint64_t *output_degrees,
|
void update_degrees_after_bitand(uint64_t *output_degrees,
|
||||||
|
|||||||
@@ -61,4 +61,6 @@ void cleanup_cuda_extend_radix_with_sign_msb_64(void *const *streams,
|
|||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,5 +50,7 @@ void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
|||||||
int_cmux_buffer<uint64_t> *mem_ptr =
|
int_cmux_buffer<uint64_t> *mem_ptr =
|
||||||
(int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,6 +99,8 @@ void cleanup_cuda_integer_comparison(void *const *streams,
|
|||||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,6 +148,8 @@ void cleanup_cuda_integer_are_all_comparisons_block_true(
|
|||||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||||
@@ -192,4 +196,6 @@ void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
|||||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,6 +75,8 @@ void cleanup_cuda_integer_compress_radix_ciphertext_64(
|
|||||||
int_compression<uint64_t> *mem_ptr =
|
int_compression<uint64_t> *mem_ptr =
|
||||||
(int_compression<uint64_t> *)(*mem_ptr_void);
|
(int_compression<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
void cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
||||||
@@ -84,4 +86,6 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
|||||||
int_decompression<uint64_t> *mem_ptr =
|
int_decompression<uint64_t> *mem_ptr =
|
||||||
(int_decompression<uint64_t> *)(*mem_ptr_void);
|
(int_decompression<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,5 +46,7 @@ void cleanup_cuda_integer_div_rem(void *const *streams,
|
|||||||
(int_div_rem_memory<uint64_t> *)(*mem_ptr_void);
|
(int_div_rem_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ void cleanup_cuda_full_propagation(void *const *streams,
|
|||||||
(int_fullprop_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_fullprop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
|
uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||||
@@ -155,6 +157,8 @@ void cleanup_cuda_propagate_single_carry(void *const *streams,
|
|||||||
int_sc_prop_memory<uint64_t> *mem_ptr =
|
int_sc_prop_memory<uint64_t> *mem_ptr =
|
||||||
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,6 +170,8 @@ void cleanup_cuda_add_and_propagate_single_carry(void *const *streams,
|
|||||||
int_sc_prop_memory<uint64_t> *mem_ptr =
|
int_sc_prop_memory<uint64_t> *mem_ptr =
|
||||||
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||||
@@ -176,6 +182,8 @@ void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
|||||||
int_borrow_prop_memory<uint64_t> *mem_ptr =
|
int_borrow_prop_memory<uint64_t> *mem_ptr =
|
||||||
(int_borrow_prop_memory<uint64_t> *)(*mem_ptr_void);
|
(int_borrow_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -245,6 +253,8 @@ void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
|||||||
PUSH_RANGE("cleanup univar lut")
|
PUSH_RANGE("cleanup univar lut")
|
||||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -307,6 +317,8 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
|||||||
PUSH_RANGE("cleanup bivar lut")
|
PUSH_RANGE("cleanup bivar lut")
|
||||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -351,6 +363,8 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
|||||||
int8_t **mem_ptr_void) {
|
int8_t **mem_ptr_void) {
|
||||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
||||||
@@ -442,5 +456,7 @@ void cleanup_cuda_apply_noise_squashing_kb(void *const *streams,
|
|||||||
int_noise_squashing_lut<uint64_t> *mem_ptr =
|
int_noise_squashing_lut<uint64_t> *mem_ptr =
|
||||||
(int_noise_squashing_lut<uint64_t> *)(*mem_ptr_void);
|
(int_noise_squashing_lut<uint64_t> *)(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -198,6 +198,8 @@ void cleanup_cuda_integer_mult(void *const *streams,
|
|||||||
(int_mul_memory<uint64_t> *)(*mem_ptr_void);
|
(int_mul_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -248,4 +250,6 @@ void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
|
|||||||
(int_sum_ciphertexts_vec_memory<uint64_t> *)(*mem_ptr_void);
|
(int_sum_ciphertexts_vec_memory<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,4 +46,6 @@ void cleanup_cuda_integer_radix_scalar_mul(void *const *streams,
|
|||||||
(int_scalar_mul_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_scalar_mul_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -103,12 +103,7 @@ __host__ void host_integer_scalar_mul_radix(
|
|||||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||||
|
|
||||||
if (mem->anticipated_buffers_drop) {
|
if (mem->anticipated_buffers_drop) {
|
||||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
mem->release_buffers(streams, gpu_indexes, gpu_count);
|
||||||
preshifted_buffer,
|
|
||||||
mem->gpu_memory_allocated);
|
|
||||||
delete preshifted_buffer;
|
|
||||||
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
|
||||||
delete (mem->logical_scalar_shift_buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (j == 0) {
|
if (j == 0) {
|
||||||
|
|||||||
@@ -41,4 +41,6 @@ void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
|
|||||||
(int_logical_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_logical_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,6 +83,8 @@ void cleanup_cuda_integer_radix_logical_scalar_shift(
|
|||||||
(int_logical_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_logical_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanup_cuda_integer_radix_arithmetic_scalar_shift(
|
void cleanup_cuda_integer_radix_arithmetic_scalar_shift(
|
||||||
@@ -93,4 +95,6 @@ void cleanup_cuda_integer_radix_arithmetic_scalar_shift(
|
|||||||
(int_arithmetic_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_arithmetic_scalar_shift_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,4 +40,6 @@ void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
|
|||||||
(int_shift_and_rotate_buffer<uint64_t> *)(*mem_ptr_void);
|
(int_shift_and_rotate_buffer<uint64_t> *)(*mem_ptr_void);
|
||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,4 +45,6 @@ void cleanup_cuda_sub_and_propagate_single_carry(void *const *streams,
|
|||||||
|
|
||||||
mem_ptr->release((cudaStream_t *)streams, gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)streams, gpu_indexes, gpu_count);
|
||||||
POP_RANGE()
|
POP_RANGE()
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -372,4 +372,5 @@ void cleanup_cuda_programmable_bootstrap_amortized(void *stream,
|
|||||||
|
|
||||||
// Free memory
|
// Free memory
|
||||||
cuda_drop_async(*pbs_buffer, static_cast<cudaStream_t>(stream), gpu_index);
|
cuda_drop_async(*pbs_buffer, static_cast<cudaStream_t>(stream), gpu_index);
|
||||||
|
*pbs_buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
#endif
|
#endif
|
||||||
#include "ciphertext.h"
|
#include "ciphertext.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
template <typename Torus>
|
template <typename Torus>
|
||||||
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
||||||
uint32_t polynomial_size,
|
uint32_t polynomial_size,
|
||||||
@@ -731,6 +733,8 @@ void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
|
|||||||
int8_t **buffer) {
|
int8_t **buffer) {
|
||||||
auto x = (pbs_buffer<uint64_t, CLASSICAL> *)(*buffer);
|
auto x = (pbs_buffer<uint64_t, CLASSICAL> *)(*buffer);
|
||||||
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
||||||
|
delete x;
|
||||||
|
*buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template bool has_support_to_cuda_programmable_bootstrap_cg<uint64_t>(
|
template bool has_support_to_cuda_programmable_bootstrap_cg<uint64_t>(
|
||||||
|
|||||||
@@ -258,4 +258,6 @@ void cleanup_cuda_programmable_bootstrap_128(void *stream, uint32_t gpu_index,
|
|||||||
int8_t **buffer) {
|
int8_t **buffer) {
|
||||||
auto x = (pbs_buffer_128<__uint128_t, PBS_TYPE::CLASSICAL> *)(*buffer);
|
auto x = (pbs_buffer_128<__uint128_t, PBS_TYPE::CLASSICAL> *)(*buffer);
|
||||||
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
||||||
|
delete x;
|
||||||
|
*buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -440,6 +440,8 @@ void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
|
|||||||
int8_t **buffer) {
|
int8_t **buffer) {
|
||||||
auto x = (pbs_buffer<uint64_t, MULTI_BIT> *)(*buffer);
|
auto x = (pbs_buffer<uint64_t, MULTI_BIT> *)(*buffer);
|
||||||
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
||||||
|
delete x;
|
||||||
|
*buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -294,6 +294,8 @@ void cleanup_cuda_multi_bit_programmable_bootstrap_128(void *stream,
|
|||||||
const auto x =
|
const auto x =
|
||||||
reinterpret_cast<pbs_buffer_128<uint64_t, MULTI_BIT> *>(*buffer);
|
reinterpret_cast<pbs_buffer_128<uint64_t, MULTI_BIT> *>(*buffer);
|
||||||
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
x->release(static_cast<cudaStream_t>(stream), gpu_index);
|
||||||
|
delete x;
|
||||||
|
*buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -64,4 +64,6 @@ void cleanup_expand_without_verification_64(void *const *streams,
|
|||||||
zk_expand_mem<uint64_t> *mem_ptr =
|
zk_expand_mem<uint64_t> *mem_ptr =
|
||||||
reinterpret_cast<zk_expand_mem<uint64_t> *>(*mem_ptr_void);
|
reinterpret_cast<zk_expand_mem<uint64_t> *>(*mem_ptr_void);
|
||||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||||
|
delete mem_ptr;
|
||||||
|
*mem_ptr_void = nullptr;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,27 +1,79 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
RUN_VALGRIND=0
|
||||||
|
RUN_COMPUTE_SANITIZER=0
|
||||||
|
|
||||||
|
while [ -n "$1" ]
|
||||||
|
do
|
||||||
|
case "$1" in
|
||||||
|
"--cpu" )
|
||||||
|
RUN_VALGRIND=1
|
||||||
|
;;
|
||||||
|
|
||||||
|
"--gpu" )
|
||||||
|
RUN_COMPUTE_SANITIZER=1
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "Unknown param : $1"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "${RUN_VALGRIND}" == "0" && "${RUN_COMPUTE_SANITIZER}" == "0" ]]; then
|
||||||
|
echo "Usage: check_memory_errors.sh [--gpu] [--cpu]"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Build the tests but don't run them
|
# Build the tests but don't run them
|
||||||
RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" test --no-run --profile "${CARGO_PROFILE}" \
|
RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" test --no-run --profile "${CARGO_PROFILE}" \
|
||||||
--features=integer,internal-keycache,gpu,zk-pok -p "${TFHE_SPEC}"
|
--features=integer,internal-keycache,gpu-debug,zk-pok -p "${TFHE_SPEC}"
|
||||||
|
|
||||||
# Find the test executable
|
# Find the test executable -> last one to have been modified
|
||||||
EXECUTABLE=$(find target/release/deps/ -type f -executable -name "tfhe-*")
|
EXECUTABLE=target/release/deps/$(find target/release/deps/ -type f -executable -name "tfhe-*" -printf "%T@ %f\n" |sort -nr|sed 's/^.* //; q;')
|
||||||
|
|
||||||
# List the tests into a temporary file
|
# List the tests into a temporary file
|
||||||
RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" nextest list --cargo-profile "${CARGO_PROFILE}" \
|
RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" nextest list --cargo-profile "${CARGO_PROFILE}" \
|
||||||
--features=integer,internal-keycache,gpu,zk-pok -p "${TFHE_SPEC}" &> /tmp/test_list.txt
|
--features=integer,internal-keycache,gpu-debug,zk-pok -p "${TFHE_SPEC}" &> /tmp/test_list.txt
|
||||||
|
|
||||||
# Filter the tests to get only the HL ones
|
# Filter the tests to get only the HL ones
|
||||||
TESTS_HL=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep 'high_level_api::.*gpu.*')
|
TESTS_HL=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep 'high_level_api::.*gpu.*' )
|
||||||
|
|
||||||
# Run compute sanitizer on each test individually
|
if [[ "${RUN_VALGRIND}" == "1" ]]; then
|
||||||
# shellcheck disable=SC2181
|
# shellcheck disable=SC2181
|
||||||
RESULT=0 && \
|
RESULT=0 && \
|
||||||
while read -r t; do \
|
while read -r t; do \
|
||||||
echo compute-sanitizer --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
echo valgrind --leak-check=full --show-leak-kinds=definite "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
||||||
compute-sanitizer --leak-check=full --error-exitcode=1 --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
valgrind --leak-check=full --show-leak-kinds=definite "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
||||||
if [[ $? != "0" ]]; then \
|
if [[ $? != "0" ]]; then \
|
||||||
RESULT=1; \
|
RESULT=1; \
|
||||||
fi; \
|
fi; \
|
||||||
done <<< "${TESTS_HL}"
|
done <<< "${TESTS_HL}"
|
||||||
|
|
||||||
exit $RESULT
|
if [ $RESULT -ne 0 ]; then \
|
||||||
|
exit $RESULT; \
|
||||||
|
fi;
|
||||||
|
fi
|
||||||
|
|
||||||
|
TESTS_HL=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep 'high_level_api::.*gpu.*' )
|
||||||
|
|
||||||
|
if [[ "${RUN_COMPUTE_SANITIZER}" == "1" ]]; then
|
||||||
|
# Run compute sanitizer on each test individually
|
||||||
|
# shellcheck disable=SC2181
|
||||||
|
RESULT=0 && \
|
||||||
|
while read -r t; do \
|
||||||
|
echo compute-sanitizer --tool memcheck --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
||||||
|
compute-sanitizer --tool memcheck --leak-check=full --error-exitcode=1 --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
|
||||||
|
if [[ $? != "0" ]]; then \
|
||||||
|
RESULT=1; \
|
||||||
|
fi; \
|
||||||
|
done <<< "${TESTS_HL}"
|
||||||
|
|
||||||
|
if [ $RESULT -ne 0 ]; then \
|
||||||
|
exit $RESULT; \
|
||||||
|
fi;
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|||||||
Reference in New Issue
Block a user