mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
23 Commits
as/check_n
...
as/refacto
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
64229ca391 | ||
|
|
8bc080355d | ||
|
|
0cc8d625e4 | ||
|
|
ec5d0da883 | ||
|
|
8ed5633300 | ||
|
|
cf07dcf6a3 | ||
|
|
20dad23256 | ||
|
|
d7380e4264 | ||
|
|
093ffb7699 | ||
|
|
c804b838cb | ||
|
|
7b174b1865 | ||
|
|
79cb6b6066 | ||
|
|
6ff87e94bb | ||
|
|
4c27f48968 | ||
|
|
8bf2a12e9b | ||
|
|
64b5a0fdcd | ||
|
|
49c390edef | ||
|
|
82860a0b01 | ||
|
|
39ca504ce4 | ||
|
|
61c7ffea2e | ||
|
|
48bb3833e7 | ||
|
|
2ad2f522db | ||
|
|
2333a5591e |
@@ -4,9 +4,6 @@ ignore = [
|
||||
"RUSTSEC-2024-0436",
|
||||
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
|
||||
"RUSTSEC-2025-0141",
|
||||
# Ignoring unsoundness in 'rand' with custom logger. Rand update is currently blocked by
|
||||
# arkworks and we do not use custom loggers.
|
||||
"RUSTSEC-2026-0097",
|
||||
]
|
||||
|
||||
[output]
|
||||
|
||||
@@ -79,19 +79,11 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Find existing comment
|
||||
- name: Post/refresh backward-compat report
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
id: find-comment
|
||||
uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0
|
||||
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0
|
||||
with:
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-includes: '**Backward-compat snapshot:'
|
||||
|
||||
- name: Comment on PR
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
|
||||
with:
|
||||
comment-id: ${{ steps.find-comment.outputs.comment-id }}
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-path: report.md
|
||||
edit-mode: replace
|
||||
header: backward-compat-snapshot
|
||||
hide_and_recreate: true
|
||||
hide_classify: OUTDATED
|
||||
path: report.md
|
||||
|
||||
2
.github/workflows/benchmark_cpu_common.yml
vendored
2
.github/workflows/benchmark_cpu_common.yml
vendored
@@ -223,7 +223,7 @@ jobs:
|
||||
results_type: ${{ inputs.additional_results_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_ct_key_sizes.yml
vendored
2
.github/workflows/benchmark_ct_key_sizes.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_ct_key_sizes
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/benchmark_gpu_4090.yml
vendored
4
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -173,7 +173,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_gpu_common.yml
vendored
2
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -270,7 +270,7 @@ jobs:
|
||||
filenames: ${{ inputs.additional_file_to_parse }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -232,7 +232,7 @@ jobs:
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
@@ -271,7 +271,7 @@ jobs:
|
||||
- name: Upload profile artifact
|
||||
env:
|
||||
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
|
||||
@@ -302,7 +302,7 @@ jobs:
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
|
||||
path: fhevm/$${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_hpu_common.yml
vendored
2
.github/workflows/benchmark_hpu_common.yml
vendored
@@ -185,7 +185,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -280,7 +280,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_tfhe_fft.yml
vendored
2
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_tfhe_ntt.yml
vendored
2
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -180,7 +180,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Check workflows security
|
||||
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
|
||||
uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3
|
||||
with:
|
||||
advanced-security: 'false' # Print results directly in logs
|
||||
persona: pedantic
|
||||
|
||||
4
.github/workflows/generate_svg_common.yml
vendored
4
.github/workflows/generate_svg_common.yml
vendored
@@ -87,7 +87,7 @@ jobs:
|
||||
|
||||
- name: Upload tables
|
||||
if: inputs.backend_comparison == false
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
|
||||
# This will upload all the file generated
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
|
||||
- name: Upload comparison tables
|
||||
if: inputs.backend_comparison == true
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
with:
|
||||
name: ${{ github.sha }}_backends_comparison_tables
|
||||
# This will upload all the file generated
|
||||
|
||||
7
.github/workflows/gpu_fast_tests.yml
vendored
7
.github/workflows/gpu_fast_tests.yml
vendored
@@ -133,10 +133,6 @@ jobs:
|
||||
run: |
|
||||
nvidia-cuda-mps-control -d
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_fake_multi_gpu
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
@@ -151,6 +147,9 @@ jobs:
|
||||
run: |
|
||||
make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu_fast
|
||||
|
||||
slack-notify:
|
||||
name: gpu_fast_tests/slack-notify
|
||||
|
||||
2
.github/workflows/make_release_common.yml
vendored
2
.github/workflows/make_release_common.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
|
||||
19
.github/workflows/make_release_common_cuda.yml
vendored
19
.github/workflows/make_release_common_cuda.yml
vendored
@@ -128,7 +128,7 @@ jobs:
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
@@ -196,6 +196,13 @@ jobs:
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
@@ -210,12 +217,12 @@ jobs:
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
DRY-RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# dry-run expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since dry-run is handled in the env section above.
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p "${PACKAGE}" ${DRY-RUN}
|
||||
cargo publish -p "${PACKAGE}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
@@ -255,7 +262,7 @@ jobs:
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (${{ inputs.package-name }} release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
2
.github/workflows/make_release_tfhe.yml
vendored
2
.github/workflows/make_release_tfhe.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Authenticate on NPM
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '24'
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
|
||||
20
Makefile
20
Makefile
@@ -360,7 +360,7 @@ check_fmt_toml: install_taplo
|
||||
|
||||
.PHONY: check_typos # Check for typos in codebase
|
||||
check_typos: install_typos_checker
|
||||
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" | typos --file-list - && echo "No typos found"
|
||||
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" ":!*.hpu" | typos --file-list - && echo "No typos found"
|
||||
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
@@ -587,6 +587,17 @@ clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selec
|
||||
echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
|
||||
fi
|
||||
|
||||
.PHONY: check_backward_compat_locks_did_not_change # Check backward compat Cargo.lock files are up to date
|
||||
check_backward_compat_locks_did_not_change: install_rs_check_toolchain
|
||||
@for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
|
||||
echo "checking Cargo.lock for $$crate"; \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
|
||||
-C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate metadata --locked --format-version 1 > /dev/null || \
|
||||
( echo "Cargo.lock for $$crate is out of date. Update it with:" && \
|
||||
echo " cd $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate && cargo metadata --format-version 1 > /dev/null" && \
|
||||
echo "then commit the updated Cargo.lock." && exit 1 ); \
|
||||
done
|
||||
|
||||
.PHONY: clippy_test_vectors # Run clippy lints on the test vectors app
|
||||
clippy_test_vectors: install_rs_check_toolchain
|
||||
cd apps/test-vectors; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
@@ -1134,12 +1145,6 @@ test_high_level_api_gpu: install_cargo_nextest # Run all the GPU tests for high_
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
.PHONY: test_high_level_api_fake_multi_gpu
|
||||
test_high_level_api_fake_multi_gpu: install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu-debug-fake-multi-gpu,zk-pok -p tfhe \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
test_list_gpu: install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo nextest list --cargo-profile $(CARGO_PROFILE) \
|
||||
--features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
@@ -2271,6 +2276,7 @@ pcc_batch_5:
|
||||
$(call run_recipe_with_details,clippy_tfhe_lints)
|
||||
$(call run_recipe_with_details,check_compile_tests)
|
||||
$(call run_recipe_with_details,clippy_backward_compat_data)
|
||||
$(call run_recipe_with_details,check_backward_compat_locks_did_not_change)
|
||||
|
||||
.PHONY: pcc_batch_6 # duration: 6'32''
|
||||
pcc_batch_6:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.14.0"
|
||||
version = "0.15.0"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn get_linux_distribution_name() -> Option<String> {
|
||||
let content = std::fs::read_to_string("/etc/os-release").ok()?;
|
||||
for line in content.lines() {
|
||||
if let Some(value) = line.strip_prefix("NAME=") {
|
||||
return Some(value.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn main() {
|
||||
if let Ok(val) = std::env::var("DOCS_RS") {
|
||||
@@ -28,9 +37,7 @@ fn main() {
|
||||
println!("cargo::rerun-if-changed=src");
|
||||
|
||||
if std::env::consts::OS == "linux" {
|
||||
let output = Command::new("./get_os_name.sh").output().unwrap();
|
||||
let distribution = String::from_utf8(output.stdout).unwrap();
|
||||
if distribution != "Ubuntu\n" {
|
||||
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
|
||||
println!(
|
||||
"cargo:warning=This Linux distribution is not officially supported. \
|
||||
Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
|
||||
|
||||
@@ -4,22 +4,18 @@
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_aes_inputs, uint32_t sbox_parallelism);
|
||||
|
||||
uint64_t scratch_cuda_integer_aes_ctr_256_encrypt_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_aes_inputs, uint32_t sbox_parallelism);
|
||||
|
||||
void cuda_integer_aes_ctr_encrypt_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
|
||||
@@ -34,12 +30,10 @@ void cleanup_cuda_integer_aes_ctr_256_encrypt_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_key_expansion_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_key_expansion_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *expanded_keys,
|
||||
@@ -57,12 +51,10 @@ void cuda_integer_aes_ctr_256_encrypt_64_async(
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_integer_key_expansion_256_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_key_expansion_256_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *expanded_keys,
|
||||
|
||||
@@ -17,10 +17,9 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t num_blocks_to_decompress,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_compress_radix_ciphertext_64_async(
|
||||
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
|
||||
|
||||
@@ -381,16 +381,16 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
bool use_seq = overflow_sub_mem_1->prop_simu_group_carries_mem
|
||||
->use_sequential_algorithm_to_resolve_group_carries;
|
||||
|
||||
cuda_set_device(streams.gpu_index(0));
|
||||
cuda_set_device(0);
|
||||
check_cuda_error(
|
||||
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming));
|
||||
create_indexes_for_overflow_sub(streams.get_ith(0), num_blocks, group_size,
|
||||
use_seq, allocate_gpu_memory, size_tracker);
|
||||
check_cuda_error(cudaEventRecord(create_indexes_done, streams.stream(0)));
|
||||
cuda_set_device(streams.gpu_index(1));
|
||||
cuda_set_device(1);
|
||||
check_cuda_error(
|
||||
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0));
|
||||
cuda_set_device(streams.gpu_index(2));
|
||||
cuda_set_device(2);
|
||||
check_cuda_error(
|
||||
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0));
|
||||
|
||||
|
||||
@@ -105,22 +105,32 @@ typedef struct {
|
||||
uint32_t polynomial_size;
|
||||
} CudaPackedGlweCiphertextListFFI;
|
||||
|
||||
// FFI-boundary parameter struct for a LWE bootstrap key.
|
||||
// All fields are plain uint32_t for safe Rust/C++ interop.
|
||||
// Use crypto_params() (defined below) to obtain the strongly-typed C++ form.
|
||||
typedef struct {
|
||||
uint32_t input_lwe_dimension;
|
||||
uint32_t glwe_dimension;
|
||||
uint32_t polynomial_size;
|
||||
uint32_t base_log;
|
||||
uint32_t level_count;
|
||||
uint32_t big_lwe_dimension;
|
||||
uint32_t pbs_type;
|
||||
uint32_t grouping_factor;
|
||||
} CudaLweBootstrapKeyParamsFFI;
|
||||
|
||||
uint64_t scratch_cuda_apply_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint64_t lut_degree,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, uint64_t lut_degree,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
uint64_t scratch_cuda_apply_many_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t num_many_lut, uint64_t lut_degree,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
void cuda_apply_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
@@ -139,12 +149,10 @@ void cuda_apply_many_univariate_lut_64_async(
|
||||
uint32_t lut_stride);
|
||||
|
||||
uint64_t scratch_cuda_full_propagation_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_full_propagation_64_inplace_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *input_blocks,
|
||||
@@ -162,11 +170,9 @@ void cuda_integer_mult_inplace_64_async(
|
||||
uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
|
||||
bool const is_boolean_right, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level,
|
||||
uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
uint32_t carry_modulus, CudaLweBootstrapKeyParamsFFI bsk_params,
|
||||
uint32_t ks_base_log, uint32_t ks_level, uint32_t num_blocks,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
@@ -183,12 +189,10 @@ void cuda_scalar_addition_ciphertext_64_inplace(
|
||||
uint32_t message_modulus, uint32_t carry_modulus);
|
||||
|
||||
uint64_t scratch_cuda_logical_scalar_shift_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_logical_scalar_shift_64_inplace_async(
|
||||
@@ -196,12 +200,10 @@ void cuda_logical_scalar_shift_64_inplace_async(
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_arithmetic_scalar_shift_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_arithmetic_scalar_shift_64_inplace_async(
|
||||
@@ -215,12 +217,10 @@ void cleanup_cuda_arithmetic_scalar_shift_64_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_shift_and_rotate_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_shift_and_rotate_64_inplace_async(
|
||||
@@ -232,22 +232,18 @@ void cleanup_cuda_shift_and_rotate_64_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_comparison_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, COMPARISON_TYPE op_type,
|
||||
bool is_signed, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_comparison_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, COMPARISON_TYPE op_type,
|
||||
bool is_signed, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -276,12 +272,10 @@ void cuda_boolean_bitop_inplace_64_async(
|
||||
void *const *bsks, void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool is_unchecked, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -289,11 +283,9 @@ void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitnot_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t lwe_ciphertext_count, bool is_unchecked, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -311,6 +303,20 @@ void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
|
||||
uint32_t param_message_modulus,
|
||||
uint32_t param_carry_modulus);
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
@@ -322,38 +328,20 @@ void cuda_integer_scalar_bitop_inplace_64_async(
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_cmux_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
uint64_t scratch_cuda_cmux_64_async(CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params,
|
||||
uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_cmux_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
@@ -365,12 +353,10 @@ void cuda_cmux_64_async(CudaStreamsFFI streams,
|
||||
void cleanup_cuda_cmux_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_scalar_rotate_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_scalar_rotate_64_inplace_async(CudaStreamsFFI streams,
|
||||
@@ -382,21 +368,17 @@ void cleanup_cuda_scalar_rotate_64_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_propagate_single_carry_64_inplace_async(
|
||||
@@ -418,12 +400,10 @@ void cleanup_cuda_add_and_propagate_single_carry_64_inplace(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_overflowing_sub_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t compute_overflow, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t compute_overflow, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_overflowing_sub_64_inplace_async(
|
||||
@@ -438,14 +418,12 @@ void cleanup_cuda_integer_overflowing_sub_64_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_partial_sum_ciphertexts_vec_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_in_radix,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_in_radix,
|
||||
uint32_t max_num_radix_in_vec, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
uint32_t carry_modulus, bool reduce_degrees_for_single_carry_propagation,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_partial_sum_ciphertexts_vec_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
@@ -456,12 +434,11 @@ void cleanup_cuda_partial_sum_ciphertexts_vec_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_mul_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t num_scalar_bits,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t num_scalar_bits, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_scalar_mul_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
|
||||
@@ -474,11 +451,9 @@ void cleanup_cuda_integer_scalar_mul_64(CudaStreamsFFI streams,
|
||||
|
||||
uint64_t scratch_cuda_integer_div_rem_64_async(
|
||||
CudaStreamsFFI streams, bool is_signed, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_div_rem_64_async(CudaStreamsFFI streams,
|
||||
@@ -497,11 +472,9 @@ void cuda_integer_reverse_blocks_64_inplace_async(
|
||||
|
||||
uint64_t scratch_cuda_integer_abs_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool is_signed,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_abs_inplace_64_async(CudaStreamsFFI streams,
|
||||
@@ -513,12 +486,10 @@ void cleanup_cuda_integer_abs_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_are_all_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_64_async(
|
||||
@@ -530,12 +501,10 @@ void cleanup_cuda_integer_are_all_comparisons_block_true_64(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_64_async(
|
||||
@@ -559,13 +528,11 @@ void trim_radix_blocks_msb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaStreamsFFI streams);
|
||||
|
||||
uint64_t scratch_cuda_apply_noise_squashing_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_glwe_dimension, uint32_t input_polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t num_original_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t input_glwe_dimension,
|
||||
uint32_t input_polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t num_radix_blocks, uint32_t num_original_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_apply_noise_squashing_async(
|
||||
@@ -577,12 +544,10 @@ void cleanup_cuda_apply_noise_squashing(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_sub_and_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_sub_and_propagate_single_carry_64_inplace_async(
|
||||
@@ -595,13 +560,11 @@ void cleanup_cuda_sub_and_propagate_single_carry_64_inplace(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_unsigned_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
|
||||
@@ -612,13 +575,11 @@ void cleanup_cuda_integer_unsigned_scalar_div_radix_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_signed_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_signed_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
|
||||
@@ -629,12 +590,10 @@ void cleanup_cuda_integer_signed_scalar_div_radix_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_unsigned_scalar_div_rem_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -651,12 +610,10 @@ void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_64(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_signed_scalar_div_rem_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -672,12 +629,11 @@ void cleanup_cuda_integer_signed_scalar_div_rem_radix_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_count_of_consecutive_bits_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
Direction direction, BitValue bit_value, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, Direction direction,
|
||||
BitValue bit_value, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_count_of_consecutive_bits_64_async(
|
||||
@@ -689,13 +645,12 @@ void cleanup_cuda_integer_count_of_consecutive_bits_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_grouped_oprf_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_to_process,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, uint32_t message_bits_per_block,
|
||||
uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_to_process,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
uint32_t message_bits_per_block, uint32_t total_random_bits,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_grouped_oprf_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
@@ -707,31 +662,28 @@ void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_intermediate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, uint32_t message_bits_per_block,
|
||||
uint32_t num_input_random_bits, uint32_t num_scalar_bits,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_intermediate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
uint32_t message_bits_per_block, uint32_t num_input_random_bits,
|
||||
uint32_t num_scalar_bits, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *ksks);
|
||||
void *const *compute_bsks, void *const *ksks);
|
||||
|
||||
void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_ilog2_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t input_num_blocks, uint32_t counter_num_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t input_num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
@@ -747,14 +699,12 @@ void cleanup_cuda_integer_ilog2_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_match_value_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_matches, uint32_t num_input_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
|
||||
uint32_t num_output_packed_blocks, uint32_t max_output_is_zero,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_match_value_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out_result,
|
||||
@@ -767,13 +717,11 @@ void cleanup_cuda_unchecked_match_value_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_cast_to_unsigned_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_input_blocks, uint32_t target_num_blocks, bool input_is_signed,
|
||||
bool requires_full_propagate, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
|
||||
bool input_is_signed, bool requires_full_propagate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_cast_to_unsigned_64_async(CudaStreamsFFI streams,
|
||||
@@ -787,14 +735,12 @@ void cleanup_cuda_cast_to_unsigned_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_match_value_or_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_matches, uint32_t num_input_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
|
||||
uint32_t num_match_packed_blocks, uint32_t num_final_blocks,
|
||||
uint32_t max_output_is_zero, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_match_value_or_64_async(
|
||||
@@ -808,12 +754,10 @@ void cleanup_cuda_unchecked_match_value_or_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_contains_64_async(CudaStreamsFFI streams,
|
||||
@@ -828,12 +772,10 @@ void cleanup_cuda_unchecked_contains_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_contains_clear_64_async(
|
||||
@@ -846,12 +788,10 @@ void cleanup_cuda_unchecked_contains_clear_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_is_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_clears, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_is_in_clears_64_async(CudaStreamsFFI streams,
|
||||
@@ -866,12 +806,10 @@ void cleanup_cuda_unchecked_is_in_clears_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_clears, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_index_in_clears_64_async(
|
||||
@@ -885,12 +823,10 @@ void cleanup_cuda_unchecked_index_in_clears_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_unique, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_first_index_in_clears_64_async(
|
||||
@@ -908,12 +844,10 @@ void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_of_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_first_index_of_clear_64_async(
|
||||
@@ -927,12 +861,10 @@ void cleanup_cuda_unchecked_first_index_of_clear_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_of_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_first_index_of_64_async(
|
||||
@@ -946,12 +878,10 @@ void cleanup_cuda_unchecked_first_index_of_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_of_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_index_of_64_async(CudaStreamsFFI streams,
|
||||
@@ -967,12 +897,10 @@ void cleanup_cuda_unchecked_index_of_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_of_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_index_of_clear_64_async(
|
||||
@@ -987,12 +915,10 @@ void cleanup_cuda_unchecked_index_of_clear_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_all_eq_slices_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_all_eq_slices_64_async(
|
||||
@@ -1005,12 +931,10 @@ void cleanup_cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_sub_slice_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_lhs, uint32_t num_rhs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_lhs, uint32_t num_rhs,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_unchecked_contains_sub_slice_64_async(
|
||||
@@ -1023,12 +947,10 @@ void cleanup_cuda_unchecked_contains_sub_slice_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_cast_to_signed_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_input_blocks,
|
||||
uint32_t target_num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool input_is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool input_is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_cast_to_signed_64_async(CudaStreamsFFI streams,
|
||||
|
||||
@@ -77,7 +77,7 @@ public:
|
||||
static const uint64_t UNKNOWN = std::numeric_limits<uint64_t>::max();
|
||||
};
|
||||
|
||||
#if defined(DEBUG) || defined(DEBUG_FAKE_MULTI_GPU)
|
||||
#ifdef DEBUG
|
||||
#define CHECK_NOISE_LEVEL(noise_level_expr, msg_mod, carry_mod) \
|
||||
do { \
|
||||
if ((msg_mod) == 2 && (carry_mod) == 2) { \
|
||||
@@ -345,6 +345,21 @@ struct int_radix_params {
|
||||
message_modulus(message_modulus), carry_modulus(carry_modulus),
|
||||
noise_reduction_type(noise_reduction_type){};
|
||||
|
||||
int_radix_params(CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type)
|
||||
: pbs_type((PBS_TYPE)bsk_params.pbs_type),
|
||||
glwe_dimension(bsk_params.glwe_dimension),
|
||||
polynomial_size(bsk_params.polynomial_size),
|
||||
big_lwe_dimension(bsk_params.big_lwe_dimension),
|
||||
small_lwe_dimension(bsk_params.input_lwe_dimension), ks_level(ks_level),
|
||||
ks_base_log(ks_base_log), pbs_level(bsk_params.level_count),
|
||||
pbs_base_log(bsk_params.base_log),
|
||||
grouping_factor(bsk_params.grouping_factor),
|
||||
message_modulus(message_modulus), carry_modulus(carry_modulus),
|
||||
noise_reduction_type(noise_reduction_type){};
|
||||
|
||||
int_radix_params() = default;
|
||||
|
||||
void print() {
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_inputs);
|
||||
|
||||
void cuda_kreyvium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,
|
||||
|
||||
@@ -7,11 +7,13 @@
|
||||
extern "C" {
|
||||
|
||||
void cuda_negate_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
@@ -58,8 +60,10 @@ void cuda_glwe_wrapping_polynomial_mul_one_to_many_64_async(
|
||||
int8_t *circulant, void const *poly_rhs, uint32_t polynomial_size,
|
||||
uint32_t glwe_dimension, uint32_t n_rhs);
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, const uint64_t plaintext_in);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint64_t plaintext_in,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_inplace_32(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_trivium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_inputs);
|
||||
|
||||
void cuda_trivium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,
|
||||
|
||||
@@ -2,19 +2,14 @@
|
||||
#include "aes.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_aes_inputs, uint32_t sbox_parallelism) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_aes_encrypt<uint64_t>(
|
||||
CudaStreams(streams), (int_aes_encrypt_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -22,19 +17,14 @@ uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_aes_ctr_256_encrypt_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
|
||||
uint32_t sbox_parallelism) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_aes_inputs, uint32_t sbox_parallelism) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_aes_encrypt<uint64_t>(
|
||||
CudaStreams(streams), (int_aes_encrypt_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -78,18 +68,13 @@ void cleanup_cuda_integer_aes_ctr_256_encrypt_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_key_expansion_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_key_expansion<uint64_t>(
|
||||
CudaStreams(streams), (int_key_expansion_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -14,18 +14,13 @@ void cuda_integer_aes_ctr_256_encrypt_64_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_key_expansion_256_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_key_expansion_256<uint64_t>(
|
||||
CudaStreams(streams), (int_key_expansion_256_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -2,17 +2,12 @@
|
||||
|
||||
uint64_t scratch_cuda_integer_abs_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool is_signed,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_abs<uint64_t>(
|
||||
CudaStreams(streams), (int_abs_buffer<uint64_t> **)mem_ptr, is_signed,
|
||||
|
||||
@@ -11,19 +11,14 @@ void cuda_boolean_bitop_inplace_64_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool is_unchecked, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_boolean_bitop<uint64_t>(
|
||||
CudaStreams(streams), (boolean_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -41,18 +36,13 @@ void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitnot_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t lwe_ciphertext_count, bool is_unchecked, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_boolean_bitnot<uint64_t>(
|
||||
CudaStreams(streams), (boolean_bitnot_buffer<uint64_t> **)mem_ptr, params,
|
||||
@@ -78,6 +68,34 @@ void cleanup_cuda_boolean_bitnot_64(CudaStreamsFFI streams,
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *radix_ciphertext,
|
||||
uint32_t ct_message_modulus,
|
||||
@@ -99,25 +117,6 @@ void cuda_integer_bitop_inplace_64_async(
|
||||
(uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
@@ -128,25 +127,6 @@ void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
|
||||
@@ -176,14 +176,14 @@ host_bitnot(CudaStreams streams, CudaRadixCiphertextFFI *radix_ciphertext,
|
||||
(ct_message_modulus - 1);
|
||||
|
||||
host_negation<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), radix_ciphertext,
|
||||
radix_ciphertext, radix_ciphertext->lwe_dimension,
|
||||
streams.stream(0), streams.gpu_index(0), (Torus *)radix_ciphertext->ptr,
|
||||
(Torus *)radix_ciphertext->ptr, radix_ciphertext->lwe_dimension,
|
||||
radix_ciphertext->num_radix_blocks);
|
||||
|
||||
host_addition_plaintext_scalar<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), radix_ciphertext,
|
||||
radix_ciphertext, encoded_scalar, radix_ciphertext->lwe_dimension,
|
||||
radix_ciphertext->num_radix_blocks);
|
||||
streams.stream(0), streams.gpu_index(0), (Torus *)radix_ciphertext->ptr,
|
||||
(Torus *)radix_ciphertext->ptr, encoded_scalar,
|
||||
radix_ciphertext->lwe_dimension, radix_ciphertext->num_radix_blocks);
|
||||
|
||||
for (size_t i = 0; i < radix_ciphertext->num_radix_blocks; ++i) {
|
||||
radix_ciphertext->degrees[i] = ct_message_modulus - 1;
|
||||
|
||||
@@ -34,19 +34,14 @@ void trim_radix_blocks_msb_64(CudaRadixCiphertextFFI *output,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_cast_to_unsigned_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_input_blocks, uint32_t target_num_blocks, bool input_is_signed,
|
||||
bool requires_full_propagate, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
|
||||
bool input_is_signed, bool requires_full_propagate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_cast_to_unsigned<uint64_t>(
|
||||
CudaStreams(streams), (int_cast_to_unsigned_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -80,19 +75,13 @@ void cleanup_cuda_cast_to_unsigned_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_cast_to_signed_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_input_blocks,
|
||||
uint32_t target_num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool input_is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool input_is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_cast_to_signed<uint64_t>(
|
||||
CudaStreams(streams), (int_cast_to_signed_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
#include "integer/cmux.cuh"
|
||||
|
||||
uint64_t scratch_cuda_cmux_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
uint64_t scratch_cuda_cmux_64_async(CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params,
|
||||
uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch cmux")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
std::function<uint64_t(uint64_t)> predicate_lut_f =
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
#include "integer/comparison.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_comparison_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, COMPARISON_TYPE op_type, bool is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch comparison")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
switch (op_type) {
|
||||
@@ -38,18 +34,14 @@ uint64_t scratch_cuda_integer_comparison_64_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_comparison_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, COMPARISON_TYPE op_type, bool is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch scalar comparison")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
switch (op_type) {
|
||||
@@ -151,18 +143,13 @@ void cleanup_cuda_integer_scalar_comparison_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_are_all_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_comparison_check<uint64_t>(
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -196,18 +183,13 @@ void cleanup_cuda_integer_are_all_comparisons_block_true_64(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_comparison_check<uint64_t>(
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -35,8 +35,7 @@ device_accumulate_all_blocks(Torus *output, Torus const *input_block,
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
Torus *output, Torus const *input,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
@@ -46,8 +45,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
// Add all blocks and store in sum
|
||||
device_accumulate_all_blocks<Torus><<<num_blocks, num_threads, 0, stream>>>(
|
||||
(Torus *)output->ptr, (Torus const *)input->ptr, lwe_dimension,
|
||||
num_radix_blocks);
|
||||
output, input, lwe_dimension, num_radix_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -104,33 +102,23 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = (Torus *)tmp_out->ptr;
|
||||
auto accumulator_ptr =
|
||||
(Torus *)are_all_block_true_buffer->tmp_block_accumulated->ptr;
|
||||
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
|
||||
GPU_ASSERT(are_all_block_true_buffer->tmp_block_accumulated->lwe_dimension ==
|
||||
big_lwe_dimension,
|
||||
"lwe_dimension mismatch between tmp_block_accumulated and "
|
||||
"big_lwe_dimension");
|
||||
GPU_ASSERT(tmp_out->lwe_dimension == big_lwe_dimension,
|
||||
"lwe_dimension mismatch between tmp_out and big_lwe_dimension");
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
uint32_t acc_offset = 0, inp_offset = 0;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
CudaRadixCiphertextFFI acc_slice, inp_slice;
|
||||
as_radix_ciphertext_slice<Torus>(
|
||||
&acc_slice, are_all_block_true_buffer->tmp_block_accumulated,
|
||||
acc_offset, acc_offset + 1);
|
||||
as_radix_ciphertext_slice<Torus>(&inp_slice, tmp_out, inp_offset,
|
||||
inp_offset + chunk_length);
|
||||
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
&acc_slice, &inp_slice, big_lwe_dimension,
|
||||
chunk_length);
|
||||
accumulator_ptr, input_blocks,
|
||||
big_lwe_dimension, chunk_length);
|
||||
|
||||
acc_offset += 1;
|
||||
accumulator_ptr += (big_lwe_dimension + 1);
|
||||
remaining_blocks -= (chunk_length - 1);
|
||||
inp_offset += chunk_length;
|
||||
input_blocks += (big_lwe_dimension + 1) * chunk_length;
|
||||
}
|
||||
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
|
||||
@@ -231,31 +219,21 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
GPU_ASSERT(buffer->tmp_block_accumulated->lwe_dimension == big_lwe_dimension,
|
||||
"lwe_dimension mismatch between tmp_block_accumulated and "
|
||||
"big_lwe_dimension");
|
||||
GPU_ASSERT(mem_ptr->tmp_lwe_array_out->lwe_dimension == big_lwe_dimension,
|
||||
"lwe_dimension mismatch between tmp_lwe_array_out and "
|
||||
"big_lwe_dimension");
|
||||
auto input_blocks = (Torus *)mem_ptr->tmp_lwe_array_out->ptr;
|
||||
auto accumulator = (Torus *)buffer->tmp_block_accumulated->ptr;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
uint32_t acc_offset = 0, inp_offset = 0;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
CudaRadixCiphertextFFI acc_slice, inp_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&acc_slice, buffer->tmp_block_accumulated,
|
||||
acc_offset, acc_offset + 1);
|
||||
as_radix_ciphertext_slice<Torus>(&inp_slice, mem_ptr->tmp_lwe_array_out,
|
||||
inp_offset, inp_offset + chunk_length);
|
||||
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
&acc_slice, &inp_slice, big_lwe_dimension,
|
||||
accumulator, input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
|
||||
acc_offset += 1;
|
||||
accumulator += (big_lwe_dimension + 1);
|
||||
remaining_blocks -= (chunk_length - 1);
|
||||
inp_offset += chunk_length;
|
||||
input_blocks += (big_lwe_dimension + 1) * chunk_length;
|
||||
}
|
||||
|
||||
// Selects a LUT
|
||||
@@ -318,31 +296,22 @@ __host__ void host_compare_blocks_with_zero(
|
||||
streams.stream(0), streams.gpu_index(0), sum, 0, 1, lwe_array_in, 0, 1);
|
||||
num_sum_blocks = 1;
|
||||
} else {
|
||||
GPU_ASSERT(sum->lwe_dimension == big_lwe_dimension,
|
||||
"lwe_dimension mismatch between sum and big_lwe_dimension");
|
||||
GPU_ASSERT(lwe_array_in->lwe_dimension == big_lwe_dimension,
|
||||
"lwe_dimension mismatch between lwe_array_in and "
|
||||
"big_lwe_dimension");
|
||||
uint32_t remainder_blocks = num_radix_blocks;
|
||||
uint32_t sum_offset = 0, inp_offset = 0;
|
||||
auto sum_i = (Torus *)sum->ptr;
|
||||
auto chunk = (Torus *)lwe_array_in->ptr;
|
||||
while (remainder_blocks > 1) {
|
||||
uint32_t chunk_size =
|
||||
std::min(remainder_blocks, num_elements_to_fill_carry);
|
||||
CudaRadixCiphertextFFI sum_slice, inp_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&sum_slice, sum, sum_offset,
|
||||
sum_offset + 1);
|
||||
as_radix_ciphertext_slice<Torus>(&inp_slice, lwe_array_in, inp_offset,
|
||||
inp_offset + chunk_size);
|
||||
|
||||
accumulate_all_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
&sum_slice, &inp_slice, big_lwe_dimension,
|
||||
chunk_size);
|
||||
sum_i, chunk, big_lwe_dimension, chunk_size);
|
||||
|
||||
num_sum_blocks++;
|
||||
remainder_blocks -= (chunk_size - 1);
|
||||
|
||||
// Update operands
|
||||
inp_offset += chunk_size - 1;
|
||||
sum_offset += 1;
|
||||
chunk += (chunk_size - 1) * big_lwe_size;
|
||||
sum_i += big_lwe_size;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,8 +381,9 @@ compare_radix_blocks(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
|
||||
// Subtract
|
||||
host_subtraction<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), lwe_array_out, lwe_array_left,
|
||||
lwe_array_right, big_lwe_dimension, num_radix_blocks);
|
||||
streams.stream(0), streams.gpu_index(0), (Torus *)lwe_array_out->ptr,
|
||||
(Torus *)lwe_array_left->ptr, (Torus *)lwe_array_right->ptr,
|
||||
big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
|
||||
@@ -23,22 +23,24 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t num_blocks_to_decompress,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
// Decompression doesn't keyswitch, so big and small dimensions are the same
|
||||
int_radix_params encryption_params(
|
||||
pbs_type, encryption_glwe_dimension, encryption_polynomial_size,
|
||||
lwe_dimension, lwe_dimension, 0, 0, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus, noise_reduction_type);
|
||||
(PBS_TYPE)bsk_params.pbs_type, encryption_glwe_dimension,
|
||||
encryption_polynomial_size, bsk_params.big_lwe_dimension,
|
||||
bsk_params.big_lwe_dimension, 0, 0, bsk_params.level_count,
|
||||
bsk_params.base_log, bsk_params.grouping_factor, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
int_radix_params compression_params(
|
||||
pbs_type, compression_glwe_dimension, compression_polynomial_size,
|
||||
lwe_dimension, compression_glwe_dimension * compression_polynomial_size,
|
||||
0, 0, pbs_level, pbs_base_log, grouping_factor, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
(PBS_TYPE)bsk_params.pbs_type, compression_glwe_dimension,
|
||||
compression_polynomial_size, bsk_params.big_lwe_dimension,
|
||||
compression_glwe_dimension * compression_polynomial_size, 0, 0,
|
||||
bsk_params.level_count, bsk_params.base_log, bsk_params.grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_decompress_radix_ciphertext<uint64_t>(
|
||||
CudaStreams(streams), (int_decompression<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -214,7 +214,7 @@ host_integer_compress(CudaStreams streams,
|
||||
|
||||
if constexpr (std::is_same_v<Torus, uint64_t>) {
|
||||
lwe_pksk_input = mem_ptr->tmp_lwe;
|
||||
host_cleartext_multiplication_unsafe_no_degrees<Torus>(
|
||||
host_cleartext_multiplication<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), lwe_pksk_input, lwe_array_in,
|
||||
(uint64_t)compression_params.message_modulus);
|
||||
}
|
||||
|
||||
@@ -2,17 +2,13 @@
|
||||
|
||||
uint64_t scratch_cuda_integer_div_rem_64_async(
|
||||
CudaStreamsFFI streams, bool is_signed, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch div")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_div_rem<uint64_t>(
|
||||
CudaStreams(streams), is_signed, (int_div_rem_memory<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -192,7 +192,7 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
|
||||
|
||||
host_negation<Torus>(
|
||||
streams.stream(gpu_index), streams.gpu_index(gpu_index),
|
||||
out_boolean_block, out_boolean_block,
|
||||
(Torus *)out_boolean_block->ptr, (Torus *)out_boolean_block->ptr,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
|
||||
// we calculate encoding because this block works only for
|
||||
@@ -200,8 +200,8 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
|
||||
const Torus encoded_scalar = 1ULL << (sizeof(Torus) * 8 - 5);
|
||||
host_addition_plaintext_scalar<Torus>(
|
||||
streams.stream(gpu_index), streams.gpu_index(gpu_index),
|
||||
out_boolean_block, out_boolean_block, encoded_scalar,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
(Torus *)out_boolean_block->ptr, (Torus *)out_boolean_block->ptr,
|
||||
encoded_scalar, radix_params.big_lwe_dimension, 1);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -289,32 +289,35 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
|
||||
// c3 = !o3
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), c3, 0, 1, o3, 0, 1);
|
||||
host_negation<Torus>(streams.stream(0), streams.gpu_index(0), c3, c3,
|
||||
host_negation<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
(Torus *)c3->ptr, (Torus *)c3->ptr,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
const Torus encoded_scalar = 1ULL << (sizeof(Torus) * 8 - 5);
|
||||
host_addition_plaintext_scalar<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), c3, c3, encoded_scalar,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
streams.stream(0), streams.gpu_index(0), (Torus *)c3->ptr,
|
||||
(Torus *)c3->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
|
||||
|
||||
// c2 = !o2 + o3
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(1), streams.gpu_index(1), c2, 0, 1, o2, 0, 1);
|
||||
host_negation<Torus>(streams.stream(1), streams.gpu_index(1), c2, c2,
|
||||
host_negation<Torus>(streams.stream(1), streams.gpu_index(1),
|
||||
(Torus *)c2->ptr, (Torus *)c2->ptr,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
host_addition_plaintext_scalar<Torus>(
|
||||
streams.stream(1), streams.gpu_index(1), c2, c2, encoded_scalar,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
streams.stream(1), streams.gpu_index(1), (Torus *)c2->ptr,
|
||||
(Torus *)c2->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams.stream(1), streams.gpu_index(1), c2, c2,
|
||||
o3_gpu_1, 1, 4, 4);
|
||||
|
||||
// c1 = !o1 + o2
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(2), streams.gpu_index(2), c1, 0, 1, o1, 0, 1);
|
||||
host_negation<Torus>(streams.stream(2), streams.gpu_index(2), c1, c1,
|
||||
host_negation<Torus>(streams.stream(2), streams.gpu_index(2),
|
||||
(Torus *)c1->ptr, (Torus *)c1->ptr,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
host_addition_plaintext_scalar<Torus>(
|
||||
streams.stream(2), streams.gpu_index(2), c1, c1, encoded_scalar,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
streams.stream(2), streams.gpu_index(2), (Torus *)c1->ptr,
|
||||
(Torus *)c1->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams.stream(2), streams.gpu_index(2), c1, c1,
|
||||
o2_gpu_2, 1, 4, 4);
|
||||
|
||||
@@ -327,9 +330,10 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2(
|
||||
CudaRadixCiphertextFFI *cx,
|
||||
CudaRadixCiphertextFFI *rx,
|
||||
int_radix_lut<Torus> *lut, Torus factor) {
|
||||
auto rx_list = to_lwe_ciphertext_list(rx);
|
||||
host_cleartext_multiplication<Torus>(streams.stream(gpu_index),
|
||||
streams.gpu_index(gpu_index),
|
||||
rx, rx, factor);
|
||||
(Torus *)rx->ptr, &rx_list, factor);
|
||||
host_add_the_same_block_to_all_blocks<Torus>(streams.stream(gpu_index),
|
||||
streams.gpu_index(gpu_index),
|
||||
rx, rx, cx, 4, 4);
|
||||
@@ -950,7 +954,7 @@ __host__ void host_integer_div_rem(
|
||||
int_mem_ptr->sub_streams_1.synchronize();
|
||||
int_mem_ptr->sub_streams_2.synchronize();
|
||||
|
||||
host_integer_negation<Torus>(
|
||||
host_negation<Torus>(
|
||||
int_mem_ptr->sub_streams_1, int_mem_ptr->negated_quotient, quotient,
|
||||
radix_params.message_modulus, radix_params.carry_modulus, num_blocks);
|
||||
|
||||
@@ -961,7 +965,7 @@ __host__ void host_integer_div_rem(
|
||||
nullptr, int_mem_ptr->scp_mem_1, bsks,
|
||||
ksks, requested_flag, uses_carry);
|
||||
|
||||
host_integer_negation<Torus>(
|
||||
host_negation<Torus>(
|
||||
int_mem_ptr->sub_streams_2, int_mem_ptr->negated_remainder, remainder,
|
||||
radix_params.message_modulus, radix_params.carry_modulus, num_blocks);
|
||||
|
||||
|
||||
@@ -1,19 +1,14 @@
|
||||
#include "ilog2.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_count_of_consecutive_bits_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
Direction direction, BitValue bit_value, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, Direction direction,
|
||||
BitValue bit_value, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_count_of_consecutive_bits<uint64_t>(
|
||||
CudaStreams(streams), params,
|
||||
@@ -53,19 +48,14 @@ void cleanup_cuda_integer_count_of_consecutive_bits_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_ilog2_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t input_num_blocks, uint32_t counter_num_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t input_num_blocks, uint32_t counter_num_blocks,
|
||||
uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_ilog2<uint64_t>(
|
||||
CudaStreams(streams), params, (int_ilog2_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -15,17 +15,12 @@ void cuda_full_propagation_64_inplace_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_full_propagation_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_full_propagation<uint64_t>(
|
||||
CudaStreams(streams), (int_fullprop_buffer<uint64_t> **)mem_ptr, params,
|
||||
@@ -44,17 +39,13 @@ void cleanup_cuda_full_propagation_64_inplace(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_propagate_single_carry_inplace<uint64_t>(
|
||||
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
|
||||
@@ -62,17 +53,13 @@ uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_propagate_single_carry_inplace<uint64_t>(
|
||||
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
|
||||
@@ -80,17 +67,13 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_overflowing_sub_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t compute_overflow, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t compute_overflow, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_overflowing_sub<uint64_t>(
|
||||
CudaStreams(streams), (int_borrow_prop_memory<uint64_t> **)mem_ptr,
|
||||
@@ -170,17 +153,12 @@ void cleanup_cuda_integer_overflowing_sub_64_inplace(CudaStreamsFFI streams,
|
||||
|
||||
uint64_t scratch_cuda_apply_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint64_t lut_degree, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_apply_univariate_lut<uint64_t>(
|
||||
CudaStreams(streams), (int_radix_lut<uint64_t> **)mem_ptr,
|
||||
@@ -190,17 +168,12 @@ uint64_t scratch_cuda_apply_univariate_lut_64_async(
|
||||
|
||||
uint64_t scratch_cuda_apply_many_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t num_many_lut, uint64_t lut_degree,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_apply_many_univariate_lut<uint64_t>(
|
||||
CudaStreams(streams), (int_radix_lut<uint64_t> **)mem_ptr,
|
||||
@@ -294,19 +267,14 @@ uint64_t scratch_cuda_apply_noise_squashing_mem(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_apply_noise_squashing_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_glwe_dimension, uint32_t input_polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t original_num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t input_glwe_dimension,
|
||||
uint32_t input_polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t num_radix_blocks, uint32_t original_num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_apply_noise_squashing_mem(
|
||||
streams, params, (int_noise_squashing_lut<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -505,8 +505,8 @@ template <typename Torus, typename KSTorus>
|
||||
__host__ void integer_radix_apply_univariate_lookup_table(
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, void *const *bsks,
|
||||
KSTorus *const *ksks, int_radix_lut<Torus> *lut, uint32_t num_radix_blocks,
|
||||
bool skip_input_noise_check = false) {
|
||||
KSTorus *const *ksks, int_radix_lut<Torus> *lut,
|
||||
uint32_t num_radix_blocks) {
|
||||
PUSH_RANGE("apply lut")
|
||||
// apply_lookup_table
|
||||
auto params = lut->params;
|
||||
@@ -531,14 +531,6 @@ __host__ void integer_radix_apply_univariate_lookup_table(
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
if (!skip_input_noise_check) {
|
||||
for (uint32_t i = 0; i < num_radix_blocks; i++) {
|
||||
auto idx = lut->using_trivial_lwe_indexes ? i : lut->h_lwe_indexes_in[i];
|
||||
CHECK_NOISE_LEVEL(lwe_array_in->noise_levels[idx], params.message_modulus,
|
||||
params.carry_modulus);
|
||||
}
|
||||
}
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
@@ -746,7 +738,7 @@ __host__ void integer_radix_apply_bivariate_lookup_table(
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, void *const *bsks,
|
||||
KSTorus *const *ksks, int_radix_lut<Torus> *lut, uint32_t num_radix_blocks,
|
||||
uint32_t shift, bool skip_input_noise_check = false) {
|
||||
uint32_t shift) {
|
||||
PUSH_RANGE("apply bivar lut")
|
||||
if (lwe_array_out->lwe_dimension != lwe_array_1->lwe_dimension ||
|
||||
lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
|
||||
@@ -773,16 +765,6 @@ __host__ void integer_radix_apply_bivariate_lookup_table(
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto grouping_factor = params.grouping_factor;
|
||||
|
||||
if (!skip_input_noise_check) {
|
||||
for (uint32_t i = 0; i < num_radix_blocks; i++) {
|
||||
auto idx = lut->using_trivial_lwe_indexes ? i : lut->h_lwe_indexes_in[i];
|
||||
CHECK_NOISE_LEVEL(lwe_array_1->noise_levels[idx], params.message_modulus,
|
||||
params.carry_modulus);
|
||||
CHECK_NOISE_LEVEL(lwe_array_2->noise_levels[idx], params.message_modulus,
|
||||
params.carry_modulus);
|
||||
}
|
||||
}
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
@@ -2267,13 +2249,14 @@ void host_single_borrow_propagate(CudaStreams streams,
|
||||
streams, borrow_states, params, mem->prop_simu_group_carries_mem, bsks,
|
||||
ksks, num_radix_blocks, num_groups);
|
||||
|
||||
auto shifted_blocks =
|
||||
(Torus *)mem->shifted_blocks_borrow_state_mem->shifted_blocks->ptr;
|
||||
auto prepared_blocks = mem->prop_simu_group_carries_mem->prepared_blocks;
|
||||
auto simulators = (Torus *)mem->prop_simu_group_carries_mem->simulators->ptr;
|
||||
|
||||
host_subtraction<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), prepared_blocks,
|
||||
mem->shifted_blocks_borrow_state_mem->shifted_blocks,
|
||||
mem->prop_simu_group_carries_mem->simulators, big_lwe_dimension,
|
||||
num_radix_blocks);
|
||||
host_subtraction<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
(Torus *)prepared_blocks->ptr, shifted_blocks,
|
||||
simulators, big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
host_add_scalar_one_inplace<Torus>(streams, prepared_blocks, message_modulus,
|
||||
carry_modulus);
|
||||
@@ -2317,7 +2300,8 @@ void host_single_borrow_propagate(CudaStreams streams,
|
||||
|
||||
auto resolved_carries = mem->prop_simu_group_carries_mem->resolved_carries;
|
||||
host_negation<Torus>(sub_streams_2.stream(0), sub_streams_2.gpu_index(0),
|
||||
resolved_carries, resolved_carries, big_lwe_dimension,
|
||||
(Torus *)resolved_carries->ptr,
|
||||
(Torus *)resolved_carries->ptr, big_lwe_dimension,
|
||||
num_groups);
|
||||
|
||||
host_radix_sum_in_groups<Torus>(
|
||||
|
||||
@@ -122,16 +122,12 @@ void cuda_integer_mult_inplace_64_async(
|
||||
uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
|
||||
bool const is_boolean_right, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level,
|
||||
uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
polynomial_size * glwe_dimension, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
uint32_t carry_modulus, CudaLweBootstrapKeyParamsFFI bsk_params,
|
||||
uint32_t ks_base_log, uint32_t ks_level, uint32_t num_radix_blocks,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
const uint32_t polynomial_size = bsk_params.polynomial_size;
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
@@ -164,20 +160,14 @@ void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_partial_sum_ciphertexts_vec_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_in_radix,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_in_radix,
|
||||
uint32_t max_num_radix_in_vec, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
uint32_t carry_modulus, bool reduce_degrees_for_single_carry_propagation,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
return scratch_cuda_integer_partial_sum_ciphertexts_vec<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
(int_sum_ciphertexts_vec_memory<uint64_t> **)mem_ptr, num_blocks_in_radix,
|
||||
|
||||
@@ -418,7 +418,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec(
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
streams, current_blocks, current_blocks, bsks, ksks,
|
||||
luts_message_carry, total_ciphertexts, true);
|
||||
luts_message_carry, total_ciphertexts);
|
||||
}
|
||||
cuda_set_device(streams.gpu_index(0));
|
||||
std::swap(d_columns, d_new_columns);
|
||||
@@ -471,7 +471,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec(
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
active_streams, current_blocks, radix_lwe_out, bsks, ksks,
|
||||
luts_message_carry, num_blocks_in_apply_lut, true);
|
||||
luts_message_carry, num_blocks_in_apply_lut);
|
||||
}
|
||||
calculate_final_degrees(radix_lwe_out->degrees, terms->degrees,
|
||||
num_radix_blocks, num_radix_in_vec, chunk_size,
|
||||
|
||||
@@ -10,7 +10,7 @@ void cuda_negate_ciphertext_64(CudaStreamsFFI streams,
|
||||
"operations");
|
||||
|
||||
auto cuda_streams = CudaStreams(streams);
|
||||
host_integer_negation<uint64_t>(cuda_streams, lwe_array_out, lwe_array_in,
|
||||
host_negation<uint64_t>(cuda_streams, lwe_array_out, lwe_array_in,
|
||||
message_modulus, carry_modulus, num_radix_blocks);
|
||||
cuda_synchronize_stream(cuda_streams.stream(0), cuda_streams.gpu_index(0));
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ __global__ void device_negation(Torus *output, Torus const *input,
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_negation(CudaStreams streams,
|
||||
__host__ void host_negation(CudaStreams streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in,
|
||||
uint64_t message_modulus, uint64_t carry_modulus,
|
||||
|
||||
@@ -1,19 +1,14 @@
|
||||
#include "integer/oprf.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_grouped_oprf_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_to_process,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, uint32_t message_bits_per_block,
|
||||
uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_to_process,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
uint32_t message_bits_per_block, uint32_t total_random_bits,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_grouped_oprf<uint64_t>(
|
||||
CudaStreams(streams), (int_grouped_oprf_memory<uint64_t> **)mem_ptr,
|
||||
@@ -45,20 +40,14 @@ void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_intermediate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, uint32_t message_bits_per_block,
|
||||
uint32_t num_input_random_bits, uint32_t num_scalar_bits,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks_intermediate,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
uint32_t message_bits_per_block, uint32_t num_input_random_bits,
|
||||
uint32_t num_scalar_bits, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_grouped_oprf_custom_range<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -72,13 +61,13 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
void *const *compute_bsks, void *const *ksks) {
|
||||
|
||||
host_integer_grouped_oprf_custom_range<uint64_t>(
|
||||
CudaStreams(streams), radix_lwe_out, num_blocks_intermediate,
|
||||
(const uint64_t *)seeded_lwe_input, decomposed_scalar,
|
||||
has_at_least_one_set, num_scalars, shift,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks, compute_bsks,
|
||||
(uint64_t *const *)ksks);
|
||||
}
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ void host_integer_grouped_oprf_custom_range(
|
||||
const Torus *decomposed_scalar, const Torus *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift,
|
||||
int_grouped_oprf_custom_range_memory<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks) {
|
||||
void *const *compute_bsks, Torus *const *ksks) {
|
||||
|
||||
CudaRadixCiphertextFFI *computation_buffer = mem_ptr->tmp_oprf_output;
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(
|
||||
@@ -127,12 +127,12 @@ void host_integer_grouped_oprf_custom_range(
|
||||
|
||||
host_integer_scalar_mul_radix<Torus>(
|
||||
streams, computation_buffer, decomposed_scalar, has_at_least_one_set,
|
||||
mem_ptr->scalar_mul_buffer, bsks, ksks, mem_ptr->params.message_modulus,
|
||||
num_scalars);
|
||||
mem_ptr->scalar_mul_buffer, compute_bsks, ksks,
|
||||
mem_ptr->params.message_modulus, num_scalars);
|
||||
|
||||
host_logical_scalar_shift_inplace<Torus>(streams, computation_buffer, shift,
|
||||
mem_ptr->logical_scalar_shift_buffer,
|
||||
bsks, ksks, num_blocks_intermediate);
|
||||
host_logical_scalar_shift_inplace<Torus>(
|
||||
streams, computation_buffer, shift, mem_ptr->logical_scalar_shift_buffer,
|
||||
compute_bsks, ksks, num_blocks_intermediate);
|
||||
|
||||
uint32_t num_blocks_output = radix_lwe_out->num_radix_blocks;
|
||||
uint32_t blocks_to_copy =
|
||||
|
||||
@@ -112,7 +112,7 @@ device_scalar_subtraction_inplace(Torus *lwe_array, Torus *scalar_input,
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_scalar_subtraction_inplace(
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *lwe_array, Torus *scalar_input,
|
||||
CudaStreams streams, Torus *lwe_array, Torus *scalar_input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(streams.gpu_index(0));
|
||||
@@ -130,8 +130,7 @@ __host__ void host_scalar_subtraction_inplace(
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_scalar_subtraction_inplace<Torus>
|
||||
<<<grid, thds, 0, streams.stream(0)>>>((Torus *)lwe_array->ptr,
|
||||
scalar_input,
|
||||
<<<grid, thds, 0, streams.stream(0)>>>(lwe_array, scalar_input,
|
||||
input_lwe_ciphertext_count,
|
||||
lwe_dimension, delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
@@ -63,8 +63,8 @@ __host__ void scalar_compare_radix_blocks(
|
||||
// Subtract
|
||||
// Here we need the true lwe sub, not the one that comes from shortint.
|
||||
host_scalar_subtraction_inplace<Torus>(
|
||||
streams, subtracted_blocks, scalar_blocks, big_lwe_dimension,
|
||||
num_radix_blocks, message_modulus, carry_modulus);
|
||||
streams, (Torus *)subtracted_blocks->ptr, scalar_blocks,
|
||||
big_lwe_dimension, num_radix_blocks, message_modulus, carry_modulus);
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
#include "scalar_div.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_unsigned_scalar_div_radix<uint64_t>(
|
||||
CudaStreams(streams), params,
|
||||
@@ -45,19 +39,13 @@ void cleanup_cuda_integer_unsigned_scalar_div_radix_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_signed_scalar_div_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_signed_scalar_div_radix<uint64_t>(
|
||||
CudaStreams(streams), params,
|
||||
@@ -89,20 +77,14 @@ void cleanup_cuda_integer_signed_scalar_div_radix_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_unsigned_scalar_div_rem_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_unsigned_scalar_div_rem_radix<uint64_t>(
|
||||
CudaStreams(streams), params,
|
||||
@@ -143,20 +125,14 @@ void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_64(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_signed_scalar_div_rem_radix_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
|
||||
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_integer_signed_scalar_div_rem_radix<uint64_t>(
|
||||
CudaStreams(streams), params,
|
||||
|
||||
@@ -129,7 +129,7 @@ __host__ void host_integer_signed_scalar_div_radix(
|
||||
if (scalar_divisor_ffi->is_divisor_negative) {
|
||||
CudaRadixCiphertextFFI *tmp = mem_ptr->tmp_ffi;
|
||||
|
||||
host_integer_negation<Torus>(
|
||||
host_negation<Torus>(
|
||||
streams, tmp, numerator_ct, mem_ptr->params.message_modulus,
|
||||
mem_ptr->params.carry_modulus, numerator_ct->num_radix_blocks);
|
||||
|
||||
@@ -224,7 +224,7 @@ __host__ void host_integer_signed_scalar_div_radix(
|
||||
}
|
||||
|
||||
if (scalar_divisor_ffi->is_divisor_negative) {
|
||||
host_integer_negation<Torus>(
|
||||
host_negation<Torus>(
|
||||
streams, numerator_ct, tmp, mem_ptr->params.message_modulus,
|
||||
mem_ptr->params.carry_modulus, numerator_ct->num_radix_blocks);
|
||||
} else {
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "integer/scalar_mul.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_mul_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t num_scalar_bits,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t num_scalar_bits, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_scalar_mul<uint64_t>(
|
||||
CudaStreams(streams), (int_scalar_mul_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "scalar_rotate.cuh"
|
||||
|
||||
uint64_t scratch_cuda_scalar_rotate_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_scalar_rotate<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "scalar_shifts.cuh"
|
||||
|
||||
uint64_t scratch_cuda_logical_scalar_shift_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_logical_scalar_shift<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -35,18 +30,13 @@ void cuda_logical_scalar_shift_64_inplace_async(
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_arithmetic_scalar_shift_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_arithmetic_scalar_shift<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "shift_and_rotate.cuh"
|
||||
|
||||
uint64_t scratch_cuda_shift_and_rotate_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_shift_and_rotate<uint64_t>(
|
||||
CudaStreams(streams), (int_shift_and_rotate_buffer<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "subtraction.cuh"
|
||||
|
||||
uint64_t scratch_cuda_sub_and_propagate_single_carry_64_inplace_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_sub_and_propagate_single_carry<uint64_t>(
|
||||
CudaStreams(streams), (int_sub_and_propagate<uint64_t> **)mem_ptr,
|
||||
|
||||
@@ -36,7 +36,7 @@ void host_sub_and_propagate_single_carry(
|
||||
int_sub_and_propagate<Torus> *mem, void *const *bsks, KSTorus *const *ksks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_integer_negation<Torus>(streams, mem->neg_rhs_array, rhs_array,
|
||||
host_negation<Torus>(streams, mem->neg_rhs_array, rhs_array,
|
||||
mem->params.message_modulus, mem->params.carry_modulus,
|
||||
mem->neg_rhs_array->num_radix_blocks);
|
||||
|
||||
@@ -46,7 +46,7 @@ void host_sub_and_propagate_single_carry(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_subtraction(CudaStreams streams,
|
||||
__host__ void host_subtraction(CudaStreams streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in_2,
|
||||
@@ -65,7 +65,7 @@ __host__ void host_integer_subtraction(CudaStreams streams,
|
||||
PANIC("Cuda error: lwe_array_in and lwe_array_out lwe_dimension must be "
|
||||
"the same")
|
||||
|
||||
host_integer_negation<Torus>(streams, lwe_array_out, lwe_array_in_2, message_modulus,
|
||||
host_negation<Torus>(streams, lwe_array_out, lwe_array_in_2, message_modulus,
|
||||
carry_modulus, num_radix_blocks);
|
||||
host_addition<Torus>(streams.stream(0), streams.gpu_index(0), lwe_array_out,
|
||||
lwe_array_out, lwe_array_in_1, num_radix_blocks,
|
||||
|
||||
@@ -1,18 +1,13 @@
|
||||
#include "integer/vector_comparison.cuh"
|
||||
|
||||
uint64_t scratch_cuda_unchecked_all_eq_slices_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_all_eq_slices<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -50,18 +45,13 @@ void cleanup_cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_sub_slice_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_lhs, uint32_t num_rhs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_lhs, uint32_t num_rhs,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_contains_sub_slice<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
|
||||
@@ -1,19 +1,14 @@
|
||||
#include "integer/vector_find.cuh"
|
||||
|
||||
uint64_t scratch_cuda_unchecked_match_value_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_matches, uint32_t num_input_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
|
||||
uint32_t num_output_packed_blocks, uint32_t max_output_is_zero,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_match_value<uint64_t>(
|
||||
CudaStreams(streams), (int_unchecked_match_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -56,20 +51,15 @@ void cleanup_cuda_unchecked_match_value_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_match_value_or_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_matches, uint32_t num_input_blocks,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
|
||||
uint32_t num_match_packed_blocks, uint32_t num_final_blocks,
|
||||
uint32_t max_output_is_zero, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_match_value_or<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -107,18 +97,13 @@ void cleanup_cuda_unchecked_match_value_or_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_contains<uint64_t>(
|
||||
CudaStreams(streams), (int_unchecked_contains_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -157,18 +142,13 @@ void cleanup_cuda_unchecked_contains_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_contains_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_contains_clear<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -202,18 +182,13 @@ void cleanup_cuda_unchecked_contains_clear_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_is_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_clears, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_is_in_clears<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -247,18 +222,13 @@ void cleanup_cuda_unchecked_is_in_clears_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_clears, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_index_in_clears<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -299,18 +269,13 @@ void cleanup_cuda_unchecked_index_in_clears_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_in_clears_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_unique, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_first_index_in_clears<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -351,18 +316,13 @@ void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_of_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_first_index_of_clear<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -403,18 +363,13 @@ void cleanup_cuda_unchecked_first_index_of_clear_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_first_index_of_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_first_index_of<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
@@ -455,18 +410,13 @@ void cleanup_cuda_unchecked_first_index_of_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_of_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_index_of<uint64_t>(
|
||||
CudaStreams(streams), (int_unchecked_index_of_buffer<uint64_t> **)mem_ptr,
|
||||
@@ -508,18 +458,13 @@ void cleanup_cuda_unchecked_index_of_64(CudaStreamsFFI streams,
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_unchecked_index_of_clear_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
|
||||
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_unchecked_index_of_clear<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
|
||||
@@ -2,18 +2,14 @@
|
||||
#include "kreyvium.cuh"
|
||||
|
||||
uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_inputs) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_kreyvium_encrypt<uint64_t>(
|
||||
CudaStreams(streams), (int_kreyvium_buffer<uint64_t> **)mem_ptr, params,
|
||||
|
||||
@@ -182,15 +182,18 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
|
||||
* performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, const uint64_t plaintext_in) {
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint64_t plaintext_in,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_addition_plaintext_scalar<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out, lwe_array_in,
|
||||
plaintext_in, lwe_array_out->lwe_dimension,
|
||||
lwe_array_out->num_radix_blocks);
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), plaintext_in,
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
|
||||
}
|
||||
|
||||
@@ -68,9 +68,9 @@ __host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_addition_plaintext_scalar(
|
||||
cudaStream_t stream, uint32_t gpu_index, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *lwe_input, const T plaintext_input,
|
||||
const uint32_t lwe_dimension, const uint32_t lwe_ciphertext_count) {
|
||||
cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
|
||||
const T plaintext_input, const uint32_t lwe_dimension,
|
||||
const uint32_t lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
@@ -79,13 +79,12 @@ __host__ void host_addition_plaintext_scalar(
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu((T *)output->ptr, (T const *)lwe_input->ptr,
|
||||
cuda_memcpy_async_gpu_to_gpu(output, lwe_input,
|
||||
safe_mul_sizeof<T>((size_t)(lwe_dimension + 1),
|
||||
(size_t)lwe_ciphertext_count),
|
||||
stream, gpu_index);
|
||||
plaintext_addition_scalar<T><<<grid, thds, 0, stream>>>(
|
||||
(T *)output->ptr, (T const *)lwe_input->ptr, plaintext_input,
|
||||
lwe_dimension, num_entries);
|
||||
output, lwe_input, plaintext_input, lwe_dimension, num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -216,9 +215,7 @@ __global__ void subtraction(T *output, T const *input_1, T const *input_2,
|
||||
// Coefficient-wise subtraction
|
||||
template <typename T>
|
||||
__host__ void host_subtraction(cudaStream_t stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2,
|
||||
T *output, T const *input_1, T const *input_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
@@ -233,9 +230,8 @@ __host__ void host_subtraction(cudaStream_t stream, uint32_t gpu_index,
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
subtraction<T><<<grid, thds, 0, stream>>>(
|
||||
(T *)output->ptr, (T const *)input_1->ptr, (T const *)input_2->ptr,
|
||||
num_entries);
|
||||
subtraction<T>
|
||||
<<<grid, thds, 0, stream>>>(output, input_1, input_2, num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
@@ -68,25 +68,6 @@ __global__ void cleartext_multiplication(T *output, T const *lwe_input,
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_cleartext_multiplication(
|
||||
cudaStream_t stream, uint32_t gpu_index, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *lwe_input, T cleartext_input) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
uint32_t num_entries =
|
||||
lwe_input->num_radix_blocks * (lwe_input->lwe_dimension + 1);
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
cleartext_multiplication<T><<<grid, thds, 0, stream>>>(
|
||||
(T *)output->ptr, (T const *)lwe_input->ptr, cleartext_input, num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_cleartext_multiplication_unsafe_no_degrees(
|
||||
cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
CudaLweCiphertextListFFI const *lwe_input, T cleartext_input) {
|
||||
|
||||
|
||||
@@ -5,16 +5,17 @@
|
||||
* See the equivalent operation on u64 ciphertexts for more details.
|
||||
*/
|
||||
void cuda_negate_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in) {
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_negation<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
lwe_array_out, lwe_array_in,
|
||||
lwe_array_out->lwe_dimension,
|
||||
lwe_array_out->num_radix_blocks);
|
||||
static_cast<uint32_t *>(lwe_array_out),
|
||||
static_cast<const uint32_t *>(lwe_array_in),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
|
||||
}
|
||||
|
||||
@@ -41,15 +42,16 @@ void cuda_negate_lwe_ciphertext_vector_32(
|
||||
* device function that performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in) {
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_negation<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
lwe_array_out, lwe_array_in,
|
||||
lwe_array_out->lwe_dimension,
|
||||
lwe_array_out->num_radix_blocks);
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
|
||||
}
|
||||
|
||||
@@ -22,10 +22,8 @@ __global__ void negation(T *output, T const *input, uint32_t num_entries) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_negation(cudaStream_t stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
const uint32_t input_lwe_dimension,
|
||||
__host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *input, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
@@ -39,7 +37,7 @@ __host__ void host_negation(cudaStream_t stream, uint32_t gpu_index,
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
negation<T><<<grid, thds, 0, stream>>>((T *)output->ptr, (T const *)input->ptr, num_entries);
|
||||
negation<T><<<grid, thds, 0, stream>>>(output, input, num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
@@ -2,18 +2,14 @@
|
||||
#include "trivium.cuh"
|
||||
|
||||
uint64_t scratch_cuda_trivium_generate_keystream_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
|
||||
uint32_t num_inputs) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus,
|
||||
noise_reduction_type);
|
||||
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
|
||||
carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_trivium_encrypt<uint64_t>(
|
||||
CudaStreams(streams), (int_trivium_buffer<uint64_t> **)mem_ptr, params,
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
cat /etc/os-release | grep "\<NAME\>" | sed "s/NAME=\"//g" | sed "s/\"//g"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -156,7 +156,7 @@ impl HpuVarWrapped {
|
||||
{
|
||||
let mut inner = var.inner.lock().unwrap();
|
||||
|
||||
for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct.into_iter()) {
|
||||
for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct) {
|
||||
#[cfg(feature = "io-dump")]
|
||||
let params = ct.params().clone();
|
||||
for (id, cut) in ct.into_container().iter().enumerate() {
|
||||
|
||||
@@ -24,7 +24,7 @@ bindgen.workspace = true
|
||||
[dependencies]
|
||||
ark-ec.workspace = true
|
||||
ark-ff.workspace = true
|
||||
tfhe-cuda-backend = { version = "0.14.0", path = "../tfhe-cuda-backend" }
|
||||
tfhe-cuda-backend = { version = "0.15.0", path = "../tfhe-cuda-backend" }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn get_linux_distribution_name() -> Option<String> {
|
||||
let content = std::fs::read_to_string("/etc/os-release").ok()?;
|
||||
for line in content.lines() {
|
||||
if let Some(value) = line.strip_prefix("NAME=") {
|
||||
return Some(value.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Handle docs.rs builds (no CUDA available)
|
||||
@@ -29,16 +38,10 @@ fn main() {
|
||||
println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition");
|
||||
println!("cargo:rustc-link-arg=-Wl,--no-as-needed");
|
||||
|
||||
// Check Linux distribution (reuse script from tfhe-cuda-backend)
|
||||
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
|
||||
.expect("CARGO_MANIFEST_DIR must be set by cargo during build");
|
||||
let script_path = PathBuf::from(&manifest_dir).join("../tfhe-cuda-backend/get_os_name.sh");
|
||||
let output = Command::new(&script_path)
|
||||
.output()
|
||||
.expect("Failed to run get_os_name.sh — is tfhe-cuda-backend present?");
|
||||
let distribution =
|
||||
String::from_utf8(output.stdout).expect("get_os_name.sh output must be valid UTF-8");
|
||||
if distribution != "Ubuntu\n" {
|
||||
|
||||
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
|
||||
println!(
|
||||
"cargo:warning=This Linux distribution is not officially supported. \
|
||||
Only Ubuntu is supported by zk-cuda-backend at this time. Build may fail\n"
|
||||
|
||||
@@ -71,11 +71,6 @@ set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
|
||||
# Additional CUDA flags (aligned with tfhe-cuda-backend)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")
|
||||
|
||||
# =============================================================================
|
||||
# Path to tfhe-cuda-backend for device utilities
|
||||
# =============================================================================
|
||||
set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-backend/cuda)
|
||||
|
||||
# Core source files (without device utilities) Device utilities come from tfhe-cuda-backend.
|
||||
set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/curve.cu src/msm/pippenger/msm_pippenger.cu
|
||||
src/msm/msm.cu)
|
||||
@@ -112,7 +107,7 @@ endif()
|
||||
target_link_libraries(zk_cuda_backend PUBLIC cudart)
|
||||
|
||||
# Include both local headers and tfhe-cuda-backend headers (for device.h)
|
||||
target_include_directories(zk_cuda_backend PUBLIC include ../src/include ${TFHE_CUDA_BACKEND_DIR}/include)
|
||||
target_include_directories(zk_cuda_backend PUBLIC include ../src/include)
|
||||
|
||||
# =============================================================================
|
||||
# Tests and Benchmarks (optional, controlled by ZK_CUDA_BACKEND_BUILD_TESTS/BENCHMARKS)
|
||||
@@ -135,4 +130,3 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
|
||||
message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
message(STATUS "C++ standard: ${CMAKE_CXX_STANDARD}")
|
||||
message(STATUS "CUDA standard: ${CMAKE_CUDA_STANDARD}")
|
||||
message(STATUS "tfhe-cuda-backend path: ${TFHE_CUDA_BACKEND_DIR}")
|
||||
|
||||
35
backends/zk-cuda-backend/cuda/include/checked_arithmetic.h
Normal file
35
backends/zk-cuda-backend/cuda/include/checked_arithmetic.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
|
||||
#include "device.h"
|
||||
|
||||
// Variadic checked multiplication of size_t values.
|
||||
// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
|
||||
// On overflow the value written to *out is unspecified.
|
||||
template <typename... Args>
|
||||
inline bool checked_mul(size_t *out, size_t first, Args... rest) {
|
||||
size_t result = first;
|
||||
for (size_t value : {static_cast<size_t>(rest)...}) {
|
||||
if (__builtin_mul_overflow(result, value, &result))
|
||||
return true;
|
||||
}
|
||||
*out = result;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Variadic safe multiplication: computes the product and panics on overflow.
|
||||
template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
|
||||
size_t result;
|
||||
bool overflow = checked_mul(&result, first, rest...);
|
||||
PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Variadic safe multiplication with an appended sizeof(T) factor.
|
||||
// Computes (args... * sizeof(T)) with overflow checking.
|
||||
template <typename T, typename... Args>
|
||||
inline size_t safe_mul_sizeof(Args... args) {
|
||||
return safe_mul(args..., sizeof(T));
|
||||
}
|
||||
145
backends/zk-cuda-backend/cuda/include/device.h
Normal file
145
backends/zk-cuda-backend/cuda/include/device.h
Normal file
@@ -0,0 +1,145 @@
|
||||
#ifndef DEVICE_H
|
||||
#define DEVICE_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#define check_cuda_error(ans) \
|
||||
{ cuda_error((ans), __FILE__, __LINE__); }
|
||||
inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
if (code != cudaSuccess) {
|
||||
std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
|
||||
file, line);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// The PANIC macro should be used to validate user-inputs to GPU functions
|
||||
// it will execute in all targets, including production settings
|
||||
// e.g., cudaMemCopy to the device should check that the destination pointer is
|
||||
// a device pointer
|
||||
#define PANIC(format, ...) \
|
||||
{ \
|
||||
std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__, \
|
||||
__LINE__, __func__, ##__VA_ARGS__); \
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
// This is a generic assertion checking macro with user defined printf-style
|
||||
// message
|
||||
#define PANIC_IF_FALSE(cond, format, ...) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifndef GPU_ASSERTS_DISABLE
|
||||
// The GPU assert should be used to validate assumptions in algorithms,
|
||||
// for example, checking that two user-provided quantities have a certain
|
||||
// relationship or that the size of the buffer provided to a function is
|
||||
// sufficient when it is filled with some algorithm that depends on
|
||||
// user-provided inputs e.g., OPRF corrections buffer should not have a size
|
||||
// higher than the number of blocks in the datatype that is generated
|
||||
#define GPU_ASSERT(cond, format, ...) \
|
||||
PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
|
||||
#else
|
||||
#define GPU_ASSERT(cond) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
uint32_t cuda_get_device();
|
||||
void cuda_set_device(uint32_t gpu_index);
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
|
||||
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_is_available();
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
|
||||
uint64_t cuda_device_total_memory(uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
|
||||
uint64_t size,
|
||||
cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
void *dest, void const *src, uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
int cuda_get_number_of_gpus();
|
||||
|
||||
int cuda_get_number_of_sms();
|
||||
|
||||
void cuda_synchronize_device(uint32_t gpu_index);
|
||||
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index);
|
||||
|
||||
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
|
||||
}
|
||||
|
||||
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_support_cooperative_groups();
|
||||
|
||||
bool cuda_check_support_thread_block_clusters();
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *d_array, Torus value, Torus n);
|
||||
|
||||
#endif
|
||||
16
backends/zk-cuda-backend/cuda/include/helper_profile.cuh
Normal file
16
backends/zk-cuda-backend/cuda/include/helper_profile.cuh
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef HELPER_PROFILE
|
||||
#define HELPER_PROFILE
|
||||
|
||||
#ifdef USE_NVTOOLS
|
||||
#include <nvtx3/nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
void cuda_nvtx_label_with_color(const char *name);
|
||||
void cuda_nvtx_pop();
|
||||
|
||||
#define PUSH_RANGE(name) \
|
||||
{ cuda_nvtx_label_with_color(name); }
|
||||
#define POP_RANGE() \
|
||||
{ cuda_nvtx_pop(); }
|
||||
|
||||
#endif
|
||||
43
backends/zk-cuda-backend/cuda/src/helper_profile.cu
Normal file
43
backends/zk-cuda-backend/cuda/src/helper_profile.cu
Normal file
@@ -0,0 +1,43 @@
|
||||
#include "helper_profile.cuh"
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t adler32(const unsigned char *data) {
|
||||
const uint32_t MOD_ADLER = 65521;
|
||||
uint32_t a = 1, b = 0;
|
||||
size_t index;
|
||||
for (index = 0; data[index] != 0; ++index) {
|
||||
a = (a + data[index] * 2) % MOD_ADLER;
|
||||
b = (b + a) % MOD_ADLER;
|
||||
}
|
||||
return (b << 16) | a;
|
||||
}
|
||||
|
||||
void cuda_nvtx_label_with_color(const char *name) {
|
||||
#ifdef USE_NVTOOLS
|
||||
int color_id = adler32((const unsigned char *)name);
|
||||
int r, g, b;
|
||||
r = color_id & 0x000000ff;
|
||||
g = (color_id & 0x000ff000) >> 12;
|
||||
b = (color_id & 0x0ff00000) >> 20;
|
||||
if (r < 64 & g < 64 & b < 64) {
|
||||
r = r * 3;
|
||||
g = g * 3 + 64;
|
||||
b = b * 4;
|
||||
}
|
||||
|
||||
color_id = 0xff000000 | (r << 16) | (g << 8) | (b);
|
||||
nvtxEventAttributes_t eventAttrib = {0};
|
||||
eventAttrib.version = NVTX_VERSION;
|
||||
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
|
||||
eventAttrib.colorType = NVTX_COLOR_ARGB;
|
||||
eventAttrib.color = color_id;
|
||||
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||
eventAttrib.message.ascii = name;
|
||||
nvtxRangePushEx(&eventAttrib);
|
||||
#endif
|
||||
}
|
||||
void cuda_nvtx_pop() {
|
||||
#ifdef USE_NVTOOLS
|
||||
nvtxRangePop();
|
||||
#endif
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <stddef.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "../../tfhe-cuda-backend/cuda/src/utils/helper_profile.cuh"
|
||||
#include "helper_profile.cuh"
|
||||
|
||||
// C++ helper functions (not exported, used internally)
|
||||
// These can call template functions since they have C++ linkage
|
||||
|
||||
@@ -1 +1 @@
|
||||
nightly-2026-01-14
|
||||
nightly-2026-04-22
|
||||
|
||||
@@ -25,7 +25,7 @@ use tfhe::{
|
||||
CompressedKVStore, CompressedPublicKey, CompressedServerKey,
|
||||
CompressedSquashedNoiseCiphertextList, CompressedSquashedNoiseCiphertextListBuilder, FheBool,
|
||||
FheInt8, FheUint32, FheUint64, FheUint8, ReRandomizationContext, ReRandomizationMode,
|
||||
ReRandomizationSupport, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
|
||||
ReRandomizationSupport, Seed, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
|
||||
SquashedNoiseFheUint,
|
||||
};
|
||||
use tfhe_backward_compat_data::load::{
|
||||
@@ -748,6 +748,22 @@ fn test_hl_key_features(
|
||||
}
|
||||
}
|
||||
|
||||
// OPRF: check that oblivious pseudo-random generation works with the dedicated key.
|
||||
// The decrypted values only need to be within range; the seed is deterministic but we
|
||||
// don't compare to specific bit values (those are validated in the unit tests).
|
||||
if server_key.supports_oprf() {
|
||||
let seed = Seed(42u128);
|
||||
|
||||
let rand_bool = FheBool::generate_oblivious_pseudo_random(seed);
|
||||
let _: bool = rand_bool.decrypt(client_key);
|
||||
|
||||
let rand_uint = FheUint8::generate_oblivious_pseudo_random(seed);
|
||||
let _: u8 = rand_uint.decrypt(client_key);
|
||||
|
||||
let rand_int = FheInt8::generate_oblivious_pseudo_random(seed);
|
||||
let _: i8 = rand_int.decrypt(client_key);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ use rayon::prelude::*;
|
||||
#[cfg(any(feature = "gpu", feature = "hpu"))]
|
||||
use std::cmp::max;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::oprf::{OprfPrivateKey, OprfServerKey};
|
||||
use tfhe::integer::IntegerKeyKind;
|
||||
use tfhe::keycache::NamedParam;
|
||||
#[cfg(any(feature = "gpu", feature = "hpu"))]
|
||||
@@ -35,32 +36,42 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
format!("{bench_name}_bounded::{param_name}::{bit_size}_bits");
|
||||
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
));
|
||||
_ = black_box(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&sks,
|
||||
),
|
||||
);
|
||||
})
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
),
|
||||
);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
|
||||
bench_id_oprf = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
bench_id_oprf_bounded =
|
||||
@@ -71,10 +82,11 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
{
|
||||
// Execute the operation once to know its cost.
|
||||
reset_pbs_count();
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1);
|
||||
throughput_num_threads(num_block, pbs_count)
|
||||
@@ -85,11 +97,13 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
let setup = |_batch_size: usize| ();
|
||||
let run = |_: &mut (), batch_size: usize| {
|
||||
(0..batch_size).into_par_iter().for_each(|_| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
);
|
||||
oprf_sk
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
});
|
||||
};
|
||||
find_optimal_batch(run, setup) as u64
|
||||
@@ -100,9 +114,10 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
})
|
||||
})
|
||||
@@ -111,10 +126,11 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
})
|
||||
})
|
||||
@@ -148,6 +164,8 @@ pub mod cuda {
|
||||
use criterion::black_box;
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::gpu::server_key::CudaServerKey;
|
||||
use tfhe::integer::gpu::CudaOprfServerKey;
|
||||
use tfhe::integer::oprf::{CompressedOprfServerKey, OprfPrivateKey};
|
||||
use tfhe::GpuIndex;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
@@ -177,12 +195,18 @@ pub mod cuda {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let cuda_oprf_sk =
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
gpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
cuda_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&gpu_sks,
|
||||
&streams,
|
||||
),
|
||||
);
|
||||
@@ -193,14 +217,20 @@ pub mod cuda {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let cuda_oprf_sk =
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
gpu_sks
|
||||
cuda_oprf_sk
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&gpu_sks,
|
||||
&streams,
|
||||
),
|
||||
);
|
||||
@@ -210,13 +240,25 @@ pub mod cuda {
|
||||
BenchmarkType::Throughput => {
|
||||
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks_vec = cuda_local_keys(&cks);
|
||||
let cpu_oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let cpu_oprf_sk = OprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
|
||||
// One CudaOprfServerKey per GPU, matching `gpu_sks_vec`.
|
||||
let cuda_oprf_sks_vec: Vec<CudaOprfServerKey> = (0..get_number_of_gpus())
|
||||
.map(|gpu_index| {
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &stream)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Execute the operation once to know its cost.
|
||||
reset_pbs_count();
|
||||
cpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
cpu_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&cpu_sks,
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
@@ -232,10 +274,11 @@ pub mod cuda {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
let gpu_index: u32 = i as u32 % get_number_of_gpus();
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
gpu_sks_vec[gpu_index as usize]
|
||||
cuda_oprf_sks_vec[gpu_index as usize]
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&gpu_sks_vec[gpu_index as usize],
|
||||
&stream,
|
||||
);
|
||||
})
|
||||
@@ -247,11 +290,12 @@ pub mod cuda {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
let gpu_index: u32 = i as u32 % get_number_of_gpus();
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
gpu_sks_vec[gpu_index as usize]
|
||||
cuda_oprf_sks_vec[gpu_index as usize]
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&gpu_sks_vec[gpu_index as usize],
|
||||
&stream,
|
||||
);
|
||||
})
|
||||
|
||||
@@ -2,6 +2,7 @@ use benchmark::params_aliases::*;
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::keycache::KEY_CACHE;
|
||||
use tfhe::shortint::oprf::{OprfPrivateKey, OprfServerKey};
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
fn oprf(c: &mut Criterion) {
|
||||
@@ -12,11 +13,15 @@ fn oprf(c: &mut Criterion) {
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
|
||||
let keys = KEY_CACHE.get_from_param(param);
|
||||
let cks = keys.client_key();
|
||||
let sks = keys.server_key();
|
||||
|
||||
let oprf_pk = OprfPrivateKey::new(cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, cks).unwrap();
|
||||
|
||||
bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| {
|
||||
b.iter(|| {
|
||||
_ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2));
|
||||
_ = black_box(oprf_sk.generate_oblivious_pseudo_random(Seed(0), 2, sks));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ mod generic_tests {
|
||||
fn test_xof_seed_getters() {
|
||||
let seed_bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let bits = u128::from_le_bytes(seed_bytes);
|
||||
let dsep = [b't', b'f', b'h', b'e', b'k', b's', b'p', b's'];
|
||||
let dsep = *b"tfheksps";
|
||||
let seed = XofSeed::new_u128(bits, dsep);
|
||||
|
||||
let s = u128::from_le_bytes(seed.seed().try_into().unwrap());
|
||||
|
||||
@@ -26,7 +26,7 @@ num-bigint = "0.4.5"
|
||||
tfhe-versionable = { version = "0.7.0", path = "../utils/tfhe-versionable" }
|
||||
tfhe-safe-serialize = { version = "0.1.0", path = "../utils/tfhe-safe-serialize" }
|
||||
zk-cuda-backend = { version = "0.1.0", path = "../backends/zk-cuda-backend", optional = true }
|
||||
tfhe-cuda-backend = { version = "=0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
tfhe-cuda-backend = { version = "=0.15.0", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
itertools.workspace = true
|
||||
[target.'cfg(target_family = "wasm")'.dependencies]
|
||||
getrandom = { workspace = true, features = ["js"] }
|
||||
|
||||
@@ -64,9 +64,9 @@ tfhe-fft = { version = "0.10.1", path = "../tfhe-fft", features = [
|
||||
"serde",
|
||||
"fft128",
|
||||
] }
|
||||
tfhe-ntt = { version = "0.7.0", path = "../tfhe-ntt" }
|
||||
tfhe-ntt = { version = "0.7.1", path = "../tfhe-ntt" }
|
||||
pulp = { workspace = true, features = ["default"] }
|
||||
tfhe-cuda-backend = { version = "0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
tfhe-cuda-backend = { version = "0.15.0", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
aligned-vec = { workspace = true, features = ["default", "serde"] }
|
||||
dyn-stack = { workspace = true, features = ["default"] }
|
||||
paste = { workspace = true }
|
||||
|
||||
@@ -75,11 +75,11 @@
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">121 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">165 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">88.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">148 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">169 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">222 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">275 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">67.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">70.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">89.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">92.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">113 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">110 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">163 ms</text>
|
||||
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@@ -75,11 +75,11 @@
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">32.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">14.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">625 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">247 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">108 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">44.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">19.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">824 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">487 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">222 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">119 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">57.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">542 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">220 ops/s</text>
|
||||
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@@ -85,7 +85,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let public_key = tfhe::CompactPublicKey::try_new(&client_key).unwrap();
|
||||
// This can be left empty, but if provided allows to tie the proof to arbitrary data
|
||||
let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
let metadata = b"TFHE-rs";
|
||||
|
||||
let clear_a = random::<u64>();
|
||||
let clear_b = random::<u64>();
|
||||
@@ -93,7 +93,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let proven_compact_list = tfhe::ProvenCompactCiphertextList::builder(&public_key)
|
||||
.push(clear_a)
|
||||
.push(clear_b)
|
||||
.build_with_proof_packed(&crs, &metadata, ZkComputeLoad::Verify)?;
|
||||
.build_with_proof_packed(&crs, metadata, ZkComputeLoad::Verify)?;
|
||||
|
||||
// Server side
|
||||
let result = {
|
||||
@@ -101,7 +101,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Verify the proofs and expand the ciphertexts
|
||||
let expander =
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, &metadata)?;
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, metadata)?;
|
||||
let a: tfhe::FheUint64 = expander.get(0)?.unwrap();
|
||||
let b: tfhe::FheUint64 = expander.get(1)?.unwrap();
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@ pub fn main() {
|
||||
let rerand_domain_separator = *b"TFHE_Rrd";
|
||||
|
||||
let crs = CompactPkeCrs::from_config(config, 2048).unwrap();
|
||||
let metadata = [b'r', b'e', b'r', b'a', b'n', b'd'];
|
||||
let metadata = b"rerand";
|
||||
|
||||
set_server_key(sks);
|
||||
|
||||
@@ -132,7 +132,7 @@ pub fn main() {
|
||||
.push(clear_a)
|
||||
.push(clear_b)
|
||||
.push(false)
|
||||
.build_with_proof_packed(&crs, &metadata, ZkComputeLoad::Proof)
|
||||
.build_with_proof_packed(&crs, metadata, ZkComputeLoad::Proof)
|
||||
.unwrap();
|
||||
|
||||
// Simulate a 256 bits nonce
|
||||
@@ -151,7 +151,7 @@ pub fn main() {
|
||||
|
||||
// Verify, re_randomize and expand
|
||||
let expander = compact_list
|
||||
.verify_re_randomize_and_expand(&crs, &cpk, &metadata, seed_gen.next_seed().unwrap())
|
||||
.verify_re_randomize_and_expand(&crs, &cpk, metadata, seed_gen.next_seed().unwrap())
|
||||
.unwrap();
|
||||
|
||||
let a: FheUint64 = expander.get(0).unwrap().unwrap();
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let server_key = tfhe::ServerKey::new(&client_key);
|
||||
let public_key = tfhe::CompactPublicKey::try_new(&client_key).unwrap();
|
||||
// This can be left empty, but if provided allows to tie the proof to arbitrary data
|
||||
let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
let metadata = b"TFHE-rs";
|
||||
|
||||
let clear_a = rng.gen::<u64>();
|
||||
let clear_b = rng.gen::<u64>();
|
||||
@@ -54,7 +54,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let proven_compact_list = tfhe::ProvenCompactCiphertextList::builder(&public_key)
|
||||
.push(clear_a)
|
||||
.push(clear_b)
|
||||
.build_with_proof_packed(&crs, &metadata, ZkComputeLoad::Verify)?;
|
||||
.build_with_proof_packed(&crs, metadata, ZkComputeLoad::Verify)?;
|
||||
|
||||
// Server side
|
||||
let result = {
|
||||
@@ -62,7 +62,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Verify the ciphertexts
|
||||
let expander =
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, &metadata)?;
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, metadata)?;
|
||||
let a: tfhe::FheUint64 = expander.get(0)?.unwrap();
|
||||
let b: tfhe::FheUint64 = expander.get(1)?.unwrap();
|
||||
|
||||
@@ -118,7 +118,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let server_key = tfhe::ServerKey::new(&client_key);
|
||||
let public_key = tfhe::CompactPublicKey::try_new(&client_key).unwrap();
|
||||
// This can be left empty, but if provided allows to tie the proof to arbitrary data
|
||||
let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
let metadata = b"TFHE-rs";
|
||||
|
||||
let clear_a = rng.gen::<u64>();
|
||||
let clear_b = rng.gen::<u64>();
|
||||
@@ -126,7 +126,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let proven_compact_list = tfhe::ProvenCompactCiphertextList::builder(&public_key)
|
||||
.push(clear_a)
|
||||
.push(clear_b)
|
||||
.build_with_proof_packed(&crs, &metadata, ZkComputeLoad::Verify)?;
|
||||
.build_with_proof_packed(&crs, metadata, ZkComputeLoad::Verify)?;
|
||||
|
||||
// Server side
|
||||
let result = {
|
||||
@@ -134,7 +134,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
// Verify the ciphertexts
|
||||
let expander =
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, &metadata)?;
|
||||
proven_compact_list.verify_and_expand(&crs, &public_key, metadata)?;
|
||||
let a: tfhe::FheUint64 = expander.get(0)?.unwrap();
|
||||
let b: tfhe::FheUint64 = expander.get(1)?.unwrap();
|
||||
|
||||
|
||||
@@ -252,9 +252,7 @@ where
|
||||
))
|
||||
}
|
||||
|
||||
const NON_ESCAPABLE_SYMBOLS: [u8; 14] = [
|
||||
b'&', b';', b':', b',', b'`', b'~', b'-', b'_', b'!', b'@', b'#', b'%', b'\'', b'\"',
|
||||
];
|
||||
const NON_ESCAPABLE_SYMBOLS: [u8; 14] = *b"&;:,`~-_!@#%'\"";
|
||||
|
||||
fn atom<Input>() -> impl Parser<Input, Output = RegExpr>
|
||||
where
|
||||
|
||||
@@ -1251,7 +1251,7 @@ pub fn encrypt_lwe_ciphertext_iterator_with_seeded_public_key<Scalar, KeyCont, O
|
||||
}
|
||||
}
|
||||
|
||||
for (output_ct, plaintext) in output.iter_mut().zip(encoded.into_iter()) {
|
||||
for (output_ct, plaintext) in output.iter_mut().zip(encoded) {
|
||||
lwe_ciphertext_plaintext_add_assign(output_ct, plaintext);
|
||||
}
|
||||
}
|
||||
@@ -2123,7 +2123,7 @@ pub fn encrypt_lwe_ciphertext_with_compact_public_key<
|
||||
///
|
||||
/// // We can add custom metadata that will be required for verification, allowing to tie the proof
|
||||
/// // to some arbitrary data.
|
||||
/// let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
/// let metadata = b"TFHE-rs";
|
||||
///
|
||||
/// // Create the PRNG
|
||||
/// let mut seeder = new_seeder();
|
||||
@@ -2170,14 +2170,14 @@ pub fn encrypt_lwe_ciphertext_with_compact_public_key<
|
||||
/// glwe_noise_distribution,
|
||||
/// encryption_generator.noise_generator_mut(),
|
||||
/// &crs,
|
||||
/// &metadata,
|
||||
/// metadata,
|
||||
/// ZkComputeLoad::Proof,
|
||||
/// )
|
||||
/// .unwrap();
|
||||
///
|
||||
/// // verify the ciphertext list with the proof
|
||||
/// assert!(
|
||||
/// verify_lwe_ciphertext(&lwe, &lwe_compact_public_key, &proof, &crs, &metadata).is_valid()
|
||||
/// verify_lwe_ciphertext(&lwe, &lwe_compact_public_key, &proof, &crs, metadata).is_valid()
|
||||
/// );
|
||||
///
|
||||
/// let decrypted_plaintext = decrypt_lwe_ciphertext(&lwe_secret_key, &lwe);
|
||||
@@ -2572,7 +2572,7 @@ pub fn encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
///
|
||||
/// // We can add custom metadata that will be required for verification, allowing to tie the proof
|
||||
/// // to some arbitrary data.
|
||||
/// let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
/// let metadata = b"TFHE-rs";
|
||||
///
|
||||
/// // Create the PRNG
|
||||
/// let mut seeder = new_seeder();
|
||||
@@ -2623,7 +2623,7 @@ pub fn encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
/// glwe_noise_distribution,
|
||||
/// encryption_generator.noise_generator_mut(),
|
||||
/// &crs,
|
||||
/// &metadata,
|
||||
/// metadata,
|
||||
/// ZkComputeLoad::Proof,
|
||||
/// )
|
||||
/// .unwrap();
|
||||
@@ -2634,7 +2634,7 @@ pub fn encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
/// &lwe_compact_public_key,
|
||||
/// &proof,
|
||||
/// &crs,
|
||||
/// &metadata,
|
||||
/// metadata,
|
||||
/// )
|
||||
/// .is_valid());
|
||||
///
|
||||
@@ -3045,7 +3045,7 @@ pub fn par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
///
|
||||
/// // We can add custom metadata that will be required for verification, allowing to tie the proof
|
||||
/// // to some arbitrary data.
|
||||
/// let metadata = [b'T', b'F', b'H', b'E', b'-', b'r', b's'];
|
||||
/// let metadata = b"TFHE-rs";
|
||||
///
|
||||
/// // Create the PRNG
|
||||
/// let mut seeder = new_seeder();
|
||||
@@ -3096,7 +3096,7 @@ pub fn par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
/// glwe_noise_distribution,
|
||||
/// encryption_generator.noise_generator_mut(),
|
||||
/// &crs,
|
||||
/// &metadata,
|
||||
/// metadata,
|
||||
/// ZkComputeLoad::Proof,
|
||||
/// )
|
||||
/// .unwrap();
|
||||
@@ -3107,7 +3107,7 @@ pub fn par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key<
|
||||
/// &lwe_compact_public_key,
|
||||
/// &proof,
|
||||
/// &crs,
|
||||
/// &metadata,
|
||||
/// metadata,
|
||||
/// )
|
||||
/// .is_valid());
|
||||
///
|
||||
|
||||
@@ -1010,7 +1010,7 @@ fn lwe_compact_public_encrypt_prove_verify_decrypt_custom_mod<Scalar>(
|
||||
let message_modulus_log = params.message_modulus_log;
|
||||
let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
|
||||
|
||||
let metadata = [b'c', b'o', b'r', b'e'];
|
||||
let metadata = b"core";
|
||||
|
||||
let mut rsc = TestResources::new();
|
||||
let mut random_generator = RandomGenerator::<DefaultRandomGenerator>::new(rsc.seeder.seed());
|
||||
@@ -1073,7 +1073,7 @@ fn lwe_compact_public_encrypt_prove_verify_decrypt_custom_mod<Scalar>(
|
||||
glwe_noise_distribution,
|
||||
rsc.encryption_random_generator.noise_generator_mut(),
|
||||
crs,
|
||||
&metadata,
|
||||
metadata,
|
||||
ZkComputeLoad::Proof,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -1090,13 +1090,13 @@ fn lwe_compact_public_encrypt_prove_verify_decrypt_custom_mod<Scalar>(
|
||||
assert_eq!(msg, decoded);
|
||||
|
||||
// Verify the proof
|
||||
assert!(verify_lwe_ciphertext(&ct, &pk, &proof, crs, &metadata).is_valid());
|
||||
assert!(verify_lwe_ciphertext(&ct, &pk, &proof, crs, metadata).is_valid());
|
||||
|
||||
// verify proof with invalid ciphertext
|
||||
let index = random_generator.gen::<usize>() % ct.as_ref().len();
|
||||
let value_to_add = random_generator.gen::<Scalar>();
|
||||
ct.as_mut()[index] = ct.as_mut()[index].wrapping_add(value_to_add);
|
||||
assert!(verify_lwe_ciphertext(&ct, &pk, &proof, crs, &metadata).is_invalid());
|
||||
assert!(verify_lwe_ciphertext(&ct, &pk, &proof, crs, metadata).is_invalid());
|
||||
}
|
||||
|
||||
// In coverage, we break after one while loop iteration, changing message values does
|
||||
@@ -1122,7 +1122,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
let glwe_noise_distribution = TUniform::new(9);
|
||||
let ciphertext_modulus = CiphertextModulus::new_native();
|
||||
|
||||
let metadata = [b'c', b'o', b'r', b'e'];
|
||||
let metadata = b"core";
|
||||
|
||||
let delta_log = 59;
|
||||
let delta = 1u64 << delta_log;
|
||||
@@ -1195,7 +1195,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
glwe_noise_distribution,
|
||||
encryption_random_generator.noise_generator_mut(),
|
||||
&crs,
|
||||
&metadata,
|
||||
metadata,
|
||||
ZkComputeLoad::Proof,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -1205,7 +1205,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
&compact_lwe_pk,
|
||||
&proof,
|
||||
&crs,
|
||||
&metadata
|
||||
metadata
|
||||
)
|
||||
.is_valid());
|
||||
|
||||
@@ -1236,7 +1236,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
&compact_lwe_pk,
|
||||
&proof,
|
||||
&crs,
|
||||
&metadata
|
||||
metadata
|
||||
)
|
||||
.is_invalid());
|
||||
|
||||
@@ -1284,7 +1284,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
glwe_noise_distribution,
|
||||
encryption_random_generator.noise_generator_mut(),
|
||||
&crs,
|
||||
&metadata,
|
||||
metadata,
|
||||
ZkComputeLoad::Proof,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -1294,7 +1294,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
&compact_lwe_pk,
|
||||
&proof,
|
||||
&crs,
|
||||
&metadata
|
||||
metadata
|
||||
)
|
||||
.is_valid());
|
||||
|
||||
@@ -1325,7 +1325,7 @@ fn test_par_compact_lwe_list_public_key_encryption_and_proof() {
|
||||
&compact_lwe_pk,
|
||||
&proof,
|
||||
&crs,
|
||||
&metadata
|
||||
metadata
|
||||
)
|
||||
.is_invalid());
|
||||
|
||||
|
||||
@@ -538,4 +538,5 @@ macro_rules! create_parameterized_test_with_non_native_parameters {
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use {create_parameterized_test, create_parameterized_test_with_non_native_parameters};
|
||||
pub(crate) use create_parameterized_test;
|
||||
pub(crate) use create_parameterized_test_with_non_native_parameters;
|
||||
|
||||
@@ -19,7 +19,7 @@ pub fn pbs_variance_132_bits_security_gaussian(
|
||||
lwe_dimension.0 as f64,
|
||||
glwe_dimension.0 as f64,
|
||||
polynomial_size.0 as f64,
|
||||
var_min.0 as f64,
|
||||
var_min.0,
|
||||
decomposition_level_count.0 as f64,
|
||||
decomposition_base_log.0 as f64,
|
||||
ciphertext_modulus,
|
||||
@@ -45,7 +45,7 @@ pub fn pbs_variance_132_bits_security_tuniform(
|
||||
lwe_dimension.0 as f64,
|
||||
glwe_dimension.0 as f64,
|
||||
polynomial_size.0 as f64,
|
||||
var_min.0 as f64,
|
||||
var_min.0,
|
||||
decomposition_level_count.0 as f64,
|
||||
decomposition_base_log.0 as f64,
|
||||
ciphertext_modulus,
|
||||
|
||||
@@ -53,7 +53,7 @@ macro_rules! implement_gaussian {
|
||||
for a in uniform_rand_bytes_v.iter_mut() {
|
||||
*a = generator.generate_next();
|
||||
}
|
||||
let size = <$T>::BITS as i32;
|
||||
let size = <$T as crate::core_crypto::commons::numeric::Numeric>::BITS as i32;
|
||||
let mut u: $T = <$S>::from_le_bytes(uniform_rand_bytes_u).cast_into();
|
||||
u *= <$T>::TWO.powi(-size + 1);
|
||||
let mut v: $T = <$S>::from_le_bytes(uniform_rand_bytes_v).cast_into();
|
||||
|
||||
@@ -102,7 +102,7 @@ macro_rules! implement {
|
||||
self.floor()
|
||||
}
|
||||
fn to_bit_string(&self) -> String {
|
||||
if Self::BITS == 32 {
|
||||
if <Self as Numeric>::BITS == 32 {
|
||||
let mut bit_string = format!("{:032b}", self.to_bits());
|
||||
bit_string.insert(1, ' ');
|
||||
bit_string.insert(10, ' ');
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user