mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
13 Commits
tfhe-versi
...
go/refacto
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f3f05f9068 | ||
|
|
3a2bb4470f | ||
|
|
6120fab886 | ||
|
|
53b68619b0 | ||
|
|
e854823233 | ||
|
|
19e00c484b | ||
|
|
818e480dac | ||
|
|
a7fc8a90e1 | ||
|
|
3fad6d194c | ||
|
|
23efcb8dd4 | ||
|
|
33c69d9d1f | ||
|
|
960d287e92 | ||
|
|
662e5402a3 |
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@@ -3,6 +3,7 @@ self-hosted-runner:
|
||||
labels:
|
||||
- m1mac
|
||||
- 4090-desktop
|
||||
- large_windows_16_latest
|
||||
# Configuration variables in array of strings defined in your repository or
|
||||
# organization. `null` means disabling configuration variables check.
|
||||
# Empty array means no configuration variable is allowed.
|
||||
|
||||
4
.github/workflows/aws_tfhe_fast_tests.yml
vendored
4
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
4
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_integer_tests.yml
vendored
4
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_tests.yml
vendored
4
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -129,7 +129,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -235,7 +235,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
4
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest-large, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest-large, large_windows_16_latest]
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
|
||||
4
.github/workflows/code_coverage.yml
vendored
4
.github/workflows/code_coverage.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -125,7 +125,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_integer_bench
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
needs: cuda-integer-benchmarks
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_core_crypto_bench
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -106,6 +106,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name:
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto, integer and internal CUDA backend tests
|
||||
run: |
|
||||
make test_gpu
|
||||
@@ -143,7 +147,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/integer_gpu_benchmark.yml
vendored
4
.github/workflows/integer_gpu_benchmark.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -180,7 +180,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -189,7 +189,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
19
.github/workflows/make_release.yml
vendored
19
.github/workflows/make_release.yml
vendored
@@ -40,8 +40,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
echo "NPM_TAG=$(sed -n -e '1,/^version/p' tfhe/Cargo.toml | grep '^version[[:space:]]*=' | cut -d '=' -f 2 | tr -d ' ')" >> "${GITHUB_ENV}"
|
||||
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Publish crate.io package
|
||||
if: ${{ inputs.push_to_crates }}
|
||||
@@ -65,14 +66,6 @@ jobs:
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
|
||||
- name: Publish web package as latest
|
||||
if: ${{ inputs.push_web_package && inputs.npm_latest_tag }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
|
||||
- name: Build Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
run: |
|
||||
@@ -90,14 +83,6 @@ jobs:
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
|
||||
- name: Publish Node package as latest
|
||||
if: ${{ inputs.push_node_package && inputs.npm_latest_tag }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
|
||||
4
.github/workflows/make_release_cuda.yml
vendored
4
.github/workflows/make_release_cuda.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/wasm_client_benchmark.yml
vendored
4
.github/workflows/wasm_client_benchmark.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/zk_pke_benchmark.yml
vendored
6
.github/workflows/zk_pke_benchmark.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@58f2cae4bf2c0b6728083f5f009b6dc0eb6dc3ac
|
||||
uses: zama-ai/slab-github-runner@1d4b7b7540118af5f96ac16a1dc4cfd9c5929dc8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -272,6 +272,11 @@ void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *carry_out, int8_t *mem_ptr, void **bsks, void **ksks,
|
||||
uint32_t num_blocks);
|
||||
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *lwe_array,
|
||||
void *carry_out, void *input_carries, int8_t *mem_ptr, void **bsks,
|
||||
void **ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
@@ -294,9 +294,6 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// compressed size = 8192 is actual polynomial size = 16384.
|
||||
// from this size, twiddles can't fit in constant memory,
|
||||
// so from here, butterfly operation access device memory.
|
||||
if constexpr (params::degree >= 8192) {
|
||||
// level 13
|
||||
tid = threadIdx.x;
|
||||
@@ -307,7 +304,7 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
|
||||
(tid & (params::degree / 8192 - 1));
|
||||
i2 = i1 + params::degree / 8192;
|
||||
|
||||
w = negtwiddles13[twid_id];
|
||||
w = negtwiddles[twid_id + 4096];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
@@ -351,10 +348,6 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
|
||||
// mapping in backward fft is reversed
|
||||
// butterfly operation is started from last level
|
||||
|
||||
// compressed size = 8192 is actual polynomial size = 16384.
|
||||
// twiddles for this size can't fit in constant memory so
|
||||
// butterfly operation for this level access device memory to fetch
|
||||
// twiddles
|
||||
if constexpr (params::degree >= 8192) {
|
||||
// level 13
|
||||
tid = threadIdx.x;
|
||||
@@ -365,7 +358,7 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
|
||||
(tid & (params::degree / 8192 - 1));
|
||||
i2 = i1 + params::degree / 8192;
|
||||
|
||||
w = negtwiddles13[twid_id];
|
||||
w = negtwiddles[twid_id + 4096];
|
||||
u = A[i1] - A[i2];
|
||||
|
||||
A[i1] += A[i2];
|
||||
@@ -722,4 +715,312 @@ __global__ void batch_polynomial_mul(double2 *d_input1, double2 *d_input2,
|
||||
}
|
||||
}
|
||||
|
||||
template <class params> __device__ void NSMFFT_direct_bundle(double2 *A, const double2 regs[4]) {
|
||||
|
||||
/* We don't make bit reverse here, since twiddles are already reversed
|
||||
* Each thread is always in charge of "opt/2" pairs of coefficients,
|
||||
* which is why we always loop through N/2 by N/opt strides
|
||||
* The pragma unroll instruction tells the compiler to unroll the
|
||||
* full loop, which should increase performance
|
||||
*/
|
||||
|
||||
size_t tid = threadIdx.x;
|
||||
size_t twid_id;
|
||||
size_t i1, i2;
|
||||
double2 u, v, w;
|
||||
// level 1
|
||||
// we don't make actual complex multiplication on level1 since we have only
|
||||
// one twiddle, it's real and image parts are equal, so we can multiply
|
||||
// it with simpler operations
|
||||
// degree = 1024, opt = 2 ->
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
i1 = tid;
|
||||
i2 = tid + params::degree / 2;
|
||||
|
||||
//u = A[i1];
|
||||
//v = A[i2] * (double2){0.707106781186547461715008466854,
|
||||
// 0.707106781186547461715008466854};
|
||||
|
||||
u = regs[i];
|
||||
v = regs[i + params::opt / 2] * (double2){0.707106781186547461715008466854,
|
||||
0.707106781186547461715008466854};
|
||||
//A[i1] += v;
|
||||
A[i1] = u + v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt; //256
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// level 2
|
||||
// from this level there are more than one twiddles and none of them has equal
|
||||
// real and imag parts, so complete complex multiplication is needed
|
||||
// for each level params::degree / 2^level represents number of coefficients
|
||||
// inside divided chunk of specific level
|
||||
//
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 4);
|
||||
i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
|
||||
i2 = i1 + params::degree / 4;
|
||||
|
||||
w = negtwiddles[twid_id + 2];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// level 3
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 8);
|
||||
i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
|
||||
i2 = i1 + params::degree / 8;
|
||||
|
||||
w = negtwiddles[twid_id + 4];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// level 4
|
||||
//tid = threadIdx.x;
|
||||
//for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 16);
|
||||
i1 =
|
||||
2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
|
||||
i2 = i1 + params::degree / 16;
|
||||
|
||||
w = negtwiddles[twid_id + 8];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
//tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// level 5
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 32);
|
||||
i1 =
|
||||
2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
|
||||
i2 = i1 + params::degree / 32;
|
||||
|
||||
w = negtwiddles[twid_id + 16];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// level 6
|
||||
//tid = threadIdx.x;
|
||||
//for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 64);
|
||||
i1 =
|
||||
2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
|
||||
i2 = i1 + params::degree / 64;
|
||||
|
||||
w = negtwiddles[twid_id + 32];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
//tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// level 7
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 128);
|
||||
i1 = 2 * (params::degree / 128) * twid_id +
|
||||
(tid & (params::degree / 128 - 1));
|
||||
i2 = i1 + params::degree / 128;
|
||||
|
||||
w = negtwiddles[twid_id + 64];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// from level 8, we need to check size of params degree, because we support
|
||||
// minimum actual polynomial size = 256, when compressed size is halfed and
|
||||
// minimum supported compressed size is 128, so we always need first 7
|
||||
// levels of butterfly operation, since butterfly levels are hardcoded
|
||||
// we need to check if polynomial size is big enough to require specific level
|
||||
// of butterfly.
|
||||
if constexpr (params::degree >= 256) {
|
||||
// level 8
|
||||
//tid = threadIdx.x;
|
||||
//for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 256);
|
||||
i1 = 2 * (params::degree / 256) * twid_id +
|
||||
(tid & (params::degree / 256 - 1));
|
||||
i2 = i1 + params::degree / 256;
|
||||
|
||||
w = negtwiddles[twid_id + 128];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
//tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if constexpr (params::degree >= 512) {
|
||||
// level 9
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 512);
|
||||
i1 = 2 * (params::degree / 512) * twid_id +
|
||||
(tid & (params::degree / 512 - 1));
|
||||
i2 = i1 + params::degree / 512;
|
||||
|
||||
w = negtwiddles[twid_id + 256];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if constexpr (params::degree >= 1024) {
|
||||
// level 10
|
||||
//tid = threadIdx.x;
|
||||
//for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 1024);
|
||||
i1 = 2 * (params::degree / 1024) * twid_id +
|
||||
(tid & (params::degree / 1024 - 1));
|
||||
i2 = i1 + params::degree / 1024;
|
||||
|
||||
w = negtwiddles[twid_id + 512];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
//tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if constexpr (params::degree >= 2048) {
|
||||
// level 11
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 2048);
|
||||
i1 = 2 * (params::degree / 2048) * twid_id +
|
||||
(tid & (params::degree / 2048 - 1));
|
||||
i2 = i1 + params::degree / 2048;
|
||||
|
||||
w = negtwiddles[twid_id + 1024];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if constexpr (params::degree >= 4096) {
|
||||
// level 12
|
||||
//tid = threadIdx.x;
|
||||
//for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
#pragma unroll
|
||||
for (int i = params::opt / 2 - 1; i >= 0 ; --i) {
|
||||
tid = threadIdx.x + i * params::degree / params::opt;
|
||||
twid_id = tid / (params::degree / 4096);
|
||||
i1 = 2 * (params::degree / 4096) * twid_id +
|
||||
(tid & (params::degree / 4096 - 1));
|
||||
i2 = i1 + params::degree / 4096;
|
||||
|
||||
w = negtwiddles[twid_id + 2048];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
//tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if constexpr (params::degree >= 8192) {
|
||||
// level 13
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (size_t i = 0; i < params::opt / 2; ++i) {
|
||||
twid_id = tid / (params::degree / 8192);
|
||||
i1 = 2 * (params::degree / 8192) * twid_id +
|
||||
(tid & (params::degree / 8192 - 1));
|
||||
i2 = i1 + params::degree / 8192;
|
||||
|
||||
w = negtwiddles[twid_id + 4096];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
A[i1] += v;
|
||||
A[i2] = u - v;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // GPU_BOOTSTRAP_FFT_CUH
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "cuComplex.h"
|
||||
|
||||
__constant__ double2 negtwiddles[4096] = {
|
||||
__device__ double2 negtwiddles[8192] = {
|
||||
{0, 0},
|
||||
{0.707106781186547461715008466854, 0.707106781186547572737310929369},
|
||||
{0.92387953251128673848313610506, 0.382683432365089781779232680492},
|
||||
@@ -4096,9 +4096,7 @@ __constant__ double2 negtwiddles[4096] = {
|
||||
{0.70791982920081630847874976098, 0.706292797233758484765075991163},
|
||||
{-0.706292797233758484765075991163, 0.70791982920081630847874976098},
|
||||
{0.00115048533711384847431913325266, 0.99999933819152553304832053982},
|
||||
{-0.99999933819152553304832053982, 0.00115048533711384847431913325266}};
|
||||
|
||||
__device__ double2 negtwiddles13[4096] = {
|
||||
{-0.99999933819152553304832053982, 0.00115048533711384847431913325266},
|
||||
{0.999999981616429334252416083473, 0.000191747597310703291528452552051},
|
||||
{-0.000191747597310703291528452552051, 0.999999981616429334252416083473},
|
||||
{0.706971182161065359039753275283, 0.707242354213734603085583785287},
|
||||
|
||||
@@ -2,12 +2,7 @@
|
||||
#define GPU_BOOTSTRAP_TWIDDLES_CUH
|
||||
|
||||
/*
|
||||
* 'negtwiddles' are stored in constant memory for faster access times
|
||||
* because of it's limited size, only twiddles for up to 2^12 polynomial size
|
||||
* can be stored there, twiddles for 2^13 are stored in device memory
|
||||
* 'negtwiddles13'
|
||||
* 'negtwiddles' are stored in device memory to profit caching
|
||||
*/
|
||||
|
||||
extern __constant__ double2 negtwiddles[4096];
|
||||
extern __device__ double2 negtwiddles13[4096];
|
||||
extern __device__ double2 negtwiddles[8192];
|
||||
#endif
|
||||
|
||||
@@ -68,6 +68,18 @@ void cuda_propagate_single_carry_kb_64_inplace(
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
nullptr, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
}
|
||||
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *lwe_array,
|
||||
void *carry_out, void *input_carries, int8_t *mem_ptr, void **bsks,
|
||||
void **ksks, uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<uint64_t *>(input_carries),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
@@ -427,7 +427,7 @@ void scratch_cuda_propagate_single_carry_kb_inplace(
|
||||
template <typename Torus>
|
||||
void host_propagate_single_carry(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array,
|
||||
Torus *carry_out,
|
||||
Torus *carry_out, Torus *input_carries,
|
||||
int_sc_prop_memory<Torus> *mem, void **bsks,
|
||||
Torus **ksks, uint32_t num_blocks) {
|
||||
auto params = mem->params;
|
||||
@@ -482,6 +482,12 @@ void host_propagate_single_carry(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
cuda_memset_async(step_output, 0, big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
if (input_carries != nullptr) {
|
||||
cuda_memcpy_async_gpu_to_gpu(input_carries, step_output,
|
||||
big_lwe_size_bytes * num_blocks, streams[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
|
||||
host_addition(streams[0], gpu_indexes[0], lwe_array, lwe_array, step_output,
|
||||
glwe_dimension * polynomial_size, num_blocks);
|
||||
|
||||
|
||||
@@ -368,8 +368,8 @@ __host__ void host_integer_sum_ciphertexts_vec_kb(
|
||||
num_blocks);
|
||||
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count,
|
||||
radix_lwe_out, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks);
|
||||
radix_lwe_out, nullptr, nullptr,
|
||||
mem_ptr->scp_mem, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus, typename STorus, class params>
|
||||
|
||||
@@ -84,6 +84,7 @@ __global__ void device_multi_bit_programmable_bootstrap_keybundle(
|
||||
grouping_factor, 2 * polynomial_size, glwe_dimension, level_count);
|
||||
Torus *bsk_poly = bsk_slice + poly_id * params::degree;
|
||||
|
||||
// opt = 8 degree/opt = 256
|
||||
copy_polynomial<Torus, params::opt, params::degree / params::opt>(
|
||||
bsk_poly, accumulator);
|
||||
|
||||
@@ -114,6 +115,7 @@ __global__ void device_multi_bit_programmable_bootstrap_keybundle(
|
||||
// Move accumulator to local memory
|
||||
double2 temp[params::opt / 2];
|
||||
int tid = threadIdx.x;
|
||||
//opt = 8 degree=2048 degree/opt =256
|
||||
#pragma unroll
|
||||
for (int i = 0; i < params::opt / 2; i++) {
|
||||
temp[i].x = __ll2double_rn((int64_t)accumulator[tid]);
|
||||
@@ -123,17 +125,22 @@ __global__ void device_multi_bit_programmable_bootstrap_keybundle(
|
||||
temp[i].y /= (double)std::numeric_limits<Torus>::max();
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
|
||||
/*
|
||||
synchronize_threads_in_block();
|
||||
|
||||
// Move from local memory back to shared memory but as complex
|
||||
tid = threadIdx.x;
|
||||
//Loop for 4 times ... temp[4]
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < params::opt / 2; i++) {
|
||||
fft[tid] = temp[i];
|
||||
tid += params::degree / params::opt;
|
||||
tid += params::degree / params::opt; // degree 2048 opt 8 degree/opt = 256
|
||||
}
|
||||
synchronize_threads_in_block();
|
||||
NSMFFT_direct<HalfDegree<params>>(fft);
|
||||
*/
|
||||
NSMFFT_direct_bundle<HalfDegree<params>>(fft, temp);
|
||||
|
||||
// lwe iteration
|
||||
auto keybundle_out = get_ith_mask_kth_block(
|
||||
|
||||
@@ -1068,6 +1068,19 @@ extern "C" {
|
||||
num_blocks: u32,
|
||||
);
|
||||
|
||||
pub fn cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
streams: *const *mut c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
radix_lwe: *mut c_void,
|
||||
carry_out: *mut c_void,
|
||||
input_carries: *mut c_void,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut c_void,
|
||||
ksks: *const *mut c_void,
|
||||
num_blocks: u32,
|
||||
);
|
||||
|
||||
pub fn cleanup_cuda_propagate_single_carry(
|
||||
streams: *const *mut c_void,
|
||||
gpu_indexes: *const u32,
|
||||
|
||||
@@ -13,9 +13,9 @@ description = "tfhe-zk-pok: An implementation of zero-knowledge proofs of encryp
|
||||
|
||||
[dependencies]
|
||||
ark-bls12-381 = { package = "tfhe-ark-bls12-381", version = "0.4.0" }
|
||||
ark-ec = { package = "tfhe-ark-ec", version = "0.4.2" }
|
||||
ark-ff = { package = "tfhe-ark-ff", version = "0.4.3" }
|
||||
ark-poly = { package = "tfhe-ark-poly", version = "0.4.2" }
|
||||
ark-ec = { package = "tfhe-ark-ec", version = "0.4.2", features = ["parallel"] }
|
||||
ark-ff = { package = "tfhe-ark-ff", version = "0.4.3", features = ["parallel"] }
|
||||
ark-poly = { package = "tfhe-ark-poly", version = "0.4.2", features = ["parallel"] }
|
||||
ark-serialize = { version = "0.4.2" }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.8.0"
|
||||
|
||||
@@ -242,6 +242,96 @@ mod g2 {
|
||||
.unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
// m is an intermediate variable that's used in both the curve point addition and pairing
|
||||
// functions. we cache it since it requires a Zp division
|
||||
// https://hackmd.io/@tazAymRSQCGXTUKkbh1BAg/Sk27liTW9#Math-Formula-for-Point-Addition
|
||||
pub(crate) fn compute_m(self, other: G2Affine) -> Option<crate::curve_446::Fq2> {
|
||||
let zero = crate::curve_446::Fq2::ZERO;
|
||||
|
||||
// in the context of elliptic curves, the point at infinity is the zero element of the
|
||||
// group
|
||||
if self.inner.infinity || other.inner.infinity {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self == other {
|
||||
let x = self.inner.x;
|
||||
let y = self.inner.y;
|
||||
if y == zero {
|
||||
None
|
||||
} else {
|
||||
let xx = x.square();
|
||||
Some((xx.double() + xx) / y.double())
|
||||
}
|
||||
} else {
|
||||
let x1 = self.inner.x;
|
||||
let y1 = self.inner.y;
|
||||
let x2 = other.inner.x;
|
||||
let y2 = other.inner.y;
|
||||
|
||||
let x_delta = x2 - x1;
|
||||
let y_delta = y2 - y1;
|
||||
|
||||
if x_delta == zero {
|
||||
None
|
||||
} else {
|
||||
Some(y_delta / x_delta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn double(self, m: Option<crate::curve_446::Fq2>) -> Self {
|
||||
// in the context of elliptic curves, the point at infinity is the zero element of the
|
||||
// group
|
||||
if self.inner.infinity {
|
||||
return self;
|
||||
}
|
||||
|
||||
let mut result = self;
|
||||
|
||||
let x = self.inner.x;
|
||||
let y = self.inner.y;
|
||||
|
||||
if let Some(m) = m {
|
||||
let x3 = m.square() - x.double();
|
||||
let y3 = m * (x - x3) - y;
|
||||
|
||||
(result.inner.x, result.inner.y) = (x3, y3);
|
||||
} else {
|
||||
result.inner.infinity = true;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn add_unequal(self, other: G2Affine, m: Option<crate::curve_446::Fq2>) -> Self {
|
||||
// in the context of elliptic curves, the point at infinity is the zero element of the
|
||||
// group
|
||||
if self.inner.infinity {
|
||||
return other;
|
||||
}
|
||||
if other.inner.infinity {
|
||||
return self;
|
||||
}
|
||||
|
||||
let mut result = self;
|
||||
|
||||
let x1 = self.inner.x;
|
||||
let y1 = self.inner.y;
|
||||
let x2 = other.inner.x;
|
||||
|
||||
if let Some(m) = m {
|
||||
let x3 = m.square() - x1 - x2;
|
||||
let y3 = m * (x1 - x3) - y1;
|
||||
|
||||
(result.inner.x, result.inner.y) = (x3, y3);
|
||||
} else {
|
||||
result.inner.infinity = true;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(
|
||||
@@ -373,9 +463,9 @@ mod g2 {
|
||||
}
|
||||
|
||||
pub fn double(self) -> Self {
|
||||
Self {
|
||||
inner: self.inner.double(),
|
||||
}
|
||||
let mut this = self;
|
||||
this.inner.double_in_place();
|
||||
this
|
||||
}
|
||||
}
|
||||
|
||||
@@ -431,51 +521,79 @@ mod g2 {
|
||||
}
|
||||
|
||||
mod gt {
|
||||
use crate::curve_446::{Fq, Fq12, Fq2};
|
||||
|
||||
use super::*;
|
||||
use ark_ec::bls12::Bls12Config;
|
||||
use ark_ec::pairing::{MillerLoopOutput, Pairing};
|
||||
use ark_ff::{CubicExtField, Fp12, Fp2, QuadExtField};
|
||||
use ark_ff::{CubicExtField, QuadExtField};
|
||||
|
||||
type Bls = crate::curve_446::Bls12_446;
|
||||
type Config = crate::curve_446::Config;
|
||||
|
||||
const ONE: Fp2<<Config as Bls12Config>::Fp2Config> = QuadExtField {
|
||||
c0: MontFp!("1"),
|
||||
c1: MontFp!("0"),
|
||||
};
|
||||
const ZERO: Fp2<<Config as Bls12Config>::Fp2Config> = QuadExtField {
|
||||
const ZERO: Fq2 = QuadExtField {
|
||||
c0: MontFp!("0"),
|
||||
c1: MontFp!("0"),
|
||||
};
|
||||
|
||||
const U1: Fp12<<Config as Bls12Config>::Fp12Config> = QuadExtField {
|
||||
c0: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ZERO,
|
||||
c2: ZERO,
|
||||
},
|
||||
c1: CubicExtField {
|
||||
c0: ONE,
|
||||
c1: ZERO,
|
||||
c2: ZERO,
|
||||
},
|
||||
// computed by copying the result from
|
||||
// let two: Fq = MontFp!("2"); println!("{}", two.inverse().unwrap()), which we can't compute in
|
||||
// a const context;
|
||||
const TWO_INV: Fq = {
|
||||
MontFp!("86412351771428577990035638289747981121746346761394949218917418178192828331138736448451251370148591845087981000773214233672031082665302")
|
||||
};
|
||||
const U3: Fp12<<Config as Bls12Config>::Fp12Config> = QuadExtField {
|
||||
c0: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ZERO,
|
||||
c2: ZERO,
|
||||
},
|
||||
c1: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ONE,
|
||||
c2: ZERO,
|
||||
},
|
||||
const TWO_INV_MINUS_1: Fq = {
|
||||
MontFp!("86412351771428577990035638289747981121746346761394949218917418178192828331138736448451251370148591845087981000773214233672031082665301")
|
||||
};
|
||||
|
||||
const fn fp2_to_fp12(
|
||||
x: Fp2<<Config as Bls12Config>::Fp2Config>,
|
||||
) -> Fp12<<Config as Bls12Config>::Fp12Config> {
|
||||
// the only non zero value in inv(U1) and inv(U3), which come from Olivier's equations.
|
||||
const C: Fq2 = QuadExtField {
|
||||
c0: TWO_INV,
|
||||
c1: TWO_INV_MINUS_1,
|
||||
};
|
||||
|
||||
fn fp2_mul_c(x: Fq2) -> Fq2 {
|
||||
let x0_c0 = x.c0 * C.c0;
|
||||
let x1_c0 = x.c1 * C.c0;
|
||||
|
||||
let x0_c1 = x0_c0 - x.c0;
|
||||
let x1_c1 = x1_c0 - x.c1;
|
||||
|
||||
QuadExtField {
|
||||
c0: x0_c0 - x1_c1,
|
||||
c1: x0_c1 + x1_c0,
|
||||
}
|
||||
}
|
||||
|
||||
fn fp2_mul_u1_inv(x: Fq2) -> Fq12 {
|
||||
QuadExtField {
|
||||
c0: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ZERO,
|
||||
c2: ZERO,
|
||||
},
|
||||
c1: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ZERO,
|
||||
c2: fp2_mul_c(x),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn fp2_mul_u3_inv(x: Fq2) -> Fq12 {
|
||||
QuadExtField {
|
||||
c0: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: ZERO,
|
||||
c2: ZERO,
|
||||
},
|
||||
c1: CubicExtField {
|
||||
c0: ZERO,
|
||||
c1: fp2_mul_c(x),
|
||||
c2: ZERO,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const fn fp2_to_fp12(x: Fq2) -> Fq12 {
|
||||
QuadExtField {
|
||||
c0: CubicExtField {
|
||||
c0: x,
|
||||
@@ -490,52 +608,59 @@ mod gt {
|
||||
}
|
||||
}
|
||||
|
||||
const fn fp_to_fp12(
|
||||
x: <Config as Bls12Config>::Fp,
|
||||
) -> Fp12<<Config as Bls12Config>::Fp12Config> {
|
||||
fp2_to_fp12(QuadExtField {
|
||||
const fn fp_to_fp2(x: Fq) -> Fq2 {
|
||||
QuadExtField {
|
||||
c0: x,
|
||||
c1: MontFp!("0"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn ate_tangent_ev(qt: G2, evpt: G1) -> Fp12<<Config as Bls12Config>::Fp12Config> {
|
||||
let qt = qt.inner.into_affine();
|
||||
let evpt = evpt.inner.into_affine();
|
||||
const fn fp_to_fp12(x: Fq) -> Fq12 {
|
||||
fp2_to_fp12(fp_to_fp2(x))
|
||||
}
|
||||
|
||||
fn ate_tangent_ev(qt: G2Affine, evpt: G1Affine, m: Fq2) -> Fq12 {
|
||||
let qt = qt.inner;
|
||||
let evpt = evpt.inner;
|
||||
|
||||
let (xt, yt) = (qt.x, qt.y);
|
||||
let (xe, ye) = (evpt.x, evpt.y);
|
||||
|
||||
let xt = fp2_to_fp12(xt);
|
||||
let yt = fp2_to_fp12(yt);
|
||||
let xe = fp_to_fp12(xe);
|
||||
let ye = fp_to_fp12(ye);
|
||||
let l = m;
|
||||
let mut l_xe = l;
|
||||
l_xe.c0 *= xe;
|
||||
l_xe.c1 *= xe;
|
||||
|
||||
let three = fp_to_fp12(MontFp!("3"));
|
||||
let two = fp_to_fp12(MontFp!("2"));
|
||||
let mut r0 = fp_to_fp12(ye);
|
||||
let r1 = fp2_mul_u1_inv(l_xe);
|
||||
let r2 = fp2_mul_u3_inv(l * xt - yt);
|
||||
|
||||
let l = three * xt.square() / (two * yt);
|
||||
ye - (l * xe) / U1 + (l * xt - yt) / U3
|
||||
r0.c1.c1 = r2.c1.c1;
|
||||
r0.c1.c2 = -r1.c1.c2;
|
||||
|
||||
r0
|
||||
}
|
||||
|
||||
fn ate_line_ev(q1: G2, q2: G2, evpt: G1) -> Fp12<<Config as Bls12Config>::Fp12Config> {
|
||||
let q1 = q1.inner.into_affine();
|
||||
let q2 = q2.inner.into_affine();
|
||||
let evpt = evpt.inner.into_affine();
|
||||
fn ate_line_ev(q1: G2Affine, evpt: G1Affine, m: Fq2) -> Fq12 {
|
||||
let q1 = q1.inner;
|
||||
let evpt = evpt.inner;
|
||||
|
||||
let (x1, y1) = (q1.x, q1.y);
|
||||
let (x2, y2) = (q2.x, q2.y);
|
||||
let (xe, ye) = (evpt.x, evpt.y);
|
||||
|
||||
let x1 = fp2_to_fp12(x1);
|
||||
let y1 = fp2_to_fp12(y1);
|
||||
let x2 = fp2_to_fp12(x2);
|
||||
let y2 = fp2_to_fp12(y2);
|
||||
let xe = fp_to_fp12(xe);
|
||||
let ye = fp_to_fp12(ye);
|
||||
let l = m;
|
||||
let mut l_xe = l;
|
||||
l_xe.c0 *= xe;
|
||||
l_xe.c1 *= xe;
|
||||
|
||||
let l = (y2 - y1) / (x2 - x1);
|
||||
ye - (l * xe) / U1 + (l * x1 - y1) / U3
|
||||
let mut r0 = fp_to_fp12(ye);
|
||||
let r1 = fp2_mul_u1_inv(l * fp_to_fp2(xe));
|
||||
let r2 = fp2_mul_u3_inv(l * x1 - y1);
|
||||
|
||||
r0.c1.c1 = r2.c1.c1;
|
||||
r0.c1.c2 = -r1.c1.c2;
|
||||
|
||||
r0
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
@@ -544,22 +669,24 @@ mod gt {
|
||||
let t_bits = b"110000000001000001000000100000000000000000000000000000000100000000000000001";
|
||||
|
||||
let mut fk = fp_to_fp12(MontFp!("1"));
|
||||
let p = p.normalize();
|
||||
let q = q.normalize();
|
||||
|
||||
let mut qk = q;
|
||||
|
||||
for k in 1..t_log2 {
|
||||
let lkk = ate_tangent_ev(qk, p);
|
||||
qk = qk + qk;
|
||||
let m = qk.compute_m(qk).unwrap();
|
||||
let lkk = ate_tangent_ev(qk, p, m);
|
||||
qk = qk.double(Some(m));
|
||||
fk = fk.square() * lkk;
|
||||
|
||||
if t_bits[k] == b'1' {
|
||||
assert_ne!(q, qk);
|
||||
let lkp1 = if q != -qk {
|
||||
ate_line_ev(q, qk, p)
|
||||
} else {
|
||||
fp_to_fp12(MontFp!("1"))
|
||||
};
|
||||
qk += q;
|
||||
fk *= lkp1;
|
||||
let m = q.compute_m(qk);
|
||||
let new_qk = q.add_unequal(qk, m);
|
||||
if !new_qk.inner.infinity {
|
||||
fk *= ate_line_ev(q, p, m.unwrap());
|
||||
}
|
||||
qk = new_qk;
|
||||
}
|
||||
}
|
||||
let mlo = MillerLoopOutput(fk);
|
||||
|
||||
@@ -1720,7 +1720,7 @@ mod cuda {
|
||||
|
||||
define_cuda_server_key_bench_clean_input_scalar_fn!(
|
||||
method_name: unchecked_unsigned_overflowing_scalar_add,
|
||||
display_name: unsigned_overflowing_scalar_add,
|
||||
display_name: unsigned_overflowing_add,
|
||||
rng_func: default_scalar
|
||||
);
|
||||
|
||||
@@ -1981,7 +1981,7 @@ mod cuda {
|
||||
|
||||
define_cuda_server_key_bench_clean_input_scalar_fn!(
|
||||
method_name: unsigned_overflowing_scalar_add,
|
||||
display_name: overflowing_scalar_add,
|
||||
display_name: overflowing_add,
|
||||
rng_func: default_scalar
|
||||
);
|
||||
|
||||
|
||||
@@ -553,7 +553,7 @@ define_server_key_bench_fn!(
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: greater,
|
||||
display_name: greater,
|
||||
display_name: greater_than,
|
||||
BenchParamsSet::Standard
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
@@ -563,7 +563,7 @@ define_server_key_bench_fn!(
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: less,
|
||||
display_name: less,
|
||||
display_name: less_than,
|
||||
BenchParamsSet::Standard
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
@@ -676,7 +676,7 @@ define_server_key_scalar_div_bench_fn!(
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_greater,
|
||||
display_name: greater,
|
||||
display_name: greater_than,
|
||||
BenchParamsSet::Standard
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
@@ -686,7 +686,7 @@ define_server_key_scalar_bench_fn!(
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_less,
|
||||
display_name: less,
|
||||
display_name: less_than,
|
||||
BenchParamsSet::Standard
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
|
||||
@@ -4,7 +4,7 @@ use rayon::prelude::*;
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
pub fn pack_cast_64(c: &mut Criterion) {
|
||||
let bench_name = "pack_cast_64";
|
||||
let bench_name = "shortint::pack_cast_64";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
@@ -55,7 +55,7 @@ pub fn pack_cast_64(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
pub fn pack_cast(c: &mut Criterion) {
|
||||
let bench_name = "pack_cast";
|
||||
let bench_name = "shortint::pack_cast";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
@@ -96,7 +96,7 @@ pub fn pack_cast(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
pub fn cast(c: &mut Criterion) {
|
||||
let bench_name = "cast";
|
||||
let bench_name = "shortint::cast";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
This document explains how to save and load versioned data using the data versioning feature.
|
||||
|
||||
Starting from v0.7.0, **TFHE-rs** supports versioned data types. This allows you to store data and load it in the future without compatibility concerns. This feature is done by the `tfhe-versionable` crate.
|
||||
Starting from v0.6.4, **TFHE-rs** supports versioned data types. This allows you to store data and load it in the future without compatibility concerns. This feature is done by the `tfhe-versionable` crate.
|
||||
|
||||
This versioning scheme is compatible with all the [data formats](https://serde.rs/#data-formats) supported by serde.
|
||||
|
||||
@@ -85,7 +85,7 @@ When possible, data will be upgraded automatically without any kind of interract
|
||||
You will find below a list of breaking changes and how to upgrade them.
|
||||
|
||||
# 0.6 -> 0.7
|
||||
- `crate::integer::ciphertext::CompactCiphertextList`
|
||||
- `tfhe::integer::ciphertext::CompactCiphertextList`:
|
||||
in 0.6, these lists of ciphertext were statically typed and homogenous. Since 0.7, they are heterogeneous. The new version stores for each element an information about its type (Signed, Unsigned or Boolean). Since this information were not stored before, the list is set to be made of `Unsigned` integers by default. If that is not the case, you can set its type using the following snippet:
|
||||
|
||||
```rust
|
||||
@@ -142,3 +142,12 @@ pub fn main() {
|
||||
assert_eq!(-1i8, decrypted);
|
||||
}
|
||||
```
|
||||
|
||||
- `tfhe::{CompactFheInt, CompactFheUint, CompactFheIntList, CompactFheUintList}`:
|
||||
The types have been deprecated, they are only kept in **TFHE-rs** for backward compatibility. They can now be accessed using the `tfhe::high_level_api::backward_compatibility::integers` module. The only functionality that is still supported is to unversionize them and expand them into regular `FheInt`, `FheUint`, `Vec<FehInt>` and `Vec<FheUint>`:
|
||||
|
||||
```Rust
|
||||
let loaded_ct = CompactFheUint8::unversionize(versioned_ct).unwrap();
|
||||
let ct = loaded_ct.expand();
|
||||
```
|
||||
Starting with v0.7, this compact list functionality is provided by the `tfhe::CompactCiphertextList` type.
|
||||
|
||||
@@ -933,6 +933,91 @@ pub unsafe fn propagate_single_carry_assign_async<T: UnsignedInteger, B: Numeric
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
/// - [CudaStreams::synchronize] __must__ be called after this function
|
||||
/// as soon as synchronization is required
|
||||
pub unsafe fn propagate_single_carry_get_input_carries_assign_async<
|
||||
T: UnsignedInteger,
|
||||
B: Numeric,
|
||||
>(
|
||||
streams: &CudaStreams,
|
||||
radix_lwe_input: &mut CudaVec<T>,
|
||||
carry_out: &mut CudaVec<T>,
|
||||
input_carries: &mut CudaVec<T>,
|
||||
bootstrapping_key: &CudaVec<B>,
|
||||
keyswitch_key: &CudaVec<T>,
|
||||
lwe_dimension: LweDimension,
|
||||
glwe_dimension: GlweDimension,
|
||||
polynomial_size: PolynomialSize,
|
||||
ks_level: DecompositionLevelCount,
|
||||
ks_base_log: DecompositionBaseLog,
|
||||
pbs_level: DecompositionLevelCount,
|
||||
pbs_base_log: DecompositionBaseLog,
|
||||
num_blocks: u32,
|
||||
message_modulus: MessageModulus,
|
||||
carry_modulus: CarryModulus,
|
||||
pbs_type: PBSType,
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
) {
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
radix_lwe_input.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
bootstrapping_key.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
keyswitch_key.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
let mut mem_ptr: *mut i8 = std::ptr::null_mut();
|
||||
let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32;
|
||||
scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes.as_ptr(),
|
||||
streams.len() as u32,
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
glwe_dimension.0 as u32,
|
||||
polynomial_size.0 as u32,
|
||||
big_lwe_dimension,
|
||||
lwe_dimension.0 as u32,
|
||||
ks_level.0 as u32,
|
||||
ks_base_log.0 as u32,
|
||||
pbs_level.0 as u32,
|
||||
pbs_base_log.0 as u32,
|
||||
grouping_factor.0 as u32,
|
||||
num_blocks,
|
||||
message_modulus.0 as u32,
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
true,
|
||||
);
|
||||
cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes.as_ptr(),
|
||||
streams.len() as u32,
|
||||
radix_lwe_input.as_mut_c_ptr(0),
|
||||
carry_out.as_mut_c_ptr(0),
|
||||
input_carries.as_mut_c_ptr(0),
|
||||
mem_ptr,
|
||||
bootstrapping_key.ptr.as_ptr(),
|
||||
keyswitch_key.ptr.as_ptr(),
|
||||
num_blocks,
|
||||
);
|
||||
cleanup_cuda_propagate_single_carry(
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes.as_ptr(),
|
||||
streams.len() as u32,
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
|
||||
@@ -29,24 +29,24 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg = 1u64;
|
||||
///
|
||||
/// let ct = cks.encrypt(msg);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
|
||||
/// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.unchecked_bitnot(&d_ct, &mut stream);
|
||||
/// let d_ct_res = sks.unchecked_bitnot(&d_ct, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -55,24 +55,24 @@ impl CudaServerKey {
|
||||
pub fn unchecked_bitnot<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.unchecked_bitnot_assign(&mut result, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.unchecked_bitnot_assign(&mut result, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_bitnot_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
// We do (-ciphertext) + (msg_mod -1) as it allows to avoid an allocation
|
||||
cuda_lwe_ciphertext_negate_assign(&mut ct.as_mut().d_blocks, stream);
|
||||
cuda_lwe_ciphertext_negate_assign(&mut ct.as_mut().d_blocks, streams);
|
||||
|
||||
let ct_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0;
|
||||
|
||||
@@ -81,14 +81,21 @@ impl CudaServerKey {
|
||||
let shift_plaintext = u64::from(scalar) * delta;
|
||||
|
||||
let scalar_vector = vec![shift_plaintext; ct_blocks];
|
||||
let mut d_decomposed_scalar =
|
||||
CudaVec::<u64>::new_async(ct.as_ref().d_blocks.lwe_ciphertext_count().0, stream, 0);
|
||||
d_decomposed_scalar.copy_from_cpu_async(scalar_vector.as_slice(), stream, 0);
|
||||
let mut d_decomposed_scalar = CudaVec::<u64>::new_async(
|
||||
ct.as_ref().d_blocks.lwe_ciphertext_count().0,
|
||||
streams,
|
||||
streams.gpu_indexes[0],
|
||||
);
|
||||
d_decomposed_scalar.copy_from_cpu_async(
|
||||
scalar_vector.as_slice(),
|
||||
streams,
|
||||
streams.gpu_indexes[0],
|
||||
);
|
||||
|
||||
cuda_lwe_ciphertext_plaintext_add_assign(
|
||||
&mut ct.as_mut().d_blocks,
|
||||
&d_decomposed_scalar,
|
||||
stream,
|
||||
streams,
|
||||
);
|
||||
ct.as_mut().info = ct.as_ref().info.after_bitnot();
|
||||
}
|
||||
@@ -96,12 +103,12 @@ impl CudaServerKey {
|
||||
pub fn unchecked_bitnot_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.unchecked_bitnot_assign_async(ct, stream);
|
||||
self.unchecked_bitnot_assign_async(ct, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitand between two ciphertexts encrypting integer values.
|
||||
@@ -121,11 +128,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 201u64;
|
||||
/// let msg2 = 1u64;
|
||||
@@ -134,14 +141,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.unchecked_bitand(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.unchecked_bitand(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -151,23 +158,23 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.unchecked_bitand_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.unchecked_bitand_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_bitop_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
op: BitOpType,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
assert_eq!(
|
||||
ct_left.as_ref().d_blocks.lwe_dimension(),
|
||||
@@ -183,7 +190,7 @@ impl CudaServerKey {
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_bitop_integer_radix_kb_assign_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct_left.as_mut().d_blocks.0.d_vec,
|
||||
&ct_right.as_ref().d_blocks.0.d_vec,
|
||||
&d_bsk.d_vec,
|
||||
@@ -210,7 +217,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_bitop_integer_radix_kb_assign_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct_left.as_mut().d_blocks.0.d_vec,
|
||||
&ct_right.as_ref().d_blocks.0.d_vec,
|
||||
&d_multibit_bsk.d_vec,
|
||||
@@ -242,13 +249,13 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::And, stream);
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::And, streams);
|
||||
ct_left.as_mut().info = ct_left.as_ref().info.after_bitand(&ct_right.as_ref().info);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitor between two ciphertexts encrypting integer values.
|
||||
@@ -268,11 +275,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 200u64;
|
||||
/// let msg2 = 1u64;
|
||||
@@ -281,14 +288,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.unchecked_bitor(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.unchecked_bitor(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -298,10 +305,10 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.unchecked_bitor_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.unchecked_bitor_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -309,13 +316,13 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Or, stream);
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Or, streams);
|
||||
ct_left.as_mut().info = ct_left.as_ref().info.after_bitor(&ct_right.as_ref().info);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitxor between two ciphertexts encrypting integer values.
|
||||
@@ -335,11 +342,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 49;
|
||||
/// let msg2 = 64;
|
||||
@@ -348,14 +355,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.unchecked_bitxor(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.unchecked_bitxor(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -365,10 +372,10 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.unchecked_bitxor_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.unchecked_bitxor_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -376,13 +383,13 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Xor, stream);
|
||||
self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Xor, streams);
|
||||
ct_left.as_mut().info = ct_left.as_ref().info.after_bitxor(&ct_right.as_ref().info);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitand between two ciphertexts encrypting integer values.
|
||||
@@ -402,11 +409,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 201u64;
|
||||
/// let msg2 = 1u64;
|
||||
@@ -415,14 +422,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.bitand(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.bitand(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -432,22 +439,22 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.bitand_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.bitand_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn bitand_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let mut tmp_rhs;
|
||||
|
||||
@@ -458,36 +465,36 @@ impl CudaServerKey {
|
||||
) {
|
||||
(true, true) => (ct_left, ct_right),
|
||||
(true, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
(false, true) => {
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
(ct_left, ct_right)
|
||||
}
|
||||
(false, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
}
|
||||
};
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::And, stream);
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::And, streams);
|
||||
}
|
||||
|
||||
pub fn bitand_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.bitand_assign_async(ct_left, ct_right, stream);
|
||||
self.bitand_assign_async(ct_left, ct_right, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitor between two ciphertexts encrypting integer values.
|
||||
@@ -507,11 +514,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 201u64;
|
||||
/// let msg2 = 1u64;
|
||||
@@ -520,14 +527,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.bitor(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.bitor(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -537,22 +544,22 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.bitor_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.bitor_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn bitor_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let mut tmp_rhs;
|
||||
|
||||
@@ -562,36 +569,36 @@ impl CudaServerKey {
|
||||
) {
|
||||
(true, true) => (ct_left, ct_right),
|
||||
(true, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
(false, true) => {
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
(ct_left, ct_right)
|
||||
}
|
||||
(false, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
};
|
||||
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Or, stream);
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Or, streams);
|
||||
}
|
||||
|
||||
pub fn bitor_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.bitor_assign_async(ct_left, ct_right, stream);
|
||||
self.bitor_assign_async(ct_left, ct_right, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitxor between two ciphertexts encrypting integer values.
|
||||
@@ -611,11 +618,11 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg1 = 201u64;
|
||||
/// let msg2 = 1u64;
|
||||
@@ -624,14 +631,14 @@ impl CudaServerKey {
|
||||
/// let ct2 = cks.encrypt(msg2);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &mut streams);
|
||||
/// let d_ct2 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct2, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.bitxor(&d_ct1, &d_ct2, &mut stream);
|
||||
/// let d_ct_res = sks.bitxor(&d_ct1, &d_ct2, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
@@ -641,22 +648,22 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(stream) };
|
||||
self.bitxor_assign(&mut result, ct_right, stream);
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.bitxor_assign(&mut result, ct_right, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn bitxor_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let mut tmp_rhs;
|
||||
|
||||
@@ -666,36 +673,36 @@ impl CudaServerKey {
|
||||
) {
|
||||
(true, true) => (ct_left, ct_right),
|
||||
(true, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
(false, true) => {
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
(ct_left, ct_right)
|
||||
}
|
||||
(false, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(stream);
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
|
||||
self.full_propagate_assign_async(ct_left, stream);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, stream);
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
};
|
||||
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Xor, stream);
|
||||
self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Xor, streams);
|
||||
}
|
||||
|
||||
pub fn bitxor_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
unsafe {
|
||||
self.bitxor_assign_async(ct_left, ct_right, stream);
|
||||
self.bitxor_assign_async(ct_left, ct_right, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically bitnot for an encrypted integer value.
|
||||
@@ -716,55 +723,55 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg = 1u64;
|
||||
///
|
||||
/// let ct = cks.encrypt(msg);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
|
||||
/// let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a bitwise and:
|
||||
/// let d_ct_res = sks.bitnot(&d_ct, &mut stream);
|
||||
/// let d_ct_res = sks.bitnot(&d_ct, &mut streams);
|
||||
///
|
||||
/// // Copy back to CPU
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec: u64 = cks.decrypt(&ct_res);
|
||||
/// assert_eq!(dec, !msg % 256);
|
||||
/// ```
|
||||
pub fn bitnot<T: CudaIntegerRadixCiphertext>(&self, ct: &T, stream: &CudaStreams) -> T {
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.bitnot_assign(&mut result, stream);
|
||||
pub fn bitnot<T: CudaIntegerRadixCiphertext>(&self, ct: &T, streams: &CudaStreams) -> T {
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.bitnot_assign(&mut result, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn bitnot_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
if !ct.block_carries_are_empty() {
|
||||
self.full_propagate_assign_async(ct, stream);
|
||||
self.full_propagate_assign_async(ct, streams);
|
||||
}
|
||||
|
||||
self.unchecked_bitnot_assign_async(ct, stream);
|
||||
self.unchecked_bitnot_assign_async(ct, streams);
|
||||
}
|
||||
|
||||
pub fn bitnot_assign<T: CudaIntegerRadixCiphertext>(&self, ct: &mut T, stream: &CudaStreams) {
|
||||
pub fn bitnot_assign<T: CudaIntegerRadixCiphertext>(&self, ct: &mut T, streams: &CudaStreams) {
|
||||
unsafe {
|
||||
self.bitnot_assign_async(ct, stream);
|
||||
self.bitnot_assign_async(ct, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,8 @@ use crate::integer::gpu::ciphertext::{
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{
|
||||
apply_univariate_lut_kb_async, full_propagate_assign_async,
|
||||
propagate_single_carry_assign_async, CudaServerKey, PBSType,
|
||||
propagate_single_carry_assign_async, propagate_single_carry_get_input_carries_assign_async,
|
||||
CudaServerKey, PBSType,
|
||||
};
|
||||
use crate::shortint::ciphertext::{Degree, NoiseLevel};
|
||||
use crate::shortint::engine::fill_accumulator;
|
||||
@@ -224,6 +225,80 @@ impl CudaServerKey {
|
||||
carry_out
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronized
|
||||
#[allow(dead_code)]
|
||||
pub(crate) unsafe fn propagate_single_carry_get_input_carries_assign_async<T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
input_carries: &mut T,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut carry_out: T = self.create_trivial_zero_radix(1, streams);
|
||||
let ciphertext = ct.as_mut();
|
||||
let num_blocks = ciphertext.d_blocks.lwe_ciphertext_count().0 as u32;
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
propagate_single_carry_get_input_carries_assign_async(
|
||||
streams,
|
||||
&mut ciphertext.d_blocks.0.d_vec,
|
||||
&mut carry_out.as_mut().d_blocks.0.d_vec,
|
||||
&mut input_carries.as_mut().d_blocks.0.d_vec,
|
||||
&d_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
d_bsk.input_lwe_dimension(),
|
||||
d_bsk.glwe_dimension(),
|
||||
d_bsk.polynomial_size(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_bsk.decomp_level_count(),
|
||||
d_bsk.decomp_base_log(),
|
||||
num_blocks,
|
||||
ciphertext.info.blocks.first().unwrap().message_modulus,
|
||||
ciphertext.info.blocks.first().unwrap().carry_modulus,
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
);
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
propagate_single_carry_get_input_carries_assign_async(
|
||||
streams,
|
||||
&mut ciphertext.d_blocks.0.d_vec,
|
||||
&mut carry_out.as_mut().d_blocks.0.d_vec,
|
||||
&mut input_carries.as_mut().d_blocks.0.d_vec,
|
||||
&d_multibit_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
d_multibit_bsk.input_lwe_dimension(),
|
||||
d_multibit_bsk.glwe_dimension(),
|
||||
d_multibit_bsk.polynomial_size(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_multibit_bsk.decomp_level_count(),
|
||||
d_multibit_bsk.decomp_base_log(),
|
||||
num_blocks,
|
||||
ciphertext.info.blocks.first().unwrap().message_modulus,
|
||||
ciphertext.info.blocks.first().unwrap().carry_modulus,
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
);
|
||||
}
|
||||
};
|
||||
ciphertext.info.blocks.iter_mut().for_each(|b| {
|
||||
b.degree = Degree::new(b.message_modulus.0 - 1);
|
||||
b.noise_level = NoiseLevel::NOMINAL;
|
||||
});
|
||||
carry_out.as_mut().info.blocks.iter_mut().for_each(|b| {
|
||||
b.degree = Degree::new(1);
|
||||
b.noise_level = NoiseLevel::NOMINAL;
|
||||
});
|
||||
carry_out
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
@@ -327,25 +402,26 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
num_blocks: usize,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 + num_blocks;
|
||||
let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
|
||||
let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
|
||||
let shift = num_blocks * lwe_size.0;
|
||||
|
||||
let mut extended_ct_vec =
|
||||
unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
|
||||
let mut extended_ct_vec = unsafe {
|
||||
CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
|
||||
};
|
||||
unsafe {
|
||||
extended_ct_vec.memset_async(0u64, stream, 0);
|
||||
extended_ct_vec.memset_async(0u64, streams, streams.gpu_indexes[0]);
|
||||
extended_ct_vec.copy_self_range_gpu_to_gpu_async(
|
||||
shift..,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
stream,
|
||||
streams,
|
||||
0,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
let extended_ct_list = CudaLweCiphertextList::from_cuda_vec(
|
||||
extended_ct_vec,
|
||||
LweCiphertextCount(new_num_blocks),
|
||||
@@ -398,19 +474,24 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
num_blocks: usize,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 + num_blocks;
|
||||
let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
|
||||
let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
|
||||
|
||||
let mut extended_ct_vec =
|
||||
unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
|
||||
let mut extended_ct_vec = unsafe {
|
||||
CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
|
||||
};
|
||||
unsafe {
|
||||
extended_ct_vec.memset_async(0u64, stream, 0);
|
||||
extended_ct_vec.copy_from_gpu_async(&ct.as_ref().d_blocks.0.d_vec, stream, 0);
|
||||
extended_ct_vec.memset_async(0u64, streams, streams.gpu_indexes[0]);
|
||||
extended_ct_vec.copy_from_gpu_async(
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
streams,
|
||||
streams.gpu_indexes[0],
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
let extended_ct_list = CudaLweCiphertextList::from_cuda_vec(
|
||||
extended_ct_vec,
|
||||
LweCiphertextCount(new_num_blocks),
|
||||
@@ -463,24 +544,25 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
num_blocks: usize,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 - num_blocks;
|
||||
let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
|
||||
let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
|
||||
let shift = num_blocks * lwe_size.0;
|
||||
|
||||
let mut trimmed_ct_vec =
|
||||
unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
|
||||
let mut trimmed_ct_vec = unsafe {
|
||||
CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
|
||||
};
|
||||
unsafe {
|
||||
trimmed_ct_vec.copy_src_range_gpu_to_gpu_async(
|
||||
shift..,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
stream,
|
||||
streams,
|
||||
0,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
let trimmed_ct_list = CudaLweCiphertextList::from_cuda_vec(
|
||||
trimmed_ct_vec,
|
||||
LweCiphertextCount(new_num_blocks),
|
||||
@@ -530,24 +612,25 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
num_blocks: usize,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let new_num_blocks = ct.as_ref().d_blocks.lwe_ciphertext_count().0 - num_blocks;
|
||||
let ciphertext_modulus = ct.as_ref().d_blocks.ciphertext_modulus();
|
||||
let lwe_size = ct.as_ref().d_blocks.lwe_dimension().to_lwe_size();
|
||||
let shift = new_num_blocks * lwe_size.0;
|
||||
|
||||
let mut trimmed_ct_vec =
|
||||
unsafe { CudaVec::new_async(new_num_blocks * lwe_size.0, stream, 0) };
|
||||
let mut trimmed_ct_vec = unsafe {
|
||||
CudaVec::new_async(new_num_blocks * lwe_size.0, streams, streams.gpu_indexes[0])
|
||||
};
|
||||
unsafe {
|
||||
trimmed_ct_vec.copy_src_range_gpu_to_gpu_async(
|
||||
0..shift,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
stream,
|
||||
streams,
|
||||
0,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
let trimmed_ct_list = CudaLweCiphertextList::from_cuda_vec(
|
||||
trimmed_ct_vec,
|
||||
LweCiphertextCount(new_num_blocks),
|
||||
@@ -594,7 +677,7 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
num_blocks: usize,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let message_modulus = self.message_modulus.0 as u64;
|
||||
let num_bits_in_block = message_modulus.ilog2();
|
||||
@@ -612,28 +695,40 @@ impl CudaServerKey {
|
||||
let lwe_size = ct.as_ref().d_blocks.0.lwe_dimension.to_lwe_size().0;
|
||||
|
||||
// Allocate the necessary amount of memory
|
||||
let mut output_radix = CudaVec::new(new_num_ct_blocks * lwe_size, stream, 0);
|
||||
let mut output_radix = CudaVec::new(
|
||||
new_num_ct_blocks * lwe_size,
|
||||
streams,
|
||||
streams.gpu_indexes[0],
|
||||
);
|
||||
unsafe {
|
||||
output_radix.copy_from_gpu_async(&ct.as_ref().d_blocks.0.d_vec, stream, 0);
|
||||
output_radix.copy_from_gpu_async(
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
streams,
|
||||
streams.gpu_indexes[0],
|
||||
);
|
||||
// Get the last ct block
|
||||
let last_block = ct
|
||||
.as_ref()
|
||||
.d_blocks
|
||||
.0
|
||||
.d_vec
|
||||
.as_slice(lwe_size * (num_ct_blocks - 1).., 0)
|
||||
.as_slice(lwe_size * (num_ct_blocks - 1).., streams.gpu_indexes[0])
|
||||
.unwrap();
|
||||
let mut output_slice = output_radix
|
||||
.as_mut_slice(lwe_size * num_ct_blocks..lwe_size * new_num_ct_blocks, 0)
|
||||
.as_mut_slice(
|
||||
lwe_size * num_ct_blocks..lwe_size * new_num_ct_blocks,
|
||||
streams.gpu_indexes[0],
|
||||
)
|
||||
.unwrap();
|
||||
let (padding_block, new_blocks) = output_slice.split_at_mut(lwe_size, 0);
|
||||
let (padding_block, new_blocks) =
|
||||
output_slice.split_at_mut(lwe_size, streams.gpu_indexes[0]);
|
||||
let mut padding_block = padding_block.unwrap();
|
||||
let mut new_blocks = new_blocks.unwrap();
|
||||
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
apply_univariate_lut_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut padding_block,
|
||||
&last_block,
|
||||
padding_block_creator_lut.acc.as_ref(),
|
||||
@@ -657,7 +752,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
apply_univariate_lut_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut padding_block,
|
||||
&last_block,
|
||||
padding_block_creator_lut.acc.as_ref(),
|
||||
@@ -682,12 +777,12 @@ impl CudaServerKey {
|
||||
}
|
||||
for i in 0..num_blocks - 1 {
|
||||
let mut output_block = new_blocks
|
||||
.get_mut(lwe_size * i..lwe_size * (i + 1), 0)
|
||||
.get_mut(lwe_size * i..lwe_size * (i + 1), streams.gpu_indexes[0])
|
||||
.unwrap();
|
||||
output_block.copy_from_gpu_async(&padding_block, stream, 0);
|
||||
output_block.copy_from_gpu_async(&padding_block, streams, streams.gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
let output_lwe_list = CudaLweCiphertextList(CudaLweList {
|
||||
d_vec: output_radix,
|
||||
lwe_ciphertext_count: LweCiphertextCount(new_num_ct_blocks),
|
||||
|
||||
@@ -11,14 +11,14 @@ use crate::integer::gpu::{
|
||||
impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_bitop_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
op: BitOpType,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
@@ -31,12 +31,13 @@ impl CudaServerKey {
|
||||
.map(|x| x as u64)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let clear_blocks = CudaVec::from_cpu_async(&h_clear_blocks, stream, 0);
|
||||
let clear_blocks =
|
||||
CudaVec::from_cpu_async(&h_clear_blocks, streams, streams.gpu_indexes[0]);
|
||||
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_scalar_bitop_integer_radix_kb_assign_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct.as_mut().d_blocks.0.d_vec,
|
||||
&clear_blocks,
|
||||
&d_bsk.d_vec,
|
||||
@@ -63,7 +64,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_scalar_bitop_integer_radix_kb_assign_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct.as_mut().d_blocks.0.d_vec,
|
||||
&clear_blocks,
|
||||
&d_multibit_bsk.d_vec,
|
||||
@@ -91,13 +92,18 @@ impl CudaServerKey {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_bitand<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
rhs: Scalar,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.unchecked_scalar_bitand_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.unchecked_scalar_bitand_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -105,25 +111,25 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.unchecked_scalar_bitor_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.unchecked_scalar_bitor_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -131,25 +137,30 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_bitxor<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
rhs: Scalar,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.unchecked_scalar_bitxor_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.unchecked_scalar_bitxor_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -157,138 +168,138 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_bitand_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
if !ct.block_carries_are_empty() {
|
||||
self.full_propagate_assign_async(ct, stream);
|
||||
self.full_propagate_assign_async(ct, streams);
|
||||
}
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitand(rhs);
|
||||
}
|
||||
|
||||
pub fn scalar_bitand_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
|
||||
pub fn scalar_bitand_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.scalar_bitand_assign_async(ct, rhs, stream);
|
||||
self.scalar_bitand_assign_async(ct, rhs, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_bitand<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.scalar_bitand_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.scalar_bitand_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_bitor_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
if !ct.block_carries_are_empty() {
|
||||
self.full_propagate_assign_async(ct, stream);
|
||||
self.full_propagate_assign_async(ct, streams);
|
||||
}
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitor(rhs);
|
||||
}
|
||||
|
||||
pub fn scalar_bitor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
|
||||
pub fn scalar_bitor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.scalar_bitor_assign_async(ct, rhs, stream);
|
||||
self.scalar_bitor_assign_async(ct, rhs, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_bitor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.scalar_bitor_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.scalar_bitor_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_bitxor_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
rhs: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
if !ct.block_carries_are_empty() {
|
||||
self.full_propagate_assign_async(ct, stream);
|
||||
self.full_propagate_assign_async(ct, streams);
|
||||
}
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream);
|
||||
self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, streams);
|
||||
ct.as_mut().info = ct.as_ref().info.after_scalar_bitxor(rhs);
|
||||
}
|
||||
|
||||
pub fn scalar_bitxor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, stream: &CudaStreams)
|
||||
pub fn scalar_bitxor_assign<Scalar, T>(&self, ct: &mut T, rhs: Scalar, streams: &CudaStreams)
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.scalar_bitxor_assign_async(ct, rhs, stream);
|
||||
self.scalar_bitxor_assign_async(ct, rhs, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_bitxor<Scalar, T>(&self, ct: &T, rhs: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u8>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.scalar_bitxor_assign(&mut result, rhs, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.scalar_bitxor_assign(&mut result, rhs, streams);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,15 +102,15 @@ impl CudaServerKey {
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_signed_and_unsigned_scalar_comparison_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
op: ComparisonType,
|
||||
signed_with_positive_scalar: bool,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -122,7 +122,7 @@ impl CudaServerKey {
|
||||
ComparisonType::GT | ComparisonType::GE | ComparisonType::NE => 1,
|
||||
_ => 0,
|
||||
};
|
||||
let ct_res: T = self.create_trivial_radix(value, 1, stream);
|
||||
let ct_res: T = self.create_trivial_radix(value, 1, streams);
|
||||
return CudaBooleanBlock::from_cuda_radix_ciphertext(ct_res.into_inner());
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ impl CudaServerKey {
|
||||
ComparisonType::LT | ComparisonType::LE | ComparisonType::NE => 1,
|
||||
_ => 0,
|
||||
};
|
||||
let ct_res: T = self.create_trivial_radix(value, 1, stream);
|
||||
let ct_res: T = self.create_trivial_radix(value, 1, streams);
|
||||
return CudaBooleanBlock::from_cuda_radix_ciphertext(ct_res.into_inner());
|
||||
}
|
||||
|
||||
@@ -153,7 +153,8 @@ impl CudaServerKey {
|
||||
// as we will handle them separately.
|
||||
scalar_blocks.truncate(ct.as_ref().d_blocks.lwe_ciphertext_count().0);
|
||||
|
||||
let d_scalar_blocks: CudaVec<u64> = CudaVec::from_cpu_async(&scalar_blocks, stream, 0);
|
||||
let d_scalar_blocks: CudaVec<u64> =
|
||||
CudaVec::from_cpu_async(&scalar_blocks, streams, streams.gpu_indexes[0]);
|
||||
|
||||
let lwe_ciphertext_count = ct.as_ref().d_blocks.lwe_ciphertext_count();
|
||||
|
||||
@@ -161,7 +162,7 @@ impl CudaServerKey {
|
||||
ct.as_ref().d_blocks.lwe_dimension(),
|
||||
LweCiphertextCount(1),
|
||||
CiphertextModulus::new_native(),
|
||||
stream,
|
||||
streams,
|
||||
);
|
||||
let mut block_info = ct.as_ref().info.blocks[0];
|
||||
block_info.degree = Degree::new(0);
|
||||
@@ -174,7 +175,7 @@ impl CudaServerKey {
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_scalar_comparison_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut result.as_mut().ciphertext.d_blocks.0.d_vec,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
&d_scalar_blocks,
|
||||
@@ -204,7 +205,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_scalar_comparison_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut result.as_mut().ciphertext.d_blocks.0.d_vec,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
&d_scalar_blocks,
|
||||
@@ -239,14 +240,14 @@ impl CudaServerKey {
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_comparison_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
op: ComparisonType,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -260,12 +261,12 @@ impl CudaServerKey {
|
||||
// Scalar is greater than the bounds, so ciphertext is smaller
|
||||
let result: T = match op {
|
||||
ComparisonType::LT | ComparisonType::LE => {
|
||||
self.create_trivial_radix(1, num_blocks, stream)
|
||||
self.create_trivial_radix(1, num_blocks, streams)
|
||||
}
|
||||
_ => self.create_trivial_radix(
|
||||
0,
|
||||
ct.as_ref().d_blocks.lwe_ciphertext_count().0,
|
||||
stream,
|
||||
streams,
|
||||
),
|
||||
};
|
||||
return CudaBooleanBlock::from_cuda_radix_ciphertext(result.into_inner());
|
||||
@@ -274,12 +275,12 @@ impl CudaServerKey {
|
||||
// Scalar is smaller than the bounds, so ciphertext is bigger
|
||||
let result: T = match op {
|
||||
ComparisonType::GT | ComparisonType::GE => {
|
||||
self.create_trivial_radix(1, num_blocks, stream)
|
||||
self.create_trivial_radix(1, num_blocks, streams)
|
||||
}
|
||||
_ => self.create_trivial_radix(
|
||||
0,
|
||||
ct.as_ref().d_blocks.lwe_ciphertext_count().0,
|
||||
stream,
|
||||
streams,
|
||||
),
|
||||
};
|
||||
return CudaBooleanBlock::from_cuda_radix_ciphertext(result.into_inner());
|
||||
@@ -292,29 +293,29 @@ impl CudaServerKey {
|
||||
|
||||
if scalar >= Scalar::ZERO {
|
||||
self.unchecked_signed_and_unsigned_scalar_comparison_async(
|
||||
ct, scalar, op, true, stream,
|
||||
ct, scalar, op, true, streams,
|
||||
)
|
||||
} else {
|
||||
let scalar_as_trivial = self.create_trivial_radix(scalar, num_blocks, stream);
|
||||
self.unchecked_comparison_async(ct, &scalar_as_trivial, op, stream)
|
||||
let scalar_as_trivial = self.create_trivial_radix(scalar, num_blocks, streams);
|
||||
self.unchecked_comparison_async(ct, &scalar_as_trivial, op, streams)
|
||||
}
|
||||
} else {
|
||||
// Unsigned
|
||||
self.unchecked_signed_and_unsigned_scalar_comparison_async(
|
||||
ct, scalar, op, false, stream,
|
||||
ct, scalar, op, false, streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_minmax_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
op: ComparisonType,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
@@ -327,16 +328,17 @@ impl CudaServerKey {
|
||||
.iter_as::<u64>()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let d_scalar_blocks: CudaVec<u64> = CudaVec::from_cpu_async(&scalar_blocks, stream, 0);
|
||||
let d_scalar_blocks: CudaVec<u64> =
|
||||
CudaVec::from_cpu_async(&scalar_blocks, streams, streams.gpu_indexes[0]);
|
||||
|
||||
let lwe_ciphertext_count = ct.as_ref().d_blocks.lwe_ciphertext_count();
|
||||
|
||||
let mut result = ct.duplicate_async(stream);
|
||||
let mut result = ct.duplicate_async(streams);
|
||||
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_scalar_comparison_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut result.as_mut().d_blocks.0.d_vec,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
&d_scalar_blocks,
|
||||
@@ -366,7 +368,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_scalar_comparison_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut result.as_mut().d_blocks.0.d_vec,
|
||||
&ct.as_ref().d_blocks.0.d_vec,
|
||||
&d_scalar_blocks,
|
||||
@@ -401,45 +403,45 @@ impl CudaServerKey {
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_eq_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
Scalar: DecomposableInto<u64>,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::EQ, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::EQ, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_eq<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
Scalar: DecomposableInto<u64>,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_eq_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_eq_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_eq_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
@@ -449,12 +451,12 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_eq_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_eq_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
/// Compares for equality 2 ciphertexts
|
||||
@@ -473,12 +475,12 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// let size = 4;
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &streams);
|
||||
///
|
||||
/// let msg1 = 14u64;
|
||||
/// let msg2 = 97u64;
|
||||
@@ -486,12 +488,12 @@ impl CudaServerKey {
|
||||
/// let ct1 = cks.encrypt(msg1);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &streams);
|
||||
///
|
||||
/// let d_ct_res = sks.scalar_eq(&d_ct1, msg2, &stream);
|
||||
/// let d_ct_res = sks.scalar_eq(&d_ct1, msg2, &streams);
|
||||
///
|
||||
/// // Copy the result back to CPU
|
||||
/// let ct_res = d_ct_res.to_boolean_block(&stream);
|
||||
/// let ct_res = d_ct_res.to_boolean_block(&streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec_result = cks.decrypt_bool(&ct_res);
|
||||
@@ -501,26 +503,26 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
Scalar: DecomposableInto<u64>,
|
||||
{
|
||||
let result = unsafe { self.scalar_eq_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_eq_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_ne_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
@@ -530,12 +532,12 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_ne_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_ne_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
/// Compares for equality 2 ciphertexts
|
||||
@@ -554,12 +556,12 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// let size = 4;
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &streams);
|
||||
///
|
||||
/// let msg1 = 14u64;
|
||||
/// let msg2 = 97u64;
|
||||
@@ -567,12 +569,12 @@ impl CudaServerKey {
|
||||
/// let ct1 = cks.encrypt(msg1);
|
||||
///
|
||||
/// // Copy to GPU
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &stream);
|
||||
/// let mut d_ct1 = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct1, &streams);
|
||||
///
|
||||
/// let d_ct_res = sks.scalar_ne(&d_ct1, msg2, &stream);
|
||||
/// let d_ct_res = sks.scalar_ne(&d_ct1, msg2, &streams);
|
||||
///
|
||||
/// // Copy the result back to CPU
|
||||
/// let ct_res = d_ct_res.to_boolean_block(&stream);
|
||||
/// let ct_res = d_ct_res.to_boolean_block(&streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec_result = cks.decrypt_bool(&ct_res);
|
||||
@@ -582,185 +584,185 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_ne_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_ne_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_ne_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
Scalar: DecomposableInto<u64>,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::NE, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::NE, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_ne<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
Scalar: DecomposableInto<u64>,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_ne_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_ne_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_gt_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GT, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GT, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_gt<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_gt_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_gt_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_ge_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GE, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GE, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_ge<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_ge_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_ge_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_lt_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LT, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LT, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_lt<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_lt_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_lt_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_le_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LE, stream)
|
||||
self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LE, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_le<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_le_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_le_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_gt_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -770,38 +772,38 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_gt_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_gt_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_gt<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_gt_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_gt_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_ge_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -811,38 +813,38 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_ge_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_ge_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_ge<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_ge_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_ge_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_lt_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -852,37 +854,37 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_lt_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_lt_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_lt<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_lt_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_lt_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_le_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -892,92 +894,102 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_le_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_le_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_le<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaBooleanBlock
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_le_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_le_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_max_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MAX, stream)
|
||||
self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MAX, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_max<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_max_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_max_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_min_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MIN, stream)
|
||||
self.unchecked_scalar_minmax_async(ct, scalar, ComparisonType::MIN, streams)
|
||||
}
|
||||
|
||||
pub fn unchecked_scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_min<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.unchecked_scalar_min_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.unchecked_scalar_min_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_max_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -987,33 +999,33 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_max_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_max_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_max<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_max_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_max_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_min_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
@@ -1023,21 +1035,21 @@ impl CudaServerKey {
|
||||
let lhs = if ct.block_carries_are_empty() {
|
||||
ct
|
||||
} else {
|
||||
tmp_lhs = ct.duplicate_async(stream);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, stream);
|
||||
tmp_lhs = ct.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_lhs, streams);
|
||||
&tmp_lhs
|
||||
};
|
||||
|
||||
self.unchecked_scalar_min_async(lhs, scalar, stream)
|
||||
self.unchecked_scalar_min_async(lhs, scalar, streams)
|
||||
}
|
||||
|
||||
pub fn scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_min<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: DecomposableInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let result = unsafe { self.scalar_min_async(ct, scalar, stream) };
|
||||
stream.synchronize();
|
||||
let result = unsafe { self.scalar_min_async(ct, scalar, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,50 +26,59 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg = 30;
|
||||
/// let scalar = 3;
|
||||
///
|
||||
/// let ct = cks.encrypt(msg);
|
||||
/// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
|
||||
/// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a scalar multiplication:
|
||||
/// let d_ct_res = sks.unchecked_scalar_mul(&d_ct, scalar, &mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let d_ct_res = sks.unchecked_scalar_mul(&d_ct, scalar, &mut streams);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// let clear: u64 = cks.decrypt(&ct_res);
|
||||
/// assert_eq!(scalar * msg, clear);
|
||||
/// ```
|
||||
pub fn unchecked_scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn unchecked_scalar_mul<Scalar, T>(
|
||||
&self,
|
||||
ct: &T,
|
||||
scalar: Scalar,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.unchecked_scalar_mul_assign(&mut result, scalar, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.unchecked_scalar_mul_assign(&mut result, scalar, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unchecked_scalar_mul_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
if scalar == Scalar::ZERO {
|
||||
ct.as_mut().d_blocks.0.d_vec.memset_async(0, stream, 0);
|
||||
ct.as_mut()
|
||||
.d_blocks
|
||||
.0
|
||||
.d_vec
|
||||
.memset_async(0, streams, streams.gpu_indexes[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -80,7 +89,7 @@ impl CudaServerKey {
|
||||
if scalar.is_power_of_two() {
|
||||
// Shifting cost one bivariate PBS so its always faster
|
||||
// than multiplying
|
||||
self.unchecked_scalar_left_shift_assign_async(ct, scalar.ilog2() as u64, stream);
|
||||
self.unchecked_scalar_left_shift_assign_async(ct, scalar.ilog2() as u64, streams);
|
||||
return;
|
||||
}
|
||||
let ciphertext = ct.as_mut();
|
||||
@@ -104,7 +113,7 @@ impl CudaServerKey {
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_scalar_mul_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct.as_mut().d_blocks.0.d_vec,
|
||||
decomposed_scalar.as_slice(),
|
||||
has_at_least_one_set.as_slice(),
|
||||
@@ -129,7 +138,7 @@ impl CudaServerKey {
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_scalar_mul_integer_radix_kb_async(
|
||||
stream,
|
||||
streams,
|
||||
&mut ct.as_mut().d_blocks.0.d_vec,
|
||||
decomposed_scalar.as_slice(),
|
||||
has_at_least_one_set.as_slice(),
|
||||
@@ -161,15 +170,15 @@ impl CudaServerKey {
|
||||
&self,
|
||||
ct: &mut T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.unchecked_scalar_mul_assign_async(ct, scalar, stream);
|
||||
self.unchecked_scalar_mul_assign_async(ct, scalar, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
/// Computes homomorphically a multiplication between a scalar and a ciphertext.
|
||||
@@ -189,63 +198,63 @@ impl CudaServerKey {
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let mut stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
/// let mut streams = CudaStreams::new_single_gpu(gpu_index);
|
||||
///
|
||||
/// // We have 4 * 2 = 8 bits of message
|
||||
/// let size = 4;
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream);
|
||||
/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut streams);
|
||||
///
|
||||
/// let msg = 30;
|
||||
/// let scalar = 3;
|
||||
///
|
||||
/// let ct = cks.encrypt(msg);
|
||||
/// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut stream);
|
||||
/// let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &mut streams);
|
||||
///
|
||||
/// // Compute homomorphically a scalar multiplication:
|
||||
/// let d_ct_res = sks.scalar_mul(&d_ct, scalar, &mut stream);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream);
|
||||
/// let d_ct_res = sks.scalar_mul(&d_ct, scalar, &mut streams);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&mut streams);
|
||||
///
|
||||
/// let clear: u64 = cks.decrypt(&ct_res);
|
||||
/// assert_eq!(scalar * msg, clear);
|
||||
/// ```
|
||||
pub fn scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, stream: &CudaStreams) -> T
|
||||
pub fn scalar_mul<Scalar, T>(&self, ct: &T, scalar: Scalar, streams: &CudaStreams) -> T
|
||||
where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut result = unsafe { ct.duplicate_async(stream) };
|
||||
self.scalar_mul_assign(&mut result, scalar, stream);
|
||||
let mut result = unsafe { ct.duplicate_async(streams) };
|
||||
self.scalar_mul_assign(&mut result, scalar, streams);
|
||||
result
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn scalar_mul_assign_async<Scalar, T>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
scalar: Scalar,
|
||||
stream: &CudaStreams,
|
||||
streams: &CudaStreams,
|
||||
) where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
if !ct.block_carries_are_empty() {
|
||||
self.full_propagate_assign_async(ct, stream);
|
||||
self.full_propagate_assign_async(ct, streams);
|
||||
};
|
||||
|
||||
self.unchecked_scalar_mul_assign_async(ct, scalar, stream);
|
||||
self.unchecked_scalar_mul_assign_async(ct, scalar, streams);
|
||||
}
|
||||
|
||||
pub fn scalar_mul_assign<Scalar, T>(&self, ct: &mut T, scalar: Scalar, stream: &CudaStreams)
|
||||
pub fn scalar_mul_assign<Scalar, T>(&self, ct: &mut T, scalar: Scalar, streams: &CudaStreams)
|
||||
where
|
||||
Scalar: ScalarMultiplier + DecomposableInto<u8> + CastInto<u64>,
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.scalar_mul_assign_async(ct, scalar, stream);
|
||||
self.scalar_mul_assign_async(ct, scalar, streams);
|
||||
}
|
||||
stream.synchronize();
|
||||
streams.synchronize();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user