diff --git a/.github/workflows/aws_tfhe_gpu_tests.yml b/.github/workflows/aws_tfhe_gpu_tests.yml index d6d6d4c54..0ac24196a 100644 --- a/.github/workflows/aws_tfhe_gpu_tests.yml +++ b/.github/workflows/aws_tfhe_gpu_tests.yml @@ -1,14 +1,101 @@ # Compile and test Concrete-cuda on an AWS instance name: Concrete Cuda - Full tests +env: + CARGO_TERM_COLOR: always + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + RUSTFLAGS: "-C target-cpu=native" + on: + # Allows you to run this workflow manually from the Actions tab as an alternative. workflow_dispatch: + # All the inputs are provided by Slab + inputs: + instance_id: + description: "AWS instance ID" + type: string + instance_image_id: + description: "AWS instance AMI ID" + type: string + instance_type: + description: "AWS instance product type" + type: string + runner_name: + description: "Action runner name" + type: string + request_id: + description: 'Slab request ID' + type: string + fork_repo: + description: 'Name of forked repo as user/repo' + type: string + fork_git_sha: + description: 'Git SHA to checkout from fork' + type: string jobs: - placeholder: - name: Placeholder - runs-on: ubuntu-latest + run-cuda-tests-linux: + concurrency: + group: tfhe_cuda_backend_test-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + name: Test code in EC2 + runs-on: ${{ inputs.runner_name }} + strategy: + fail-fast: false + # explicit include-based build matrix, of known valid options + matrix: + include: + - os: ubuntu-22.04 + cuda: "12.2" + gcc: 9 + env: + CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} steps: - - run: | - echo "Hello this is a Placeholder for GPU Workflow" + # Step used for log purpose. + - name: Instance configuration used + run: | + echo "ID: ${{ inputs.instance_id }}" + echo "AMI: ${{ inputs.instance_image_id }}" + echo "Type: ${{ inputs.instance_type }}" + echo "Request ID: ${{ inputs.request_id }}" + echo "Fork repo: ${{ inputs.fork_repo }}" + echo "Fork git sha: ${{ inputs.fork_git_sha }}" + + - name: Checkout tfhe-rs + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 + with: + repository: ${{ inputs.fork_repo }} + ref: ${{ inputs.fork_git_sha }} + + - name: Set up home + run: | + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Install latest stable + uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af + with: + toolchain: stable + default: true + + - name: Export CUDA variables + if: ${{ !cancelled() }} + run: | + echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}" + echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" + echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}" + echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}" + + # Specify the correct host compilers + - name: Export gcc and g++ variables + if: ${{ !cancelled() }} + run: | + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Run all tests + run: | + make clippy_gpu + make test_gpu diff --git a/.github/workflows/integer_gpu_benchmark.yml b/.github/workflows/integer_gpu_benchmark.yml new file mode 100644 index 000000000..a45fcc5cf --- /dev/null +++ b/.github/workflows/integer_gpu_benchmark.yml @@ -0,0 +1,157 @@ +# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot. +name: Integer GPU benchmarks + +on: + workflow_dispatch: + inputs: + instance_id: + description: "Instance ID" + type: string + instance_image_id: + description: "Instance AMI ID" + type: string + instance_type: + description: "Instance product type" + type: string + runner_name: + description: "Action runner name" + type: string + request_id: + description: "Slab request ID" + type: string + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +jobs: + run-integer-benchmarks: + name: Execute integer benchmarks in EC2 + runs-on: ${{ github.event.inputs.runner_name }} + if: ${{ !cancelled() }} + strategy: + fail-fast: false + # explicit include-based build matrix, of known valid options + matrix: + include: + - os: ubuntu-22.04 + cuda: "12.2" + gcc: 9 + env: + CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + steps: + - name: Instance configuration used + run: | + echo "IDs: ${{ inputs.instance_id }}" + echo "AMI: ${{ inputs.instance_image_id }}" + echo "Type: ${{ inputs.instance_type }}" + echo "Request ID: ${{ inputs.request_id }}" + + - name: Get benchmark date + run: | + echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + + - name: Checkout tfhe-rs repo with tags + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + + - name: Set up home + # "Install rust" step require root user to have a HOME directory which is not set. + run: | + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Install rust + uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af + with: + toolchain: nightly + override: true + + - name: Export CUDA variables + if: ${{ !cancelled() }} + run: | + echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}" + echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" + echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}" + echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}" + + # Specify the correct host compilers + - name: Export gcc and g++ variables + if: ${{ !cancelled() }} + run: | + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Run benchmarks with AVX512 + run: | + make AVX512_SUPPORT=ON FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu + + - name: Parse benchmarks to csv + run: | + make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \ + parse_integer_benches + + - name: Upload csv results artifact + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce + with: + name: ${{ github.sha }}_csv_integer + path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} + + - name: Parse results + run: | + COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" + COMMIT_HASH="$(git describe --tags --dirty)" + python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ + --database tfhe_rs \ + --hardware ${{ inputs.instance_type }} \ + --backend gpu \ + --project-version "${COMMIT_HASH}" \ + --branch ${{ github.ref_name }} \ + --commit-date "${COMMIT_DATE}" \ + --bench-date "${{ env.BENCH_DATE }}" \ + --walk-subdirs \ + --name-suffix avx512 \ + --throughput + + - name: Upload parsed results artifact + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce + with: + name: ${{ github.sha }}_integer + path: ${{ env.RESULTS_FILENAME }} + + - name: Checkout Slab repo + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + repository: zama-ai/slab + path: slab + token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }} + + - name: Send data to Slab + shell: bash + run: | + echo "Computing HMac on results file" + SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')" + echo "Sending results to Slab..." + curl -v -k \ + -H "Content-Type: application/json" \ + -H "X-Slab-Repository: ${{ github.repository }}" \ + -H "X-Slab-Command: store_data_v2" \ + -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ + -d @${{ env.RESULTS_FILENAME }} \ + ${{ secrets.SLAB_URL }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})" + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} diff --git a/.github/workflows/integer_gpu_full_benchmark.yml b/.github/workflows/integer_gpu_full_benchmark.yml new file mode 100644 index 000000000..05b495038 --- /dev/null +++ b/.github/workflows/integer_gpu_full_benchmark.yml @@ -0,0 +1,154 @@ +# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot. +name: Integer GPU full benchmarks + +on: + workflow_dispatch: + inputs: + instance_id: + description: "Instance ID" + type: string + instance_image_id: + description: "Instance AMI ID" + type: string + instance_type: + description: "Instance product type" + type: string + runner_name: + description: "Action runner name" + type: string + request_id: + description: "Slab request ID" + type: string + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +jobs: + integer-benchmarks: + name: Execute integer benchmarks for all operations flavor + runs-on: ${{ github.event.inputs.runner_name }} + if: ${{ !cancelled() }} + continue-on-error: true + strategy: + fail-fast: false + max-parallel: 1 + matrix: + command: [ integer, integer_multi_bit] + op_flavor: [ default, unchecked ] + # explicit include-based build matrix, of known valid options + include: + - os: ubuntu-22.04 + cuda: "12.2" + gcc: 9 + env: + CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + steps: + - name: Instance configuration used + run: | + echo "IDs: ${{ inputs.instance_id }}" + echo "AMI: ${{ inputs.instance_image_id }}" + echo "Type: ${{ inputs.instance_type }}" + echo "Request ID: ${{ inputs.request_id }}" + + - name: Checkout tfhe-rs repo with tags + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + + - name: Get benchmark details + run: | + echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}" + echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}" + + - name: Set up home + # "Install rust" step require root user to have a HOME directory which is not set. + run: | + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Install rust + uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af + with: + toolchain: nightly + override: true + + - name: Export CUDA variables + if: ${{ !cancelled() }} + run: | + echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}" + echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" + echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}" + echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}" + + # Specify the correct host compilers + - name: Export gcc and g++ variables + if: ${{ !cancelled() }} + run: | + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Checkout Slab repo + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + repository: zama-ai/slab + path: slab + token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }} + + - name: Run benchmarks with AVX512 + run: | + make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu + + - name: Parse results + run: | + python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ + --database tfhe_rs \ + --hardware ${{ inputs.instance_type }} \ + --backend gpu \ + --project-version "${{ env.COMMIT_HASH }}" \ + --branch ${{ github.ref_name }} \ + --commit-date "${{ env.COMMIT_DATE }}" \ + --bench-date "${{ env.BENCH_DATE }}" \ + --walk-subdirs \ + --name-suffix avx512 \ + --throughput + + - name: Upload parsed results artifact + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce + with: + name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }} + path: ${{ env.RESULTS_FILENAME }} + + - name: Send data to Slab + shell: bash + run: | + echo "Computing HMac on results file" + SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')" + echo "Sending results to Slab..." + curl -v -k \ + -H "Content-Type: application/json" \ + -H "X-Slab-Repository: ${{ github.repository }}" \ + -H "X-Slab-Command: store_data_v2" \ + -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ + -d @${{ env.RESULTS_FILENAME }} \ + ${{ secrets.SLAB_URL }} + + slack-notification: + name: Slack Notification + runs-on: ${{ github.event.inputs.runner_name }} + if: ${{ failure() }} + needs: integer-benchmarks + steps: + - name: Notify + continue-on-error: true + uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_MESSAGE: "Integer GPU full benchmarks failed. (${{ env.ACTION_RUN_URL }})" + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} diff --git a/.github/workflows/integer_multi_bit_gpu_benchmark.yml b/.github/workflows/integer_multi_bit_gpu_benchmark.yml new file mode 100644 index 000000000..34c6d317a --- /dev/null +++ b/.github/workflows/integer_multi_bit_gpu_benchmark.yml @@ -0,0 +1,158 @@ +# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot. +name: Integer Multi-bit benchmarks + +on: + workflow_dispatch: + inputs: + instance_id: + description: "Instance ID" + type: string + instance_image_id: + description: "Instance AMI ID" + type: string + instance_type: + description: "Instance product type" + type: string + runner_name: + description: "Action runner name" + type: string + request_id: + description: "Slab request ID" + type: string + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +jobs: + run-integer-benchmarks: + name: Execute integer multi-bit benchmarks in EC2 + runs-on: ${{ github.event.inputs.runner_name }} + if: ${{ !cancelled() }} + strategy: + fail-fast: false + # explicit include-based build matrix, of known valid options + matrix: + include: + - os: ubuntu-22.04 + cuda: "11.8" + cuda_arch: "70" + gcc: 9 + env: + CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + steps: + - name: Instance configuration used + run: | + echo "IDs: ${{ inputs.instance_id }}" + echo "AMI: ${{ inputs.instance_image_id }}" + echo "Type: ${{ inputs.instance_type }}" + echo "Request ID: ${{ inputs.request_id }}" + + - name: Get benchmark date + run: | + echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + + - name: Checkout tfhe-rs repo with tags + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + + - name: Set up home + # "Install rust" step require root user to have a HOME directory which is not set. + run: | + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Install rust + uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af + with: + toolchain: nightly + override: true + + - name: Export CUDA variables + if: ${{ !cancelled() }} + run: | + echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}" + echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}" + echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}" + echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}" + + # Specify the correct host compilers + - name: Export gcc and g++ variables + if: ${{ !cancelled() }} + run: | + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}" + echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}" + + - name: Run multi-bit benchmarks with AVX512 + run: | + make AVX512_SUPPORT=ON FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu + + - name: Parse benchmarks to csv + run: | + make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \ + parse_integer_benches + + - name: Upload csv results artifact + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce + with: + name: ${{ github.sha }}_csv_integer + path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} + + - name: Parse results + run: | + COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" + COMMIT_HASH="$(git describe --tags --dirty)" + python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ + --database tfhe_rs \ + --hardware ${{ inputs.instance_type }} \ + --backend gpu \ + --project-version "${COMMIT_HASH}" \ + --branch ${{ github.ref_name }} \ + --commit-date "${COMMIT_DATE}" \ + --bench-date "${{ env.BENCH_DATE }}" \ + --walk-subdirs \ + --name-suffix avx512 \ + --throughput + + - name: Upload parsed results artifact + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce + with: + name: ${{ github.sha }}_integer + path: ${{ env.RESULTS_FILENAME }} + + - name: Checkout Slab repo + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + repository: zama-ai/slab + path: slab + token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }} + + - name: Send data to Slab + shell: bash + run: | + echo "Computing HMac on results file" + SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')" + echo "Sending results to Slab..." + curl -v -k \ + -H "Content-Type: application/json" \ + -H "X-Slab-Repository: ${{ github.repository }}" \ + -H "X-Slab-Command: store_data_v2" \ + -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \ + -d @${{ env.RESULTS_FILENAME }} \ + ${{ secrets.SLAB_URL }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})" + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} diff --git a/.github/workflows/start_benchmarks.yml b/.github/workflows/start_benchmarks.yml index 0c9129356..aeb8293e6 100644 --- a/.github/workflows/start_benchmarks.yml +++ b/.github/workflows/start_benchmarks.yml @@ -49,6 +49,7 @@ jobs: command: [ boolean_bench, shortint_bench, integer_bench, integer_multi_bit_bench, signed_integer_bench, signed_integer_multi_bit_bench, + integer_gpu_bench, integer_multi_bit_gpu_bench, pbs_bench, wasm_client_bench ] runs-on: ubuntu-latest steps: diff --git a/.github/workflows/start_full_benchmarks.yml b/.github/workflows/start_full_benchmarks.yml index d49582233..0aea893d9 100644 --- a/.github/workflows/start_full_benchmarks.yml +++ b/.github/workflows/start_full_benchmarks.yml @@ -24,8 +24,8 @@ jobs: if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }} strategy: matrix: - command: [ boolean_bench, shortint_full_bench, integer_full_bench, - signed_integer_full_bench, pbs_bench, wasm_client_bench ] + command: [ boolean_bench, shortint_full_bench, integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench, + pbs_bench, wasm_client_bench ] runs-on: ubuntu-latest steps: - name: Checkout tfhe-rs diff --git a/.github/workflows/trigger_aws_tests_on_pr.yml b/.github/workflows/trigger_aws_tests_on_pr.yml index 0bb487415..3f2e5a586 100644 --- a/.github/workflows/trigger_aws_tests_on_pr.yml +++ b/.github/workflows/trigger_aws_tests_on_pr.yml @@ -29,6 +29,7 @@ jobs: allow-repeats: true message: | @slab-ci cpu_fast_test + @slab-ci gpu_test - name: Add approved label uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf diff --git a/Makefile b/Makefile index bbae463ef..9e6343d0c 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,10 @@ endif REGEX_STRING?='' REGEX_PATTERN?='' +# tfhe-cuda-backend +TFHECUDA_SRC="backends/tfhe-cuda-backend/implementation" +TFHECUDA_BUILD=$(TFHECUDA_SRC)/build + # Exclude these files from coverage reports define COVERAGE_EXCLUDED_FILES --exclude-files apps/trivium/src/trivium/* \ @@ -137,10 +141,21 @@ check_linelint_installed: fmt: install_rs_check_toolchain cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt +.PHONY: fmt_gpu # Format rust and cuda code +fmt_gpu: install_rs_check_toolchain + cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt + cd backends/tfhe-cuda-backend/implementation/ && ./format_tfhe_cuda_backend.sh + .PHONY: check_fmt # Check rust code format check_fmt: install_rs_check_toolchain cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check +.PHONY: clippy_gpu # Run clippy lints on the gpu backend +clippy_gpu: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \ + --features=$(TARGET_ARCH_FEATURE),integer,shortint,gpu \ + -p tfhe -- --no-deps -D warnings + .PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant fix_newline: check_linelint_installed linelint -a . @@ -333,6 +348,23 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst -p $(TFHE_SPEC) -- core_crypto::; \ fi +.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend +test_gpu: test_core_crypto_gpu test_integer_gpu + +.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend +test_core_crypto_gpu: install_rs_build_toolchain install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- core_crypto::gpu:: + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- core_crypto::gpu:: + +.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend +test_integer_gpu: install_rs_build_toolchain install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- integer::gpu::server_key:: + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- integer::gpu::server_key:: + .PHONY: test_boolean # Run the tests of the boolean module test_boolean: install_rs_build_toolchain RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \ @@ -498,7 +530,7 @@ docs: doc lint_doc: install_rs_check_toolchain RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \ cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \ - --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps + --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p tfhe --no-deps .PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc lint_docs: lint_doc @@ -577,6 +609,20 @@ bench_integer: install_rs_check_toolchain --bench integer-bench \ --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- +.PHONY: bench_signed_integer # Run benchmarks for signed integer +bench_signed_integer: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench integer-signed-bench \ + --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- + +.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend +bench_integer_gpu: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench integer-bench \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p tfhe -- + .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters bench_integer_multi_bit: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \ @@ -585,13 +631,6 @@ bench_integer_multi_bit: install_rs_check_toolchain --bench integer-bench \ --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- -.PHONY: bench_signed_integer # Run benchmarks for signed integer -bench_signed_integer: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \ - cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ - --bench integer-signed-bench \ - --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- - .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters bench_signed_integer_multi_bit: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \ @@ -600,6 +639,14 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain --bench integer-signed-bench \ --features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- +.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters +bench_integer_multi_bit_gpu: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \ + __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench integer-bench \ + --features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p tfhe -- + .PHONY: bench_shortint # Run benchmarks for shortint bench_shortint: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \ @@ -715,9 +762,12 @@ sha256_bool: install_rs_check_toolchain --example sha256_bool \ --features=$(TARGET_ARCH_FEATURE),boolean -.PHONY: pcc # pcc stands for pre commit checks +.PHONY: pcc # pcc stands for pre commit checks (except GPU) pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_all check_compile_tests +.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation +pcc_gpu: pcc clippy_gpu + .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast fpcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_fast check_compile_tests diff --git a/backends/tfhe-cuda-backend/implementation/.cmake-format-config.py b/backends/tfhe-cuda-backend/implementation/.cmake-format-config.py new file mode 100644 index 000000000..f6867a8dd --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/.cmake-format-config.py @@ -0,0 +1,10 @@ +# ----------------------------- +# Options effecting formatting. +# ----------------------------- +with section("format"): + + # How wide to allow formatted cmake files + line_width = 120 + + # How many spaces to tab for indent + tab_size = 2 diff --git a/backends/tfhe-cuda-backend/implementation/CMakeLists.txt b/backends/tfhe-cuda-backend/implementation/CMakeLists.txt new file mode 100644 index 000000000..7a856bd25 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/CMakeLists.txt @@ -0,0 +1,89 @@ +cmake_minimum_required(VERSION 3.24 FATAL_ERROR) +project(tfhe_cuda_backend LANGUAGES CXX CUDA) + +# See if the minimum CUDA version is available. If not, only enable documentation building. +set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0) +include(CheckLanguage) +# See if CUDA is available +check_language(CUDA) +# If so, enable CUDA to check the version. +if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) +endif() +# If CUDA is not available, or the minimum version is too low do not build +if(NOT CMAKE_CUDA_COMPILER) + message(FATAL_ERROR "Cuda compiler not found.") +endif() + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION}) + message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.") +endif() +# Get CUDA compute capability +set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script) # No suffix required +set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu) +execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE}) +execute_process( + COMMAND ${OUTPUTFILE} + RESULT_VARIABLE CUDA_RETURN_CODE + OUTPUT_VARIABLE ARCH) +file(REMOVE ${OUTPUTFILE}) + +if(${CUDA_RETURN_CODE} EQUAL 0) + set(CUDA_SUCCESS "TRUE") +else() + set(CUDA_SUCCESS "FALSE") +endif() + +if(${CUDA_SUCCESS}) + message(STATUS "CUDA Architecture: ${ARCH}") + message(STATUS "CUDA Version: ${CUDA_VERSION_STRING}") + message(STATUS "CUDA Path: ${CUDA_TOOLKIT_ROOT_DIR}") + message(STATUS "CUDA Libraries: ${CUDA_LIBRARIES}") + message(STATUS "CUDA Performance Primitives: ${CUDA_npp_LIBRARY}") +else() + message(WARNING ${ARCH}) +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +# Add OpenMP support +find_package(OpenMP REQUIRED) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${OpenMP_CXX_FLAGS}") +set(CMAKE_CUDA_ARCHITECTURES native) +if(NOT CUDA_NVCC_FLAGS) + set(CUDA_NVCC_FLAGS -arch=sm_70) +endif() + +# in production, should use -arch=sm_70 --ptxas-options=-v to see register spills -lineinfo for better debugging +set(CMAKE_CUDA_FLAGS + "${CMAKE_CUDA_FLAGS} -ccbin ${CMAKE_CXX_COMPILER} -O3 \ + -std=c++17 --no-exceptions --expt-relaxed-constexpr -rdc=true \ + --use_fast_math -Xcompiler -fPIC") + +set(INCLUDE_DIR include) + +add_subdirectory(src) +target_include_directories(tfhe_cuda_backend PRIVATE ${INCLUDE_DIR}) + +# This is required for rust cargo build +install(TARGETS tfhe_cuda_backend DESTINATION .) +install(TARGETS tfhe_cuda_backend DESTINATION lib) + +# Define a function to add a lint target. +find_file(CPPLINT NAMES cpplint cpplint.exe) +if(CPPLINT) + # Add a custom target to lint all child projects. Dependencies are specified in child projects. + add_custom_target(all_lint) + # Don't trigger this target on ALL_BUILD or Visual Studio 'Rebuild Solution' + set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_ALL TRUE) + # set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE) +endif() + +enable_testing() diff --git a/backends/tfhe-cuda-backend/implementation/CPPLINT.cfg b/backends/tfhe-cuda-backend/implementation/CPPLINT.cfg new file mode 100644 index 000000000..c50c2762a --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/CPPLINT.cfg @@ -0,0 +1,3 @@ +set noparent +linelength=240 +filter=-legal/copyright,-readability/todo,-runtime/references,-build/c++17 diff --git a/backends/tfhe-cuda-backend/implementation/README.md b/backends/tfhe-cuda-backend/implementation/README.md new file mode 100644 index 000000000..184252fc5 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/README.md @@ -0,0 +1,52 @@ +# TFHE Cuda backend + +## Introduction + +The `tfhe-cuda-backend` holds the code for GPU acceleration of Zama's variant of TFHE. +It implements CUDA/C++ functions to perform homomorphic operations on LWE ciphertexts. + +It provides functions to allocate memory on the GPU, to copy data back +and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.: +- `cuda_create_stream`, `cuda_destroy_stream` +- `cuda_malloc`, `cuda_check_valid_malloc` +- `cuda_memcpy_async_to_cpu`, `cuda_memcpy_async_to_gpu` +- `cuda_get_number_of_gpus` +- `cuda_synchronize_device` +The cryptographic operations it provides are: +- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64` +- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64` +- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64` +- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64` +- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`. + +## Dependencies + +**Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported. + +- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation +- [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0 +- [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions +- [cmake](https://cmake.org/) >= 3.24 + +## Build + +The Cuda project held in `tfhe-cuda-backend` can be compiled independently from Concrete in the +following way: +``` +git clone git@github.com:zama-ai/tfhe-rs +cd backends/tfhe-cuda-backend/implementation +mkdir build +cd build +cmake .. +make +``` +The compute capability is detected automatically (with the first GPU information) and set accordingly. + +## Links + +- [TFHE](https://eprint.iacr.org/2018/421.pdf) + +## License + +This software is distributed under the BSD-3-Clause-Clear license. If you have any questions, +please contact us at `hello@zama.ai`. diff --git a/backends/tfhe-cuda-backend/implementation/check_cuda.cu b/backends/tfhe-cuda-backend/implementation/check_cuda.cu new file mode 100644 index 000000000..af56ff9a6 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/check_cuda.cu @@ -0,0 +1,22 @@ +#include + +int main(int argc, char **argv) { + cudaDeviceProp dP; + float min_cc = 3.0; + + int rc = cudaGetDeviceProperties(&dP, 0); + if (rc != cudaSuccess) { + cudaError_t error = cudaGetLastError(); + printf("CUDA error: %s", cudaGetErrorString(error)); + return rc; /* Failure */ + } + if ((dP.major + (dP.minor / 10)) < min_cc) { + printf("Min Compute Capability of %2.1f required: %d.%d found\n Not " + "Building CUDA Code", + min_cc, dP.major, dP.minor); + return 1; /* Failure */ + } else { + printf("-arch=sm_%d%d", dP.major, dP.minor); + return 0; /* Success */ + } +} diff --git a/backends/tfhe-cuda-backend/implementation/format_tfhe_cuda_backend.sh b/backends/tfhe-cuda-backend/implementation/format_tfhe_cuda_backend.sh new file mode 100755 index 000000000..12719a8cf --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/format_tfhe_cuda_backend.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +find ./{include,src} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file' +cmake-format -i CMakeLists.txt -c .cmake-format-config.py + +find ./{include,src} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py' diff --git a/backends/tfhe-cuda-backend/implementation/include/bootstrap.h b/backends/tfhe-cuda-backend/implementation/include/bootstrap.h new file mode 100644 index 000000000..1e326ae5f --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/bootstrap.h @@ -0,0 +1,118 @@ +#ifndef CUDA_BOOTSTRAP_H +#define CUDA_BOOTSTRAP_H + +#include "device.h" +#include + +enum PBS_TYPE { MULTI_BIT = 0, LOW_LAT = 1, AMORTIZED = 2 }; + +extern "C" { +void cuda_fourier_polynomial_mul(void *input1, void *input2, void *output, + cuda_stream_t *stream, + uint32_t polynomial_size, + uint32_t total_polynomials); + +void cuda_convert_lwe_bootstrap_key_32(void *dest, void *src, + cuda_stream_t *stream, + uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, + uint32_t polynomial_size); + +void cuda_convert_lwe_bootstrap_key_64(void *dest, void *src, + cuda_stream_t *stream, + uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, + uint32_t polynomial_size); + +void scratch_cuda_bootstrap_amortized_32( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory); + +void scratch_cuda_bootstrap_amortized_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory); + +void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory); + +void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory); + +void cleanup_cuda_bootstrap_amortized(cuda_stream_t *stream, + int8_t **pbs_buffer); + +void scratch_cuda_bootstrap_low_latency_32( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory); + +void scratch_cuda_bootstrap_low_latency_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory); + +void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory); + +void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory); + +void cleanup_cuda_bootstrap_low_latency(cuda_stream_t *stream, + int8_t **pbs_buffer); + +uint64_t get_buffer_size_bootstrap_amortized_64( + uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory); + +uint64_t get_buffer_size_bootstrap_low_latency_64( + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory); +} + +#ifdef __CUDACC__ +__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); + +template +__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level, + uint32_t polynomial_size, + int glwe_dimension, uint32_t level_count); + +template +__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level, + uint32_t polynomial_size, + int glwe_dimension, uint32_t level_count); + +template +__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block( + T *ptr, int g, int i, int k, int level, uint32_t grouping_factor, + uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count); + +#endif + +#endif // CUDA_BOOTSTRAP_H diff --git a/backends/tfhe-cuda-backend/implementation/include/bootstrap_multibit.h b/backends/tfhe-cuda-backend/implementation/include/bootstrap_multibit.h new file mode 100644 index 000000000..fe1b964c5 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/bootstrap_multibit.h @@ -0,0 +1,45 @@ +#ifndef CUDA_MULTI_BIT_H +#define CUDA_MULTI_BIT_H + +#include + +extern "C" { +void cuda_convert_lwe_multi_bit_bootstrap_key_64( + void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size, + uint32_t grouping_factor); + +void cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, + uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx, + uint32_t max_shared_memory, uint32_t chunk_size = 0); + +void scratch_cuda_multi_bit_pbs_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory, + uint32_t chunk_size = 0); + +void cleanup_cuda_multi_bit_pbs(cuda_stream_t *stream, int8_t **pbs_buffer); +} +#ifdef __CUDACC__ +__host__ uint32_t get_lwe_chunk_size(uint32_t lwe_dimension, + uint32_t level_count, + uint32_t glwe_dimension, + uint32_t num_samples); + +__host__ uint32_t get_average_lwe_chunk_size(uint32_t lwe_dimension, + uint32_t level_count, + uint32_t glwe_dimension); + +__host__ uint64_t get_max_buffer_size_multibit_bootstrap( + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t level_count, uint32_t max_input_lwe_ciphertext_count); +#endif + +#endif // CUDA_MULTI_BIT_H diff --git a/backends/tfhe-cuda-backend/implementation/include/ciphertext.h b/backends/tfhe-cuda-backend/implementation/include/ciphertext.h new file mode 100644 index 000000000..2cc41d3d1 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/ciphertext.h @@ -0,0 +1,18 @@ +#ifndef CUDA_CIPHERTEXT_H +#define CUDA_CIPHERTEXT_H + +#include + +extern "C" { +void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src, + void *v_stream, + uint32_t gpu_index, + uint32_t number_of_cts, + uint32_t lwe_dimension); +void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src, + void *v_stream, + uint32_t gpu_index, + uint32_t number_of_cts, + uint32_t lwe_dimension); +}; +#endif diff --git a/backends/tfhe-cuda-backend/implementation/include/device.h b/backends/tfhe-cuda-backend/implementation/include/device.h new file mode 100644 index 000000000..bfe8c64f4 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/device.h @@ -0,0 +1,88 @@ +#ifndef DEVICE_H +#define DEVICE_H + +#include +#include +#include +#include +#include + +#define synchronize_threads_in_block() __syncthreads() + +extern "C" { + +struct cuda_stream_t { + cudaStream_t stream; + uint32_t gpu_index; + + cuda_stream_t(uint32_t gpu_index) { + this->gpu_index = gpu_index; + + cudaStreamCreate(&stream); + } + + void release() { + cudaSetDevice(gpu_index); + cudaStreamDestroy(stream); + } + + void synchronize() { cudaStreamSynchronize(stream); } +}; + +cuda_stream_t *cuda_create_stream(uint32_t gpu_index); + +int cuda_destroy_stream(cuda_stream_t *stream); + +void *cuda_malloc(uint64_t size, uint32_t gpu_index); + +void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream); + +int cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index); + +int cuda_check_support_cooperative_groups(); + +int cuda_memcpy_to_cpu(void *dest, const void *src, uint64_t size); + +int cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size, + cuda_stream_t *stream); + +int cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size, + cuda_stream_t *stream); + +int cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size); + +int cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size, + cuda_stream_t *stream); + +int cuda_memset_async(void *dest, uint64_t val, uint64_t size, + cuda_stream_t *stream); + +int cuda_get_number_of_gpus(); + +int cuda_synchronize_device(uint32_t gpu_index); + +int cuda_drop(void *ptr, uint32_t gpu_index); + +int cuda_drop_async(void *ptr, cuda_stream_t *stream); + +int cuda_get_max_shared_memory(uint32_t gpu_index); + +int cuda_synchronize_stream(cuda_stream_t *stream); + +#define check_cuda_error(ans) \ + { cuda_error((ans), __FILE__, __LINE__); } +inline void cuda_error(cudaError_t code, const char *file, int line, + bool abort = true) { + if (code != cudaSuccess) { + fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code), file, + line); + if (abort) + exit(code); + } +} +} + +template +void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value, + Torus n); +#endif diff --git a/backends/tfhe-cuda-backend/implementation/include/helper_debug.cuh b/backends/tfhe-cuda-backend/implementation/include/helper_debug.cuh new file mode 100644 index 000000000..37555339b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/helper_debug.cuh @@ -0,0 +1,100 @@ +#include "cuComplex.h" +#include "thrust/complex.h" +#include +#include +#include + +#define PRINT_VARS +#ifdef PRINT_VARS +#define PRINT_DEBUG_5(var, begin, end, step, cond) \ + _print_debug(var, #var, begin, end, step, cond, "", false) +#define PRINT_DEBUG_6(var, begin, end, step, cond, text) \ + _print_debug(var, #var, begin, end, step, cond, text, true) +#define CAT(A, B) A##B +#define PRINT_SELECT(NAME, NUM) CAT(NAME##_, NUM) +#define GET_COUNT(_1, _2, _3, _4, _5, _6, COUNT, ...) COUNT +#define VA_SIZE(...) GET_COUNT(__VA_ARGS__, 6, 5, 4, 3, 2, 1) +#define PRINT_DEBUG(...) \ + PRINT_SELECT(PRINT_DEBUG, VA_SIZE(__VA_ARGS__))(__VA_ARGS__) +#else +#define PRINT_DEBUG(...) +#endif + +template +__device__ typename std::enable_if::value, void>::type +_print_debug(T *var, const char *var_name, int start, int end, int step, + bool cond, const char *text, bool has_text) { + __syncthreads(); + if (cond) { + if (has_text) + printf("%s\n", text); + for (int i = start; i < end; i += step) { + printf("%s[%u]: %u\n", var_name, i, var[i]); + } + } + __syncthreads(); +} + +template +__device__ typename std::enable_if::value, void>::type +_print_debug(T *var, const char *var_name, int start, int end, int step, + bool cond, const char *text, bool has_text) { + __syncthreads(); + if (cond) { + if (has_text) + printf("%s\n", text); + for (int i = start; i < end; i += step) { + printf("%s[%u]: %d\n", var_name, i, var[i]); + } + } + __syncthreads(); +} + +template +__device__ typename std::enable_if::value, void>::type +_print_debug(T *var, const char *var_name, int start, int end, int step, + bool cond, const char *text, bool has_text) { + __syncthreads(); + if (cond) { + if (has_text) + printf("%s\n", text); + for (int i = start; i < end; i += step) { + printf("%s[%u]: %.15f\n", var_name, i, var[i]); + } + } + __syncthreads(); +} + +template +__device__ + typename std::enable_if>::value, + void>::type + _print_debug(T *var, const char *var_name, int start, int end, int step, + bool cond, const char *text, bool has_text) { + __syncthreads(); + if (cond) { + if (has_text) + printf("%s\n", text); + for (int i = start; i < end; i += step) { + printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].real(), + var[i].imag()); + } + } + __syncthreads(); +} + +template +__device__ + typename std::enable_if::value, void>::type + _print_debug(T *var, const char *var_name, int start, int end, int step, + bool cond, const char *text, bool has_text) { + __syncthreads(); + if (cond) { + if (has_text) + printf("%s\n", text); + for (int i = start; i < end; i += step) { + printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].x, var[i].y); + } + } + __syncthreads(); +} diff --git a/backends/tfhe-cuda-backend/implementation/include/integer.h b/backends/tfhe-cuda-backend/implementation/include/integer.h new file mode 100644 index 000000000..715483599 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/integer.h @@ -0,0 +1,1303 @@ +#ifndef CUDA_INTEGER_H +#define CUDA_INTEGER_H + +#include "bootstrap.h" +#include "bootstrap_multibit.h" +#include +#include +#include + +enum OUTPUT_CARRY { NONE = 0, GENERATED = 1, PROPAGATED = 2 }; +enum SHIFT_TYPE { LEFT_SHIFT = 0, RIGHT_SHIFT = 1 }; +enum LUT_TYPE { OPERATOR = 0, MAXVALUE = 1, ISNONZERO = 2, BLOCKSLEN = 3 }; +enum BITOP_TYPE { + BITAND = 0, + BITOR = 1, + BITXOR = 2, + BITNOT = 3, + SCALAR_BITAND = 4, + SCALAR_BITOR = 5, + SCALAR_BITXOR = 6, +}; + +enum COMPARISON_TYPE { + EQ = 0, + NE = 1, + GT = 2, + GE = 3, + LT = 4, + LE = 5, + MAX = 6, + MIN = 7, +}; +enum IS_RELATIONSHIP { IS_INFERIOR = 0, IS_EQUAL = 1, IS_SUPERIOR = 2 }; + +/* + * generate bivariate accumulator for device pointer + * v_stream - cuda stream + * acc - device pointer for bivariate accumulator + * ... + * f - wrapping function with two Torus inputs + */ +template +void generate_device_accumulator_bivariate( + cuda_stream_t *stream, Torus *acc_bivariate, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t message_modulus, uint32_t carry_modulus, + std::function f); + +/* + * generate univariate accumulator for device pointer + * v_stream - cuda stream + * acc - device pointer for univariate accumulator + * ... + * f - evaluating function with one Torus input + */ +template +void generate_device_accumulator(cuda_stream_t *stream, Torus *acc, + uint32_t glwe_dimension, + uint32_t polynomial_size, + uint32_t message_modulus, + uint32_t carry_modulus, + std::function f); + +extern "C" { +void scratch_cuda_full_propagation_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type, + bool allocate_gpu_memory); + +void cuda_full_propagation_64_inplace( + cuda_stream_t *stream, void *input_blocks, int8_t *mem_ptr, void *ksk, + void *bsk, uint32_t lwe_dimension, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t ks_base_log, uint32_t ks_level, + uint32_t pbs_base_log, uint32_t pbs_level, uint32_t grouping_factor, + uint32_t num_blocks); + +void cleanup_cuda_full_propagation(cuda_stream_t *stream, + int8_t **mem_ptr_void); + +void scratch_cuda_integer_mult_radix_ciphertext_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t message_modulus, + uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension, + uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level, + uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor, + uint32_t num_blocks, PBS_TYPE pbs_type, uint32_t max_shared_memory, + bool allocate_gpu_memory); + +void cuda_integer_mult_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *radix_lwe_out, void *radix_lwe_left, + void *radix_lwe_right, void *bsk, void *ksk, int8_t *mem_ptr, + uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension, + uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log, + uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level, + uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type, + uint32_t max_shared_memory); + +void cleanup_cuda_integer_mult(cuda_stream_t *stream, int8_t **mem_ptr_void); + +void cuda_negate_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint32_t lwe_dimension, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus); + +void cuda_scalar_addition_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, void *scalar_input, + uint32_t lwe_dimension, uint32_t lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus); + +void cuda_small_scalar_multiplication_integer_radix_ciphertext_64( + cuda_stream_t *stream, void *output_lwe_array, void *input_lwe_array, + uint64_t scalar, uint32_t lwe_dimension, uint32_t lwe_ciphertext_count); + +void cuda_small_scalar_multiplication_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint64_t scalar, + uint32_t lwe_dimension, uint32_t lwe_ciphertext_count); + +void scratch_cuda_integer_radix_scalar_shift_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, SHIFT_TYPE shift_type, bool allocate_gpu_memory); + +void cuda_integer_radix_scalar_shift_kb_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint32_t shift, int8_t *mem_ptr, + void *bsk, void *ksk, uint32_t num_blocks); + +void cleanup_cuda_integer_radix_scalar_shift(cuda_stream_t *stream, + int8_t **mem_ptr_void); + +void scratch_cuda_integer_radix_comparison_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type, + bool allocate_gpu_memory); + +void cuda_comparison_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1, + void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count); + +void cuda_scalar_comparison_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *scalar_blocks, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count, uint32_t num_scalar_blocks); + +void cleanup_cuda_integer_comparison(cuda_stream_t *stream, + int8_t **mem_ptr_void); + +void scratch_cuda_integer_radix_bitop_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type, + bool allocate_gpu_memory); + +void cuda_bitop_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1, + void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count); + +void cuda_bitnot_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count); + +void cuda_scalar_bitop_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_input, + void *clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t lwe_ciphertext_count, BITOP_TYPE op); + +void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void); + +void scratch_cuda_integer_radix_cmux_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory); + +void cuda_cmux_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition, + void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t lwe_ciphertext_count); + +void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream, + int8_t **mem_ptr_void); + +void scratch_cuda_integer_radix_scalar_rotate_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, SHIFT_TYPE shift_type, bool allocate_gpu_memory); + +void cuda_integer_radix_scalar_rotate_kb_64_inplace(cuda_stream_t *stream, + void *lwe_array, uint32_t n, + int8_t *mem_ptr, void *bsk, + void *ksk, + uint32_t num_blocks); + +void cleanup_cuda_integer_radix_scalar_rotate(cuda_stream_t *stream, + int8_t **mem_ptr_void); + +void scratch_cuda_propagate_single_carry_low_latency_kb_64_inplace( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, bool allocate_gpu_memory); + +void cuda_propagate_single_carry_low_latency_kb_64_inplace( + cuda_stream_t *stream, void *lwe_array, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t num_blocks); + +void cleanup_cuda_propagate_single_carry_low_latency(cuda_stream_t *stream, + int8_t **mem_ptr_void); +} + +struct int_radix_params { + PBS_TYPE pbs_type; + uint32_t glwe_dimension; + uint32_t polynomial_size; + uint32_t big_lwe_dimension; + uint32_t small_lwe_dimension; + uint32_t ks_level; + uint32_t ks_base_log; + uint32_t pbs_level; + uint32_t pbs_base_log; + uint32_t grouping_factor; + uint32_t message_modulus; + uint32_t carry_modulus; + + int_radix_params(){}; + + int_radix_params(PBS_TYPE pbs_type, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, + uint32_t ks_base_log, uint32_t pbs_level, + uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t message_modulus, uint32_t carry_modulus) + : pbs_type(pbs_type), glwe_dimension(glwe_dimension), + polynomial_size(polynomial_size), big_lwe_dimension(big_lwe_dimension), + small_lwe_dimension(small_lwe_dimension), ks_level(ks_level), + ks_base_log(ks_base_log), pbs_level(pbs_level), + pbs_base_log(pbs_base_log), grouping_factor(grouping_factor), + message_modulus(message_modulus), carry_modulus(carry_modulus){}; + + void print() { + printf("pbs_type: %u, glwe_dimension: %u, polynomial_size: %u, " + "big_lwe_dimension: %u, " + "small_lwe_dimension: %u, ks_level: %u, ks_base_log: %u, pbs_level: " + "%u, pbs_base_log: " + "%u, grouping_factor: %u, message_modulus: %u, carry_modulus: %u\n", + pbs_type, glwe_dimension, polynomial_size, big_lwe_dimension, + small_lwe_dimension, ks_level, ks_base_log, pbs_level, pbs_base_log, + grouping_factor, message_modulus, carry_modulus); + }; +}; + +// Store things needed to apply LUTs +template struct int_radix_lut { + int_radix_params params; + int8_t *pbs_buffer; + + Torus *lut_indexes; + Torus *lwe_indexes; + + Torus *tmp_lwe_before_ks; + Torus *tmp_lwe_after_ks; + + Torus *lut = nullptr; + + int_radix_lut(cuda_stream_t *stream, int_radix_params params, + uint32_t num_luts, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + + Torus lut_indexes_size = num_radix_blocks * sizeof(Torus); + Torus big_size = + (params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus); + Torus small_size = + (params.small_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus); + Torus lut_buffer_size = + (params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus); + + /////////////// + // PBS + if (params.pbs_type == MULTI_BIT) { + // Only 64 bits is supported + static_assert( + sizeof(Torus) == 8, + "Error (GPU multi bit PBS): only 64 bits Torus is supported"); + scratch_cuda_multi_bit_pbs_64( + stream, &pbs_buffer, params.small_lwe_dimension, + params.glwe_dimension, params.polynomial_size, params.pbs_level, + params.grouping_factor, num_radix_blocks, + cuda_get_max_shared_memory(stream->gpu_index), allocate_gpu_memory); + } else { + // Classic + // We only use low latency for classic mode + if (sizeof(Torus) == sizeof(uint32_t)) + scratch_cuda_bootstrap_low_latency_32( + stream, &pbs_buffer, params.glwe_dimension, params.polynomial_size, + params.pbs_level, num_radix_blocks, + cuda_get_max_shared_memory(stream->gpu_index), allocate_gpu_memory); + else + scratch_cuda_bootstrap_low_latency_64( + stream, &pbs_buffer, params.glwe_dimension, params.polynomial_size, + params.pbs_level, num_radix_blocks, + cuda_get_max_shared_memory(stream->gpu_index), allocate_gpu_memory); + } + + if (allocate_gpu_memory) { + // Allocate LUT + // LUT is used as a trivial encryption and must be initialized outside + // this contructor + lut = (Torus *)cuda_malloc_async(num_luts * lut_buffer_size, stream); + + lut_indexes = (Torus *)cuda_malloc_async(lut_indexes_size, stream); + + // lut_indexes is initialized to 0 by default + // if a different behavior is wanted, it should be rewritten later + cuda_memset_async(lut_indexes, 0, lut_indexes_size, stream); + + // lwe_(input/output)_indexes are initialized to range(num_radix_blocks) + // by default + lwe_indexes = (Torus *)cuda_malloc(num_radix_blocks * sizeof(Torus), + stream->gpu_index); + auto h_lwe_indexes = (Torus *)malloc(num_radix_blocks * sizeof(Torus)); + + for (int i = 0; i < num_radix_blocks; i++) + h_lwe_indexes[i] = i; + + cuda_memcpy_to_gpu(lwe_indexes, h_lwe_indexes, + num_radix_blocks * sizeof(Torus)); + free(h_lwe_indexes); + + // Keyswitch + tmp_lwe_before_ks = (Torus *)cuda_malloc_async(big_size, stream); + tmp_lwe_after_ks = (Torus *)cuda_malloc_async(small_size, stream); + } + } + + Torus *get_lut(size_t ind) { + assert(lut != nullptr); + return &lut[ind * (params.glwe_dimension + 1) * params.polynomial_size]; + } + + Torus *get_tvi(size_t ind) { return &lut_indexes[ind]; } + void release(cuda_stream_t *stream) { + cuda_drop_async(lut_indexes, stream); + cuda_drop_async(lwe_indexes, stream); + cuda_drop_async(tmp_lwe_before_ks, stream); + cuda_drop_async(tmp_lwe_after_ks, stream); + cuda_drop_async(lut, stream); + cuda_drop_async(pbs_buffer, stream); + } +}; + +template struct int_fullprop_buffer { + PBS_TYPE pbs_type; + int8_t *pbs_buffer; + + Torus *lut_buffer; + Torus *lut_indexes; + Torus *lwe_indexes; + + Torus *tmp_small_lwe_vector; + Torus *tmp_big_lwe_vector; +}; + +template struct int_sc_prop_memory { + Torus *generates_or_propagates; + Torus *step_output; + + // test_vector_array[2] = {lut_does_block_generate_carry, + // lut_does_block_generate_or_propagate} + int_radix_lut *test_vector_array; + int_radix_lut *lut_carry_propagation_sum; + int_radix_lut *message_acc; + + int_radix_params params; + + int_sc_prop_memory(cuda_stream_t *stream, int_radix_params params, + uint32_t num_radix_blocks, bool allocate_gpu_memory) { + this->params = params; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + auto big_lwe_size = (polynomial_size * glwe_dimension + 1); + auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + // allocate memory for intermediate calculations + generates_or_propagates = (Torus *)cuda_malloc_async( + num_radix_blocks * big_lwe_size_bytes, stream); + step_output = (Torus *)cuda_malloc_async( + num_radix_blocks * big_lwe_size_bytes, stream); + + // declare functions for test vector generation + auto f_lut_does_block_generate_carry = [message_modulus](Torus x) -> Torus { + if (x >= message_modulus) + return OUTPUT_CARRY::GENERATED; + return OUTPUT_CARRY::NONE; + }; + + auto f_lut_does_block_generate_or_propagate = + [message_modulus](Torus x) -> Torus { + if (x >= message_modulus) + return OUTPUT_CARRY::GENERATED; + else if (x == (message_modulus - 1)) + return OUTPUT_CARRY::PROPAGATED; + return OUTPUT_CARRY::NONE; + }; + + auto f_lut_carry_propagation_sum = [](Torus msb, Torus lsb) -> Torus { + if (msb == OUTPUT_CARRY::PROPAGATED) + return lsb; + return msb; + }; + + auto f_message_acc = [message_modulus](Torus x) -> Torus { + return x % message_modulus; + }; + + // create test vector objects + test_vector_array = new int_radix_lut( + stream, params, 2, num_radix_blocks, allocate_gpu_memory); + lut_carry_propagation_sum = new struct int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + message_acc = new struct int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + auto lut_does_block_generate_carry = test_vector_array->get_lut(0); + auto lut_does_block_generate_or_propagate = test_vector_array->get_lut(1); + + // generate test vectors + generate_device_accumulator( + stream, lut_does_block_generate_carry, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, f_lut_does_block_generate_carry); + generate_device_accumulator( + stream, lut_does_block_generate_or_propagate, glwe_dimension, + polynomial_size, message_modulus, carry_modulus, + f_lut_does_block_generate_or_propagate); + cuda_set_value_async(&(stream->stream), + test_vector_array->get_tvi(1), 1, + num_radix_blocks - 1); + + generate_device_accumulator_bivariate( + stream, lut_carry_propagation_sum->lut, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, f_lut_carry_propagation_sum); + + generate_device_accumulator(stream, message_acc->lut, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, f_message_acc); + } + + void release(cuda_stream_t *stream) { + cuda_drop_async(generates_or_propagates, stream); + cuda_drop_async(step_output, stream); + + test_vector_array->release(stream); + lut_carry_propagation_sum->release(stream); + message_acc->release(stream); + + delete test_vector_array; + delete lut_carry_propagation_sum; + delete message_acc; + } +}; + +template struct int_mul_memory { + Torus *vector_result_sb; + Torus *block_mul_res; + Torus *small_lwe_vector; + Torus *lwe_pbs_out_array; + int_radix_lut *test_vector_array; // lsb msb + int_radix_lut *test_vector_message; + int_radix_lut *test_vector_carry; + int_sc_prop_memory *scp_mem; + int_radix_params params; + + int_mul_memory(cuda_stream_t *stream, int_radix_params params, + uint32_t num_radix_blocks, bool allocate_gpu_memory) { + this->params = params; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + auto lwe_dimension = params.small_lwe_dimension; + + // create single carry propagation memory object + scp_mem = new int_sc_prop_memory(stream, params, num_radix_blocks, + allocate_gpu_memory); + // 'vector_result_lsb' contains blocks from all possible shifts of + // radix_lwe_left excluding zero ciphertext blocks + int lsb_vector_block_count = num_radix_blocks * (num_radix_blocks + 1) / 2; + + // 'vector_result_msb' contains blocks from all possible shifts of + // radix_lwe_left except the last blocks of each shift + int msb_vector_block_count = num_radix_blocks * (num_radix_blocks - 1) / 2; + + int total_block_count = lsb_vector_block_count + msb_vector_block_count; + + // allocate memory for intermediate buffers + vector_result_sb = (Torus *)cuda_malloc_async( + 2 * total_block_count * (polynomial_size * glwe_dimension + 1) * + sizeof(Torus), + stream); + block_mul_res = (Torus *)cuda_malloc_async( + 2 * total_block_count * (polynomial_size * glwe_dimension + 1) * + sizeof(Torus), + stream); + small_lwe_vector = (Torus *)cuda_malloc_async( + total_block_count * (lwe_dimension + 1) * sizeof(Torus), stream); + lwe_pbs_out_array = + (Torus *)cuda_malloc_async((glwe_dimension * polynomial_size + 1) * + total_block_count * sizeof(Torus), + stream); + + // create int_radix_lut objects for lsb, msb, message, carry + // test_vector_array -> lut = {lsb_acc, msb_acc} + test_vector_array = new int_radix_lut( + stream, params, 2, total_block_count, allocate_gpu_memory); + test_vector_message = new int_radix_lut( + stream, params, 1, total_block_count, allocate_gpu_memory); + test_vector_carry = new int_radix_lut( + stream, params, 1, total_block_count, allocate_gpu_memory); + + auto lsb_acc = test_vector_array->get_lut(0); + auto msb_acc = test_vector_array->get_lut(1); + auto message_acc = test_vector_message->get_lut(0); + auto carry_acc = test_vector_carry->get_lut(0); + + // define functions for each accumulator + auto lut_f_lsb = [message_modulus](Torus x, Torus y) -> Torus { + return (x * y) % message_modulus; + }; + auto lut_f_msb = [message_modulus](Torus x, Torus y) -> Torus { + return (x * y) / message_modulus; + }; + auto lut_f_message = [message_modulus](Torus x) -> Torus { + return x % message_modulus; + }; + auto lut_f_carry = [message_modulus](Torus x) -> Torus { + return x / message_modulus; + }; + + // generate accumulators + generate_device_accumulator(stream, message_acc, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, lut_f_message); + generate_device_accumulator(stream, carry_acc, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, lut_f_carry); + generate_device_accumulator_bivariate( + stream, lsb_acc, glwe_dimension, polynomial_size, message_modulus, + carry_modulus, lut_f_lsb); + generate_device_accumulator_bivariate( + stream, msb_acc, glwe_dimension, polynomial_size, message_modulus, + carry_modulus, lut_f_msb); + + // tvi for test_vector_array should be reinitialized + // first lsb_vector_block_count value should reference to lsb_acc + // last msb_vector_block_count values should reference to msb_acc + // for message and carry default tvi is fine + cuda_set_value_async( + &(stream->stream), test_vector_array->get_tvi(lsb_vector_block_count), + 1, msb_vector_block_count); + } + + void release(cuda_stream_t *stream) { + cuda_drop_async(vector_result_sb, stream); + cuda_drop_async(block_mul_res, stream); + cuda_drop_async(small_lwe_vector, stream); + cuda_drop_async(lwe_pbs_out_array, stream); + + test_vector_array->release(stream); + test_vector_message->release(stream); + test_vector_carry->release(stream); + + scp_mem->release(stream); + + delete test_vector_array; + delete test_vector_message; + delete test_vector_carry; + + delete scp_mem; + } +}; + +template struct int_shift_buffer { + int_radix_params params; + std::vector *> lut_buffers_bivariate; + std::vector *> lut_buffers_univariate; + + SHIFT_TYPE shift_type; + + Torus *tmp_rotated; + + int_shift_buffer(cuda_stream_t *stream, SHIFT_TYPE shift_type, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->shift_type = shift_type; + this->params = params; + + if (allocate_gpu_memory) { + uint32_t max_amount_of_pbs = num_radix_blocks; + uint32_t big_lwe_size = params.big_lwe_dimension + 1; + uint32_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + tmp_rotated = (Torus *)cuda_malloc_async( + max_amount_of_pbs * big_lwe_size_bytes, stream); + + uint32_t num_bits_in_block = (uint32_t)std::log2(params.message_modulus); + + // LUT + // pregenerate lut vector and indexes + // lut for left shift + // here we generate 'num_bits_in_block' times test_vector + // one for each 'shift_within_block' = 'shift' % 'num_bits_in_block' + // even though test_vector_left contains 'num_bits_in_block' lut + // tvi will have indexes for single lut only and those indexes will be 0 + // it means for pbs corresponding lut should be selected and pass along + // tvi filled with zeros + + // calculate bivariate lut for each 'shift_within_block' + for (int s_w_b = 1; s_w_b < num_bits_in_block; s_w_b++) { + auto cur_lut_bivariate = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + uint32_t shift_within_block = s_w_b; + + std::function shift_lut_f; + + if (shift_type == LEFT_SHIFT) { + shift_lut_f = [shift_within_block, + params](Torus current_block, + Torus previous_block) -> Torus { + current_block = current_block << shift_within_block; + previous_block = previous_block << shift_within_block; + + Torus message_of_current_block = + current_block % params.message_modulus; + Torus carry_of_previous_block = + previous_block / params.message_modulus; + return message_of_current_block + carry_of_previous_block; + }; + } else { + shift_lut_f = [num_bits_in_block, shift_within_block, params]( + Torus current_block, Torus next_block) -> Torus { + // left shift so as not to lose + // bits when shifting right afterwards + next_block <<= num_bits_in_block; + next_block >>= shift_within_block; + + // The way of getting carry / message is reversed compared + // to the usual way but its normal: + // The message is in the upper bits, the carry in lower bits + Torus message_of_current_block = + current_block >> shift_within_block; + Torus carry_of_previous_block = next_block % params.message_modulus; + + return message_of_current_block + carry_of_previous_block; + }; + } + + // right shift + generate_device_accumulator_bivariate( + stream, cur_lut_bivariate->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, + params.carry_modulus, shift_lut_f); + + lut_buffers_bivariate.push_back(cur_lut_bivariate); + } + + // here we generate 'message_modulus' times test_vector + // one for each 'shift' + // tvi will have indexes for single lut only and those indexes will be 0 + // it means for pbs corresponding lut should be selected and pass along + // tvi filled with zeros + + // calculate lut for each 'shift' + for (int shift = 0; shift < params.message_modulus; shift++) { + auto cur_lut = + new int_radix_lut(stream, params, 1, 1, allocate_gpu_memory); + + std::function shift_lut_f; + if (shift_type == LEFT_SHIFT) + shift_lut_f = [shift, params](Torus x) -> Torus { + return (x << shift) % params.message_modulus; + }; + else + shift_lut_f = [shift, params](Torus x) -> Torus { + return (x >> shift) % params.message_modulus; + }; + + generate_device_accumulator( + stream, cur_lut->lut, params.glwe_dimension, params.polynomial_size, + params.message_modulus, params.carry_modulus, shift_lut_f); + + lut_buffers_univariate.push_back(cur_lut); + } + } + } + + void release(cuda_stream_t *stream) { + for (auto &buffer : lut_buffers_bivariate) { + buffer->release(stream); + delete buffer; + } + for (auto &buffer : lut_buffers_univariate) { + buffer->release(stream); + delete buffer; + } + lut_buffers_bivariate.clear(); + lut_buffers_univariate.clear(); + + cuda_drop_async(tmp_rotated, stream); + } +}; + +template struct int_zero_out_if_buffer { + + int_radix_params params; + + Torus *tmp; + + cuda_stream_t *local_stream; + + int_zero_out_if_buffer(cuda_stream_t *stream, int_radix_params params, + uint32_t num_radix_blocks, bool allocate_gpu_memory) { + this->params = params; + + Torus big_size = + (params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus); + if (allocate_gpu_memory) { + + tmp = (Torus *)cuda_malloc_async(big_size, stream); + // We may use a different stream to allow concurrent operation + local_stream = new cuda_stream_t(stream->gpu_index); + } + } + void release(cuda_stream_t *stream) { + cuda_drop_async(tmp, stream); + local_stream->release(); + } +}; + +template struct int_cmux_buffer { + int_radix_lut *predicate_lut; + int_radix_lut *inverted_predicate_lut; + int_radix_lut *message_extract_lut; + + Torus *tmp_true_ct; + Torus *tmp_false_ct; + + int_zero_out_if_buffer *zero_if_true_buffer; + int_zero_out_if_buffer *zero_if_false_buffer; + + int_radix_params params; + + int_cmux_buffer(cuda_stream_t *stream, + std::function predicate_lut_f, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + + this->params = params; + + if (allocate_gpu_memory) { + Torus big_size = + (params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus); + Torus small_size = + (params.small_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus); + + tmp_true_ct = (Torus *)cuda_malloc_async(big_size, stream); + tmp_false_ct = (Torus *)cuda_malloc_async(big_size, stream); + + zero_if_true_buffer = new int_zero_out_if_buffer( + stream, params, num_radix_blocks, allocate_gpu_memory); + zero_if_false_buffer = new int_zero_out_if_buffer( + stream, params, num_radix_blocks, allocate_gpu_memory); + + auto lut_f = [predicate_lut_f](Torus block, Torus condition) -> Torus { + return predicate_lut_f(condition) ? 0 : block; + }; + auto inverted_lut_f = [predicate_lut_f](Torus block, + Torus condition) -> Torus { + return predicate_lut_f(condition) ? block : 0; + }; + auto message_extract_lut_f = [params](Torus x) -> Torus { + return x % params.message_modulus; + }; + + predicate_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + inverted_predicate_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + message_extract_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + generate_device_accumulator_bivariate( + stream, predicate_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + lut_f); + + generate_device_accumulator_bivariate( + stream, inverted_predicate_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + inverted_lut_f); + + generate_device_accumulator( + stream, message_extract_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + message_extract_lut_f); + } + } + + void release(cuda_stream_t *stream) { + predicate_lut->release(stream); + delete predicate_lut; + inverted_predicate_lut->release(stream); + delete inverted_predicate_lut; + message_extract_lut->release(stream); + delete message_extract_lut; + + zero_if_true_buffer->release(stream); + delete zero_if_true_buffer; + zero_if_false_buffer->release(stream); + delete zero_if_false_buffer; + + cuda_drop_async(tmp_true_ct, stream); + cuda_drop_async(tmp_false_ct, stream); + } +}; + +template struct int_are_all_block_true_buffer { + COMPARISON_TYPE op; + int_radix_params params; + + int_radix_lut *is_max_value_lut; + int_radix_lut *is_equal_to_num_blocks_lut; + + Torus *tmp_block_accumulated; + + int_are_all_block_true_buffer(cuda_stream_t *stream, COMPARISON_TYPE op, + int_radix_params params, + uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + this->op = op; + + if (allocate_gpu_memory) { + Torus total_modulus = params.message_modulus * params.carry_modulus; + uint32_t max_value = total_modulus - 1; + + int max_chunks = (num_radix_blocks + max_value - 1) / max_value; + tmp_block_accumulated = (Torus *)cuda_malloc_async( + (params.big_lwe_dimension + 1) * max_chunks * sizeof(Torus), stream); + + // LUT + // We need three LUTs: + // (x & max_value as u64) == max_value + // x != 0 + // (x & max_value as u64) == blocks.len() + + auto is_max_value_lut_f = [total_modulus](Torus x) -> Torus { + Torus max_value = total_modulus - 1; + return (x & max_value) == max_value; + }; + + is_max_value_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + is_equal_to_num_blocks_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + generate_device_accumulator( + stream, is_max_value_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + is_max_value_lut_f); + } + } + + void release(cuda_stream_t *stream) { + is_max_value_lut->release(stream); + delete is_max_value_lut; + is_equal_to_num_blocks_lut->release(stream); + delete is_equal_to_num_blocks_lut; + + cuda_drop_async(tmp_block_accumulated, stream); + } +}; + +template struct int_comparison_eq_buffer { + int_radix_params params; + COMPARISON_TYPE op; + + int_radix_lut *operator_lut; + int_radix_lut *is_non_zero_lut; + + int_are_all_block_true_buffer *are_all_block_true_buffer; + + int_comparison_eq_buffer(cuda_stream_t *stream, COMPARISON_TYPE op, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + this->op = op; + + if (allocate_gpu_memory) { + + are_all_block_true_buffer = new int_are_all_block_true_buffer( + stream, op, params, num_radix_blocks, allocate_gpu_memory); + + // Operator LUT + auto operator_f = [op](Torus lhs, Torus rhs) -> Torus { + if (op == COMPARISON_TYPE::EQ) { + // EQ + return (lhs == rhs); + } else { + // NE + return (lhs != rhs); + } + }; + operator_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + generate_device_accumulator_bivariate( + stream, operator_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + operator_f); + + // f(x) -> x == 0 + Torus total_modulus = params.message_modulus * params.carry_modulus; + auto is_non_zero_lut_f = [total_modulus](Torus x) -> Torus { + return (x % total_modulus) != 0; + }; + + is_non_zero_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + generate_device_accumulator( + stream, is_non_zero_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + is_non_zero_lut_f); + } + } + + void release(cuda_stream_t *stream) { + operator_lut->release(stream); + delete operator_lut; + is_non_zero_lut->release(stream); + delete is_non_zero_lut; + + are_all_block_true_buffer->release(stream); + delete are_all_block_true_buffer; + } +}; + +template struct int_tree_sign_reduction_buffer { + int_radix_params params; + + std::function block_selector_f; + + int_radix_lut *tree_inner_leaf_lut; + int_radix_lut *tree_last_leaf_lut; + + int_radix_lut *tree_last_leaf_scalar_lut; + + Torus *tmp_x; + Torus *tmp_y; + + int_tree_sign_reduction_buffer(cuda_stream_t *stream, + std::function operator_f, + int_radix_params params, + uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + + block_selector_f = [](Torus msb, Torus lsb) -> Torus { + if (msb == IS_EQUAL) // EQUAL + return lsb; + else + return msb; + }; + + auto last_leaf_noop_lut_f = [this](Torus x) -> Torus { + int msb = (x >> 2) & 3; + int lsb = x & 3; + + return this->block_selector_f(msb, lsb); + }; + + if (allocate_gpu_memory) { + tmp_x = (Torus *)cuda_malloc_async((params.big_lwe_dimension + 1) * + num_radix_blocks * sizeof(Torus), + stream); + tmp_y = (Torus *)cuda_malloc_async((params.big_lwe_dimension + 1) * + num_radix_blocks * sizeof(Torus), + stream); + + // LUTs + tree_inner_leaf_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + tree_last_leaf_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + tree_last_leaf_scalar_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + generate_device_accumulator_bivariate( + stream, tree_inner_leaf_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + block_selector_f); + } + } + + void release(cuda_stream_t *stream) { + tree_inner_leaf_lut->release(stream); + delete tree_inner_leaf_lut; + tree_last_leaf_lut->release(stream); + delete tree_last_leaf_lut; + tree_last_leaf_scalar_lut->release(stream); + delete tree_last_leaf_scalar_lut; + + cuda_drop_async(tmp_x, stream); + cuda_drop_async(tmp_y, stream); + } +}; + +template struct int_comparison_diff_buffer { + int_radix_params params; + COMPARISON_TYPE op; + + Torus *tmp_packed_left; + Torus *tmp_packed_right; + + std::function operator_f; + + int_radix_lut *is_zero_lut; + + int_tree_sign_reduction_buffer *tree_buffer; + + // Used for scalar comparisons + cuda_stream_t *lsb_stream; + cuda_stream_t *msb_stream; + + int_comparison_diff_buffer(cuda_stream_t *stream, COMPARISON_TYPE op, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + this->op = op; + + operator_f = [op](Torus x) -> Torus { + switch (op) { + case GT: + return x == IS_SUPERIOR; + case GE: + return (x == IS_SUPERIOR) || (x == IS_EQUAL); + case LT: + return x == IS_INFERIOR; + case LE: + return (x == IS_INFERIOR) || (x == IS_EQUAL); + default: + // We don't need a default case but we need to return something + return 42; + } + }; + + if (allocate_gpu_memory) { + lsb_stream = cuda_create_stream(stream->gpu_index); + msb_stream = cuda_create_stream(stream->gpu_index); + + Torus big_size = (params.big_lwe_dimension + 1) * sizeof(Torus); + + tmp_packed_left = + (Torus *)cuda_malloc_async(big_size * (num_radix_blocks / 2), stream); + + tmp_packed_right = + (Torus *)cuda_malloc_async(big_size * (num_radix_blocks / 2), stream); + + // LUTs + uint32_t total_modulus = params.message_modulus * params.carry_modulus; + auto is_zero_f = [total_modulus](Torus x) -> Torus { + return (x % total_modulus) == 0; + }; + + is_zero_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + generate_device_accumulator( + stream, is_zero_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + is_zero_f); + + tree_buffer = new int_tree_sign_reduction_buffer( + stream, operator_f, params, num_radix_blocks, allocate_gpu_memory); + } + } + + void release(cuda_stream_t *stream) { + is_zero_lut->release(stream); + delete is_zero_lut; + tree_buffer->release(stream); + delete tree_buffer; + + cuda_drop_async(tmp_packed_left, stream); + cuda_drop_async(tmp_packed_right, stream); + + cuda_destroy_stream(lsb_stream); + cuda_destroy_stream(msb_stream); + } +}; + +template struct int_comparison_buffer { + COMPARISON_TYPE op; + + int_radix_params params; + + ////////////////// + int_radix_lut *cleaning_lut; + std::function cleaning_lut_f; + + int_comparison_eq_buffer *eq_buffer; + int_comparison_diff_buffer *diff_buffer; + + Torus *tmp_block_comparisons; + + // Max Min + Torus *tmp_lwe_array_out; + int_cmux_buffer *cmux_buffer; + + int_comparison_buffer(cuda_stream_t *stream, COMPARISON_TYPE op, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + this->params = params; + this->op = op; + + cleaning_lut_f = [](Torus x) -> Torus { return x; }; + + if (allocate_gpu_memory) { + tmp_lwe_array_out = (Torus *)cuda_malloc_async( + (params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus), + stream); + + // Block comparisons + tmp_block_comparisons = (Torus *)cuda_malloc_async( + (params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus), + stream); + + // Cleaning LUT + cleaning_lut = new int_radix_lut( + stream, params, 1, num_radix_blocks, allocate_gpu_memory); + + generate_device_accumulator( + stream, cleaning_lut->lut, params.glwe_dimension, + params.polynomial_size, params.message_modulus, params.carry_modulus, + cleaning_lut_f); + + switch (op) { + case COMPARISON_TYPE::MAX: + case COMPARISON_TYPE::MIN: + cmux_buffer = new int_cmux_buffer( + stream, + [op](Torus x) -> Torus { + if (op == COMPARISON_TYPE::MAX) + return (x == IS_SUPERIOR); + else + return (x == IS_INFERIOR); + }, + params, num_radix_blocks, allocate_gpu_memory); + case COMPARISON_TYPE::GT: + case COMPARISON_TYPE::GE: + case COMPARISON_TYPE::LT: + case COMPARISON_TYPE::LE: + diff_buffer = new int_comparison_diff_buffer( + stream, op, params, num_radix_blocks, allocate_gpu_memory); + case COMPARISON_TYPE::EQ: + case COMPARISON_TYPE::NE: + eq_buffer = new int_comparison_eq_buffer( + stream, op, params, num_radix_blocks, allocate_gpu_memory); + break; + } + } + } + + void release(cuda_stream_t *stream) { + switch (op) { + case COMPARISON_TYPE::MAX: + case COMPARISON_TYPE::MIN: + cmux_buffer->release(stream); + case COMPARISON_TYPE::GT: + case COMPARISON_TYPE::GE: + case COMPARISON_TYPE::LT: + case COMPARISON_TYPE::LE: + diff_buffer->release(stream); + case COMPARISON_TYPE::EQ: + case COMPARISON_TYPE::NE: + eq_buffer->release(stream); + break; + } + cleaning_lut->release(stream); + cuda_drop_async(tmp_lwe_array_out, stream); + cuda_drop_async(tmp_block_comparisons, stream); + } +}; + +template struct int_bitop_buffer { + + int_radix_params params; + int_radix_lut *lut; + + int_bitop_buffer(cuda_stream_t *stream, BITOP_TYPE op, + int_radix_params params, uint32_t num_radix_blocks, + bool allocate_gpu_memory) { + + this->params = params; + + switch (op) { + case BITAND: + case BITOR: + case BITXOR: + lut = new int_radix_lut(stream, params, 1, num_radix_blocks, + allocate_gpu_memory); + { + auto lut_bivariate_f = [op](Torus lhs, Torus rhs) -> Torus { + if (op == BITOP_TYPE::BITAND) { + // AND + return lhs & rhs; + } else if (op == BITOP_TYPE::BITOR) { + // OR + return lhs | rhs; + } else { + // XOR + return lhs ^ rhs; + } + }; + + generate_device_accumulator_bivariate( + stream, lut->lut, params.glwe_dimension, params.polynomial_size, + params.message_modulus, params.carry_modulus, lut_bivariate_f); + } + break; + case BITNOT: + lut = new int_radix_lut(stream, params, 1, num_radix_blocks, + allocate_gpu_memory); + { + auto lut_not_f = [params](Torus x) -> Torus { + return (~x) % params.message_modulus; + }; + generate_device_accumulator( + stream, lut->lut, params.glwe_dimension, params.polynomial_size, + params.message_modulus, params.carry_modulus, lut_not_f); + } + break; + default: + // Scalar OP + uint32_t lut_size = (params.glwe_dimension + 1) * params.polynomial_size; + + lut = new int_radix_lut(stream, params, params.message_modulus, + num_radix_blocks, allocate_gpu_memory); + + for (int i = 0; i < params.message_modulus; i++) { + auto lut_block = lut->lut + i * lut_size; + auto rhs = i; + + auto lut_univariate_scalar_f = [op, rhs](Torus x) -> Torus { + if (op == BITOP_TYPE::SCALAR_BITAND) { + // AND + return x & rhs; + } else if (op == BITOP_TYPE::SCALAR_BITOR) { + // OR + return x | rhs; + } else { + // XOR + return x ^ rhs; + } + }; + generate_device_accumulator( + stream, lut_block, params.glwe_dimension, params.polynomial_size, + params.message_modulus, params.carry_modulus, + lut_univariate_scalar_f); + } + } + } + + void release(cuda_stream_t *stream) { + lut->release(stream); + delete lut; + } +}; + +#endif // CUDA_INTEGER_H diff --git a/backends/tfhe-cuda-backend/implementation/include/keyswitch.h b/backends/tfhe-cuda-backend/implementation/include/keyswitch.h new file mode 100644 index 000000000..441afc7f6 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/keyswitch.h @@ -0,0 +1,21 @@ +#ifndef CNCRT_KS_H_ +#define CNCRT_KS_H_ + +#include + +extern "C" { + +void cuda_keyswitch_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lwe_array_in, void *lwe_input_indexes, void *ksk, + uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log, + uint32_t level_count, uint32_t num_samples); + +void cuda_keyswitch_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lwe_array_in, void *lwe_input_indexes, void *ksk, + uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log, + uint32_t level_count, uint32_t num_samples); +} + +#endif // CNCRT_KS_H_ diff --git a/backends/tfhe-cuda-backend/implementation/include/linear_algebra.h b/backends/tfhe-cuda-backend/implementation/include/linear_algebra.h new file mode 100644 index 000000000..07127553c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/include/linear_algebra.h @@ -0,0 +1,50 @@ +#ifndef CUDA_LINALG_H_ +#define CUDA_LINALG_H_ + +#include "bootstrap.h" +#include +#include + +extern "C" { + +void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in_1, + void *lwe_array_in_2, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in_1, + void *lwe_array_in_2, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_add_lwe_ciphertext_vector_plaintext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *plaintext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_add_lwe_ciphertext_vector_plaintext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *plaintext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *cleartext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *cleartext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count); +} + +#endif // CUDA_LINALG_H_ diff --git a/backends/tfhe-cuda-backend/implementation/src/CMakeLists.txt b/backends/tfhe-cuda-backend/implementation/src/CMakeLists.txt new file mode 100644 index 000000000..58dc0a2cc --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/CMakeLists.txt @@ -0,0 +1,22 @@ +set(SOURCES + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bit_extraction.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bitwise_ops.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap_multibit.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/ciphertext.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/circuit_bootstrap.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/device.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/integer.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/linear_algebra.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/shifts.h + ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h) +file(GLOB_RECURSE SOURCES "*.cu") +add_library(tfhe_cuda_backend STATIC ${SOURCES}) +set_target_properties( + tfhe_cuda_backend + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + CUDA_ARCHITECTURES native) +target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX) +target_include_directories(tfhe_cuda_backend PRIVATE .) diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cu b/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cu new file mode 100644 index 000000000..1aed30d2c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cu @@ -0,0 +1 @@ +#include "ciphertext.cuh" diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cuh b/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cuh new file mode 100644 index 000000000..de85aa483 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/ciphertext.cuh @@ -0,0 +1,44 @@ +#ifndef CUDA_CIPHERTEXT_CUH +#define CUDA_CIPHERTEXT_CUH + +#include "ciphertext.h" +#include "device.h" +#include + +template +void cuda_convert_lwe_ciphertext_vector_to_gpu(T *dest, T *src, + cuda_stream_t *stream, + uint32_t number_of_cts, + uint32_t lwe_dimension) { + cudaSetDevice(stream->gpu_index); + uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T); + cuda_memcpy_async_to_gpu(dest, src, size, stream); +} + +void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src, + cuda_stream_t *stream, + uint32_t number_of_cts, + uint32_t lwe_dimension) { + cuda_convert_lwe_ciphertext_vector_to_gpu( + (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension); +} + +template +void cuda_convert_lwe_ciphertext_vector_to_cpu(T *dest, T *src, + cuda_stream_t *stream, + uint32_t number_of_cts, + uint32_t lwe_dimension) { + cudaSetDevice(stream->gpu_index); + uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T); + cuda_memcpy_async_to_cpu(dest, src, size, stream); +} + +void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src, + cuda_stream_t *stream, + uint32_t number_of_cts, + uint32_t lwe_dimension) { + cuda_convert_lwe_ciphertext_vector_to_cpu( + (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/gadget.cuh b/backends/tfhe-cuda-backend/implementation/src/crypto/gadget.cuh new file mode 100644 index 000000000..94665a7a2 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/gadget.cuh @@ -0,0 +1,162 @@ +#ifndef CNCRT_CRYPTO_CUH +#define CNCRT_CRPYTO_CUH + +#include "device.h" +#include + +/** + * GadgetMatrix implements the iterator design pattern to decompose a set of + * num_poly consecutive polynomials with degree params::degree. A total of + * level_count levels is expected and each call to decompose_and_compress_next() + * writes to the result the next level. It is also possible to advance an + * arbitrary amount of levels by using decompose_and_compress_level(). + * + * This class always decomposes the entire set of num_poly polynomials. + * By default, it works on a single polynomial. + */ +#pragma once +template class GadgetMatrix { +private: + uint32_t level_count; + uint32_t base_log; + uint32_t mask; + uint32_t halfbg; + uint32_t num_poly; + T offset; + int current_level; + T mask_mod_b; + T *state; + +public: + __device__ GadgetMatrix(uint32_t base_log, uint32_t level_count, T *state, + uint32_t num_poly = 1) + : base_log(base_log), level_count(level_count), num_poly(num_poly), + state(state) { + + mask_mod_b = (1ll << base_log) - 1ll; + current_level = level_count; + int tid = threadIdx.x; + for (int i = 0; i < num_poly * params::opt; i++) { + state[tid] >>= (sizeof(T) * 8 - base_log * level_count); + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + } + + // Decomposes all polynomials at once + __device__ void decompose_and_compress_next(double2 *result) { + for (int j = 0; j < num_poly; j++) { + auto result_slice = result + j * params::degree / 2; + decompose_and_compress_next_polynomial(result_slice, j); + } + } + + // Decomposes a single polynomial + __device__ void decompose_and_compress_next_polynomial(double2 *result, + int j) { + if (j == 0) + current_level -= 1; + + int tid = threadIdx.x; + auto state_slice = state + j * params::degree; + for (int i = 0; i < params::opt / 2; i++) { + T res_re = state_slice[tid] & mask_mod_b; + T res_im = state_slice[tid + params::degree / 2] & mask_mod_b; + state_slice[tid] >>= base_log; + state_slice[tid + params::degree / 2] >>= base_log; + T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re; + T carry_im = + ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im; + carry_re >>= (base_log - 1); + carry_im >>= (base_log - 1); + state_slice[tid] += carry_re; + state_slice[tid + params::degree / 2] += carry_im; + res_re -= carry_re << base_log; + res_im -= carry_im << base_log; + + result[tid].x = (int32_t)res_re; + result[tid].y = (int32_t)res_im; + + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + } + + // Decomposes a single polynomial + __device__ void + decompose_and_compress_next_polynomial_elements(double2 *result, int j) { + if (j == 0) + current_level -= 1; + + int tid = threadIdx.x; + auto state_slice = state + j * params::degree; + for (int i = 0; i < params::opt / 2; i++) { + T res_re = state_slice[tid] & mask_mod_b; + T res_im = state_slice[tid + params::degree / 2] & mask_mod_b; + state_slice[tid] >>= base_log; + state_slice[tid + params::degree / 2] >>= base_log; + T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re; + T carry_im = + ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im; + carry_re >>= (base_log - 1); + carry_im >>= (base_log - 1); + state_slice[tid] += carry_re; + state_slice[tid + params::degree / 2] += carry_im; + res_re -= carry_re << base_log; + res_im -= carry_im << base_log; + + result[i].x = (int32_t)res_re; + result[i].y = (int32_t)res_im; + + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + } + + __device__ void decompose_and_compress_level(double2 *result, int level) { + for (int i = 0; i < level_count - level; i++) + decompose_and_compress_next(result); + } +}; + +template class GadgetMatrixSingle { +private: + uint32_t level_count; + uint32_t base_log; + uint32_t mask; + uint32_t halfbg; + T offset; + +public: + __device__ GadgetMatrixSingle(uint32_t base_log, uint32_t level_count) + : base_log(base_log), level_count(level_count) { + uint32_t bg = 1 << base_log; + this->halfbg = bg / 2; + this->mask = bg - 1; + T temp = 0; + for (int i = 0; i < this->level_count; i++) { + temp += 1ULL << (sizeof(T) * 8 - (i + 1) * this->base_log); + } + this->offset = temp * this->halfbg; + } + + __device__ T decompose_one_level_single(T element, uint32_t level) { + T s = element + this->offset; + uint32_t decal = (sizeof(T) * 8 - (level + 1) * this->base_log); + T temp1 = (s >> decal) & this->mask; + return (T)(temp1 - this->halfbg); + } +}; + +template +__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) { + Torus res = state & mask_mod_b; + state >>= base_log; + Torus carry = ((res - 1ll) | state) & res; + carry >>= base_log - 1; + state += carry; + res -= carry << base_log; + return res; +} + +#endif // CNCRT_CRPYTO_H diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/ggsw.cuh b/backends/tfhe-cuda-backend/implementation/src/crypto/ggsw.cuh new file mode 100644 index 000000000..2735b6eb9 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/ggsw.cuh @@ -0,0 +1,74 @@ +#ifndef CNCRT_GGSW_CUH +#define CNCRT_GGSW_CUH + +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "polynomial/parameters.cuh" + +template +__global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src, + int8_t *device_mem) { + + extern __shared__ int8_t sharedmem[]; + double2 *selected_memory; + + if constexpr (SMD == FULLSM) + selected_memory = (double2 *)sharedmem; + else + selected_memory = (double2 *)device_mem[blockIdx.x * params::degree]; + + // Compression + int offset = blockIdx.x * blockDim.x; + + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + ST x = src[(tid) + params::opt * offset]; + ST y = src[(tid + params::degree / 2) + params::opt * offset]; + selected_memory[tid].x = x / (double)std::numeric_limits::max(); + selected_memory[tid].y = y / (double)std::numeric_limits::max(); + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + + // Switch to the FFT space + NSMFFT_direct>(selected_memory); + synchronize_threads_in_block(); + + // Write the output to global memory + tid = threadIdx.x; +#pragma unroll + for (int j = 0; j < params::opt / 2; j++) { + dest[tid + (params::opt >> 1) * offset] = selected_memory[tid]; + tid += params::degree / params::opt; + } +} + +/** + * Applies the FFT transform on sequence of GGSW ciphertexts already in the + * global memory + */ +template +void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src, + int8_t *d_mem, uint32_t r, uint32_t glwe_dim, + uint32_t polynomial_size, uint32_t level_count, + uint32_t gpu_index, uint32_t max_shared_memory) { + cudaSetDevice(stream->gpu_index); + + int shared_memory_size = sizeof(double) * polynomial_size; + + int gridSize = r * (glwe_dim + 1) * (glwe_dim + 1) * level_count; + int blockSize = polynomial_size / params::opt; + + if (max_shared_memory < shared_memory_size) { + device_batch_fft_ggsw_vector + <<stream>>>(dest, src, d_mem); + } else { + device_batch_fft_ggsw_vector + <<stream>>>(dest, src, + d_mem); + } + check_cuda_error(cudaGetLastError()); +} + +#endif // CNCRT_GGSW_CUH diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cu b/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cu new file mode 100644 index 000000000..1cb9d3f8b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cu @@ -0,0 +1,48 @@ +#include "keyswitch.cuh" +#include "keyswitch.h" +#include + +/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts. + * Head out to the equivalent operation on 64 bits for more details. + */ +void cuda_keyswitch_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lwe_array_in, void *lwe_input_indexes, void *ksk, + uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log, + uint32_t level_count, uint32_t num_samples) { + cuda_keyswitch_lwe_ciphertext_vector( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), static_cast(ksk), + lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples); +} + +/* Perform keyswitch on a batch of 64 bits input LWE ciphertexts. + * + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - lwe_array_out: output batch of num_samples keyswitched ciphertexts c = + * (a0,..an-1,b) where n is the output LWE dimension (lwe_dimension_out) + * - lwe_array_in: input batch of num_samples LWE ciphertexts, containing + * lwe_dimension_in mask values + 1 body value + * - ksk: the keyswitch key to be used in the operation + * - base log: the log of the base used in the decomposition (should be the one + * used to create the ksk) + * + * This function calls a wrapper to a device kernel that performs the keyswitch + * - num_samples blocks of threads are launched + */ +void cuda_keyswitch_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lwe_array_in, void *lwe_input_indexes, void *ksk, + uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log, + uint32_t level_count, uint32_t num_samples) { + cuda_keyswitch_lwe_ciphertext_vector( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), static_cast(ksk), + lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cuh b/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cuh new file mode 100644 index 000000000..0d9bdad38 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/keyswitch.cuh @@ -0,0 +1,144 @@ +#ifndef CNCRT_KS_CUH +#define CNCRT_KS_CUH + +#include "device.h" +#include "gadget.cuh" +#include "polynomial/polynomial_math.cuh" +#include "torus.cuh" +#include +#include + +template +__device__ Torus *get_ith_block(Torus *ksk, int i, int level, + uint32_t lwe_dimension_out, + uint32_t level_count) { + int pos = i * level_count * (lwe_dimension_out + 1) + + level * (lwe_dimension_out + 1); + Torus *ptr = &ksk[pos]; + return ptr; +} + +/* + * keyswitch kernel + * Each thread handles a piece of the following equation: + * $$GLWE_s2(\Delta.m+e) = (0,0,..,0,b) - \sum_{i=0,k-1} $$ where k is the dimension of + * the GLWE ciphertext. If the polynomial dimension in GLWE is > 1, this + * equation is solved for each polynomial coefficient. where Dec denotes the + * decomposition with base beta and l levels and the inner product is done + * between the decomposition of a_i and l GLWE encryptions of s1_i q/\beta^j, + * with j in [1,l] We obtain a GLWE encryption of Delta.m (with Delta the + * scaling factor) under key s2 instead of s1, with an increased noise + * + */ +template +__global__ void +keyswitch(Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, Torus *ksk, uint32_t lwe_dimension_in, + uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count, + int lwe_lower, int lwe_upper, int cutoff) { + int tid = threadIdx.x; + + extern __shared__ int8_t sharedmem[]; + + Torus *local_lwe_array_out = (Torus *)sharedmem; + + auto block_lwe_array_in = get_chunk( + lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1); + auto block_lwe_array_out = get_chunk( + lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1); + + auto gadget = GadgetMatrixSingle(base_log, level_count); + + int lwe_part_per_thd; + if (tid < cutoff) { + lwe_part_per_thd = lwe_upper; + } else { + lwe_part_per_thd = lwe_lower; + } + __syncthreads(); + + for (int k = 0; k < lwe_part_per_thd; k++) { + int idx = tid + k * blockDim.x; + local_lwe_array_out[idx] = 0; + } + __syncthreads(); + + if (tid == 0) { + local_lwe_array_out[lwe_dimension_out] = + block_lwe_array_in[lwe_dimension_in]; + } + + for (int i = 0; i < lwe_dimension_in; i++) { + + __syncthreads(); + + Torus a_i = + round_to_closest_multiple(block_lwe_array_in[i], base_log, level_count); + + Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count); + Torus mask_mod_b = (1ll << base_log) - 1ll; + + for (int j = 0; j < level_count; j++) { + auto ksk_block = get_ith_block(ksk, i, j, lwe_dimension_out, level_count); + Torus decomposed = decompose_one(state, mask_mod_b, base_log); + for (int k = 0; k < lwe_part_per_thd; k++) { + int idx = tid + k * blockDim.x; + local_lwe_array_out[idx] -= (Torus)ksk_block[idx] * decomposed; + } + } + } + + for (int k = 0; k < lwe_part_per_thd; k++) { + int idx = tid + k * blockDim.x; + block_lwe_array_out[idx] = local_lwe_array_out[idx]; + } +} + +/// assume lwe_array_in in the gpu +template +__host__ void cuda_keyswitch_lwe_ciphertext_vector( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *ksk, + uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log, + uint32_t level_count, uint32_t num_samples) { + + cudaSetDevice(stream->gpu_index); + constexpr int ideal_threads = 128; + + int lwe_dim = lwe_dimension_out + 1; + int lwe_lower, lwe_upper, cutoff; + if (lwe_dim % ideal_threads == 0) { + lwe_lower = lwe_dim / ideal_threads; + lwe_upper = lwe_dim / ideal_threads; + cutoff = 0; + } else { + int y = + ceil((double)lwe_dim / (double)ideal_threads) * ideal_threads - lwe_dim; + cutoff = ideal_threads - y; + lwe_lower = lwe_dim / ideal_threads; + lwe_upper = (int)ceil((double)lwe_dim / (double)ideal_threads); + } + + int lwe_size_after = (lwe_dimension_out + 1) * num_samples; + + int shared_mem = sizeof(Torus) * (lwe_dimension_out + 1); + + cuda_memset_async(lwe_array_out, 0, sizeof(Torus) * lwe_size_after, stream); + check_cuda_error(cudaGetLastError()); + + dim3 grid(num_samples, 1, 1); + dim3 threads(ideal_threads, 1, 1); + + // cudaFuncSetAttribute(keyswitch, + // cudaFuncAttributeMaxDynamicSharedMemorySize, + // shared_mem); + + keyswitch<<stream>>>( + lwe_array_out, lwe_output_indexes, lwe_array_in, lwe_input_indexes, ksk, + lwe_dimension_in, lwe_dimension_out, base_log, level_count, lwe_lower, + lwe_upper, cutoff); + check_cuda_error(cudaGetLastError()); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/crypto/torus.cuh b/backends/tfhe-cuda-backend/implementation/src/crypto/torus.cuh new file mode 100644 index 000000000..8fce461ca --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/crypto/torus.cuh @@ -0,0 +1,74 @@ +#ifndef CNCRT_TORUS_CUH +#define CNCRT_TORUS_CUH + +#include "types/int128.cuh" +#include + +template +__device__ inline void typecast_double_to_torus(double x, T &r) { + r = T(x); +} + +template <> +__device__ inline void typecast_double_to_torus(double x, + uint32_t &r) { + r = __double2uint_rn(x); +} + +template <> +__device__ inline void typecast_double_to_torus(double x, + uint64_t &r) { + // The ull intrinsic does not behave in the same way on all architectures and + // on some platforms this causes the cmux tree test to fail + // Hence the intrinsic is not used here + uint128 nnnn = make_uint128_from_float(x); + uint64_t lll = nnnn.lo_; + r = lll; +} + +template +__device__ inline T round_to_closest_multiple(T x, uint32_t base_log, + uint32_t level_count) { + T shift = sizeof(T) * 8 - level_count * base_log; + T mask = 1ll << (shift - 1); + T b = (x & mask) >> (shift - 1); + T res = x >> shift; + res += b; + res <<= shift; + return res; +} + +template +__device__ __forceinline__ void rescale_torus_element(T element, T &output, + uint32_t log_shift) { + output = + round((double)element / (double(std::numeric_limits::max()) + 1.0) * + (double)log_shift); +} + +template +__device__ __forceinline__ T rescale_torus_element(T element, + uint32_t log_shift) { + return round((double)element / (double(std::numeric_limits::max()) + 1.0) * + (double)log_shift); +} + +template <> +__device__ __forceinline__ void +rescale_torus_element(uint32_t element, uint32_t &output, + uint32_t log_shift) { + output = + round(__uint2double_rn(element) / + (__uint2double_rn(std::numeric_limits::max()) + 1.0) * + __uint2double_rn(log_shift)); +} + +template <> +__device__ __forceinline__ void +rescale_torus_element(uint64_t element, uint64_t &output, + uint32_t log_shift) { + output = round(__ull2double_rn(element) / + (__ull2double_rn(std::numeric_limits::max()) + 1.0) * + __uint2double_rn(log_shift)); +} +#endif // CNCRT_TORUS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/device.cu b/backends/tfhe-cuda-backend/implementation/src/device.cu new file mode 100644 index 000000000..7b811c540 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/device.cu @@ -0,0 +1,350 @@ +#include "device.h" +#include +#include + +/// Unsafe function to create a CUDA stream, must check first that GPU exists +cuda_stream_t *cuda_create_stream(uint32_t gpu_index) { + cudaSetDevice(gpu_index); + cuda_stream_t *stream = new cuda_stream_t(gpu_index); + return stream; +} + +/// Unsafe function to destroy CUDA stream, must check first the GPU exists +int cuda_destroy_stream(cuda_stream_t *stream) { + stream->release(); + return 0; +} + +/// Unsafe function that will try to allocate even if gpu_index is invalid +/// or if there's not enough memory. A safe wrapper around it must call +/// cuda_check_valid_malloc() first +void *cuda_malloc(uint64_t size, uint32_t gpu_index) { + cudaSetDevice(gpu_index); + void *ptr; + cudaMalloc((void **)&ptr, size); + check_cuda_error(cudaGetLastError()); + + return ptr; +} + +/// Allocates a size-byte array at the device memory. Tries to do it +/// asynchronously. +void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream) { + cudaSetDevice(stream->gpu_index); + void *ptr; + +#ifndef CUDART_VERSION +#error CUDART_VERSION Undefined! +#elif (CUDART_VERSION >= 11020) + int support_async_alloc; + check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc, + cudaDevAttrMemoryPoolsSupported, + stream->gpu_index)); + + if (support_async_alloc) { + check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream->stream)); + } else { + check_cuda_error(cudaMalloc((void **)&ptr, size)); + } +#else + check_cuda_error(cudaMalloc((void **)&ptr, size)); +#endif + return ptr; +} + +/// Checks that allocation is valid +/// 0: valid +/// -1: invalid, not enough memory in device +/// -2: invalid, gpu index doesn't exist +int cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) { + + if (gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaSetDevice(gpu_index); + size_t total_mem, free_mem; + cudaMemGetInfo(&free_mem, &total_mem); + if (size > free_mem) { + // error code: not enough memory + return -1; + } + return 0; +} + +/// Returns +/// -> 0 if Cooperative Groups is not supported. +/// -> 1 otherwise +int cuda_check_support_cooperative_groups() { + int cooperative_groups_supported = 0; + cudaDeviceGetAttribute(&cooperative_groups_supported, + cudaDevAttrCooperativeLaunch, 0); + + return cooperative_groups_supported > 0; +} + +/// Tries to copy memory to the GPU asynchronously +/// 0: success +/// -1: error, invalid device pointer +/// -2: error, gpu index doesn't exist +/// -3: error, zero copy size +int cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size, + cuda_stream_t *stream) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + if (stream->gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, dest); + if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + + cudaSetDevice(stream->gpu_index); + check_cuda_error( + cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream->stream)); + return 0; +} + +/// Tries to copy memory to the GPU synchronously +/// 0: success +/// -1: error, invalid device pointer +/// -2: error, gpu index doesn't exist +/// -3: error, zero copy size +int cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, dest); + if (attr.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + + check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice)); + return 0; +} + +/// Tries to copy memory to the CPU synchronously +/// 0: success +/// -1: error, invalid device pointer +/// -2: error, gpu index doesn't exist +/// -3: error, zero copy size +int cuda_memcpy_to_cpu(void *dest, void *src, uint64_t size) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, src); + if (attr.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + + check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToHost)); + return 0; +} + +/// Tries to copy memory within a GPU asynchronously +/// 0: success +/// -1: error, invalid device pointer +/// -2: error, gpu index doesn't exist +/// -3: error, zero copy size +int cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size, + cuda_stream_t *stream) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + if (stream->gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaPointerAttributes attr_dest; + cudaPointerGetAttributes(&attr_dest, dest); + if (attr_dest.device != stream->gpu_index && + attr_dest.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + cudaPointerAttributes attr_src; + cudaPointerGetAttributes(&attr_src, src); + if (attr_src.device != stream->gpu_index && + attr_src.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + if (attr_src.device != attr_dest.device) { + // error code: different devices + return -1; + } + + cudaSetDevice(stream->gpu_index); + check_cuda_error(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, + stream->stream)); + return 0; +} + +/// Synchronizes device +/// 0: success +/// -2: error, gpu index doesn't exist +int cuda_synchronize_device(uint32_t gpu_index) { + if (gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaSetDevice(gpu_index); + cudaDeviceSynchronize(); + return 0; +} + +int cuda_memset_async(void *dest, uint64_t val, uint64_t size, + cuda_stream_t *stream) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + if (stream->gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, dest); + if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + cudaSetDevice(stream->gpu_index); + check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream)); + return 0; +} + +template +__global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) + array[index] = value; +} + +template +void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value, + Torus n) { + int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + + // Launch the kernel + cuda_set_value_kernel<<>>(d_array, value, + n); +} + +/// Explicitly instantiate cuda_set_value_async for 32 and 64 bits +template void cuda_set_value_async(cudaStream_t *stream, uint64_t *d_array, + uint64_t value, uint64_t n); +template void cuda_set_value_async(cudaStream_t *stream, uint32_t *d_array, + uint32_t value, uint32_t n); + +/// Tries to copy memory to the GPU asynchronously +/// 0: success +/// -1: error, invalid device pointer +/// -2: error, gpu index doesn't exist +/// -3: error, zero copy size +int cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size, + cuda_stream_t *stream) { + if (size == 0) { + // error code: zero copy size + return -3; + } + + if (stream->gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaPointerAttributes attr; + cudaPointerGetAttributes(&attr, src); + if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) { + // error code: invalid device pointer + return -1; + } + + cudaSetDevice(stream->gpu_index); + check_cuda_error( + cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream->stream)); + return 0; +} + +/// Return number of GPUs available +int cuda_get_number_of_gpus() { + int num_gpus; + cudaGetDeviceCount(&num_gpus); + return num_gpus; +} + +/// Drop a cuda array +int cuda_drop(void *ptr, uint32_t gpu_index) { + if (gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaSetDevice(gpu_index); + check_cuda_error(cudaFree(ptr)); + return 0; +} + +/// Drop a cuda array. Tries to do it asynchronously +int cuda_drop_async(void *ptr, cuda_stream_t *stream) { + + cudaSetDevice(stream->gpu_index); +#ifndef CUDART_VERSION +#error CUDART_VERSION Undefined! +#elif (CUDART_VERSION >= 11020) + int support_async_alloc; + check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc, + cudaDevAttrMemoryPoolsSupported, + stream->gpu_index)); + + if (support_async_alloc) { + check_cuda_error(cudaFreeAsync(ptr, stream->stream)); + } else { + check_cuda_error(cudaFree(ptr)); + } +#else + check_cuda_error(cudaFree(ptr)); +#endif + return 0; +} + +/// Get the maximum size for the shared memory +int cuda_get_max_shared_memory(uint32_t gpu_index) { + if (gpu_index >= cuda_get_number_of_gpus()) { + // error code: invalid gpu_index + return -2; + } + cudaSetDevice(gpu_index); + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, gpu_index); + int max_shared_memory = 0; + if (prop.major >= 6) { + max_shared_memory = prop.sharedMemPerMultiprocessor; + } else { + max_shared_memory = prop.sharedMemPerBlock; + } + return max_shared_memory; +} + +int cuda_synchronize_stream(cuda_stream_t *stream) { + stream->synchronize(); + return 0; +} diff --git a/backends/tfhe-cuda-backend/implementation/src/fft/bnsmfft.cuh b/backends/tfhe-cuda-backend/implementation/src/fft/bnsmfft.cuh new file mode 100644 index 000000000..75bb5dd33 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/fft/bnsmfft.cuh @@ -0,0 +1,725 @@ +#ifndef GPU_BOOTSTRAP_FFT_CUH +#define GPU_BOOTSTRAP_FFT_CUH + +#include "polynomial/functions.cuh" +#include "polynomial/parameters.cuh" +#include "twiddles.cuh" +#include "types/complex/operations.cuh" + +/* + * Direct negacyclic FFT: + * - before the FFT the N real coefficients are stored into a + * N/2 sized complex with the even coefficients in the real part + * and the odd coefficients in the imaginary part. This is referred to + * as the half-size FFT + * - when calling BNSMFFT_direct for the forward negacyclic FFT of PBS, + * opt is divided by 2 because the butterfly pattern is always applied + * between pairs of coefficients + * - instead of twisting each coefficient A_j before the FFT by + * multiplying by the w^j roots of unity (aka twiddles, w=exp(-i pi /N)), + * the FFT is modified, and for each level k of the FFT the twiddle: + * w_j,k = exp(-i pi j/2^k) + * is replaced with: + * \zeta_j,k = exp(-i pi (2j-1)/2^k) + */ +template __device__ void NSMFFT_direct(double2 *A) { + + /* We don't make bit reverse here, since twiddles are already reversed + * Each thread is always in charge of "opt/2" pairs of coefficients, + * which is why we always loop through N/2 by N/opt strides + * The pragma unroll instruction tells the compiler to unroll the + * full loop, which should increase performance + */ + + size_t tid = threadIdx.x; + size_t twid_id; + size_t i1, i2; + double2 u, v, w; + // level 1 + // we don't make actual complex multiplication on level1 since we have only + // one twiddle, it's real and image parts are equal, so we can multiply + // it with simpler operations +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + i1 = tid; + i2 = tid + params::degree / 2; + + u = A[i1]; + v = A[i2] * (double2){0.707106781186547461715008466854, + 0.707106781186547461715008466854}; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 2 + // from this level there are more than one twiddles and none of them has equal + // real and imag parts, so complete complex multiplication is needed + // for each level params::degree / 2^level represents number of coefficients + // inside divided chunk of specific level + // + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 4); + i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1)); + i2 = i1 + params::degree / 4; + + w = negtwiddles[twid_id + 2]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 3 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 8); + i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1)); + i2 = i1 + params::degree / 8; + + w = negtwiddles[twid_id + 4]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 4 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 16); + i1 = + 2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1)); + i2 = i1 + params::degree / 16; + + w = negtwiddles[twid_id + 8]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 5 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 32); + i1 = + 2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1)); + i2 = i1 + params::degree / 32; + + w = negtwiddles[twid_id + 16]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 6 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 64); + i1 = + 2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1)); + i2 = i1 + params::degree / 64; + + w = negtwiddles[twid_id + 32]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 7 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 128); + i1 = 2 * (params::degree / 128) * twid_id + + (tid & (params::degree / 128 - 1)); + i2 = i1 + params::degree / 128; + + w = negtwiddles[twid_id + 64]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + + // from level 8, we need to check size of params degree, because we support + // minimum actual polynomial size = 256, when compressed size is halfed and + // minimum supported compressed size is 128, so we always need first 7 + // levels of butterfy operation, since butterfly levels are hardcoded + // we need to check if polynomial size is big enough to require specific level + // of butterfly. + if constexpr (params::degree >= 256) { + // level 8 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 256); + i1 = 2 * (params::degree / 256) * twid_id + + (tid & (params::degree / 256 - 1)); + i2 = i1 + params::degree / 256; + + w = negtwiddles[twid_id + 128]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 512) { + // level 9 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 512); + i1 = 2 * (params::degree / 512) * twid_id + + (tid & (params::degree / 512 - 1)); + i2 = i1 + params::degree / 512; + + w = negtwiddles[twid_id + 256]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 1024) { + // level 10 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 1024); + i1 = 2 * (params::degree / 1024) * twid_id + + (tid & (params::degree / 1024 - 1)); + i2 = i1 + params::degree / 1024; + + w = negtwiddles[twid_id + 512]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 2048) { + // level 11 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 2048); + i1 = 2 * (params::degree / 2048) * twid_id + + (tid & (params::degree / 2048 - 1)); + i2 = i1 + params::degree / 2048; + + w = negtwiddles[twid_id + 1024]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 4096) { + // level 12 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 4096); + i1 = 2 * (params::degree / 4096) * twid_id + + (tid & (params::degree / 4096 - 1)); + i2 = i1 + params::degree / 4096; + + w = negtwiddles[twid_id + 2048]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + // compressed size = 8192 is actual polynomial size = 16384. + // from this size, twiddles can't fit in constant memory, + // so from here, butterfly operation access device memory. + if constexpr (params::degree >= 8192) { + // level 13 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 8192); + i1 = 2 * (params::degree / 8192) * twid_id + + (tid & (params::degree / 8192 - 1)); + i2 = i1 + params::degree / 8192; + + w = negtwiddles13[twid_id]; + u = A[i1]; + v = A[i2] * w; + + A[i1] += v; + A[i2] = u - v; + + tid += params::degree / params::opt; + } + __syncthreads(); + } +} + +/* + * negacyclic inverse fft + */ +template __device__ void NSMFFT_inverse(double2 *A) { + + /* We don't make bit reverse here, since twiddles are already reversed + * Each thread is always in charge of "opt/2" pairs of coefficients, + * which is why we always loop through N/2 by N/opt strides + * The pragma unroll instruction tells the compiler to unroll the + * full loop, which should increase performance + */ + + size_t tid = threadIdx.x; + size_t twid_id; + size_t i1, i2; + double2 u, w; + + // divide input by compressed polynomial size + tid = threadIdx.x; + for (size_t i = 0; i < params::opt; ++i) { + A[tid] /= params::degree; + tid += params::degree / params::opt; + } + __syncthreads(); + + // none of the twiddles have equal real and imag part, so + // complete complex multiplication has to be done + // here we have more than one twiddle + // mapping in backward fft is reversed + // butterfly operation is started from last level + + // compressed size = 8192 is actual polynomial size = 16384. + // twiddles for this size can't fit in constant memory so + // butterfly operation for this level acess device memory to fetch + // twiddles + if constexpr (params::degree >= 8192) { + // level 13 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 8192); + i1 = 2 * (params::degree / 8192) * twid_id + + (tid & (params::degree / 8192 - 1)); + i2 = i1 + params::degree / 8192; + + w = negtwiddles13[twid_id]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 4096) { + // level 12 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 4096); + i1 = 2 * (params::degree / 4096) * twid_id + + (tid & (params::degree / 4096 - 1)); + i2 = i1 + params::degree / 4096; + + w = negtwiddles[twid_id + 2048]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 2048) { + // level 11 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 2048); + i1 = 2 * (params::degree / 2048) * twid_id + + (tid & (params::degree / 2048 - 1)); + i2 = i1 + params::degree / 2048; + + w = negtwiddles[twid_id + 1024]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 1024) { + // level 10 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 1024); + i1 = 2 * (params::degree / 1024) * twid_id + + (tid & (params::degree / 1024 - 1)); + i2 = i1 + params::degree / 1024; + + w = negtwiddles[twid_id + 512]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 512) { + // level 9 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 512); + i1 = 2 * (params::degree / 512) * twid_id + + (tid & (params::degree / 512 - 1)); + i2 = i1 + params::degree / 512; + + w = negtwiddles[twid_id + 256]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + if constexpr (params::degree >= 256) { + // level 8 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 256); + i1 = 2 * (params::degree / 256) * twid_id + + (tid & (params::degree / 256 - 1)); + i2 = i1 + params::degree / 256; + + w = negtwiddles[twid_id + 128]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + } + + // below level 8, we don't need to check size of params degree, because we + // support minimum actual polynomial size = 256, when compressed size is + // halfed and minimum supported compressed size is 128, so we always need + // last 7 levels of butterfy operation, since butterfly levels are hardcoded + // we don't need to check if polynomial size is big enough to require + // specific level of butterfly. + // level 7 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 128); + i1 = 2 * (params::degree / 128) * twid_id + + (tid & (params::degree / 128 - 1)); + i2 = i1 + params::degree / 128; + + w = negtwiddles[twid_id + 64]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 6 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 64); + i1 = + 2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1)); + i2 = i1 + params::degree / 64; + + w = negtwiddles[twid_id + 32]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 5 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 32); + i1 = + 2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1)); + i2 = i1 + params::degree / 32; + + w = negtwiddles[twid_id + 16]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 4 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 16); + i1 = + 2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1)); + i2 = i1 + params::degree / 16; + + w = negtwiddles[twid_id + 8]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 3 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 8); + i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1)); + i2 = i1 + params::degree / 8; + + w = negtwiddles[twid_id + 4]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 2 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 4); + i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1)); + i2 = i1 + params::degree / 4; + + w = negtwiddles[twid_id + 2]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); + + // level 1 + tid = threadIdx.x; +#pragma unroll + for (size_t i = 0; i < params::opt / 2; ++i) { + twid_id = tid / (params::degree / 2); + i1 = 2 * (params::degree / 2) * twid_id + (tid & (params::degree / 2 - 1)); + i2 = i1 + params::degree / 2; + + w = negtwiddles[twid_id + 1]; + u = A[i1] - A[i2]; + + A[i1] += A[i2]; + A[i2] = u * conjugate(w); + + tid += params::degree / params::opt; + } + __syncthreads(); +} + +/* + * global batch fft + * does fft in half size + * unrolling half size fft result in half size + 1 elements + * this function must be called with actual degree + * function takes as input already compressed input + */ +template +__global__ void batch_NSMFFT(double2 *d_input, double2 *d_output, + double2 *buffer) { + extern __shared__ double2 sharedMemoryFFT[]; + double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2] + : sharedMemoryFFT; + int tid = threadIdx.x; + +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] = d_input[blockIdx.x * (params::degree / 2) + tid]; + tid = tid + params::degree / params::opt; + } + __syncthreads(); + NSMFFT_direct>(fft); + __syncthreads(); + + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid]; + tid = tid + params::degree / params::opt; + } +} + +/* + * global batch polynomial multiplication + * only used for fft tests + * d_input1 and d_output must not have the same pointer + * d_input1 can be modified inside the function + */ +template +__global__ void batch_polynomial_mul(double2 *d_input1, double2 *d_input2, + double2 *d_output, double2 *buffer) { + extern __shared__ double2 sharedMemoryFFT[]; + double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2] + : sharedMemoryFFT; + + // Move first polynomial into shared memory(if possible otherwise it will + // be moved in device buffer) + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] = d_input1[blockIdx.x * (params::degree / 2) + tid]; + tid = tid + params::degree / params::opt; + } + + // Perform direct negacyclic fourier transform + __syncthreads(); + NSMFFT_direct>(fft); + __syncthreads(); + + // Put the result of direct fft inside input1 + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + d_input1[blockIdx.x * (params::degree / 2) + tid] = fft[tid]; + tid = tid + params::degree / params::opt; + } + __syncthreads(); + + // Move first polynomial into shared memory(if possible otherwise it will + // be moved in device buffer) + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] = d_input2[blockIdx.x * (params::degree / 2) + tid]; + tid = tid + params::degree / params::opt; + } + + // Perform direct negacyclic fourier transform on the second polynomial + __syncthreads(); + NSMFFT_direct>(fft); + __syncthreads(); + + // calculate pointwise multiplication inside fft buffer + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] *= d_input1[blockIdx.x * (params::degree / 2) + tid]; + tid = tid + params::degree / params::opt; + } + + // Perform backward negacyclic fourier transform + __syncthreads(); + NSMFFT_inverse>(fft); + __syncthreads(); + + // copy results in output buffer + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid]; + tid = tid + params::degree / params::opt; + } +} + +#endif // GPU_BOOTSTRAP_FFT_CUH diff --git a/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cu b/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cu new file mode 100644 index 000000000..788f40696 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cu @@ -0,0 +1,8197 @@ +#include "cuComplex.h" + +__constant__ double2 negtwiddles[4096] = { + {0, 0}, + {0.707106781186547461715008466854, 0.707106781186547572737310929369}, + {0.92387953251128673848313610506, 0.382683432365089781779232680492}, + {-0.382683432365089781779232680492, 0.92387953251128673848313610506}, + {0.980785280403230430579242238309, 0.19509032201612824808378832131}, + {-0.19509032201612824808378832131, 0.980785280403230430579242238309}, + {0.555570233019602177648721408332, 0.831469612302545235671402679145}, + {-0.831469612302545235671402679145, 0.555570233019602177648721408332}, + {0.995184726672196928731750631414, 0.0980171403295606036287779261329}, + {-0.0980171403295606036287779261329, 0.995184726672196928731750631414}, + {0.634393284163645487794269683945, 0.773010453362736993376813643408}, + {-0.773010453362736993376813643408, 0.634393284163645487794269683945}, + {0.881921264348355049556005269551, 0.471396736825997642039709489836}, + {-0.471396736825997642039709489836, 0.881921264348355049556005269551}, + {0.290284677254462331053019852334, 0.956940335732208824381928025105}, + {-0.956940335732208824381928025105, 0.290284677254462331053019852334}, + {0.998795456205172405006464941835, 0.0490676743274180149345653489945}, + {-0.0490676743274180149345653489945, 0.998795456205172405006464941835}, + {0.671558954847018441114414599724, 0.740951125354959105884233849793}, + {-0.740951125354959105884233849793, 0.671558954847018441114414599724}, + {0.903989293123443338195954765979, 0.427555093430282084909777040593}, + {-0.427555093430282084909777040593, 0.903989293123443338195954765979}, + {0.336889853392220051109262612954, 0.941544065183020806308888950298}, + {-0.941544065183020806308888950298, 0.336889853392220051109262612954}, + {0.970031253194543974238683858857, 0.242980179903263870944130076168}, + {-0.242980179903263870944130076168, 0.970031253194543974238683858857}, + {0.514102744193221772306401362584, 0.857728610000272118085717920621}, + {-0.857728610000272118085717920621, 0.514102744193221772306401362584}, + {0.803207531480644942867286317778, 0.595699304492433356905678465409}, + {-0.595699304492433356905678465409, 0.803207531480644942867286317778}, + {0.1467304744553617479319029826, 0.989176509964781014438983675063}, + {-0.989176509964781014438983675063, 0.1467304744553617479319029826}, + {0.999698818696204249967252053466, 0.0245412285229122881236030195851}, + {-0.0245412285229122881236030195851, 0.999698818696204249967252053466}, + {0.689540544737066940506053924764, 0.724247082951467002764900371403}, + {-0.724247082951467002764900371403, 0.689540544737066940506053924764}, + {0.914209755703530690951197357208, 0.405241314004989860997341111215}, + {-0.405241314004989860997341111215, 0.914209755703530690951197357208}, + {0.359895036534988110865640464908, 0.932992798834738845670244700159}, + {-0.932992798834738845670244700159, 0.359895036534988110865640464908}, + {0.975702130038528570032951847679, 0.219101240156869797592875670489}, + {-0.219101240156869797592875670489, 0.975702130038528570032951847679}, + {0.534997619887097153323907150479, 0.844853565249707116890931501985}, + {-0.844853565249707116890931501985, 0.534997619887097153323907150479}, + {0.817584813151583711388070696557, 0.575808191417845338655467912758}, + {-0.575808191417845338655467912758, 0.817584813151583711388070696557}, + {0.170961888760301217171644339032, 0.985277642388941221618381405278}, + {-0.985277642388941221618381405278, 0.170961888760301217171644339032}, + {0.992479534598709967063712156232, 0.122410675199216195663254325154}, + {-0.122410675199216195663254325154, 0.992479534598709967063712156232}, + {0.615231590580626819253495796147, 0.788346427626606338634474013816}, + {-0.788346427626606338634474013816, 0.615231590580626819253495796147}, + {0.870086991108711460540803273034, 0.492898192229784037898809856415}, + {-0.492898192229784037898809856415, 0.870086991108711460540803273034}, + {0.26671275747489836538406393629, 0.963776065795439840222513794288}, + {-0.963776065795439840222513794288, 0.26671275747489836538406393629}, + {0.949528180593036674750351266994, 0.313681740398891462096031546025}, + {-0.313681740398891462096031546025, 0.949528180593036674750351266994}, + {0.449611329654606595163102156221, 0.893224301195515324458540362684}, + {-0.893224301195515324458540362684, 0.449611329654606595163102156221}, + {0.757208846506484567484562830941, 0.653172842953776755514638807654}, + {-0.653172842953776755514638807654, 0.757208846506484567484562830941}, + {0.0735645635996674263079597722026, 0.997290456678690206970827603072}, + {-0.997290456678690206970827603072, 0.0735645635996674263079597722026}, + {0.999924701839144502990563978528, 0.0122715382857199253874291855482}, + {-0.0122715382857199253874291855482, 0.999924701839144502990563978528}, + {0.69837624940897291558883352991, 0.715730825283818705706551099865}, + {-0.715730825283818705706551099865, 0.69837624940897291558883352991}, + {0.919113851690057770404962411703, 0.393992040061048098831264496766}, + {-0.393992040061048098831264496766, 0.919113851690057770404962411703}, + {0.37131719395183754306444257054, 0.928506080473215589243807244202}, + {-0.928506080473215589243807244202, 0.37131719395183754306444257054}, + {0.97831737071962765472932233024, 0.207111376192218532565902933129}, + {-0.207111376192218532565902933129, 0.97831737071962765472932233024}, + {0.545324988422046463831804885558, 0.838224705554838078747081908659}, + {-0.838224705554838078747081908659, 0.545324988422046463831804885558}, + {0.824589302785025290987164225953, 0.56573181078361323148584460796}, + {-0.56573181078361323148584460796, 0.824589302785025290987164225953}, + {0.183039887955140950781540709613, 0.983105487431216285010293631785}, + {-0.983105487431216285010293631785, 0.183039887955140950781540709613}, + {0.993906970002356060511772284372, 0.110222207293883059375794175594}, + {-0.110222207293883059375794175594, 0.993906970002356060511772284372}, + {0.624859488142386343412226779037, 0.780737228572094488221466690447}, + {-0.780737228572094488221466690447, 0.624859488142386343412226779037}, + {0.876070094195406601222941844753, 0.48218377207912271886769417506}, + {-0.48218377207912271886769417506, 0.876070094195406601222941844753}, + {0.278519689385053115238122245501, 0.960430519415565786545130322338}, + {-0.960430519415565786545130322338, 0.278519689385053115238122245501}, + {0.95330604035419386210747916266, 0.302005949319228084171129466995}, + {-0.302005949319228084171129466995, 0.95330604035419386210747916266}, + {0.460538710958240005144403994564, 0.887639620402853934955089698633}, + {-0.887639620402853934955089698633, 0.460538710958240005144403994564}, + {0.7651672656224589585960416116, 0.643831542889791497152884858224}, + {-0.643831542889791497152884858224, 0.7651672656224589585960416116}, + {0.0857973123444398938497457152152, 0.996312612182778001290728298045}, + {-0.996312612182778001290728298045, 0.0857973123444398938497457152152}, + {0.99811811290014917918966830257, 0.0613207363022085782944259335636}, + {-0.0613207363022085782944259335636, 0.99811811290014917918966830257}, + {0.662415777590171783728578702721, 0.749136394523459370198281703779}, + {-0.749136394523459370198281703779, 0.662415777590171783728578702721}, + {0.8986744656939538167250702827, 0.438616238538527603019190337363}, + {-0.438616238538527603019190337363, 0.8986744656939538167250702827}, + {0.325310292162262926218829761638, 0.945607325380521279711842908}, + {-0.945607325380521279711842908, 0.325310292162262926218829761638}, + {0.966976471044852070590991388599, 0.254865659604514571690003776894}, + {-0.254865659604514571690003776894, 0.966976471044852070590991388599}, + {0.503538383725717575423175276228, 0.863972856121586696431791096984}, + {-0.863972856121586696431791096984, 0.503538383725717575423175276228}, + {0.795836904608883566325516767392, 0.605511041404325545123299434636}, + {-0.605511041404325545123299434636, 0.795836904608883566325516767392}, + {0.134580708507126195483394326402, 0.990902635427780009713671915961}, + {-0.990902635427780009713671915961, 0.134580708507126195483394326402}, + {0.987301418157858434732077057561, 0.158858143333861445700705417039}, + {-0.158858143333861445700705417039, 0.987301418157858434732077057561}, + {0.58579785745643886407663103455, 0.810457198252594768206336084404}, + {-0.810457198252594768206336084404, 0.58579785745643886407663103455}, + {0.851355193105265195541164757742, 0.524589682678468949283967504016}, + {-0.524589682678468949283967504016, 0.851355193105265195541164757742}, + {0.231058108280671109513448868711, 0.972939952205560176778931236186}, + {-0.972939952205560176778931236186, 0.231058108280671109513448868711}, + {0.937339011912574959772825877735, 0.348418680249434564721866536274}, + {-0.348418680249434564721866536274, 0.937339011912574959772825877735}, + {0.416429560097637152527028092663, 0.909167983090522380251741196844}, + {-0.909167983090522380251741196844, 0.416429560097637152527028092663}, + {0.732654271672412815696873167326, 0.680600997795453022121137109934}, + {-0.680600997795453022121137109934, 0.732654271672412815696873167326}, + {0.0368072229413588317137318028927, 0.999322384588349543754759451986}, + {-0.999322384588349543754759451986, 0.0368072229413588317137318028927}, + {0.999981175282601109088886914833, 0.0061358846491544752690949771079}, + {-0.0061358846491544752690949771079, 0.999981175282601109088886914833}, + {0.702754744457225299925084982533, 0.711432195745216433557800428389}, + {-0.711432195745216433557800428389, 0.702754744457225299925084982533}, + {0.921514039342041901825552940863, 0.388345046698826301678764139069}, + {-0.388345046698826301678764139069, 0.921514039342041901825552940863}, + {0.377007410216418259452098027396, 0.926210242138311379278547974536}, + {-0.926210242138311379278547974536, 0.377007410216418259452098027396}, + {0.979569765685440518865334524889, 0.201104634842091900548410876581}, + {-0.201104634842091900548410876581, 0.979569765685440518865334524889}, + {0.550457972936604811309280194109, 0.834862874986380010255970773869}, + {-0.834862874986380010255970773869, 0.550457972936604811309280194109}, + {0.828045045257755796264120817796, 0.560661576197336031235352038493}, + {-0.560661576197336031235352038493, 0.828045045257755796264120817796}, + {0.18906866414980622037589341744, 0.981963869109555242964404442318}, + {-0.981963869109555242964404442318, 0.18906866414980622037589341744}, + {0.994564570734255415374036601861, 0.104121633872054572544918471522}, + {-0.104121633872054572544918471522, 0.994564570734255415374036601861}, + {0.629638238914926984257647291088, 0.776888465673232442298967725947}, + {-0.776888465673232442298967725947, 0.629638238914926984257647291088}, + {0.879012226428633525188161002006, 0.476799230063322143635673455719}, + {-0.476799230063322143635673455719, 0.879012226428633525188161002006}, + {0.284407537211271821409042104278, 0.958703474895871599059660184139}, + {-0.958703474895871599059660184139, 0.284407537211271821409042104278}, + {0.955141168305770671409504757321, 0.296150888243623788831371257402}, + {-0.296150888243623788831371257402, 0.955141168305770671409504757321}, + {0.465976495767966181205110842711, 0.884797098430937789537154003483}, + {-0.884797098430937789537154003483, 0.465976495767966181205110842711}, + {0.769103337645579698822473346809, 0.639124444863775731384691880521}, + {-0.639124444863775731384691880521, 0.769103337645579698822473346809}, + {0.0919089564971327238618314936502, 0.995767414467659817134403965611}, + {-0.995767414467659817134403965611, 0.0919089564971327238618314936502}, + {0.998475580573294774211490221205, 0.0551952443496899411434775117868}, + {-0.0551952443496899411434775117868, 0.998475580573294774211490221205}, + {0.666999922303637471365789224365, 0.745057785441465947329220398387}, + {-0.745057785441465947329220398387, 0.666999922303637471365789224365}, + {0.901348847046022028095535461034, 0.433093818853151957259939308642}, + {-0.433093818853151957259939308642, 0.901348847046022028095535461034}, + {0.331106305759876373695504980788, 0.943593458161960385588429289783}, + {-0.943593458161960385588429289783, 0.331106305759876373695504980788}, + {0.968522094274417266746013410739, 0.248927605745720148533450810646}, + {-0.248927605745720148533450810646, 0.968522094274417266746013410739}, + {0.508830142543106989094781056338, 0.860866938637767309394632775366}, + {-0.860866938637767309394632775366, 0.508830142543106989094781056338}, + {0.799537269107905013143522410246, 0.600616479383868862029771662492}, + {-0.600616479383868862029771662492, 0.799537269107905013143522410246}, + {0.14065823933284923863418214296, 0.990058210262297122561392370699}, + {-0.990058210262297122561392370699, 0.14065823933284923863418214296}, + {0.988257567730749464374184753979, 0.152797185258443435351694006386}, + {-0.152797185258443435351694006386, 0.988257567730749464374184753979}, + {0.590759701858874164415169616404, 0.806847553543799334008213008929}, + {-0.806847553543799334008213008929, 0.590759701858874164415169616404}, + {0.854557988365400533758986512112, 0.519355990165589531670775613748}, + {-0.519355990165589531670775613748, 0.854557988365400533758986512112}, + {0.237023605994367198013250686017, 0.971503890986251783523641734064}, + {-0.971503890986251783523641734064, 0.237023605994367198013250686017}, + {0.93945922360218991897795604018, 0.342660717311994378331263533255}, + {-0.342660717311994378331263533255, 0.93945922360218991897795604018}, + {0.422000270799799681586961241919, 0.906595704514915334826241632982}, + {-0.906595704514915334826241632982, 0.422000270799799681586961241919}, + {0.736816568877369904022600621829, 0.676092703575315923103516979609}, + {-0.676092703575315923103516979609, 0.736816568877369904022600621829}, + {0.0429382569349408202419304814157, 0.999077727752645361469774343277}, + {-0.999077727752645361469774343277, 0.0429382569349408202419304814157}, + {0.999529417501093142561785498401, 0.0306748031766366259509570824093}, + {-0.0306748031766366259509570824093, 0.999529417501093142561785498401}, + {0.685083667772700355413917350234, 0.728464390448225196372789014276}, + {-0.728464390448225196372789014276, 0.685083667772700355413917350234}, + {0.91170603200542987831767050011, 0.410843171057903910892150634027}, + {-0.410843171057903910892150634027, 0.91170603200542987831767050011}, + {0.354163525420490343798007870646, 0.935183509938947610251602782228}, + {-0.935183509938947610251602782228, 0.354163525420490343798007870646}, + {0.974339382785575858214599520579, 0.225083911359792832040938037608}, + {-0.225083911359792832040938037608, 0.974339382785575858214599520579}, + {0.529803624686294716283896377718, 0.848120344803297232516570147709}, + {-0.848120344803297232516570147709, 0.529803624686294716283896377718}, + {0.81403632970594841378186856673, 0.580813958095764526490256685065}, + {-0.580813958095764526490256685065, 0.81403632970594841378186856673}, + {0.164913120489969922122241996476, 0.986308097244598669384174627339}, + {-0.986308097244598669384174627339, 0.164913120489969922122241996476}, + {0.991709753669099525197339062288, 0.128498110793793168804555193674}, + {-0.128498110793793168804555193674, 0.991709753669099525197339062288}, + {0.61038280627630947527961779997, 0.792106577300212388870193080948}, + {-0.792106577300212388870193080948, 0.61038280627630947527961779997}, + {0.867046245515692648453409674403, 0.498227666972781868537367699901}, + {-0.498227666972781868537367699901, 0.867046245515692648453409674403}, + {0.26079411791527551400804441073, 0.965394441697689398296233775909}, + {-0.965394441697689398296233775909, 0.26079411791527551400804441073}, + {0.947585591017741091235393469105, 0.319502030816015691883080762636}, + {-0.319502030816015691883080762636, 0.947585591017741091235393469105}, + {0.444122144570429200349792608904, 0.895966249756185106889461167157}, + {-0.895966249756185106889461167157, 0.444122144570429200349792608904}, + {0.753186799043612520421220324351, 0.657806693297078637350239205261}, + {-0.657806693297078637350239205261, 0.753186799043612520421220324351}, + {0.0674439195636640509423642697584, 0.997723066644191636243022003327}, + {-0.997723066644191636243022003327, 0.0674439195636640509423642697584}, + {0.996820299291165667909808689728, 0.0796824379714301117560992793187}, + {-0.0796824379714301117560992793187, 0.996820299291165667909808689728}, + {0.648514401022112441097533519496, 0.761202385484261889736501416337}, + {-0.761202385484261889736501416337, 0.648514401022112441097533519496}, + {0.890448723244757878170219100866, 0.455083587126343835915776026013}, + {-0.455083587126343835915776026013, 0.890448723244757878170219100866}, + {0.307849640041534866607975118313, 0.951435020969008338198591445689}, + {-0.951435020969008338198591445689, 0.307849640041534866607975118313}, + {0.962121404269041580192833862384, 0.272621355449948976623630869653}, + {-0.272621355449948976623630869653, 0.962121404269041580192833862384}, + {0.487550160148435940410394096034, 0.87309497841829009079361867407}, + {-0.87309497841829009079361867407, 0.487550160148435940410394096034}, + {0.784556597155575241586689116957, 0.620057211763289206629679028993}, + {-0.620057211763289206629679028993, 0.784556597155575241586689116957}, + {0.11631863091190476622305283172, 0.993211949234794500007694750821}, + {-0.993211949234794500007694750821, 0.11631863091190476622305283172}, + {0.984210092386929025209951760189, 0.177004220412148749463909780388}, + {-0.177004220412148749463909780388, 0.984210092386929025209951760189}, + {0.570780745886967255664501408319, 0.821102514991104648345299210632}, + {-0.821102514991104648345299210632, 0.570780745886967255664501408319}, + {0.841554977436898443698964911164, 0.540171472729892854225397513801}, + {-0.540171472729892854225397513801, 0.841554977436898443698964911164}, + {0.213110319916091361935883696788, 0.977028142657754394839741962642}, + {-0.977028142657754394839741962642, 0.213110319916091361935883696788}, + {0.930766961078983712241097236983, 0.365612997804773853793847138149}, + {-0.365612997804773853793847138149, 0.930766961078983712241097236983}, + {0.399624199845646843609614506931, 0.916679059921042704850435711705}, + {-0.916679059921042704850435711705, 0.399624199845646843609614506931}, + {0.720002507961381654766341853247, 0.693971460889654001569226693391}, + {-0.693971460889654001569226693391, 0.720002507961381654766341853247}, + {0.0184067299058048201854109748865, 0.999830581795823403190581757372}, + {-0.999830581795823403190581757372, 0.0184067299058048201854109748865}, + {0.999995293809576191179644411022, 0.00306795676296597614324257463636}, + {-0.00306795676296597614324257463636, 0.999995293809576191179644411022}, + {0.704934080375904881243798172363, 0.709272826438865688913892881828}, + {-0.709272826438865688913892881828, 0.704934080375904881243798172363}, + {0.922701128333878628495767770801, 0.385516053843918848897942552867}, + {-0.385516053843918848897942552867, 0.922701128333878628495767770801}, + {0.379847208924051160661150561282, 0.925049240782677584249427127361}, + {-0.925049240782677584249427127361, 0.379847208924051160661150561282}, + {0.980182135968117429491996972502, 0.19809841071795358802276609822}, + {-0.19809841071795358802276609822, 0.980182135968117429491996972502}, + {0.553016705580027578825763612258, 0.833170164701913185112402970844}, + {-0.833170164701913185112402970844, 0.553016705580027578825763612258}, + {0.829761233794523045403934702335, 0.55811853122055610221252663905}, + {-0.55811853122055610221252663905, 0.829761233794523045403934702335}, + {0.19208039704989243734445381051, 0.981379193313754560890060929523}, + {-0.981379193313754560890060929523, 0.19208039704989243734445381051}, + {0.994879330794805616378084778262, 0.10106986275482782167145501262}, + {-0.10106986275482782167145501262, 0.994879330794805616378084778262}, + {0.632018735939809062074346002191, 0.774953106594873930568212472281}, + {-0.774953106594873930568212472281, 0.632018735939809062074346002191}, + {0.880470889052160754495446326473, 0.474100214650550022543740169567}, + {-0.474100214650550022543740169567, 0.880470889052160754495446326473}, + {0.287347459544729511016214473784, 0.957826413027532908017747104168}, + {-0.957826413027532908017747104168, 0.287347459544729511016214473784}, + {0.95604525134999640556543454295, 0.293219162694258628221177787054}, + {-0.293219162694258628221177787054, 0.95604525134999640556543454295}, + {0.468688822035827901135718320802, 0.883363338665731578913664634456}, + {-0.883363338665731578913664634456, 0.468688822035827901135718320802}, + {0.771060524261813817759048106382, 0.636761861236284198994894723}, + {-0.636761861236284198994894723, 0.771060524261813817759048106382}, + {0.0949634953296389916488351445878, 0.995480755491926938560709459125}, + {-0.995480755491926938560709459125, 0.0949634953296389916488351445878}, + {0.998640218180265271108453362103, 0.0521317046802833167218338417115}, + {-0.0521317046802833167218338417115, 0.998640218180265271108453362103}, + {0.669282588346636120313348783384, 0.743007952135121718661991963017}, + {-0.743007952135121718661991963017, 0.669282588346636120313348783384}, + {0.90267331823725882600228942465, 0.43032648134008261164851205649}, + {-0.43032648134008261164851205649, 0.90267331823725882600228942465}, + {0.333999651442009382051878674247, 0.942573197601446866045193928585}, + {-0.942573197601446866045193928585, 0.333999651442009382051878674247}, + {0.969281235356548531711951000034, 0.245955050335794594973393145665}, + {-0.245955050335794594973393145665, 0.969281235356548531711951000034}, + {0.511468850437970412592392221995, 0.8593018183570083623479263224}, + {-0.8593018183570083623479263224, 0.511468850437970412592392221995}, + {0.801376171723140240388261190674, 0.598160706996342272923072869162}, + {-0.598160706996342272923072869162, 0.801376171723140240388261190674}, + {0.143695033150294443347050332704, 0.989622017463200887021912421915}, + {-0.989622017463200887021912421915, 0.143695033150294443347050332704}, + {0.988721691960323778580743692146, 0.149764534677321509148484324214}, + {-0.149764534677321509148484324214, 0.988721691960323778580743692146}, + {0.593232295039799795155488482123, 0.805031331142963546554369713704}, + {-0.805031331142963546554369713704, 0.593232295039799795155488482123}, + {0.856147328375194471838938170549, 0.516731799017649873206892152666}, + {-0.516731799017649873206892152666, 0.856147328375194471838938170549}, + {0.240003022448741470951105725362, 0.970772140728950350130332935805}, + {-0.970772140728950350130332935805, 0.240003022448741470951105725362}, + {0.940506070593268295176869742136, 0.339776884406826851225957852876}, + {-0.339776884406826851225957852876, 0.940506070593268295176869742136}, + {0.424779681209108805894913984957, 0.90529675931811881550714815603}, + {-0.90529675931811881550714815603, 0.424779681209108805894913984957}, + {0.738887324460615113608241699694, 0.673829000378756037825667135621}, + {-0.673829000378756037825667135621, 0.738887324460615113608241699694}, + {0.046003182130914629932583181926, 0.998941293186856871244572175783}, + {-0.998941293186856871244572175783, 0.046003182130914629932583181926}, + {0.999618822495178638298796158779, 0.0276081457789657397361438029293}, + {-0.0276081457789657397361438029293, 0.999618822495178638298796158779}, + {0.687315340891759163355345663149, 0.726359155084346008734996757994}, + {-0.726359155084346008734996757994, 0.687315340891759163355345663149}, + {0.912962190428398212560523461434, 0.408044162864978687821349012665}, + {-0.408044162864978687821349012665, 0.912962190428398212560523461434}, + {0.357030961233430033097135947173, 0.93409255040425887006705352178}, + {-0.93409255040425887006705352178, 0.357030961233430033097135947173}, + {0.975025345066994120202252815943, 0.222093620973203537127815820895}, + {-0.222093620973203537127815820895, 0.975025345066994120202252815943}, + {0.532403127877198012463111354009, 0.846490938774052126269964446692}, + {-0.846490938774052126269964446692, 0.532403127877198012463111354009}, + {0.815814410806733780745503281651, 0.578313796411655589579936531663}, + {-0.578313796411655589579936531663, 0.815814410806733780745503281651}, + {0.167938294974731172626292163841, 0.98579750916756747614044797956}, + {-0.98579750916756747614044797956, 0.167938294974731172626292163841}, + {0.99209931314219179654401159496, 0.125454983411546233673661276953}, + {-0.125454983411546233673661276953, 0.99209931314219179654401159496}, + {0.612810082429409708204559592559, 0.790230221437310031973311197362}, + {-0.790230221437310031973311197362, 0.612810082429409708204559592559}, + {0.868570705971340895068522058864, 0.495565261825772540582590863778}, + {-0.495565261825772540582590863778, 0.868570705971340895068522058864}, + {0.263754678974831402449297002022, 0.964589793289812758025902894587}, + {-0.964589793289812758025902894587, 0.263754678974831402449297002022}, + {0.948561349915730267490232563432, 0.316593375556165845807754521957}, + {-0.316593375556165845807754521957, 0.948561349915730267490232563432}, + {0.44686884016237415906402929977, 0.894599485631382695949298522464}, + {-0.894599485631382695949298522464, 0.44686884016237415906402929977}, + {0.755201376896536547000948758068, 0.655492852999615349673945274844}, + {-0.655492852999615349673945274844, 0.755201376896536547000948758068}, + {0.0705045733896138560048427734728, 0.997511456140303454098727797827}, + {-0.997511456140303454098727797827, 0.0705045733896138560048427734728}, + {0.997060070339482962253896403126, 0.0766238613920314920457954599442}, + {-0.0766238613920314920457954599442, 0.997060070339482962253896403126}, + {0.650846684996380875354304862412, 0.759209188978388072044367618219}, + {-0.759209188978388072044367618219, 0.650846684996380875354304862412}, + {0.891840709392342723127455883514, 0.452349587233770888961004175144}, + {-0.452349587233770888961004175144, 0.891840709392342723127455883514}, + {0.310767152749611474948920886163, 0.95048607394948170234982853799}, + {-0.95048607394948170234982853799, 0.310767152749611474948920886163}, + {0.96295326687368387741372544042, 0.269668325572915090759806844289}, + {-0.269668325572915090759806844289, 0.96295326687368387741372544042}, + {0.490226483288291159379213013381, 0.871595086655950979093177011237}, + {-0.871595086655950979093177011237, 0.490226483288291159379213013381}, + {0.786455213599085767306462457782, 0.617647307937803979882573912619}, + {-0.617647307937803979882573912619, 0.786455213599085767306462457782}, + {0.11936521481099135466585892118, 0.992850414459865104888081077661}, + {-0.992850414459865104888081077661, 0.11936521481099135466585892118}, + {0.984748501801904208008409113972, 0.173983873387463822135501345656}, + {-0.173983873387463822135501345656, 0.984748501801904208008409113972}, + {0.573297166698042204302510072012, 0.819347520076797009025426632434}, + {-0.819347520076797009025426632434, 0.573297166698042204302510072012}, + {0.843208239641845436196376795124, 0.537587076295645505119580320752}, + {-0.537587076295645505119580320752, 0.843208239641845436196376795124}, + {0.216106797076219492304005598271, 0.976369731330021140003339041868}, + {-0.976369731330021140003339041868, 0.216106797076219492304005598271}, + {0.931884265581668147504501575895, 0.36275572436739722537168972849}, + {-0.36275572436739722537168972849, 0.931884265581668147504501575895}, + {0.402434650859418430179914594191, 0.915448716088267833157487984863}, + {-0.915448716088267833157487984863, 0.402434650859418430179914594191}, + {0.72212819392921534511486925112, 0.691759258364157747500655659678}, + {-0.691759258364157747500655659678, 0.72212819392921534511486925112}, + {0.0214740802754695078724544998749, 0.999769405351215278976440004044}, + {-0.999769405351215278976440004044, 0.0214740802754695078724544998749}, + {0.999882347454212561110864498914, 0.0153392062849881001540541802797}, + {-0.0153392062849881001540541802797, 0.999882347454212561110864498914}, + {0.696177131491462986012663805013, 0.717870045055731709204849266825}, + {-0.717870045055731709204849266825, 0.696177131491462986012663805013}, + {0.917900775621390496716855977866, 0.396809987416710308050227240528}, + {-0.396809987416710308050227240528, 0.917900775621390496716855977866}, + {0.368466829953372321249105425522, 0.929640895843181214175388049625}, + {-0.929640895843181214175388049625, 0.368466829953372321249105425522}, + {0.977677357824509929429268595413, 0.210111836880469610155941495577}, + {-0.210111836880469610155941495577, 0.977677357824509929429268595413}, + {0.54275078486451588943850765645, 0.839893794195999521257078868075}, + {-0.839893794195999521257078868075, 0.54275078486451588943850765645}, + {0.822849781375826316853760999948, 0.568258952670131600726222131925}, + {-0.568258952670131600726222131925, 0.822849781375826316853760999948}, + {0.180022901405699514709723985106, 0.983662419211730254531289574516}, + {-0.983662419211730254531289574516, 0.180022901405699514709723985106}, + {0.993564135520595304029711769545, 0.113270952177564346308180631695}, + {-0.113270952177564346308180631695, 0.993564135520595304029711769545}, + {0.622461279374149967225093860179, 0.782650596166575729384362603014}, + {-0.782650596166575729384362603014, 0.622461279374149967225093860179}, + {0.874586652278176113206598074612, 0.484869248000791119856955901923}, + {-0.484869248000791119856955901923, 0.874586652278176113206598074612}, + {0.275571819310958143756096205834, 0.961280485811320639655264130852}, + {-0.961280485811320639655264130852, 0.275571819310958143756096205834}, + {0.95237501271976587879919406987, 0.30492922973540237396861130037}, + {-0.30492922973540237396861130037, 0.95237501271976587879919406987}, + {0.45781330359887723036038664759, 0.889048355854664573705292696104}, + {-0.889048355854664573705292696104, 0.45781330359887723036038664759}, + {0.763188417263381269073363455391, 0.646176012983316394588939601817}, + {-0.646176012983316394588939601817, 0.763188417263381269073363455391}, + {0.0827402645493756916383887300981, 0.996571145790554835386387821927}, + {-0.996571145790554835386387821927, 0.0827402645493756916383887300981}, + {0.997925286198595995479365683423, 0.0643826309298574650519242368318}, + {-0.0643826309298574650519242368318, 0.997925286198595995479365683423}, + {0.660114342067420478699091290764, 0.751165131909686478728360725654}, + {-0.751165131909686478728360725654, 0.660114342067420478699091290764}, + {0.897324580705418317627675151016, 0.441371268731716670519205081291}, + {-0.441371268731716670519205081291, 0.897324580705418317627675151016}, + {0.322407678801069852436711471455, 0.946600913083283534987799612281}, + {-0.946600913083283534987799612281, 0.322407678801069852436711471455}, + {0.966190003445412504134992559557, 0.257831102162158987134432663879}, + {-0.257831102162158987134432663879, 0.966190003445412504134992559557}, + {0.500885382611240825845300150831, 0.865513624090569089197799712565}, + {-0.865513624090569089197799712565, 0.500885382611240825845300150831}, + {0.793975477554337172314546933194, 0.60794978496777363208281030893}, + {-0.60794978496777363208281030893, 0.793975477554337172314546933194}, + {0.131540028702883116107358318914, 0.99131085984611544414946138204}, + {-0.99131085984611544414946138204, 0.131540028702883116107358318914}, + {0.986809401814185527257450303296, 0.16188639378011182579086835176}, + {-0.16188639378011182579086835176, 0.986809401814185527257450303296}, + {0.58330865293769829094117085333, 0.812250586585203881995198571531}, + {-0.812250586585203881995198571531, 0.58330865293769829094117085333}, + {0.849741768000852548681223197491, 0.527199134781901390667258056055}, + {-0.527199134781901390667258056055, 0.849741768000852548681223197491}, + {0.228072083170885731018273645532, 0.973644249650811977048192602524}, + {-0.973644249650811977048192602524, 0.228072083170885731018273645532}, + {0.936265667170278259590077141183, 0.351292756085567092760868490586}, + {-0.351292756085567092760868490586, 0.936265667170278259590077141183}, + {0.413638312238434557865929264153, 0.910441292258067247367137042602}, + {-0.910441292258067247367137042602, 0.413638312238434557865929264153}, + {0.730562769227827590867718754453, 0.682845546385248081122654184583}, + {-0.682845546385248081122654184583, 0.730562769227827590867718754453}, + {0.0337411718513775868433235416433, 0.999430604555461732374510575028}, + {-0.999430604555461732374510575028, 0.0337411718513775868433235416433}, + {0.999204758618363886313318289467, 0.039872927587739810662004202868}, + {-0.039872927587739810662004202868, 0.999204758618363886313318289467}, + {0.678350043129861468571561999852, 0.734738878095963499070819580083}, + {-0.734738878095963499070819580083, 0.678350043129861468571561999852}, + {0.907886116487666261498645781103, 0.419216888363223960656256394941}, + {-0.419216888363223960656256394941, 0.907886116487666261498645781103}, + {0.345541324963989038288048050163, 0.938403534063108057949875728809}, + {-0.938403534063108057949875728809, 0.345541324963989038288048050163}, + {0.972226497078936269247151358286, 0.234041958583543402427906698904}, + {-0.234041958583543402427906698904, 0.972226497078936269247151358286}, + {0.521975292937154389250054009608, 0.852960604930363630593603829766}, + {-0.852960604930363630593603829766, 0.521975292937154389250054009608}, + {0.8086561815881749826218083399, 0.588281548222645334078606538242}, + {-0.588281548222645334078606538242, 0.8086561815881749826218083399}, + {0.155828397654265232707970767478, 0.987784141644572177831662429526}, + {-0.987784141644572177831662429526, 0.155828397654265232707970767478}, + {0.990485084256457093410119796317, 0.137620121586486038323116076754}, + {-0.137620121586486038323116076754, 0.990485084256457093410119796317}, + {0.603066598540348164370072936435, 0.797690840943391155093422639766}, + {-0.797690840943391155093422639766, 0.603066598540348164370072936435}, + {0.862423956111040501681941350398, 0.50618664534515533937053533009}, + {-0.50618664534515533937053533009, 0.862423956111040501681941350398}, + {0.251897818154216968089542660891, 0.96775383709347551075552473776}, + {-0.96775383709347551075552473776, 0.251897818154216968089542660891}, + {0.944604837261480256849210945802, 0.328209843579092497289906305014}, + {-0.328209843579092497289906305014, 0.944604837261480256849210945802}, + {0.435857079922255474802028629711, 0.900015892016160279354153317399}, + {-0.900015892016160279354153317399, 0.435857079922255474802028629711}, + {0.747100605980180132448253971233, 0.664710978203344904358118583332}, + {-0.664710978203344904358118583332, 0.747100605980180132448253971233}, + {0.0582582645004357593809807269736, 0.998301544933892892608184865821}, + {-0.998301544933892892608184865821, 0.0582582645004357593809807269736}, + {0.996044700901251967017913102609, 0.0888535525825246003117641180324}, + {-0.0888535525825246003117641180324, 0.996044700901251967017913102609}, + {0.641481012808583161977082909289, 0.76713891193582040006759825701}, + {-0.76713891193582040006759825701, 0.641481012808583161977082909289}, + {0.886222530148880638378727780946, 0.463259783551860204742212090423}, + {-0.463259783551860204742212090423, 0.886222530148880638378727780946}, + {0.299079826308040475080218811854, 0.954228095109105667326332422817}, + {-0.954228095109105667326332422817, 0.299079826308040475080218811854}, + {0.959571513081984517334888096229, 0.281464937925757996417530648614}, + {-0.281464937925757996417530648614, 0.959571513081984517334888096229}, + {0.479493757660153008259840134997, 0.877545290207261352577461366309}, + {-0.877545290207261352577461366309, 0.479493757660153008259840134997}, + {0.778816512381475978266109905235, 0.627251815495144082746037383913}, + {-0.627251815495144082746037383913, 0.778816512381475978266109905235}, + {0.107172424956808842733124720326, 0.994240449453187902228989969444}, + {-0.994240449453187902228989969444, 0.107172424956808842733124720326}, + {0.982539302287441240757459581801, 0.18605515166344663291475569622}, + {-0.18605515166344663291475569622, 0.982539302287441240757459581801}, + {0.563199344013834091171588625002, 0.826321062845663423246378442855}, + {-0.826321062845663423246378442855, 0.563199344013834091171588625002}, + {0.836547727223512005423344817245, 0.547894059173100189674698867748}, + {-0.547894059173100189674698867748, 0.836547727223512005423344817245}, + {0.204108966092816868087567172552, 0.978948175319062197097252919775}, + {-0.978948175319062197097252919775, 0.204108966092816868087567172552}, + {0.927362525650401114951648651186, 0.374164062971457989092982643342}, + {-0.374164062971457989092982643342, 0.927362525650401114951648651186}, + {0.391170384302253870689725090415, 0.920318276709110594246965320053}, + {-0.920318276709110594246965320053, 0.391170384302253870689725090415}, + {0.713584868780793635245629502606, 0.700568793943248335764906187251}, + {-0.700568793943248335764906187251, 0.713584868780793635245629502606}, + {0.00920375478205981943646829535055, 0.999957644551963897860957786179}, + {-0.999957644551963897860957786179, 0.00920375478205981943646829535055}, + {0.99999882345170187925020854891, 0.00153398018628476550014039236913}, + {-0.00153398018628476550014039236913, 0.99999882345170187925020854891}, + {0.706021261449339854188167464599, 0.70819063703319529157198530811}, + {-0.70819063703319529157198530811, 0.706021261449339854188167464599}, + {0.923291416719527635592612568871, 0.384100195016935042069405881193}, + {-0.384100195016935042069405881193, 0.923291416719527635592612568871}, + {0.381265769222162376195228716824, 0.924465474325262603905173364183}, + {-0.924465474325262603905173364183, 0.381265769222162376195228716824}, + {0.980484861773469384971235740522, 0.196594597670080223350552728334}, + {-0.196594597670080223350552728334, 0.980484861773469384971235740522}, + {0.554294121453620114436944277259, 0.832320867767929684077898855321}, + {-0.832320867767929684077898855321, 0.554294121453620114436944277259}, + {0.830616400308846314359811913164, 0.556845037275160104073279399017}, + {-0.556845037275160104073279399017, 0.830616400308846314359811913164}, + {0.193585587295803607243982469299, 0.981083391150486705534206066659}, + {-0.981083391150486705534206066659, 0.193585587295803607243982469299}, + {0.995033199438118631796612589824, 0.0995436186600693329040723256185}, + {-0.0995436186600693329040723256185, 0.995033199438118631796612589824}, + {0.633206755050057301659194308741, 0.773982690606822787415808306832}, + {-0.773982690606822787415808306832, 0.633206755050057301659194308741}, + {0.881197113471222093217249948793, 0.472749031950342790686647731491}, + {-0.472749031950342790686647731491, 0.881197113471222093217249948793}, + {0.288816408206049479723276363075, 0.957384500788975856266915798187}, + {-0.957384500788975856266915798187, 0.288816408206049479723276363075}, + {0.956493918902395101611091376981, 0.291752263234989261952989636484}, + {-0.291752263234989261952989636484, 0.956493918902395101611091376981}, + {0.470043332459595619710057690099, 0.882643339979562790986733489262}, + {-0.882643339979562790986733489262, 0.470043332459595619710057690099}, + {0.772036397150384523513366730185, 0.63557832048855611439819313091}, + {-0.63557832048855611439819313091, 0.772036397150384523513366730185}, + {0.0964904313552525927377701009391, 0.995333912140482279795605791151}, + {-0.995333912140482279795605791151, 0.0964904313552525927377701009391}, + {0.99871901223387293811128984089, 0.050599749036899281662282845673}, + {-0.050599749036899281662282845673, 0.99871901223387293811128984089}, + {0.670421560380173087168031997862, 0.741980411720831067867720776121}, + {-0.741980411720831067867720776121, 0.670421560380173087168031997862}, + {0.903332368494511817047509794065, 0.428941292055329492782789202465}, + {-0.428941292055329492782789202465, 0.903332368494511817047509794065}, + {0.335445147084531603010049138902, 0.94205973977101731264838235802}, + {-0.94205973977101731264838235802, 0.335445147084531603010049138902}, + {0.969657385124292447997618182853, 0.244467902747824150644362362073}, + {-0.244467902747824150644362362073, 0.969657385124292447997618182853}, + {0.512786400633562955420075013535, 0.858516224264442739944058757828}, + {-0.858516224264442739944058757828, 0.512786400633562955420075013535}, + {0.802292795538115721676319935796, 0.59693070806219650226154271877}, + {-0.59693070806219650226154271877, 0.802292795538115721676319935796}, + {0.145212924652847463757865398293, 0.989400427791380376874030844192}, + {-0.989400427791380376874030844192, 0.145212924652847463757865398293}, + {0.98895026451030298986211164447, 0.148247678986896030961517567448}, + {-0.148247678986896030961517567448, 0.98895026451030298986211164447}, + {0.594466499184664431965074982145, 0.804120377398265695489953941433}, + {-0.804120377398265695489953941433, 0.594466499184664431965074982145}, + {0.856938977417828762206397641421, 0.515417878019463038263836551778}, + {-0.515417878019463038263836551778, 0.856938977417828762206397641421}, + {0.241491885302869330187647278763, 0.970402838687555502339421309443}, + {-0.970402838687555502339421309443, 0.241491885302869330187647278763}, + {0.941026175050889257533981435699, 0.338333766965541127280658884047}, + {-0.338333766965541127280658884047, 0.941026175050889257533981435699}, + {0.426167888726799615195517390021, 0.904644090578246240497151120508}, + {-0.904644090578246240497151120508, 0.426167888726799615195517390021}, + {0.739920095459516202751615310262, 0.672694769070772968788674006646}, + {-0.672694769070772968788674006646, 0.739920095459516202751615310262}, + {0.0475354841569593025707440858696, 0.998869549914283560987371402007}, + {-0.998869549914283560987371402007, 0.0475354841569593025707440858696}, + {0.999659996743959222698094890802, 0.0260747178291038973763082964297}, + {-0.0260747178291038973763082964297, 0.999659996743959222698094890802}, + {0.688428752784090436378505728499, 0.72530397237306076796414799901}, + {-0.72530397237306076796414799901, 0.688428752784090436378505728499}, + {0.913587047945250807501338385919, 0.406643216870369028637099972912}, + {-0.406643216870369028637099972912, 0.913587047945250807501338385919}, + {0.358463420633736540299452144609, 0.93354377297883617270457534687}, + {-0.93354377297883617270457534687, 0.358463420633736540299452144609}, + {0.975364885116656976649096577603, 0.220597690108873506487086046945}, + {-0.220597690108873506487086046945, 0.975364885116656976649096577603}, + {0.53370100180715296378508583075, 0.845673246987299065402510223066}, + {-0.845673246987299065402510223066, 0.53370100180715296378508583075}, + {0.816700572866827845253112627688, 0.577061672855679552718299873959}, + {-0.577061672855679552718299873959, 0.816700572866827845253112627688}, + {0.169450291233967959003692271835, 0.985538735312176061853506325861}, + {-0.985538735312176061853506325861, 0.169450291233967959003692271835}, + {0.992290591348257366988150351972, 0.123932975118512173073881399432}, + {-0.123932975118512173073881399432, 0.992290591348257366988150351972}, + {0.614021558931038380357847472624, 0.789289253168885651668063019315}, + {-0.789289253168885651668063019315, 0.614021558931038380357847472624}, + {0.869329871348606730840913314751, 0.494232308515959728456579114209}, + {-0.494232308515959728456579114209, 0.869329871348606730840913314751}, + {0.265234030285511790392405373495, 0.964184063951745828902062385168}, + {-0.964184063951745828902062385168, 0.265234030285511790392405373495}, + {0.949045881852700556891022642958, 0.31513792875252238934180581964}, + {-0.31513792875252238934180581964, 0.949045881852700556891022642958}, + {0.448240612285219885979614673488, 0.89391294514520325265038991347}, + {-0.89391294514520325265038991347, 0.448240612285219885979614673488}, + {0.75620600141439453523162228521, 0.654333617831800551378762520471}, + {-0.654333617831800551378762520471, 0.75620600141439453523162228521}, + {0.0720346532468893185896519071321, 0.997402129901275302792384991335}, + {-0.997402129901275302792384991335, 0.0720346532468893185896519071321}, + {0.997176436735326188198769159499, 0.0750943008479213192085666150888}, + {-0.0750943008479213192085666150888, 0.997176436735326188198769159499}, + {0.652010531096959500274579113466, 0.758209909813015281443426829355}, + {-0.758209909813015281443426829355, 0.652010531096959500274579113466}, + {0.892533555402764577912932963955, 0.450980989045103863865904259001}, + {-0.450980989045103863865904259001, 0.892533555402764577912932963955}, + {0.312224813921824939644267260519, 0.950008245001842999144514578802}, + {-0.950008245001842999144514578802, 0.312224813921824939644267260519}, + {0.963365799780954046305225801916, 0.26819085706340317631912739671}, + {-0.26819085706340317631912739671, 0.963365799780954046305225801916}, + {0.491562916106549951944515441937, 0.870842063470078864284573683108}, + {-0.870842063470078864284573683108, 0.491562916106549951944515441937}, + {0.787401747029031429114809270686, 0.616440174530853646217565255938}, + {-0.616440174530853646217565255938, 0.787401747029031429114809270686}, + {0.120888087235777069716746723316, 0.992666142448948018994769881829}, + {-0.992666142448948018994769881829, 0.120888087235777069716746723316}, + {0.985014231012239838136679281888, 0.172473083996795978345417665878}, + {-0.172473083996795978345417665878, 0.985014231012239838136679281888}, + {0.574553355047715763603832783701, 0.818467129580298657920423011092}, + {-0.818467129580298657920423011092, 0.574553355047715763603832783701}, + {0.844031895490066408349605353578, 0.536292979065963182350174065505}, + {-0.536292979065963182350174065505, 0.844031895490066408349605353578}, + {0.21760427463848364126874912472, 0.976037079039039023875545808551}, + {-0.976037079039039023875545808551, 0.21760427463848364126874912472}, + {0.932439629268462355504709648812, 0.36132580556845428354506566393}, + {-0.36132580556845428354506566393, 0.932439629268462355504709648812}, + {0.403838457567654074420460119654, 0.914830312237946197129190295527}, + {-0.914830312237946197129190295527, 0.403838457567654074420460119654}, + {0.723188489306527459987705697131, 0.690650714134534604582427164132}, + {-0.690650714134534604582427164132, 0.723188489306527459987705697131}, + {0.023007681468839372151968802882, 0.999735288260561683060245741217}, + {-0.999735288260561683060245741217, 0.023007681468839372151968802882}, + {0.999904701082852898075259417965, 0.013805388528060390587737238377}, + {-0.013805388528060390587737238377, 0.999904701082852898075259417965}, + {0.697277510830886515513782342168, 0.716801278521099538565408693103}, + {-0.716801278521099538565408693103, 0.697277510830886515513782342168}, + {0.918508394325212251807499796996, 0.395401478947816298337158968934}, + {-0.395401478947816298337158968934, 0.918508394325212251807499796996}, + {0.369892447148934100376038713875, 0.929074581259315857018066253659}, + {-0.929074581259315857018066253659, 0.369892447148934100376038713875}, + {0.977998514934557139355320032337, 0.208611851978263485030140600429}, + {-0.208611851978263485030140600429, 0.977998514934557139355320032337}, + {0.544038526730883931215032589535, 0.839060237070312742169164721417}, + {-0.839060237070312742169164721417, 0.544038526730883931215032589535}, + {0.823720511227391427588884198485, 0.566996048825108678315132237913}, + {-0.566996048825108678315132237913, 0.823720511227391427588884198485}, + {0.181531608261124993708435226836, 0.98338511032155118130049231695}, + {-0.98338511032155118130049231695, 0.181531608261124993708435226836}, + {0.993736721940724598844951742649, 0.111746711211126587004471844011}, + {-0.111746711211126587004471844011, 0.993736721940724598844951742649}, + {0.623661117525694530527857750712, 0.781694832071059386713329786289}, + {-0.781694832071059386713329786289, 0.623661117525694530527857750712}, + {0.875329403104110892464007065428, 0.483527078932918741305257981367}, + {-0.483527078932918741305257981367, 0.875329403104110892464007065428}, + {0.277046080306099895551597001031, 0.960856633107679658500899222418}, + {-0.960856633107679658500899222418, 0.277046080306099895551597001031}, + {0.952841647601198715733517019544, 0.303467946572011315620187588138}, + {-0.303467946572011315620187588138, 0.952841647601198715733517019544}, + {0.459176547521944145024974659464, 0.888345033309596354698101094982}, + {-0.888345033309596354698101094982, 0.459176547521944145024974659464}, + {0.764178740536116674064714970882, 0.645004536815543927374960730958}, + {-0.645004536815543927374960730958, 0.764178740536116674064714970882}, + {0.0842688875933240710836003017903, 0.996443051350042630076586647192}, + {-0.996443051350042630076586647192, 0.0842688875933240710836003017903}, + {0.998022873771486240812578216719, 0.0628517575641614062442741328596}, + {-0.0628517575641614062442741328596, 0.998022873771486240812578216719}, + {0.661265837839992265401178883621, 0.750151645806215072731504278636}, + {-0.750151645806215072731504278636, 0.661265837839992265401178883621}, + {0.898000579740739879319733063312, 0.43999427130963325582868606034}, + {-0.43999427130963325582868606034, 0.898000579740739879319733063312}, + {0.323859366517852853561976189667, 0.946105232370403448349804875761}, + {-0.946105232370403448349804875761, 0.323859366517852853561976189667}, + {0.96658437447833311928491184517, 0.256348682489942858442333317726}, + {-0.256348682489942858442333317726, 0.96658437447833311928491184517}, + {0.502212474045710788317364858813, 0.864744257519462378169805560901}, + {-0.864744257519462378169805560901, 0.502212474045710788317364858813}, + {0.794907126328237012558020069264, 0.606731127034524475583054936578}, + {-0.606731127034524475583054936578, 0.794907126328237012558020069264}, + {0.133060525157139064589273402817, 0.991107913723276889861324434605}, + {-0.991107913723276889861324434605, 0.133060525157139064589273402817}, + {0.987056571305750973799320036051, 0.160372457242928256881953075208}, + {-0.160372457242928256881953075208, 0.987056571305750973799320036051}, + {0.584553942953015326366994486307, 0.811354847017063729452956977184}, + {-0.811354847017063729452956977184, 0.584553942953015326366994486307}, + {0.850549481265603479762660299457, 0.525895027471084630654729608068}, + {-0.525895027471084630654729608068, 0.850549481265603479762660299457}, + {0.229565365820518868522626121376, 0.973293246054698246716441190074}, + {-0.973293246054698246716441190074, 0.229565365820518868522626121376}, + {0.936803441735921560429289911553, 0.349856129790134917634247813112}, + {-0.349856129790134917634247813112, 0.936803441735921560429289911553}, + {0.415034424476081631460999687988, 0.909805708104652222090180657688}, + {-0.909805708104652222090180657688, 0.415034424476081631460999687988}, + {0.731609381223892518697482501011, 0.681724074171649818687512834003}, + {-0.681724074171649818687512834003, 0.731609381223892518697482501011}, + {0.0352742388982139470909871192816, 0.999377670388002847801089956192}, + {-0.999377670388002847801089956192, 0.0352742388982139470909871192816}, + {0.999264747286594423592021030345, 0.0383401203735526940885591784536}, + {-0.0383401203735526940885591784536, 0.999264747286594423592021030345}, + {0.679476319899365077681352431682, 0.733697438114660260843891137483}, + {-0.733697438114660260843891137483, 0.679476319899365077681352431682}, + {0.908528118716306121172010534792, 0.417823715820212326921279100134}, + {-0.417823715820212326921279100134, 0.908528118716306121172010534792}, + {0.346980410845923681328883958486, 0.937872376439989885454906470841}, + {-0.937872376439989885454906470841, 0.346980410845923681328883958486}, + {0.97258436893473221296346764575, 0.232550307038775244672379471922}, + {-0.232550307038775244672379471922, 0.97258436893473221296346764575}, + {0.523283103475656430347839886963, 0.852158901623919828871578374674}, + {-0.852158901623919828871578374674, 0.523283103475656430347839886963}, + {0.809557642404051258644415156596, 0.587040393520917969105710199074}, + {-0.587040393520917969105710199074, 0.809557642404051258644415156596}, + {0.157343455616238248051530490557, 0.987543941794359225738730856392}, + {-0.987543941794359225738730856392, 0.157343455616238248051530490557}, + {0.990695025442664634063305584277, 0.136100575175706201003222872714}, + {-0.136100575175706201003222872714, 0.990695025442664634063305584277}, + {0.604289530948156072831523033528, 0.796764810208418827741638779116}, + {-0.796764810208418827741638779116, 0.604289530948156072831523033528}, + {0.863199421712124159711265747319, 0.504863108531267479328619174339}, + {-0.504863108531267479328619174339, 0.863199421712124159711265747319}, + {0.253382036995570159021440304059, 0.967366292222328505445716473332}, + {-0.967366292222328505445716473332, 0.253382036995570159021440304059}, + {0.945107193285260605009057144343, 0.326760452320131788983559317785}, + {-0.326760452320131788983559317785, 0.945107193285260605009057144343}, + {0.43723717366104408732496722223, 0.899346236979341573380963836826}, + {-0.899346236979341573380963836826, 0.43723717366104408732496722223}, + {0.748119380450403603788345208159, 0.66356415861203976724880249094}, + {-0.66356415861203976724880249094, 0.748119380450403603788345208159}, + {0.0597895707466398751428471314284, 0.998211003360478188461968329648}, + {-0.998211003360478188461968329648, 0.0597895707466398751428471314284}, + {0.996179828595696981174967277184, 0.0873255352061920731010502549907}, + {-0.0873255352061920731010502549907, 0.996179828595696981174967277184}, + {0.642657033966226864940551877226, 0.766153990196312917326793012762}, + {-0.766153990196312917326793012762, 0.642657033966226864940551877226}, + {0.886932118794342194689761527115, 0.461899790702462731406541251999}, + {-0.461899790702462731406541251999, 0.886932118794342194689761527115}, + {0.300543241417273454541003729901, 0.953768189885990325116438270925}, + {-0.953768189885990325116438270925, 0.300543241417273454541003729901}, + {0.960002145737665957270223771047, 0.279992643080273218014752956151}, + {-0.279992643080273218014752956151, 0.960002145737665957270223771047}, + {0.480839330600333958454228877599, 0.876808723809145651451046887814}, + {-0.876808723809145651451046887814, 0.480839330600333958454228877599}, + {0.779777787923014553683742633439, 0.626056388404343522324779769406}, + {-0.626056388404343522324779769406, 0.779777787923014553683742633439}, + {0.108697444013138716512045789386, 0.994074879304879366337388546526}, + {-0.994074879304879366337388546526, 0.108697444013138716512045789386}, + {0.982823551198705236409125518549, 0.184547736938619616475776297193}, + {-0.184547736938619616475776297193, 0.982823551198705236409125518549}, + {0.564466241520519496077668009093, 0.825456154004377551380855493335}, + {-0.825456154004377551380855493335, 0.564466241520519496077668009093}, + {0.837387201615661935782952696172, 0.546610166910834860409806879034}, + {-0.546610166910834860409806879034, 0.837387201615661935782952696172}, + {0.205610413053099239100163231342, 0.97863392442942320759158292276}, + {-0.97863392442942320759158292276, 0.205610413053099239100163231342}, + {0.927935394822617887200522091007, 0.372741067009515758545745711672}, + {-0.372741067009515758545745711672, 0.927935394822617887200522091007}, + {0.392581674072951469778303135172, 0.919717146291227360954678715643}, + {-0.919717146291227360954678715643, 0.392581674072951469778303135172}, + {0.714658687862769093079862159357, 0.699473344640283767326138786302}, + {-0.699473344640283767326138786302, 0.714658687862769093079862159357}, + {0.0107376591672644905450795249635, 0.9999423496760239116198931697}, + {-0.9999423496760239116198931697, 0.0107376591672644905450795249635}, + {0.999970586430974139879879203363, 0.00766982873953109701298247458112}, + {-0.00766982873953109701298247458112, 0.999970586430974139879879203363}, + {0.701662594740168565898841279704, 0.712509370564692323668509743584}, + {-0.712509370564692323668509743584, 0.701662594740168565898841279704}, + {0.92091724152918941204148950419, 0.38975817406985646673689416275}, + {-0.38975817406985646673689416275, 0.92091724152918941204148950419}, + {0.375586178489217215048512343856, 0.926787474304581859740892468835}, + {-0.926787474304581859740892468835, 0.375586178489217215048512343856}, + {0.979260122649082020984678820241, 0.202607038844421133427786685388}, + {-0.202607038844421133427786685388, 0.979260122649082020984678820241}, + {0.549176662187719766272664401185, 0.835706284353752604232568046427}, + {-0.835706284353752604232568046427, 0.549176662187719766272664401185}, + {0.827184027273669131297140211245, 0.561931121244689357752122305101}, + {-0.561931121244689357752122305101, 0.827184027273669131297140211245}, + {0.187562128582529602516260069933, 0.9822527413662893724932700934}, + {-0.9822527413662893724932700934, 0.187562128582529602516260069933}, + {0.994403680057679095760647669522, 0.105647153713410615893941724153}, + {-0.105647153713410615893941724153, 0.994403680057679095760647669522}, + {0.628445766601832711550912335952, 0.777853404209453036521892954624}, + {-0.777853404209453036521892954624, 0.628445766601832711550912335952}, + {0.878279791656541464206497948908, 0.478147056424843064359464506197}, + {-0.478147056424843064359464506197, 0.878279791656541464206497948908}, + {0.282936570457055336369478482084, 0.959138622461841894306644462631}, + {-0.959138622461841894306644462631, 0.282936570457055336369478482084}, + {0.954685754941338338142031716416, 0.297615707435086196408491332477}, + {-0.297615707435086196408491332477, 0.954685754941338338142031716416}, + {0.464618686306237815841768679093, 0.885510856136199953070331503113}, + {-0.885510856136199953070331503113, 0.464618686306237815841768679093}, + {0.768122028523365418806179150124, 0.640303482184151673273220239935}, + {-0.640303482184151673273220239935, 0.768122028523365418806179150124}, + {0.0903813608778649829611495647441, 0.995907229417411721250630307622}, + {-0.995907229417411721250630307622, 0.0903813608778649829611495647441}, + {0.998389737407340160935120820795, 0.0567268211669077482284251345845}, + {-0.0567268211669077482284251345845, 0.998389737407340160935120820795}, + {0.66585623366550972246358242046, 0.746080073510063779274048556545}, + {-0.746080073510063779274048556545, 0.66585623366550972246358242046}, + {0.900683429228646859066031993279, 0.434475960569655705878489015959}, + {-0.434475960569655705878489015959, 0.900683429228646859066031993279}, + {0.3296584625285874925459950191, 0.944100258491272659178150661319}, + {-0.944100258491272659178150661319, 0.3296584625285874925459950191}, + {0.968139104746362444409157888003, 0.250413006572965279872278188122}, + {-0.250413006572965279872278188122, 0.968139104746362444409157888003}, + {0.5075089910529708703279538895, 0.861646461143081299205448431167}, + {-0.861646461143081299205448431167, 0.5075089910529708703279538895}, + {0.798614994634760821945462794247, 0.601842247058580026575214105833}, + {-0.601842247058580026575214105833, 0.798614994634760821945462794247}, + {0.139139344163826200739819682894, 0.990272812363169108174076882278}, + {-0.990272812363169108174076882278, 0.139139344163826200739819682894}, + {0.988022017143283526330321819842, 0.154312973013020104939840848601}, + {-0.154312973013020104939840848601, 0.988022017143283526330321819842}, + {0.589521318641063940546587218705, 0.807752817926190358477356312505}, + {-0.807752817926190358477356312505, 0.589521318641063940546587218705}, + {0.853760301138111410423903180344, 0.520666254140367157354774008127}, + {-0.520666254140367157354774008127, 0.853760301138111410423903180344}, + {0.235533059404975486650357652252, 0.971866337480279396388027635112}, + {-0.971866337480279396388027635112, 0.235533059404975486650357652252}, + {0.938932483532064487974366784329, 0.344101425989938813909674308888}, + {-0.344101425989938813909674308888, 0.938932483532064487974366784329}, + {0.4206090744484025090166312566, 0.907241977915295816359275704599}, + {-0.907241977915295816359275704599, 0.4206090744484025090166312566}, + {0.735778589165713481357045111508, 0.677222170137180445870228595595}, + {-0.677222170137180445870228595595, 0.735778589165713481357045111508}, + {0.0414056409770767394618040668774, 0.999142418724816905317709370138}, + {-0.999142418724816905317709370138, 0.0414056409770767394618040668774}, + {0.999481186966166945673251120752, 0.0322080254083045858237710490357}, + {-0.0322080254083045858237710490357, 0.999481186966166945673251120752}, + {0.683965411797315403497066199634, 0.729514438146997012957228889718}, + {-0.729514438146997012957228889718, 0.683965411797315403497066199634}, + {0.91107473405517636066974773712, 0.412241226669882887545526273243}, + {-0.412241226669882887545526273243, 0.91107473405517636066974773712}, + {0.352728555755210726463388937191, 0.935725689481080369347409941838}, + {-0.935725689481080369347409941838, 0.352728555755210726463388937191}, + {0.973992962167955833585608615977, 0.226578263845610000659647198518}, + {-0.226578263845610000659647198518, 0.973992962167955833585608615977}, + {0.52850200154222848336615925291, 0.84893205521163961346786663853}, + {-0.84893205521163961346786663853, 0.52850200154222848336615925291}, + {0.81314441484925348291312729998, 0.582061990340775547991825078498}, + {-0.582061990340775547991825078498, 0.81314441484925348291312729998}, + {0.163399949382973225242565717963, 0.986559910264775408172965853737}, + {-0.986559910264775408172965853737, 0.163399949382973225242565717963}, + {0.991511473318743896676608073903, 0.130019222722233346312137314271}, + {-0.130019222722233346312137314271, 0.991511473318743896676608073903}, + {0.609167012336453206344799582439, 0.793041960479443641673924503266}, + {-0.793041960479443641673924503266, 0.609167012336453206344799582439}, + {0.866280954024512994671169963112, 0.49955711254508189389511585432}, + {-0.49955711254508189389511585432, 0.866280954024512994671169963112}, + {0.2593129151328862347369863528, 0.965793358874083684995071052981}, + {-0.965793358874083684995071052981, 0.2593129151328862347369863528}, + {0.947094366352777217166192258446, 0.320955232427875214451518104397}, + {-0.320955232427875214451518104397, 0.947094366352777217166192258446}, + {0.442747227564570022817491690148, 0.896646470178680154994310669281}, + {-0.896646470178680154994310669281, 0.442747227564570022817491690148}, + {0.752176850449042699864321548375, 0.658961292982037316612320410059}, + {-0.658961292982037316612320410059, 0.752176850449042699864321548375}, + {0.0659133527970038185506140848702, 0.997825350411111644532979880751}, + {-0.997825350411111644532979880751, 0.0659133527970038185506140848702}, + {0.996696895202896060439456960012, 0.0812114468095924274537722453715}, + {-0.0812114468095924274537722453715, 0.996696895202896060439456960012}, + {0.647345968636512059113385930686, 0.762196298134578897887081438967}, + {-0.762196298134578897887081438967, 0.647345968636512059113385930686}, + {0.889749586383072776918368163024, 0.456448982396883917722618662083}, + {-0.456448982396883917722618662083, 0.889749586383072776918368163024}, + {0.306389795370860917866906447671, 0.951906136807932345966776210844}, + {-0.951906136807932345966776210844, 0.306389795370860917866906447671}, + {0.961702076529122540371474769927, 0.274096909868706384294512190536}, + {-0.274096909868706384294512190536, 0.961702076529122540371474769927}, + {0.486210276124486417970160800905, 0.873841843465366863163978905504}, + {-0.873841843465366863163978905504, 0.486210276124486417970160800905}, + {0.783604518609638200921096995444, 0.621259976511087663730847907573}, + {-0.621259976511087663730847907573, 0.783604518609638200921096995444}, + {0.114794926606510083733070359813, 0.993389211148080653046577026544}, + {-0.993389211148080653046577026544, 0.114794926606510083733070359813}, + {0.983937413449218922778527485207, 0.178513770938997506920742353032}, + {-0.178513770938997506920742353032, 0.983937413449218922778527485207}, + {0.56952051934694714052653807812, 0.821977115279241554723910212488}, + {-0.821977115279241554723910212488, 0.56952051934694714052653807812}, + {0.840725374970458072532153437351, 0.54146176585312344453626565155}, + {-0.54146176585312344453626565155, 0.840725374970458072532153437351}, + {0.211611327369227553152342125031, 0.977353900145199960824982099439}, + {-0.977353900145199960824982099439, 0.211611327369227553152342125031}, + {0.930205022892219068886277000274, 0.367040345719767180376891246851}, + {-0.367040345719767180376891246851, 0.930205022892219068886277000274}, + {0.398217562153373616506968346584, 0.917290997008377906318798977736}, + {-0.917290997008377906318798977736, 0.398217562153373616506968346584}, + {0.718937122372804382486322083423, 0.695075113980000880431475707155}, + {-0.695075113980000880431475707155, 0.718937122372804382486322083423}, + {0.0168729879472817138885698540207, 0.999857641005823860602674812981}, + {-0.999857641005823860602674812981, 0.0168729879472817138885698540207}, + {0.999801169887884255693677459931, 0.0199404285515144379103968930167}, + {-0.0199404285515144379103968930167, 0.999801169887884255693677459931}, + {0.692866174817424740339788513666, 0.721066199314508105011611860391}, + {-0.721066199314508105011611860391, 0.692866174817424740339788513666}, + {0.916064965799331720752718410949, 0.401029897183575623209605964803}, + {-0.401029897183575623209605964803, 0.916064965799331720752718410949}, + {0.364184789567079891803302871267, 0.931326709081180426075263767416}, + {-0.931326709081180426075263767416, 0.364184789567079891803302871267}, + {0.97670008612871184183745754126, 0.214608810993786758292145577798}, + {-0.214608810993786758292145577798, 0.97670008612871184183745754126}, + {0.538879908531008422478691954893, 0.842382599643185847604343052808}, + {-0.842382599643185847604343052808, 0.538879908531008422478691954893}, + {0.820225982569434686197951123177, 0.572039629324757048500771361432}, + {-0.572039629324757048500771361432, 0.820225982569434686197951123177}, + {0.175494253377271425264893878193, 0.984480455383220931508958528866}, + {-0.984480455383220931508958528866, 0.175494253377271425264893878193}, + {0.993032350197851410023019980144, 0.117842061508324977281425560705}, + {-0.117842061508324977281425560705, 0.993032350197851410023019980144}, + {0.618852987960976319570249870594, 0.785506829564053932202227770176}, + {-0.785506829564053932202227770176, 0.618852987960976319570249870594}, + {0.872346058894391540583512778539, 0.488888896919763171755590747125}, + {-0.488888896919763171755590747125, 0.872346058894391540583512778539}, + {0.271145159526808010586051977953, 0.96253846804435916340025869431}, + {-0.96253846804435916340025869431, 0.271145159526808010586051977953}, + {0.950961666311575082310980633338, 0.309308760312268726799800333538}, + {-0.309308760312268726799800333538, 0.950961666311575082310980633338}, + {0.453717121000163869926069537541, 0.891145764794583183920906321873}, + {-0.891145764794583183920906321873, 0.453717121000163869926069537541}, + {0.760206681651202420546553639724, 0.649681307390683193681013563037}, + {-0.649681307390683193681013563037, 0.760206681651202420546553639724}, + {0.0781532416327942319744792598613, 0.996941357764982161171474217554}, + {-0.996941357764982161171474217554, 0.0781532416327942319744792598613}, + {0.997618435138519554783442799817, 0.0689743276282667461263287123074}, + {-0.0689743276282667461263287123074, 0.997618435138519554783442799817}, + {0.656650545729429047092651217099, 0.754194975316889171246259593318}, + {-0.754194975316889171246259593318, 0.656650545729429047092651217099}, + {0.895283921038557473082164506195, 0.445496016513981740736483061482}, + {-0.445496016513981740736483061482, 0.895283921038557473082164506195}, + {0.318048077385014893447845452101, 0.948074585922276225069538213575}, + {-0.948074585922276225069538213575, 0.318048077385014893447845452101}, + {0.964993252854920324779186557862, 0.262274707023913644654555810121}, + {-0.262274707023913644654555810121, 0.964993252854920324779186557862}, + {0.496897049022654524463860070682, 0.867809496763303211963602734613}, + {-0.867809496763303211963602734613, 0.496897049022654524463860070682}, + {0.791169330217690203177482999308, 0.611597163926461906413578617503}, + {-0.611597163926461906413578617503, 0.791169330217690203177482999308}, + {0.126976696496885865794723713407, 0.991905700430609327256092910829}, + {-0.991905700430609327256092910829, 0.126976696496885865794723713407}, + {0.986053963346195438965935409215, 0.166425903540464104057861050023}, + {-0.166425903540464104057861050023, 0.986053963346195438965935409215}, + {0.57956455913940574387055448824, 0.814926329056526621563705248263}, + {-0.814926329056526621563705248263, 0.57956455913940574387055448824}, + {0.847306638685858315440668775409, 0.531104001151255000756634672143}, + {-0.531104001151255000756634672143, 0.847306638685858315440668775409}, + {0.223589029229789987285315078225, 0.974683510688510668096284916828}, + {-0.974683510688510668096284916828, 0.223589029229789987285315078225}, + {0.934639129819680780641988349089, 0.355597661704783907232751971605}, + {-0.355597661704783907232751971605, 0.934639129819680780641988349089}, + {0.409444148692257592347942818378, 0.912335184623322748009854876727}, + {-0.912335184623322748009854876727, 0.409444148692257592347942818378}, + {0.727412628602375765929366480123, 0.686200311680038588235674978932}, + {-0.686200311680038588235674978932, 0.727412628602375765929366480123}, + {0.0291415087641937221862864504374, 0.999575296046749217637739093334}, + {-0.999575296046749217637739093334, 0.0291415087641937221862864504374}, + {0.999010685854073376965800434846, 0.0444707718549386676887280600567}, + {-0.0444707718549386676887280600567, 0.999010685854073376965800434846}, + {0.674961646102012036152473228867, 0.737852814788465982687171162979}, + {-0.737852814788465982687171162979, 0.674961646102012036152473228867}, + {0.905947297807268459024498952203, 0.423390474143796047279408867325}, + {-0.423390474143796047279408867325, 0.905947297807268459024498952203}, + {0.341219202320282355422875752993, 0.939983753034013935767632119678}, + {-0.939983753034013935767632119678, 0.341219202320282355422875752993}, + {0.97113915844972509283650197176, 0.238513594844318416177131325639}, + {-0.238513594844318416177131325639, 0.97113915844972509283650197176}, + {0.518044504095999336357181164203, 0.855353664735196028701125214866}, + {-0.855353664735196028701125214866, 0.518044504095999336357181164203}, + {0.805940390571176390466234806809, 0.591996694962040992393781380088}, + {-0.591996694962040992393781380088, 0.805940390571176390466234806809}, + {0.151281037957330222187835033765, 0.988490792852696587011962492397}, + {-0.988490792852696587011962492397, 0.151281037957330222187835033765}, + {0.989841278458820528207695588208, 0.142176803519448058388263689267}, + {-0.142176803519448058388263689267, 0.989841278458820528207695588208}, + {0.599389298400564540791890522087, 0.800457662192622820818144191435}, + {-0.800457662192622820818144191435, 0.599389298400564540791890522087}, + {0.860085390429390139743759391422, 0.510150096706766809084854230605}, + {-0.510150096706766809084854230605, 0.860085390429390139743759391422}, + {0.247441619167773296794621273875, 0.968902804776428872024496286031}, + {-0.968902804776428872024496286031, 0.247441619167773296794621273875}, + {0.943084437466093494784047379653, 0.332553369866044223890355624462}, + {-0.332553369866044223890355624462, 0.943084437466093494784047379653}, + {0.431710658025057258946333149652, 0.902012143902493179759005670348}, + {-0.902012143902493179759005670348, 0.431710658025057258946333149652}, + {0.74403374417992929057419360106, 0.668142041426518451530114361958}, + {-0.668142041426518451530114361958, 0.74403374417992929057419360106}, + {0.0536635376527305266169953767985, 0.998559074229759313645615748101}, + {-0.998559074229759313645615748101, 0.0536635376527305266169953767985}, + {0.995625256380994305693832302495, 0.0934363358457477727325724004004}, + {-0.0934363358457477727325724004004, 0.995625256380994305693832302495}, + {0.637943903621844055074063817301, 0.770082836993348007759152551444}, + {-0.770082836993348007759152551444, 0.637943903621844055074063817301}, + {0.884081258712634987517731133266, 0.467333208741988415102497356202}, + {-0.467333208741988415102497356202, 0.884081258712634987517731133266}, + {0.294685372180514326689859672115, 0.95559433413077110586186790897}, + {-0.95559433413077110586186790897, 0.294685372180514326689859672115}, + {0.958266071408017672261792085919, 0.285877834727080559762413258795}, + {-0.285877834727080559762413258795, 0.958266071408017672261792085919}, + {0.475450281747155867329013290146, 0.879742592800047407131103227584}, + {-0.879742592800047407131103227584, 0.475450281747155867329013290146}, + {0.775921699043407575580033608276, 0.63082922962842458147747493058}, + {-0.63082922962842458147747493058, 0.775921699043407575580033608276}, + {0.102595869022436281259302859326, 0.994723121104325702646065110457}, + {-0.994723121104325702646065110457, 0.102595869022436281259302859326}, + {0.981672686196983113049441271869, 0.190574754820252767473220956163}, + {-0.190574754820252767473220956163, 0.981672686196983113049441271869}, + {0.559390711859136025019267890457, 0.828904114771864874988693827618}, + {-0.828904114771864874988693827618, 0.559390711859136025019267890457}, + {0.834017501106018133150143967214, 0.551737988404707446754571265046}, + {-0.551737988404707446754571265046, 0.834017501106018133150143967214}, + {0.199601757621130970754563804803, 0.979877103699517637558358273964}, + {-0.979877103699517637558358273964, 0.199601757621130970754563804803}, + {0.925630830509872715161634459946, 0.378427754808765559602079520118}, + {-0.378427754808765559602079520118, 0.925630830509872715161634459946}, + {0.386931005514388581811857648063, 0.922108668743345072371653259324}, + {-0.922108668743345072371653259324, 0.386931005514388581811857648063}, + {0.71035334685706230661850213437, 0.703845240524484938582361337467}, + {-0.703845240524484938582361337467, 0.71035334685706230661850213437}, + {0.00460192612044857049524715364441, 0.999989411081928403213225919899}, + {-0.999989411081928403213225919899, 0.00460192612044857049524715364441}, + {0.99999970586288222662574298738, 0.00076699031874270448549957279738}, + {-0.00076699031874270448549957279738, 0.99999970586288222662574298738}, + {0.706564229144709510244126704492, 0.707648917255684350990918574098}, + {-0.707648917255684350990918574098, 0.706564229144709510244126704492}, + {0.923585746276256669418103228963, 0.383391926460808663001955665095}, + {-0.383391926460808663001955665095, 0.923585746276256669418103228963}, + {0.381974713146567224075766944225, 0.924172775251791089878850016248}, + {-0.924172775251791089878850016248, 0.381974713146567224075766944225}, + {0.980635359529608119366628216085, 0.195842517447657876727973302877}, + {-0.195842517447657876727973302877, 0.980635359529608119366628216085}, + {0.554932340462810369530188836507, 0.83189548472657759425885615201}, + {-0.83189548472657759425885615201, 0.554932340462810369530188836507}, + {0.831043250746362316405679848685, 0.55620779874873993442463415704}, + {-0.55620779874873993442463415704, 0.831043250746362316405679848685}, + {0.194338011817988598473050387838, 0.980934624306141644822787384328}, + {-0.980934624306141644822787384328, 0.194338011817988598473050387838}, + {0.995109255753726107407430845342, 0.0987804085497996225972983097563}, + {-0.0987804085497996225972983097563, 0.995109255753726107407430845342}, + {0.633800206031017276941952331981, 0.773496799498899045843813837564}, + {-0.773496799498899045843813837564, 0.633800206031017276941952331981}, + {0.881559448209143781127750116866, 0.472073023242368661200885071594}, + {-0.472073023242368661200885071594, 0.881559448209143781127750116866}, + {0.289550627897843082525497493407, 0.957162699797670102341839992732}, + {-0.957162699797670102341839992732, 0.289550627897843082525497493407}, + {0.956717408723403051062916802039, 0.291018555844085036188317872075}, + {-0.291018555844085036188317872075, 0.956717408723403051062916802039}, + {0.470720173099071603761700544055, 0.882282561676008714179886283091}, + {-0.882282561676008714179886283091, 0.470720173099071603761700544055}, + {0.772523652484441325505315489863, 0.634985989099049463746382571117}, + {-0.634985989099049463746382571117, 0.772523652484441325505315489863}, + {0.0972538144483632710501908036349, 0.995259612149133388037114400504}, + {-0.995259612149133388037114400504, 0.0972538144483632710501908036349}, + {0.998757527991183335913660812366, 0.0498337263401072774748890026331}, + {-0.0498337263401072774748890026331, 0.998757527991183335913660812366}, + {0.670990454976794215014024302945, 0.741465986630563289594419984496}, + {-0.741465986630563289594419984496, 0.670990454976794215014024302945}, + {0.903661096609247982414103717019, 0.42824831870653196075338087212}, + {-0.42824831870653196075338087212, 0.903661096609247982414103717019}, + {0.336167599117744519965356175817, 0.941802179495997648928096168675}, + {-0.941802179495997648928096168675, 0.336167599117744519965356175817}, + {0.969844604426714829159550390614, 0.243724113013852161646610738899}, + {-0.243724113013852161646610738899, 0.969844604426714829159550390614}, + {0.513444723436543459804681788228, 0.858122669538086135787580133183}, + {-0.858122669538086135787580133183, 0.513444723436543459804681788228}, + {0.802750399628069155610887719376, 0.596315181675743710698611721455}, + {-0.596315181675743710698611721455, 0.802750399628069155610887719376}, + {0.145971742489812206233779079412, 0.989288759864625166784435350564}, + {-0.989288759864625166784435350564, 0.145971742489812206233779079412}, + {0.989063678157881542851725953369, 0.147489120103153570351395273974}, + {-0.147489120103153570351395273974, 0.989063678157881542851725953369}, + {0.595083076874569960601490947738, 0.803664190826924085264693076169}, + {-0.803664190826924085264693076169, 0.595083076874569960601490947738}, + {0.857334045882815587447112193331, 0.514760462516501093865883831313}, + {-0.514760462516501093865883831313, 0.857334045882815587447112193331}, + {0.242236103853696010945739658382, 0.970217331317979159166497993283}, + {-0.970217331317979159166497993283, 0.242236103853696010945739658382}, + {0.941285396983928657199669487454, 0.337611909483074568161242723363}, + {-0.337611909483074568161242723363, 0.941285396983928657199669487454}, + {0.426861616634386487056929126993, 0.904316957844028324053908818314}, + {-0.904316957844028324053908818314, 0.426861616634386487056929126993}, + {0.74043582819689801599594147774, 0.672127059656411729449132508307}, + {-0.672127059656411729449132508307, 0.74043582819689801599594147774}, + {0.0483015934494801374432526586133, 0.998832796853527993263810458302}, + {-0.998832796853527993263810458302, 0.0483015934494801374432526586133}, + {0.999679701762987926727532794757, 0.0253079806200245706337970119648}, + {-0.0253079806200245706337970119648, 0.999679701762987926727532794757}, + {0.688984851416597043893830232264, 0.724775740845711280435637036135}, + {-0.724775740845711280435637036135, 0.688984851416597043893830232264}, + {0.913898670635911680726337635861, 0.405942384840402514800672406636}, + {-0.405942384840402514800672406636, 0.913898670635911680726337635861}, + {0.359179334232336500143389912409, 0.933268560415712045141845010221}, + {-0.933268560415712045141845010221, 0.359179334232336500143389912409}, + {0.975533794518291363928597093036, 0.219849529798778697831096451409}, + {-0.219849529798778697831096451409, 0.975533794518291363928597093036}, + {0.53434946801913751901480509332, 0.845263654741918224466701303754}, + {-0.845263654741918224466701303754, 0.53434946801913751901480509332}, + {0.817142933361272971737321313412, 0.576435101687721829222255109926}, + {-0.576435101687721829222255109926, 0.817142933361272971737321313412}, + {0.170206140061078065039978923778, 0.985408478695768419441947116866}, + {-0.985408478695768419441947116866, 0.170206140061078065039978923778}, + {0.992385354870851665864961432817, 0.123171861388280484694845995364}, + {-0.123171861388280484694845995364, 0.992385354870851665864961432817}, + {0.614626755540375047104362238315, 0.78881807241842027966782779913}, + {-0.78881807241842027966782779913, 0.614626755540375047104362238315}, + {0.869708687042265560229736820474, 0.493565395548774765721589119494}, + {-0.493565395548774765721589119494, 0.869708687042265560229736820474}, + {0.265973472112875586326197208109, 0.963980348415994114930072100833}, + {-0.963980348415994114930072100833, 0.265973472112875586326197208109}, + {0.949287310443502119206016232056, 0.314409927055336657630846275424}, + {-0.314409927055336657630846275424, 0.949287310443502119206016232056}, + {0.448926103015743316326791045867, 0.893568886002136020252351045201}, + {-0.893568886002136020252351045201, 0.448926103015743316326791045867}, + {0.756707646536245670532139229181, 0.653753422685936169678200258204}, + {-0.653753422685936169678200258204, 0.756707646536245670532139229181}, + {0.0727996298363516730622890804625, 0.997346586646633226358460433403}, + {-0.997346586646633226358460433403, 0.0727996298363516730622890804625}, + {0.997233740030466275783282981138, 0.0743294540868457559446014215609}, + {-0.0743294540868457559446014215609, 0.997233740030466275783282981138}, + {0.65259187897686254942186678818, 0.757709601030268076193863180379}, + {-0.757709601030268076193863180379, 0.65259187897686254942186678818}, + {0.892879190928051680309351922915, 0.450296291798708669951167848922}, + {-0.450296291798708669951167848922, 0.892879190928051680309351922915}, + {0.312953369211560195051191612947, 0.949768492159606680935723943549}, + {-0.949768492159606680935723943549, 0.312953369211560195051191612947}, + {0.963571216210257319723098135, 0.267451885936677680177808724693}, + {-0.267451885936677680177808724693, 0.963571216210257319723098135}, + {0.492230698951486078662753698154, 0.870464783325397672975043406041}, + {-0.870464783325397672975043406041, 0.492230698951486078662753698154}, + {0.787874319070900219763586846966, 0.615836063695984980981279477419}, + {-0.615836063695984980981279477419, 0.787874319070900219763586846966}, + {0.121649416999105530745595160624, 0.992573130476428810986533335381}, + {-0.992573130476428810986533335381, 0.121649416999105530745595160624}, + {0.985146226468662233877182643482, 0.171717536887049965210749746802}, + {-0.171717536887049965210749746802, 0.985146226468662233877182643482}, + {0.575180942414845186583249869727, 0.818026211977813444420348787389}, + {-0.818026211977813444420348787389, 0.575180942414845186583249869727}, + {0.844442978751910655610402045568, 0.535645457029741089982621815579}, + {-0.535645457029741089982621815579, 0.844442978751910655610402045568}, + {0.218352821623346321500136468785, 0.975869891578341031035392916237}, + {-0.975869891578341031035392916237, 0.218352821623346321500136468785}, + {0.932716488398140253224255502573, 0.36061052712066227199372292489}, + {-0.36061052712066227199372292489, 0.932716488398140253224255502573}, + {0.40454000477655299716772674401, 0.914520302965104447956434796652}, + {-0.914520302965104447956434796652, 0.40454000477655299716772674401}, + {0.723717999001323497587634392403, 0.690095832418599952617910275876}, + {-0.690095832418599952617910275876, 0.723717999001323497587634392403}, + {0.0237744619888275547647804586404, 0.999717347532362188289312143752}, + {-0.999717347532362188289312143752, 0.0237744619888275547647804586404}, + {0.999914995573113474236492947966, 0.0130384672419873327148254205099}, + {-0.0130384672419873327148254205099, 0.999914995573113474236492947966}, + {0.697827085376777289660310543695, 0.716266262582953117110662333289}, + {-0.716266262582953117110662333289, 0.697827085376777289660310543695}, + {0.918811393264169939953944776789, 0.394696875599433616432776261718}, + {-0.394696875599433616432776261718, 0.918811393264169939953944776789}, + {0.37060492955905161016616489178, 0.928790604058057023273420327314}, + {-0.928790604058057023273420327314, 0.37060492955905161016616489178}, + {0.978158230539735051856098380085, 0.207861675225075065442936761428}, + {-0.207861675225075065442936761428, 0.978158230539735051856098380085}, + {0.544681917787634528593798677321, 0.838642717988527297556800021994}, + {-0.838642717988527297556800021994, 0.544681917787634528593798677321}, + {0.824155149420828569972741206584, 0.566364096393063842782567007816}, + {-0.566364096393063842782567007816, 0.824155149420828569972741206584}, + {0.182285801725153295826942212443, 0.983245588085407073997146198963}, + {-0.983245588085407073997146198963, 0.182285801725153295826942212443}, + {0.993822138291519663333417611284, 0.110984491897163389806379996116}, + {-0.110984491897163389806379996116, 0.993822138291519663333417611284}, + {0.624260486452220764164167121635, 0.781216260106276094710153756751}, + {-0.781216260106276094710153756751, 0.624260486452220764164167121635}, + {0.875700006225634597356588528783, 0.482855567531765672573129677403}, + {-0.482855567531765672573129677403, 0.875700006225634597356588528783}, + {0.27778296655185763519568808988, 0.960643858822638585515107934043}, + {-0.960643858822638585515107934043, 0.27778296655185763519568808988}, + {0.953074124312172199502413150185, 0.302737036991819197240261019033}, + {-0.302737036991819197240261019033, 0.953074124312172199502413150185}, + {0.459857764501329535633544765005, 0.887992588047805564421821600263}, + {-0.887992588047805564421821600263, 0.459857764501329535633544765005}, + {0.764673227998067139843385575659, 0.644418229399988384820119335927}, + {-0.644418229399988384820119335927, 0.764673227998067139843385575659}, + {0.0850331249802802752180141965255, 0.996378124838200207591398793738}, + {-0.996378124838200207591398793738, 0.0850331249802802752180141965255}, + {0.998070786905482343343010143144, 0.0620862651950600946682001790577}, + {-0.0620862651950600946682001790577, 0.998070786905482343343010143144}, + {0.661841002387086874136912229005, 0.749644240663033478710985946236}, + {-0.749644240663033478710985946236, 0.661841002387086874136912229005}, + {0.89833778695183430507142929855, 0.439305384140099952627878110434}, + {-0.439305384140099952627878110434, 0.89833778695183430507142929855}, + {0.324584924812532149562827044065, 0.945856557086983906756927353854}, + {-0.945856557086983906756927353854, 0.324584924812532149562827044065}, + {0.966780707127683269774820473685, 0.255607246230807438891474703269}, + {-0.255607246230807438891474703269, 0.966780707127683269774820473685}, + {0.502875576800086876438911076548, 0.864358811060533915870962573536}, + {-0.864358811060533915870962573536, 0.502875576800086876438911076548}, + {0.795372249417061305543086291436, 0.606121262502186231024836615688}, + {-0.606121262502186231024836615688, 0.795372249417061305543086291436}, + {0.133820656193754744522905753001, 0.991005566067049370460040336184}, + {-0.991005566067049370460040336184, 0.133820656193754744522905753001}, + {0.987179285097874337218115670112, 0.159615347237193033747004733414}, + {-0.159615347237193033747004733414, 0.987179285097874337218115670112}, + {0.585176072326730412065387554321, 0.810906261152459784113943896955}, + {-0.810906261152459784113943896955, 0.585176072326730412065387554321}, + {0.850952587482175726307787044789, 0.525242509568094706473573296535}, + {-0.525242509568094706473573296535, 0.850952587482175726307787044789}, + {0.230311804793845442684840918446, 0.973116885359925132270575431903}, + {-0.973116885359925132270575431903, 0.230311804793845442684840918446}, + {0.937071502451759186236301957251, 0.349137507714084971421897307664}, + {-0.349137507714084971421897307664, 0.937071502451759186236301957251}, + {0.415732114569105359880296646224, 0.909487113111505429685621493263}, + {-0.909487113111505429685621493263, 0.415732114569105359880296646224}, + {0.732132041795361288016863454686, 0.681162736338795427037950958038}, + {-0.681162736338795427037950958038, 0.732132041795361288016863454686}, + {0.0360407415207062223339029571889, 0.999350321434199440062684516306}, + {-0.999350321434199440062684516306, 0.0360407415207062223339029571889}, + {0.999293859866887790310840955499, 0.037573682709270500579279428166}, + {-0.037573682709270500579279428166, 0.999293859866887790310840955499}, + {0.68003885887207893290451465873, 0.733176070547832736679083609488}, + {-0.733176070547832736679083609488, 0.68003885887207893290451465873}, + {0.908848318229439122717394639039, 0.417126760651387873402029526915}, + {-0.417126760651387873402029526915, 0.908848318229439122717394639039}, + {0.347699647819051382846566866647, 0.937605969960999985346461471636}, + {-0.937605969960999985346461471636, 0.347699647819051382846566866647}, + {0.972762446695688565156956428837, 0.231804275841964751991852722313}, + {-0.231804275841964751991852722313, 0.972762446695688565156956428837}, + {0.523936547186248602336888779973, 0.851757297898029119842533418705}, + {-0.851757297898029119842533418705, 0.523936547186248602336888779973}, + {0.810007658581641143413776262605, 0.586419297976360498481085414824}, + {-0.586419297976360498481085414824, 0.810007658581641143413776262605}, + {0.158100845978377008149706739459, 0.987422970413855405347192117915}, + {-0.987422970413855405347192117915, 0.158100845978377008149706739459}, + {0.990799121866020371385275211651, 0.135340681650134214697445145248}, + {-0.135340681650134214697445145248, 0.990799121866020371385275211651}, + {0.604900464099919821236994721403, 0.796301091630359114681425580784}, + {-0.796301091630359114681425580784, 0.604900464099919821236994721403}, + {0.863586392929667989726283394702, 0.504200894432690449598055693059}, + {-0.504200894432690449598055693059, 0.863586392929667989726283394702}, + {0.254123923047320621204647750346, 0.967171666114676642500569414551}, + {-0.967171666114676642500569414551, 0.254123923047320621204647750346}, + {0.945357537397632285980364486022, 0.326035468140330242370339419722}, + {-0.326035468140330242370339419722, 0.945357537397632285980364486022}, + {0.437926834910322859695241959344, 0.899010615769039067579626589577}, + {-0.899010615769039067579626589577, 0.437926834910322859695241959344}, + {0.748628107686245325425034025102, 0.662990163111121466599229279382}, + {-0.662990163111121466599229279382, 0.748628107686245325425034025102}, + {0.0605551713359477883358295002836, 0.99816485172764624067553995701}, + {-0.99816485172764624067553995701, 0.0605551713359477883358295002836}, + {0.996246513422315516095295606647, 0.0865614492362511700473959308511}, + {-0.0865614492362511700473959308511, 0.996246513422315516095295606647}, + {0.643244477630085853547825536225, 0.765660853118662498850710562692}, + {-0.765660853118662498850710562692, 0.643244477630085853547825536225}, + {0.887286130582383147924474542378, 0.461219386492092375817719585029}, + {-0.461219386492092375817719585029, 0.887286130582383147924474542378}, + {0.301274683984317948048925472904, 0.953537395590833281033837920404}, + {-0.953537395590833281033837920404, 0.301274683984317948048925472904}, + {0.960216615011963425807550720492, 0.279256248372291182580084978326}, + {-0.279256248372291182580084978326, 0.960216615011963425807550720492}, + {0.481511692970189919549284240929, 0.876439666795713612224005828466}, + {-0.876439666795713612224005828466, 0.481511692970189919549284240929}, + {0.780257737750316593405841558706, 0.6254581222438143628394868756}, + {-0.6254581222438143628394868756, 0.780257737750316593405841558706}, + {0.109459857849717984157322803185, 0.993991217023329376445417437935}, + {-0.993991217023329376445417437935, 0.109459857849717984157322803185}, + {0.982964808441396442617588036228, 0.183793866507478448335533016689}, + {-0.183793866507478448335533016689, 0.982964808441396442617588036228}, + {0.565099192368713976186711533956, 0.825022971064580223909956657735}, + {-0.825022971064580223909956657735, 0.565099192368713976186711533956}, + {0.837806200015150936977192941413, 0.54596773825581756955926948649}, + {-0.54596773825581756955926948649, 0.837806200015150936977192941413}, + {0.206360955321075512092221515559, 0.978475935380616834713407570234}, + {-0.978475935380616834713407570234, 0.206360955321075512092221515559}, + {0.928221010672169444255530379451, 0.37202923990828501432659436432}, + {-0.37202923990828501432659436432, 0.928221010672169444255530379451}, + {0.393286972747296403873917824967, 0.919415769424946960342026613944}, + {-0.919415769424946960342026613944, 0.393286972747296403873917824967}, + {0.715194966938680010137829867745, 0.698925002604414147278077962255}, + {-0.698925002604414147278077962255, 0.715194966938680010137829867745}, + {0.0115046021104227135650743818474, 0.999933819875235996299522867048}, + {-0.999933819875235996299522867048, 0.0115046021104227135650743818474}, + {0.999976174986897614616054852377, 0.00690285872472975580577125853665}, + {-0.00690285872472975580577125853665, 0.999976174986897614616054852377}, + {0.702208876144391869189576027566, 0.711970992572050098701197384798}, + {-0.711970992572050098701197384798, 0.702208876144391869189576027566}, + {0.921215911399408726722981555213, 0.389051724818894384405609798705}, + {-0.389051724818894384405609798705, 0.921215911399408726722981555213}, + {0.376296905035704787323425080103, 0.926499130739230514208770728146}, + {-0.926499130739230514208770728146, 0.376296905035704787323425080103}, + {0.979415232249634781780400771822, 0.201855896216568020395953908519}, + {-0.201855896216568020395953908519, 0.979415232249634781780400771822}, + {0.54981747928389090862566490614, 0.835284825358337368328420780017}, + {-0.835284825358337368328420780017, 0.54981747928389090862566490614}, + {0.827614779697938396374468084105, 0.56129651381915146579615338851}, + {-0.56129651381915146579615338851, 0.827614779697938396374468084105}, + {0.188315451756732116228576501271, 0.982108594112513610951964437845}, + {-0.982108594112513610951964437845, 0.188315451756732116228576501271}, + {0.994484417910747597879606018978, 0.10488442464313495194883074646}, + {-0.10488442464313495194883074646, 0.994484417910747597879606018978}, + {0.629042187783035999792957682075, 0.777371163595056313688758109492}, + {-0.777371163595056313688758109492, 0.629042187783035999792957682075}, + {0.878646267485068133140657664626, 0.477473283686698057870501088473}, + {-0.477473283686698057870501088473, 0.878646267485068133140657664626}, + {0.283672137272668434260225467369, 0.958921330733213173047602140286}, + {-0.958921330733213173047602140286, 0.283672137272668434260225467369}, + {0.954913742499130524521433471818, 0.296883385163778212856300342537}, + {-0.296883385163778212856300342537, 0.954913742499130524521433471818}, + {0.465297727898434598792221095209, 0.885154237640285113108973291673}, + {-0.885154237640285113108973291673, 0.465297727898434598792221095209}, + {0.768612909162058266510086923518, 0.639714151687640453225469627796}, + {-0.639714151687640453225469627796, 0.768612909162058266510086923518}, + {0.091145185496681019321485450746, 0.995837614855341612951633578632}, + {-0.995837614855341612951633578632, 0.091145185496681019321485450746}, + {0.998432952666508444217186024616, 0.0559610492185205685156113020184}, + {-0.0559610492185205685156113020184, 0.998432952666508444217186024616}, + {0.666428274005865350915200906456, 0.745569148775325429845395319717}, + {-0.745569148775325429845395319717, 0.666428274005865350915200906456}, + {0.901016403159702328196090093115, 0.43378501730367857724957048049}, + {-0.43378501730367857724957048049, 0.901016403159702328196090093115}, + {0.330382481321982779398638285784, 0.94384713594709268580373873192}, + {-0.94384713594709268580373873192, 0.330382481321982779398638285784}, + {0.968330884332445185336268878018, 0.249670379596668545740811850919}, + {-0.249670379596668545740811850919, 0.968330884332445185336268878018}, + {0.508169716269614601955595389882, 0.861256953218062171195867904316}, + {-0.861256953218062171195867904316, 0.508169716269614601955595389882}, + {0.799076366909352353573581240198, 0.60122954006514850444631292703}, + {-0.60122954006514850444631292703, 0.799076366909352353573581240198}, + {0.13989883289777721442348479286, 0.990165802557248397874900547322}, + {-0.990165802557248397874900547322, 0.13989883289777721442348479286}, + {0.988140083085692566555735538714, 0.153555124301993445312675135028}, + {-0.153555124301993445312675135028, 0.988140083085692566555735538714}, + {0.590140683832248935658526534098, 0.807300423192014449114140006714}, + {-0.807300423192014449114140006714, 0.590140683832248935658526534098}, + {0.854159395991738845665963708598, 0.520011275107596038225210577366}, + {-0.520011275107596038225210577366, 0.854159395991738845665963708598}, + {0.236278402197919568106243559669, 0.97168540004200854021121358528}, + {-0.97168540004200854021121358528, 0.236278402197919568106243559669}, + {0.939196129819569902608122902166, 0.343381172652115040921927402451}, + {-0.343381172652115040921927402451, 0.939196129819569902608122902166}, + {0.421304796545479642855269730717, 0.906919107973678140233175781759}, + {-0.906919107973678140233175781759, 0.421304796545479642855269730717}, + {0.736297795594053172685278241261, 0.676657635886374952960409245861}, + {-0.676657635886374952960409245861, 0.736297795594053172685278241261}, + {0.0421719613603479467900392307911, 0.999110367114174890978972598532}, + {-0.999110367114174890978972598532, 0.0421719613603479467900392307911}, + {0.999505596225325310122400424007, 0.0314414235405603009754216259353}, + {-0.0314414235405603009754216259353, 0.999505596225325310122400424007}, + {0.68452474112914230008897220614, 0.728989628720519422522272634524}, + {-0.728989628720519422522272634524, 0.68452474112914230008897220614}, + {0.911390651104122428982634573913, 0.411542319913765219929757677164}, + {-0.411542319913765219929757677164, 0.911390651104122428982634573913}, + {0.353446144549480811836161819883, 0.935454874862014729153258940642}, + {-0.935454874862014729153258940642, 0.353446144549480811836161819883}, + {0.974166459015280317146334709832, 0.225831154028026170887955004218}, + {-0.225831154028026170887955004218, 0.974166459015280317146334709832}, + {0.529152968757790609366509215761, 0.848526449590592646288200739946}, + {-0.848526449590592646288200739946, 0.529152968757790609366509215761}, + {0.813590611584798506505933346489, 0.581438145240810277947218764893}, + {-0.581438145240810277947218764893, 0.813590611584798506505933346489}, + {0.164156583221015839324508078789, 0.986434293901627179401714329288}, + {-0.986434293901627179401714329288, 0.164156583221015839324508078789}, + {0.991610905163495370828741215519, 0.129258704777796135099166008331}, + {-0.129258704777796135099166008331, 0.991610905163495370828741215519}, + {0.609775088663868425342684531643, 0.792574502015407689192727502814}, + {-0.792574502015407689192727502814, 0.609775088663868425342684531643}, + {0.866663854688111134905170729326, 0.498892536501744643384625987892}, + {-0.498892536501744643384625987892, 0.866663854688111134905170729326}, + {0.260053593015495188023322725712, 0.965594184302976832334763912513}, + {-0.965594184302976832334763912513, 0.260053593015495188023322725712}, + {0.947340257333192048427861209348, 0.320228725813099912578252315143}, + {-0.320228725813099912578252315143, 0.947340257333192048427861209348}, + {0.443434816498138484330837627567, 0.896306623604479546507661780197}, + {-0.896306623604479546507661780197, 0.443434816498138484330837627567}, + {0.752682046138055227402219315991, 0.658384186794785053464806878765}, + {-0.658384186794785053464806878765, 0.752682046138055227402219315991}, + {0.0666786557930015705286663774132, 0.997774502010167818610852918937}, + {-0.997774502010167818610852918937, 0.0666786557930015705286663774132}, + {0.996758890430818000893964381248, 0.0804469660529500002477121256561}, + {-0.0804469660529500002477121256561, 0.996758890430818000893964381248}, + {0.647930375409685455068142800883, 0.761699565853535265347318272688}, + {-0.761699565853535265347318272688, 0.647930375409685455068142800883}, + {0.890099416625192318974768568296, 0.455766418819434693254777357652}, + {-0.455766418819434693254777357652, 0.890099416625192318974768568296}, + {0.307119808041533048914573100774, 0.951670858810193864840698552143}, + {-0.951670858810193864840698552143, 0.307119808041533048914573100774}, + {0.961912023333112209400042047491, 0.273359213064418737904048839482}, + {-0.273359213064418737904048839482, 0.961912023333112209400042047491}, + {0.486880361346047396686742558813, 0.873468667861384884254505323042}, + {-0.873468667861384884254505323042, 0.486880361346047396686742558813}, + {0.784080788509869952562780781591, 0.620658776695972136394630069844}, + {-0.620658776695972136394630069844, 0.784080788509869952562780781591}, + {0.11555681274875526098888656179, 0.993300872358093278613466736715}, + {-0.993300872358093278613466736715, 0.11555681274875526098888656179}, + {0.984074042370776447263835962076, 0.177759047961107169433248031964}, + {-0.177759047961107169433248031964, 0.984074042370776447263835962076}, + {0.570150800319470296706469980563, 0.821540056780597605090576962539}, + {-0.821540056780597605090576962539, 0.570150800319470296706469980563}, + {0.841140423614298082810591949965, 0.540816778365796668737175423303}, + {-0.540816778365796668737175423303, 0.841140423614298082810591949965}, + {0.21236088610587844360821918599, 0.977191308829712279582224709884}, + {-0.977191308829712279582224709884, 0.21236088610587844360821918599}, + {0.930486265676149670866834640037, 0.366326779512573585950718779713}, + {-0.366326779512573585950718779713, 0.930486265676149670866834640037}, + {0.398920998336982857157551052296, 0.91698529818412299885466154592}, + {-0.91698529818412299885466154592, 0.398920998336982857157551052296}, + {0.71947002678993310365029856257, 0.694523491719965524460178585286}, + {-0.694523491719965524460178585286, 0.71947002678993310365029856257}, + {0.0176398641150820566225743135647, 0.999844405492175236638274782308}, + {-0.999844405492175236638274782308, 0.0176398641150820566225743135647}, + {0.999816169924900410848067622283, 0.0191735848683226191291328888155}, + {-0.0191735848683226191291328888155, 0.999816169924900410848067622283}, + {0.693419021813811875531996520294, 0.720534565573905272373167463229}, + {-0.720534565573905272373167463229, 0.693419021813811875531996520294}, + {0.916372282399289139753761901375, 0.400327166265690093105433788878}, + {-0.400327166265690093105433788878, 0.916372282399289139753761901375}, + {0.364899001016267321428898640079, 0.931047108935595280065911083511}, + {-0.931047108935595280065911083511, 0.364899001016267321428898640079}, + {0.976864401725312636592946091696, 0.213859628358993775210095122929}, + {-0.213859628358993775210095122929, 0.976864401725312636592946091696}, + {0.539525849325028894476474761177, 0.841969036194387676630412897794}, + {-0.841969036194387676630412897794, 0.539525849325028894476474761177}, + {0.820664490168157456650988024194, 0.571410355678857229122513672337}, + {-0.571410355678857229122513672337, 0.820664490168157456650988024194}, + {0.176249288736167880609073677078, 0.984345563417641900016974432219}, + {-0.984345563417641900016974432219, 0.176249288736167880609073677078}, + {0.9931224418304955836589442697, 0.117080380647800574855921240669}, + {-0.117080380647800574855921240669, 0.9931224418304955836589442697}, + {0.619455282066924017847497907496, 0.785031944266848080715703872556}, + {-0.785031944266848080715703872556, 0.619455282066924017847497907496}, + {0.872720775355914302195969867171, 0.488219672137626792274289755369}, + {-0.488219672137626792274289755369, 0.872720775355914302195969867171}, + {0.271883337459359775145628645987, 0.96233021921373740337202207229}, + {-0.96233021921373740337202207229, 0.271883337459359775145628645987}, + {0.951198623423113232000503103336, 0.308579290941525030689263076056}, + {-0.308579290941525030689263076056, 0.951198623423113232000503103336}, + {0.45440048771930363624704796166, 0.890797506036281494523620949622}, + {-0.890797506036281494523620949622, 0.45440048771930363624704796166}, + {0.760704757319236923862604271562, 0.649098045130225953514013781387}, + {-0.649098045130225953514013781387, 0.760704757319236923862604271562}, + {0.0789178630147849419218530897524, 0.99688112174781384755561930433}, + {-0.99688112174781384755561930433, 0.0789178630147849419218530897524}, + {0.99767104434344100472031868776, 0.0682091436588063149137894924934}, + {-0.0682091436588063149137894924934, 0.99767104434344100472031868776}, + {0.657228812828642539045631565386, 0.753691108868781323160135343642}, + {-0.753691108868781323160135343642, 0.657228812828642539045631565386}, + {0.895625348834030110545256775367, 0.444809211377104884999056366723}, + {-0.444809211377104884999056366723, 0.895625348834030110545256775367}, + {0.31877514786411847991232093591, 0.947830367262101014524944275763}, + {-0.947830367262101014524944275763, 0.31877514786411847991232093591}, + {0.965194131175724723270548111032, 0.261534489396595459798078309177}, + {-0.261534489396595459798078309177, 0.965194131175724723270548111032}, + {0.497562504349319145724450663693, 0.867428126282306921623899143015}, + {-0.867428126282306921623899143015, 0.497562504349319145724450663693}, + {0.791638186609125771298067775206, 0.610990164816271774661515792104}, + {-0.610990164816271774661515792104, 0.791638186609125771298067775206}, + {0.127737441217662311965241883627, 0.991808018777406430466214715125}, + {-0.991808018777406430466214715125, 0.127737441217662311965241883627}, + {0.986181320367928271330981715437, 0.165669560744784116756989078567}, + {-0.165669560744784116756989078567, 0.986181320367928271330981715437}, + {0.580189429272831680428623712942, 0.814481568950498613368438327598}, + {-0.814481568950498613368438327598, 0.580189429272831680428623712942}, + {0.847713741088654271216284996626, 0.53045396894497631734566311934}, + {-0.53045396894497631734566311934, 0.847713741088654271216284996626}, + {0.224336536280493603623398257696, 0.97451173337711571864616644234}, + {-0.97451173337711571864616644234, 0.224336536280493603623398257696}, + {0.934911594871516093974150862778, 0.354880697946222789518344598036}, + {-0.354880697946222789518344598036, 0.934911594871516093974150862778}, + {0.410143780513590239245047541772, 0.912020876573568339829023443599}, + {-0.912020876573568339829023443599, 0.410143780513590239245047541772}, + {0.727938723639098617113063482975, 0.685642191399187472811149746121}, + {-0.685642191399187472811149746121, 0.727938723639098617113063482975}, + {0.0299081647675165582245249140669, 0.999552650779456985929982693051}, + {-0.999552650779456985929982693051, 0.0299081647675165582245249140669}, + {0.999044500659429290934099299193, 0.043704527250063421317527456722}, + {-0.043704527250063421317527456722, 0.999044500659429290934099299193}, + {0.675527373536338626713870780804, 0.737334908710482905824790123006}, + {-0.737334908710482905824790123006, 0.675527373536338626713870780804}, + {0.906271767729257660128894258378, 0.422695496802233006139459803308}, + {-0.422695496802233006139459803308, 0.906271767729257660128894258378}, + {0.341940060393402189831135729037, 0.939721764725153341224483938277}, + {-0.939721764725153341224483938277, 0.341940060393402189831135729037}, + {0.971321810419786157986266061926, 0.237768670355934214066806475785}, + {-0.237768670355934214066806475785, 0.971321810419786157986266061926}, + {0.518700399699835057454322395643, 0.854956078024614929411484354205}, + {-0.854956078024614929411484354205, 0.518700399699835057454322395643}, + {0.806394209247956350594677132904, 0.591378372356787584962489745521}, + {-0.591378372356787584962489745521, 0.806394209247956350594677132904}, + {0.15203915632824605008721619015, 0.988374471009341282190518995776}, + {-0.988374471009341282190518995776, 0.15203915632824605008721619015}, + {0.989950035541608985845130064263, 0.141417563022303016673220099619}, + {-0.141417563022303016673220099619, 0.989950035541608985845130064263}, + {0.600003065375389055269295113249, 0.799997700959281909938169974339}, + {-0.799997700959281909938169974339, 0.600003065375389055269295113249}, + {0.860476417631632073401704019489, 0.509490269484936253441276221565}, + {-0.509490269484936253441276221565, 0.860476417631632073401704019489}, + {0.248184685457074782899411502513, 0.968712734459794777563956813538}, + {-0.968712734459794777563956813538, 0.248184685457074782899411502513}, + {0.943339225285107718654842301476, 0.331829935416461108133034940693}, + {-0.331829935416461108133034940693, 0.943339225285107718654842301476}, + {0.432402365624690143697250732657, 0.901680760692037730485992597096}, + {-0.901680760692037730485992597096, 0.432402365624690143697250732657}, + {0.744545983809307365675067558186, 0.667571178222540306812504695699}, + {-0.667571178222540306812504695699, 0.744545983809307365675067558186}, + {0.054429407010919132747783777404, 0.998517621102622210393917612237}, + {-0.998517621102622210393917612237, 0.054429407010919132747783777404}, + {0.995696628295663521690528341423, 0.092672673429913310361172307239}, + {-0.092672673429913310361172307239, 0.995696628295663521690528341423}, + {0.638534362059466786831762874499, 0.769593313685422941716751665808}, + {-0.769593313685422941716751665808, 0.638534362059466786831762874499}, + {0.88443943871825381197737669936, 0.466654989515530915777929976684}, + {-0.466654989515530915777929976684, 0.88443943871825381197737669936}, + {0.295418217105532010524626684855, 0.955368032227470354023068921379}, + {-0.955368032227470354023068921379, 0.295418217105532010524626684855}, + {0.958485055077976100257330926979, 0.285142769840248666568527369236}, + {-0.285142769840248666568527369236, 0.958485055077976100257330926979}, + {0.476124895951243631841975911811, 0.879377668271953294443221693655}, + {-0.879377668271953294443221693655, 0.476124895951243631841975911811}, + {0.77640531072794038980333652944, 0.630233919646864482189130285406}, + {-0.630233919646864482189130285406, 0.77640531072794038980333652944}, + {0.103358781848899627942017787063, 0.994644138481050710254294244805}, + {-0.994644138481050710254294244805, 0.103358781848899627942017787063}, + {0.981818566442552498330087473732, 0.189821765318656437981914564261}, + {-0.189821765318656437981914564261, 0.981818566442552498330087473732}, + {0.560026308752760382247970483149, 0.82847482370000713469693209845}, + {-0.82847482370000713469693209845, 0.560026308752760382247970483149}, + {0.834440433486103194660188364651, 0.551098142769075427516156651109}, + {-0.551098142769075427516156651109, 0.834440433486103194660188364651}, + {0.200353255162940446787445125665, 0.97972372286559117338100577399}, + {-0.97972372286559117338100577399, 0.200353255162940446787445125665}, + {0.925920808671769957065578182664, 0.377717693613385641082658139567}, + {-0.377717693613385641082658139567, 0.925920808671769957065578182664}, + {0.387638140125372732125441643802, 0.921811625181708116372192307608}, + {-0.921811625181708116372192307608, 0.387638140125372732125441643802}, + {0.710892980401151675096116377972, 0.703300199357548727618905104464}, + {-0.703300199357548727618905104464, 0.710892980401151675096116377972}, + {0.00536890696399634249902366178731, 0.999985587315143198416933500994}, + {-0.999985587315143198416933500994, 0.00536890696399634249902366178731}, + {0.999992646580707189585268679366, 0.00383494256970622754421285094395}, + {-0.00383494256970622754421285094395, 0.999992646580707189585268679366}, + {0.704389867637400413080683847511, 0.709813295430400947871873995609}, + {-0.709813295430400947871873995609, 0.704389867637400413080683847511}, + {0.922405169852209882996874057426, 0.386223643281862982767904668435}, + {-0.386223643281862982767904668435, 0.922405169852209882996874057426}, + {0.379137593384847315647334653477, 0.925340307823206309478791808942}, + {-0.925340307823206309478791808942, 0.379137593384847315647334653477}, + {0.980029908096990087784661227488, 0.198850142658750117519161904056}, + {-0.198850142658750117519161904056, 0.980029908096990087784661227488}, + {0.552377509467096072803826700692, 0.833594078094925250432822849689}, + {-0.833594078094925250432822849689, 0.552377509467096072803826700692}, + {0.829332918220788251062458584784, 0.558754785890368310674602980725}, + {-0.558754785890368310674602980725, 0.829332918220788251062458584784}, + {0.191327632211630876968300185581, 0.981526228458664773413033799443}, + {-0.981526228458664773413033799443, 0.191327632211630876968300185581}, + {0.994801518557617114879576547537, 0.101832895841466528064067631476}, + {-0.101832895841466528064067631476, 0.994801518557617114879576547537}, + {0.631424168509401750881693260453, 0.775437630904130537068397188705}, + {-0.775437630904130537068397188705, 0.631424168509401750881693260453}, + {0.880106999798240363652723772248, 0.474775387847917118566698491122}, + {-0.474775387847917118566698491122, 0.880106999798240363652723772248}, + {0.286612731439347789841320945925, 0.958046524014818601244769524783}, + {-0.958046524014818601244769524783, 0.286612731439347789841320945925}, + {0.95582007388254541790928442424, 0.293952353899684604154884937088}, + {-0.293952353899684604154884937088, 0.95582007388254541790928442424}, + {0.468011153048359829220714800613, 0.883722558624789655823406064883}, + {-0.883722558624789655823406064883, 0.468011153048359829220714800613}, + {0.770571907281380696908001937118, 0.637353069898259128045481247682}, + {-0.637353069898259128045481247682, 0.770571907281380696908001937118}, + {0.0941999432953932042122957568608, 0.995553298765638472467287556356}, + {-0.995553298765638472467287556356, 0.0941999432953932042122957568608}, + {0.998599939930320368297600452934, 0.0528976367256653243198449843021}, + {-0.0528976367256653243198449843021, 0.998599939930320368297600452934}, + {0.668712511579748092316322072293, 0.743521066854669121504173290305}, + {-0.743521066854669121504173290305, 0.668712511579748092316322072293}, + {0.902342996482444203998340981343, 0.431018696461167027944583196586}, + {-0.431018696461167027944583196586, 0.902342996482444203998340981343}, + {0.333276608683047925740794426019, 0.942829094854802707281749007961}, + {-0.942829094854802707281749007961, 0.333276608683047925740794426019}, + {0.969092305112506213760070750141, 0.246698407314942441681537843579}, + {-0.246698407314942441681537843579, 0.969092305112506213760070750141}, + {0.510809623820439040464691515808, 0.85969385726107261369577372534}, + {-0.85969385726107261369577372534, 0.510809623820439040464691515808}, + {0.800917152537344301244104372017, 0.598775178820458719997077423614}, + {-0.598775178820458719997077423614, 0.800917152537344301244104372017}, + {0.142935960377642667928910213959, 0.989731939077910571889162838488}, + {-0.989731939077910571889162838488, 0.142935960377642667928910213959}, + {0.988606533192386449648836332926, 0.150522830591677397871208654578}, + {-0.150522830591677397871208654578, 0.988606533192386449648836332926}, + {0.592614669310891128972684782639, 0.805486097780429233239374298137}, + {-0.805486097780429233239374298137, 0.592614669310891128972684782639}, + {0.855750748263253924186244603334, 0.517388303739929056312973898457}, + {-0.517388303739929056312973898457, 0.855750748263253924186244603334}, + {0.239258379021299955047297203237, 0.970955935183517970799016438832}, + {-0.970955935183517970799016438832, 0.239258379021299955047297203237}, + {0.940245188374650875395843740989, 0.340498143516697160393391641264}, + {-0.340498143516697160393391641264, 0.940245188374650875395843740989}, + {0.424085202415651563168097482048, 0.905622294939825267512389928015}, + {-0.905622294939825267512389928015, 0.424085202415651563168097482048}, + {0.738370286806648623567639333487, 0.674395521605139047771615423699}, + {-0.674395521605139047771615423699, 0.738370286806648623567639333487}, + {0.04523699029880458993835290471, 0.998976283356469818563994067517}, + {-0.998976283356469818563994067517, 0.04523699029880458993835290471}, + {0.999597353289648382634879908437, 0.0283748356176720985255546025883}, + {-0.0283748356176720985255546025883, 0.999597353289648382634879908437}, + {0.686758028286925892302861029748, 0.726886105647544966679163280787}, + {-0.726886105647544966679163280787, 0.686758028286925892302861029748}, + {0.912648955969793895803832128877, 0.408744276005481410596331670604}, + {-0.408744276005481410596331670604, 0.912648955969793895803832128877}, + {0.356314416274402356865635965733, 0.934366114943725789565576178575}, + {-0.934366114943725789565576178575, 0.356314416274402356865635965733}, + {0.974854714618708428908178120764, 0.222841390647421117021664827007}, + {-0.222841390647421117021664827007, 0.974854714618708428908178120764}, + {0.531753720922733319476094493439, 0.846899037834397239166150939127}, + {-0.846899037834397239166150939127, 0.531753720922733319476094493439}, + {0.815370609762391285180171962566, 0.57893934806308189333634572904}, + {-0.57893934806308189333634572904, 0.815370609762391285180171962566}, + {0.16718214843207293562876714077, 0.985926026254321130615210222459}, + {-0.985926026254321130615210222459, 0.16718214843207293562876714077}, + {0.992002798571244515102307559573, 0.12621587707899034613845401509}, + {-0.12621587707899034613845401509, 0.992002798571244515102307559573}, + {0.612203803249797950947197477944, 0.790700008401721610162837805547}, + {-0.790700008401721610162837805547, 0.612203803249797950947197477944}, + {0.868190356734331314392250078527, 0.496231301384258249864700474063}, + {-0.496231301384258249864700474063, 0.868190356734331314392250078527}, + {0.263014770361779004481661559112, 0.9647918068534478974740409285}, + {-0.9647918068534478974740409285, 0.263014770361779004481661559112}, + {0.948318246854599089523674138036, 0.317320819806421738906720975137}, + {-0.317320819806421738906720975137, 0.948318246854599089523674138036}, + {0.446182559577030068975744825366, 0.894941966570620750509590379806}, + {-0.894941966570620750509590379806, 0.446182559577030068975744825366}, + {0.754698398091524502717675204622, 0.656071892339617712686106187903}, + {-0.656071892339617712686106187903, 0.754698398091524502717675204622}, + {0.0697394710219073066248540726519, 0.997565239060375752444542740704}, + {-0.997565239060375752444542740704, 0.0697394710219073066248540726519}, + {0.997001007307235287413504920551, 0.0773885742752650485076770792148}, + {-0.0773885742752650485076770792148, 0.997001007307235287413504920551}, + {0.650264187460365961079844510095, 0.759708158773163444443810021767}, + {-0.759708158773163444443810021767, 0.650264187460365961079844510095}, + {0.891493499314791382204248293419, 0.453033487370931575721755280028}, + {-0.453033487370931575721755280028, 0.891493499314791382204248293419}, + {0.31003804772463783301361672784, 0.950724149773789606321372502862}, + {-0.950724149773789606321372502862, 0.31003804772463783301361672784}, + {0.962746150638399411647583292506, 0.27040682208654481799570135081}, + {-0.27040682208654481799570135081, 0.962746150638399411647583292506}, + {0.489557834101157496320411155466, 0.871970829254157808740899326949}, + {-0.871970829254157808740899326949, 0.489557834101157496320411155466}, + {0.785981252767830151917394232441, 0.618250329799760134363850738737}, + {-0.618250329799760134363850738737, 0.785981252767830151917394232441}, + {0.118603673045400717644959343033, 0.992941674389860473581848054891}, + {-0.992941674389860473581848054891, 0.118603673045400717644959343033}, + {0.984614768204312595933913598856, 0.174739114779627197071576460985}, + {-0.174739114779627197071576460985, 0.984614768204312595933913598856}, + {0.572668566454481275940224804799, 0.819786992452898988226195342577}, + {-0.819786992452898988226195342577, 0.572668566454481275940224804799}, + {0.842795667540004234119521697721, 0.538233650727821699710773373226}, + {-0.538233650727821699710773373226, 0.842795667540004234119521697721}, + {0.215357867379745548941372135232, 0.976535195964614466390685265651}, + {-0.976535195964614466390685265651, 0.215357867379745548941372135232}, + {0.931605761351257832814098946983, 0.363470363877363755733540529036}, + {-0.363470363877363755733540529036, 0.931605761351257832814098946983}, + {0.401732392185905007320201320908, 0.915757110301956722686611556128}, + {-0.915757110301956722686611556128, 0.401732392185905007320201320908}, + {0.72159740887044376833614478528, 0.692312920225718220201827080018}, + {-0.692312920225718220201827080018, 0.72159740887044376833614478528}, + {0.0207072605042658945684319604652, 0.999785581693599212371736939531}, + {-0.999785581693599212371736939531, 0.0207072605042658945684319604652}, + {0.999870288328982947945178239024, 0.0161061018535372836579799837864}, + {-0.0161061018535372836579799837864, 0.999870288328982947945178239024}, + {0.695626327345254868994572916563, 0.718403795023489721849330180703}, + {-0.718403795023489721849330180703, 0.695626327345254868994572916563}, + {0.917596156213972835580250375642, 0.397513891708632327581796062077}, + {-0.397513891708632327581796062077, 0.917596156213972835580250375642}, + {0.367753696006581920485700720747, 0.929923232892639672897416858177}, + {-0.929923232892639672897416858177, 0.367753696006581920485700720747}, + {0.977515916508569282505902720004, 0.210861644147084859035601311916}, + {-0.210861644147084859035601311916, 0.977515916508569282505902720004}, + {0.542106434812443915838287011866, 0.840309831749540769862960587488}, + {-0.840309831749540769862960587488, 0.542106434812443915838287011866}, + {0.82241369022992638626590178319, 0.568889903340175862034300280357}, + {-0.568889903340175862034300280357, 0.82241369022992638626590178319}, + {0.179268388901835745707913361002, 0.983800205702631602733276849904}, + {-0.983800205702631602733276849904, 0.179268388901835745707913361002}, + {0.993476965552789192948068830447, 0.114032972933367199308563044724}, + {-0.114032972933367199308563044724, 0.993476965552789192948068830447}, + {0.621860810854965362359791924973, 0.783127787735057312445974275761}, + {-0.783127787735057312445974275761, 0.621860810854965362359791924973}, + {0.874214505010706299792389017966, 0.485539904877946959516066272045}, + {-0.485539904877946959516066272045, 0.874214505010706299792389017966}, + {0.274834445428843887615499852473, 0.961491563980579000414650181483}, + {-0.961491563980579000414650181483, 0.274834445428843887615499852473}, + {0.952140854823815829810484956397, 0.305659602458966173088583673234}, + {-0.305659602458966173088583673234, 0.952140854823815829810484956397}, + {0.457131277457156981469665879558, 0.889399232724195520916055102134}, + {-0.889399232724195520916055102134, 0.457131277457156981469665879558}, + {0.762692582035177868426956138137, 0.646761181046383915038688883214}, + {-0.646761181046383915038688883214, 0.762692582035177868426956138137}, + {0.0819758797916330661514905386866, 0.996634313643869895749105580762}, + {-0.996634313643869895749105580762, 0.0819758797916330661514905386866}, + {0.997875611817110153012322371069, 0.0651480110258788325250378647979}, + {-0.0651480110258788325250378647979, 0.997875611817110153012322371069}, + {0.659538011519338662758116242912, 0.751671212273768429845688388014}, + {-0.751671212273768429845688388014, 0.659538011519338662758116242912}, + {0.896985789278863965279242620454, 0.442059378174214756551663185746}, + {-0.442059378174214756551663185746, 0.896985789278863965279242620454}, + {0.321681550232956581236720694505, 0.946847918221147999418008112116}, + {-0.946847918221147999418008112116, 0.321681550232956581236720694505}, + {0.965991965293840570971894976537, 0.258572084703170335107103028349}, + {-0.258572084703170335107103028349, 0.965991965293840570971894976537}, + {0.500221394711840572355754375167, 0.865897543750148823704648748389}, + {-0.865897543750148823704648748389, 0.500221394711840572355754375167}, + {0.793508952417326662853724883462, 0.608558577651779453177027789934}, + {-0.608558577651779453177027789934, 0.793508952417326662853724883462}, + {0.130779664179711707650000107606, 0.991411458193338535060945559962}, + {-0.991411458193338535060945559962, 0.130779664179711707650000107606}, + {0.986684946260146689134273856325, 0.162643219420950307929629730097}, + {-0.162643219420950307929629730097, 0.986684946260146689134273856325}, + {0.58268549302866845529536021786, 0.812697739761799486934990000009}, + {-0.812697739761799486934990000009, 0.58268549302866845529536021786}, + {0.849337161427830777959968600044, 0.52785072342255534572075248434}, + {-0.52785072342255534572075248434, 0.849337161427830777959968600044}, + {0.227325240373038861552146272516, 0.973818892345666098364631579898}, + {-0.973818892345666098364631579898, 0.227325240373038861552146272516}, + {0.935995953636831412580932010314, 0.352010759459819133621039100035}, + {-0.352010759459819133621039100035, 0.935995953636831412580932010314}, + {0.412939890915108021030022200648, 0.91075828104443756760844053133}, + {-0.91075828104443756760844053133, 0.412939890915108021030022200648}, + {0.730038818418926149789172086457, 0.683405680106258794914708687429}, + {-0.683405680106258794914708687429, 0.730038818418926149789172086457}, + {0.0329746083288973354519235670068, 0.999456189737977340747931975784}, + {-0.999456189737977340747931975784, 0.0329746083288973354519235670068}, + {0.999173882565716375836473162053, 0.0406392962359337361899491725126}, + {-0.0406392962359337361899491725126, 0.999173882565716375836473162053}, + {0.677786305995631499499154415389, 0.735258949897786839855484686268}, + {-0.735258949897786839855484686268, 0.677786305995631499499154415389}, + {0.907564314149832629396996708238, 0.419913104917843615915273858263}, + {-0.419913104917843615915273858263, 0.907564314149832629396996708238}, + {0.344821476901759293998139810355, 0.938668284894770166282285117632}, + {-0.938668284894770166282285117632, 0.344821476901759293998139810355}, + {0.972046703194623495924986400496, 0.234787578054000967142656008946}, + {-0.234787578054000967142656008946, 0.972046703194623495924986400496}, + {0.521320926878595658493509290565, 0.853360704039295425715749843221}, + {-0.853360704039295425715749843221, 0.521320926878595658493509290565}, + {0.80820473748019472370884841439, 0.588901606649675835036816806678}, + {-0.588901606649675835036816806678, 0.80820473748019472370884841439}, + {0.155070730945700507863449502111, 0.987903369972977785096190928016}, + {-0.987903369972977785096190928016, 0.155070730945700507863449502111}, + {0.990379239617108164672742987023, 0.13837977357778388776132771909}, + {-0.13837977357778388776132771909, 0.990379239617108164672742987023}, + {0.602454600003723750312190077238, 0.798153152555543865531717528938}, + {-0.798153152555543865531717528938, 0.602454600003723750312190077238}, + {0.862035462183687206660920310242, 0.506847967281863320820889384777}, + {-0.506847967281863320820889384777, 0.862035462183687206660920310242}, + {0.251155486237741920607646761709, 0.967946755628987798303342060535}, + {-0.967946755628987798303342060535, 0.251155486237741920607646761709}, + {0.944352825645594751158284907433, 0.328934249805612199946125429051}, + {-0.328934249805612199946125429051, 0.944352825645594751158284907433}, + {0.435166648244619258534271466488, 0.900349925448735599609051405423}, + {-0.900349925448735599609051405423, 0.435166648244619258534271466488}, + {0.746590559345117310563466617168, 0.665283801619087178380596014904}, + {-0.665283801619087178380596014904, 0.746590559345117310563466617168}, + {0.0574925597443675662212569932308, 0.998345934821212366294673756784}, + {-0.998345934821212366294673756784, 0.0574925597443675662212569932308}, + {0.995976258112917789411255853338, 0.0896174830900229590913141919373}, + {-0.0896174830900229590913141919373, 0.995976258112917789411255853338}, + {0.640892436006621379895875634247, 0.767630696018273384062524655747}, + {-0.767630696018273384062524655747, 0.640892436006621379895875634247}, + {0.885866953708892790331219657674, 0.463939371390838517505983418232}, + {-0.463939371390838517505983418232, 0.885866953708892790331219657674}, + {0.298347854626741404437950677675, 0.954457205766513605560419364338}, + {-0.954457205766513605560419364338, 0.298347854626741404437950677675}, + {0.959355349953930791606637740188, 0.282200837197147558210730267092}, + {-0.282200837197147558210730267092, 0.959355349953930791606637740188}, + {0.478820547881393943079331165791, 0.877912799158641843355610490107}, + {-0.877912799158641843355610490107, 0.478820547881393943079331165791}, + {0.778335187232733205497936523898, 0.627848975722176572134003436076}, + {-0.627848975722176572134003436076, 0.778335187232733205497936523898}, + {0.10640982063418767677998033605, 0.99432235722254580512213806287}, + {-0.99432235722254580512213806287, 0.10640982063418767677998033605}, + {0.982396310786084692168174115068, 0.186808695070359270795634643036}, + {-0.186808695070359270795634643036, 0.982396310786084692168174115068}, + {0.562565398100626556932013500045, 0.826752788238348523819354340958}, + {-0.826752788238348523819354340958, 0.562565398100626556932013500045}, + {0.836127251724692155399054627196, 0.548535522025067390217145657516}, + {-0.548535522025067390217145657516, 0.836127251724692155399054627196}, + {0.203358062283773316503143746559, 0.979104436975029246426061035891}, + {-0.979104436975029246426061035891, 0.203358062283773316503143746559}, + {0.92707527266474010207986111709, 0.374875230995057595606567701907}, + {-0.374875230995057595606567701907, 0.92707527266474010207986111709}, + {0.390464394036126594134117340218, 0.920618029907083967877667873836}, + {-0.920618029907083967877667873836, 0.390464394036126594134117340218}, + {0.71304732940642923111340678588, 0.701115900565918659381736688374}, + {-0.701115900565918659381736688374, 0.71304732940642923111340678588}, + {0.00843679424236980050888501381223, 0.99996440961811827730087998134}, + {-0.99996440961811827730087998134, 0.00843679424236980050888501381223}, + {0.999950291236490484969579028984, 0.00997070990741802908041702124819}, + {-0.00997070990741802908041702124819, 0.999950291236490484969579028984}, + {0.700021275194006364905874306714, 0.714121988371564708764083206916}, + {-0.714121988371564708764083206916, 0.700021275194006364905874306714}, + {0.920017982111606569262107768736, 0.391876144452922348104806360425}, + {-0.391876144452922348104806360425, 0.920017982111606569262107768736}, + {0.373452674836780296185878569304, 0.927649233092581182447133869573}, + {-0.927649233092581182447133869573, 0.373452674836780296185878569304}, + {0.978791337773105674102680495707, 0.204859749829814419275209047555}, + {-0.204859749829814419275209047555, 0.978791337773105674102680495707}, + {0.547252274009174088931217738718, 0.836967710602857017931910377229}, + {-0.836967710602857017931910377229, 0.547252274009174088931217738718}, + {0.825888851349586783356926389388, 0.56383295861137816551433843415}, + {-0.56383295861137816551433843415, 0.825888851349586783356926389388}, + {0.185301498805081898968438736119, 0.982681715786240861376654720516}, + {-0.982681715786240861376654720516, 0.185301498805081898968438736119}, + {0.994157956797789732483749958192, 0.107934966232653653528394954719}, + {-0.107934966232653653528394954719, 0.994157956797789732483749958192}, + {0.626654286272029459325949574122, 0.779297379372530296670618099597}, + {-0.779297379372530296670618099597, 0.626654286272029459325949574122}, + {0.877177265018596052925659023458, 0.480166685365088385939458248686}, + {-0.480166685365088385939458248686, 0.877177265018596052925659023458}, + {0.280728873075797191738445235387, 0.959787111718839902607669500867}, + {-0.959787111718839902607669500867, 0.280728873075797191738445235387}, + {0.953998423103894488406240270706, 0.299811622048383352723277539553}, + {-0.299811622048383352723277539553, 0.953998423103894488406240270706}, + {0.462579923189086805734859808581, 0.886577585246987043277044904244}, + {-0.886577585246987043277044904244, 0.462579923189086805734859808581}, + {0.766646676565310492001970033016, 0.642069212243792541983111732407}, + {-0.642069212243792541983111732407, 0.766646676565310492001970033016}, + {0.0880895698047704928113077471608, 0.996112557742151127904151053372}, + {-0.996112557742151127904151053372, 0.0880895698047704928113077471608}, + {0.99825656777149518461555999238, 0.0590239349846679306477170712242}, + {-0.0590239349846679306477170712242, 0.99825656777149518461555999238}, + {0.664137763755260013276426889206, 0.747610213115205146650055212376}, + {-0.747610213115205146650055212376, 0.664137763755260013276426889206}, + {0.899681329127423934366447610955, 0.436547255196401196020161705746}, + {-0.436547255196401196020161705746, 0.899681329127423934366447610955}, + {0.327485244275178000172843439941, 0.944856293190677209992145435535}, + {-0.944856293190677209992145435535, 0.327485244275178000172843439941}, + {0.967560349253314355699728821492, 0.252640001885695519856511737089}, + {-0.252640001885695519856511737089, 0.967560349253314355699728821492}, + {0.505525025631885394084008567006, 0.862811942696600331359491065086}, + {-0.862811942696600331359491065086, 0.505525025631885394084008567006}, + {0.797228060070268695902484523685, 0.603678242308430368368021845527}, + {-0.603678242308430368368021845527, 0.797228060070268695902484523685}, + {0.136860388636816376894955737953, 0.99059034621895014627313003075}, + {-0.99059034621895014627313003075, 0.136860388636816376894955737953}, + {0.987664332228205710251245363906, 0.15658597269299842635881248043}, + {-0.15658597269299842635881248043, 0.987664332228205710251245363906}, + {0.587661143724736656501761444815, 0.809107149984558238209331193502}, + {-0.809107149984558238209331193502, 0.587661143724736656501761444815}, + {0.85256000404668408343411556416, 0.522629351931096608474547338119}, + {-0.522629351931096608474547338119, 0.85256000404668408343411556416}, + {0.233296201432231592454158430883, 0.972405719027449766400650332798}, + {-0.972405719027449766400650332798, 0.233296201432231592454158430883}, + {0.938138231192824356696746690432, 0.346260969753160008366421607207}, + {-0.346260969753160008366421607207, 0.938138231192824356696746690432}, + {0.418520425194109702005107465084, 0.908207384739488698954801293439}, + {-0.908207384739488698954801293439, 0.418520425194109702005107465084}, + {0.734218374066188284032818955893, 0.678913381208238408959232401685}, + {-0.678913381208238408959232401685, 0.734218374066188284032818955893}, + {0.0391065354833298878256542252529, 0.999235046864595854998469803832}, + {-0.999235046864595854998469803832, 0.0391065354833298878256542252529}, + {0.999404431433671303075527703186, 0.034507715524795749828523838687}, + {-0.034507715524795749828523838687, 0.999404431433671303075527703186}, + {0.682285010963795568450507289526, 0.731086290265474336713680258981}, + {-0.731086290265474336713680258981, 0.682285010963795568450507289526}, + {0.910123767882541678808649976418, 0.414336490228999099194595601148}, + {-0.414336490228999099194595601148, 0.910123767882541678808649976418}, + {0.350574546054837565822737133203, 0.936534829922755496234287875268}, + {-0.936534829922755496234287875268, 0.350574546054837565822737133203}, + {0.973469034186131065844449494762, 0.228818791799802218056569813598}, + {-0.228818791799802218056569813598, 0.973469034186131065844449494762}, + {0.526547236003579333107893489796, 0.850145874692685210582965282811}, + {-0.850145874692685210582965282811, 0.526547236003579333107893489796}, + {0.811802955582515362031870154169, 0.583931469701276295580782971228}, + {-0.583931469701276295580782971228, 0.811802955582515362031870154169}, + {0.161129472905678805538798314956, 0.986933276853677710072076934011}, + {-0.986933276853677710072076934011, 0.161129472905678805538798314956}, + {0.991209678336254063069077346881, 0.132300315844444654755918122646}, + {-0.132300315844444654755918122646, 0.991209678336254063069077346881}, + {0.607340634642572818613359686424, 0.794441535616030591882008593529}, + {-0.794441535616030591882008593529, 0.607340634642572818613359686424}, + {0.865129195271623685492556887766, 0.5015490758526753856116897623}, + {-0.5015490758526753856116897623, 0.865129195271623685492556887766}, + {0.257089967945753117284368727269, 0.966387473212298897529137775564}, + {-0.966387473212298897529137775564, 0.257089967945753117284368727269}, + {0.946353351084490590494624484563, 0.323133617705052333946014186949}, + {-0.323133617705052333946014186949, 0.946353351084490590494624484563}, + {0.440682899641872904972217384056, 0.897662844259040859640208509518}, + {-0.897662844259040859640208509518, 0.440682899641872904972217384056}, + {0.75065860965451058905983927616, 0.660690284287242302063702936721}, + {-0.660690284287242302063702936721, 0.75065860965451058905983927616}, + {0.0636172129591930923808718034707, 0.997974373526346991702951072511}, + {-0.997974373526346991702951072511, 0.0636172129591930923808718034707}, + {0.996507391680110821141624910524, 0.0835046006331524315324799090376}, + {-0.0835046006331524315324799090376, 0.996507391680110821141624910524}, + {0.64559046479154880149309292392, 0.763683803527501869901072950597}, + {-0.763683803527501869901072950597, 0.64559046479154880149309292392}, + {0.888696955980891600823667886289, 0.458495060420826272551408919753}, + {-0.458495060420826272551408919753, 0.888696955980891600823667886289}, + {0.304198677629829106194847554434, 0.952608610358033347509376653761}, + {-0.952608610358033347509376653761, 0.304198677629829106194847554434}, + {0.961068842145519353081795088656, 0.276309031081271028185142313305}, + {-0.276309031081271028185142313305, 0.961068842145519353081795088656}, + {0.484198305887549029780103637677, 0.874958285048851647225376382266}, + {-0.874958285048851647225376382266, 0.484198305887549029780103637677}, + {0.782172944184913010445825420902, 0.623061381715401263470255344146}, + {-0.623061381715401263470255344146, 0.782172944184913010445825420902}, + {0.112508864787378676242113328954, 0.993650721000219117051699413423}, + {-0.993650721000219117051699413423, 0.112508864787378676242113328954}, + {0.983524054057571261999726175418, 0.180777308006728587574940547711}, + {-0.180777308006728587574940547711, 0.983524054057571261999726175418}, + {0.567627667707986227618732755218, 0.823285388460400113785908615682}, + {-0.823285388460400113785908615682, 0.567627667707986227618732755218}, + {0.839477262554578551601025537821, 0.543394815630284799823357388959}, + {-0.543394815630284799823357388959, 0.839477262554578551601025537821}, + {0.209361906010474163597478991505, 0.977838223998050426466477347276}, + {-0.977838223998050426466477347276, 0.209361906010474163597478991505}, + {0.929358011909935499694768168411, 0.369179747140619962664231934468}, + {-0.369179747140619962664231934468, 0.929358011909935499694768168411}, + {0.396105849691696265679752286815, 0.918204855051430901546893892373}, + {-0.918204855051430901546893892373, 0.396105849691696265679752286815}, + {0.717335872783521732998224251787, 0.696727526094601201656075772917}, + {-0.696727526094601201656075772917, 0.717335872783521732998224251787}, + {0.0145723016927790643276763304925, 0.999893818374418485994681304874}, + {-0.999893818374418485994681304874, 0.0145723016927790643276763304925}, + {0.999752640870248843185663645272, 0.0222408874140249609996367041731}, + {-0.0222408874140249609996367041731, 0.999752640870248843185663645272}, + {0.691205189558448451769834264269, 0.722658554178575607274126468837}, + {-0.722658554178575607274126468837, 0.691205189558448451769834264269}, + {0.915139783339685264351714977238, 0.403136672790995298498728516279}, + {-0.403136672790995298498728516279, 0.915139783339685264351714977238}, + {0.36204087145758417909391368994, 0.932162221608574426134907753294}, + {-0.932162221608574426134907753294, 0.36204087145758417909391368994}, + {0.976203692322270555337127007078, 0.21685559964263262378025842736}, + {-0.21685559964263262378025842736, 0.976203692322270555337127007078}, + {0.536940185614842913075506203313, 0.843620315706004153533115186292}, + {-0.843620315706004153533115186292, 0.536940185614842913075506203313}, + {0.818907565699658945845840207767, 0.573925429685650745348368673149}, + {-0.573925429685650745348368673149, 0.818907565699658945845840207767}, + {0.173228529645070322695588060924, 0.984881656097323698872969544027}, + {-0.984881656097323698872969544027, 0.173228529645070322695588060924}, + {0.992758570461551137498190655606, 0.120126686357101511437583951647}, + {-0.120126686357101511437583951647, 0.992758570461551137498190655606}, + {0.617043922729849758646025748021, 0.78692871177900181045572480798}, + {-0.78692871177900181045572480798, 0.617043922729849758646025748021}, + {0.871218831320811015750393835333, 0.49089484408781514090946984652}, + {-0.49089484408781514090946984652, 0.871218831320811015750393835333}, + {0.268929670420357258553423207559, 0.963159816628371356905802258552}, + {-0.963159816628371356905802258552, 0.268929670420357258553423207559}, + {0.950247438978705227796694998688, 0.311496074958275914745797763317}, + {-0.311496074958275914745797763317, 0.950247438978705227796694998688}, + {0.451665420991002486417187355983, 0.892187394822982482445183904929}, + {-0.892187394822982482445183904929, 0.451665420991002486417187355983}, + {0.758709772560407391672754329193, 0.651428799656059820399889304099}, + {-0.651428799656059820399889304099, 0.758709772560407391672754329193}, + {0.0758591034329544472436523960823, 0.997118546826979978980887153739}, + {-0.997118546826979978980887153739, 0.0758591034329544472436523960823}, + {0.99745708640994190652406814479, 0.0712696342812964012125576118706}, + {-0.0712696342812964012125576118706, 0.99745708640994190652406814479}, + {0.654913428050056034557258044515, 0.755703911436035880022643596021}, + {-0.755703911436035880022643596021, 0.654913428050056034557258044515}, + {0.894256478422316036791528404137, 0.447554857866293009927005641657}, + {-0.447554857866293009927005641657, 0.894256478422316036791528404137}, + {0.315865745062184011260342231253, 0.948803894962658489475870737806}, + {-0.948803894962658489475870737806, 0.315865745062184011260342231253}, + {0.964387212282854289213673837367, 0.264494432427801628993080385044}, + {-0.264494432427801628993080385044, 0.964387212282854289213673837367}, + {0.494898930739011200241606047712, 0.868950544250582379568470514641}, + {-0.868950544250582379568470514641, 0.494898930739011200241606047712}, + {0.789759969600819067281349816767, 0.613416001108638586636345735315}, + {-0.613416001108638586636345735315, 0.789759969600819067281349816767}, + {0.124694015942167654720229563736, 0.992195244086673922012664661452}, + {-0.992195244086673922012664661452, 0.124694015942167654720229563736}, + {0.985668412161537554894152890483, 0.168694342723617329848906365442}, + {-0.168694342723617329848906365442, 0.985668412161537554894152890483}, + {0.577687904553122799633513295703, 0.816257731928477392457921268942}, + {-0.816257731928477392457921268942, 0.577687904553122799633513295703}, + {0.846082341744896937463238373311, 0.533052221632619560587329488044}, + {-0.533052221632619560587329488044, 0.846082341744896937463238373311}, + {0.221345720647030841377045362606, 0.975195401932990368898401811748}, + {-0.975195401932990368898401811748, 0.221345720647030841377045362606}, + {0.933818436362210957391027932317, 0.357747296160341898829670981286}, + {-0.357747296160341898829670981286, 0.933818436362210957391027932317}, + {0.407343809682607971289769466239, 0.91327488781486776403539806779}, + {-0.91327488781486776403539806779, 0.407343809682607971289769466239}, + {0.72583177722277025800678984524, 0.687872249166685545418431502185}, + {-0.687872249166685545418431502185, 0.72583177722277025800678984524}, + {0.0268414396990985307245303204127, 0.99963970365071019852365452607}, + {-0.99963970365071019852365452607, 0.0268414396990985307245303204127}, + {0.998905715365818291928690086934, 0.0467693469005378628655655859347}, + {-0.0467693469005378628655655859347, 0.998905715365818291928690086934}, + {0.673262082756132973493379267893, 0.739403927446205755380503887864}, + {-0.739403927446205755380503887864, 0.673262082756132973493379267893}, + {0.904970691133653248883206288156, 0.425473910115623854544253390486}, + {-0.425473910115623854544253390486, 0.904970691133653248883206288156}, + {0.339055425414969580089064038475, 0.940766399536396069613886083971}, + {-0.940766399536396069613886083971, 0.339055425414969580089064038475}, + {0.970587775194143631551924045198, 0.24074752468858842679999554548}, + {-0.24074752468858842679999554548, 0.970587775194143631551924045198}, + {0.516074990315366632920301981358, 0.856543404837719957178876484249}, + {-0.856543404837719957178876484249, 0.516074990315366632920301981358}, + {0.80457609092630710811988592468, 0.593849571785433516524221886357}, + {-0.593849571785433516524221886357, 0.80457609092630710811988592468}, + {0.149006150660348474223937387251, 0.988836269088763542001174755569}, + {-0.988836269088763542001174755569, 0.149006150660348474223937387251}, + {0.989511513679355192429909493512, 0.144454021390860470885897370863}, + {-0.144454021390860470885897370863, 0.989511513679355192429909493512}, + {0.597545883289693269269093889307, 0.801834719479981306022864373517}, + {-0.801834719479981306022864373517, 0.597545883289693269269093889307}, + {0.858909273947823903583298488229, 0.512127776171554693895870968845}, + {-0.512127776171554693895870968845, 0.858909273947823903583298488229}, + {0.245211548667627565745164019972, 0.969469595397413064219449552184}, + {-0.969469595397413064219449552184, 0.245211548667627565745164019972}, + {0.942316745856563775518566217215, 0.334722497717581224385696714307}, + {-0.334722497717581224385696714307, 0.942316745856563775518566217215}, + {0.429634013069016384989140533435, 0.903003108972617152261364026344}, + {-0.903003108972617152261364026344, 0.429634013069016384989140533435}, + {0.742494400323139291941743067582, 0.669852271391821019186352259567}, + {-0.669852271391821019186352259567, 0.742494400323139291941743067582}, + {0.0513657419671625925516877941845, 0.998679908955899087175112072146}, + {-0.998679908955899087175112072146, 0.0513657419671625925516877941845}, + {0.995407626602534900683849627967, 0.0957269914993071624476073111509}, + {-0.0957269914993071624476073111509, 0.995407626602534900683849627967}, + {0.636170277983712173508479281736, 0.771548687647206299367041992809}, + {-0.771548687647206299367041992809, 0.636170277983712173508479281736}, + {0.883003599046780829340264062921, 0.469366215305737521923390431766}, + {-0.469366215305737521923390431766, 0.883003599046780829340264062921}, + {0.29248579899555388061926919363, 0.95626986640065803069177263751}, + {-0.95626986640065803069177263751, 0.29248579899555388061926919363}, + {0.957605738575646348031966681447, 0.288082018611004131436459374527}, + {-0.288082018611004131436459374527, 0.957605738575646348031966681447}, + {0.473424762552241529256491503475, 0.880834260347742037389195957076}, + {-0.880834260347742037389195957076, 0.473424762552241529256491503475}, + {0.774468126400670864306619023409, 0.632612931569877523507727801189}, + {-0.632612931569877523507727801189, 0.774468126400670864306619023409}, + {0.100306770211392864977639760582, 0.994956557770116378769387210923}, + {-0.994956557770116378769387210923, 0.100306770211392864977639760582}, + {0.981231580848749729284463683143, 0.192833048892205233260099817016}, + {-0.192833048892205233260099817016, 0.981231580848749729284463683143}, + {0.55748194822399155246017699028, 0.830189061241102366217603503173}, + {-0.830189061241102366217603503173, 0.55748194822399155246017699028}, + {0.832745761176359455824069755181, 0.553655576367479307364760643395}, + {-0.553655576367479307364760643395, 0.832745761176359455824069755181}, + {0.197346562240965917034429821797, 0.980333787223347963291075757297}, + {-0.980333787223347963291075757297, 0.197346562240965917034429821797}, + {0.924757629559513905093126595602, 0.380556601008928518936613727419}, + {-0.380556601008928518936613727419, 0.924757629559513905093126595602}, + {0.384808237616812875980798480668, 0.922996544014246245168919813295}, + {-0.922996544014246245168919813295, 0.384808237616812875980798480668}, + {0.708731940200400645224476647854, 0.705477878419852211244744921714}, + {-0.705477878419852211244744921714, 0.708731940200400645224476647854}, + {0.00230096915142580498575552994112, 0.999997352766978209182013870304}, + {-0.999997352766978209182013870304, 0.00230096915142580498575552994112}, + {0.999999926465717892121176646469, 0.000383495187571395563207177215048}, + {-0.000383495187571395563207177215048, 0.999999926465717892121176646469}, + {0.706835557142273862574199938535, 0.707377901237642103815517202747}, + {-0.707377901237642103815517202747, 0.706835557142273862574199938535}, + {0.92373270731979317815785179846, 0.383037707579352071363132381521}, + {-0.383037707579352071363132381521, 0.92373270731979317815785179846}, + {0.382329100870124505284763927193, 0.9240262218291438456319042416}, + {-0.9240262218291438456319042416, 0.382329100870124505284763927193}, + {0.980710392082253967771521274699, 0.195466434105376979379897761646}, + {-0.195466434105376979379897761646, 0.980710392082253967771521274699}, + {0.555251327571214092770901515905, 0.831682609671745121104891040886}, + {-0.831682609671745121104891040886, 0.555251327571214092770901515905}, + {0.831256492650303213665097246121, 0.555889056761073807599871088314}, + {-0.555889056761073807599871088314, 0.831256492650303213665097246121}, + {0.194714181235225991528636768635, 0.980860024481523873340904629003}, + {-0.980860024481523873340904629003, 0.194714181235225991528636768635}, + {0.995147064390386471011140656628, 0.0983987816753638944167192903478}, + {-0.0983987816753638944167192903478, 0.995147064390386471011140656628}, + {0.634096791725183739352189604688, 0.773253683291472593275273084146}, + {-0.773253683291472593275273084146, 0.634096791725183739352189604688}, + {0.881740421116898320796906318719, 0.471734914722871490067035438187}, + {-0.471734914722871490067035438187, 0.881740421116898320796906318719}, + {0.289917673895040750586815647694, 0.957051588141041076340798099409}, + {-0.957051588141041076340798099409, 0.289917673895040750586815647694}, + {0.95682894258753536931294547685, 0.290651637922133276870084728216}, + {-0.290651637922133276870084728216, 0.95682894258753536931294547685}, + {0.471058489601482499598006370434, 0.882101977876917575649429181794}, + {-0.882101977876917575649429181794, 0.471058489601482499598006370434}, + {0.772767109748463854046462984115, 0.63468968330279773581992230902}, + {-0.63468968330279773581992230902, 0.772767109748463854046462984115}, + {0.0976354845685172001434892763427, 0.995222242593618355854800938687}, + {-0.995222242593618355854800938687, 0.0976354845685172001434892763427}, + {0.998776565542495609051343308238, 0.0494507039700846640073450544151}, + {-0.0494507039700846640073450544151, 0.998776565542495609051343308238}, + {0.671274754273613494248706956569, 0.741208610497004261041809058952}, + {-0.741208610497004261041809058952, 0.671274754273613494248706956569}, + {0.903825261328487394862918336003, 0.427901737533854131800836739785}, + {-0.427901737533854131800836739785, 0.903825261328487394862918336003}, + {0.336528751001382409047124610879, 0.941673191584771362983019571402}, + {-0.941673191584771362983019571402, 0.336528751001382409047124610879}, + {0.969938000134323963230542631209, 0.243352164353284744491290325641}, + {-0.243352164353284744491290325641, 0.969938000134323963230542631209}, + {0.513773771594868033929515149794, 0.857925702856129790419004166324}, + {-0.857925702856129790419004166324, 0.513773771594868033929515149794}, + {0.802979024600843249714898774982, 0.596007286911056533007524649292}, + {-0.596007286911056533007524649292, 0.802979024600843249714898774982}, + {0.146351119234411464198331032094, 0.989232707657220045049939471937}, + {-0.989232707657220045049939471937, 0.146351119234411464198331032094}, + {0.989120166795572686169180087745, 0.147109808096871796934124176914}, + {-0.147109808096871796934124176914, 0.989120166795572686169180087745}, + {0.595391234465168728284822918795, 0.803435920233868117179554246832}, + {-0.803435920233868117179554246832, 0.595391234465168728284822918795}, + {0.857531390999499154581542370579, 0.514431641183222931879015504819}, + {-0.514431641183222931879015504819, 0.857531390999499154581542370579}, + {0.24260815971849680749983235728, 0.970124363593660277160779514816}, + {-0.970124363593660277160779514816, 0.24260815971849680749983235728}, + {0.94141480030973623271961514547, 0.337250906237150593902640594024}, + {-0.337250906237150593902640594024, 0.94141480030973623271961514547}, + {0.427208386446796317681418031498, 0.904153191969991776311132980481}, + {-0.904153191969991776311132980481, 0.427208386446796317681418031498}, + {0.740693531242295755134819046361, 0.671843056655211934291571651556}, + {-0.671843056655211934291571651556, 0.740693531242295755134819046361}, + {0.0486846374684389432418996079832, 0.998814199976435390659901258914}, + {-0.998814199976435390659901258914, 0.0486846374684389432418996079832}, + {0.99968933374103363664886501283, 0.0249246064042814678696213803732}, + {-0.0249246064042814678696213803732, 0.99968933374103363664886501283}, + {0.689262748761273469355614906817, 0.724511465175019631068664693885}, + {-0.724511465175019631068664693885, 0.689262748761273469355614906817}, + {0.914054280384046569096767598239, 0.405591879247603870339844434056}, + {-0.405591879247603870339844434056, 0.914054280384046569096767598239}, + {0.359537211821973068381907978619, 0.933130748242325225305648928043}, + {-0.933130748242325225305648928043, 0.359537211821973068381907978619}, + {0.975618034019781754651035043935, 0.219475401116790314048898835608}, + {-0.219475401116790314048898835608, 0.975618034019781754651035043935}, + {0.53467358326995551021099117861, 0.845058672136595467883068977244}, + {-0.845058672136595467883068977244, 0.53467358326995551021099117861}, + {0.817363933360698458052695514198, 0.57612168891747839172268186303}, + {-0.57612168891747839172268186303, 0.817363933360698458052695514198}, + {0.170584026954463618963586668542, 0.985343132998854787096831842064}, + {-0.985343132998854787096831842064, 0.170584026954463618963586668542}, + {0.992432517712593664782616542652, 0.122791277323116773678712831952}, + {-0.122791277323116773678712831952, 0.992432517712593664782616542652}, + {0.614929218278879585746210523212, 0.788582308010347232674064343882}, + {-0.788582308010347232674064343882, 0.614929218278879585746210523212}, + {0.869897903042806341922243973386, 0.493231830158727957424957821786}, + {-0.493231830158727957424957821786, 0.869897903042806341922243973386}, + {0.26634313437923817780017543555, 0.96387827798381420230100502522}, + {-0.96387827798381420230100502522, 0.26634313437923817780017543555}, + {0.949407815332291571408518393582, 0.314045856820250712804210024842}, + {-0.314045856820250712804210024842, 0.949407815332291571408518393582}, + {0.449268749371829922978349713958, 0.893396659294107720050703846937}, + {-0.893396659294107720050703846937, 0.449268749371829922978349713958}, + {0.756958302183750486591407025116, 0.653463180871802329363617900526}, + {-0.653463180871802329363617900526, 0.756958302183750486591407025116}, + {0.0731821020994028875739090267416, 0.997318594999768603948098188994}, + {-0.997318594999768603948098188994, 0.0731821020994028875739090267416}, + {0.997262171687536169706334021612, 0.0739470142808971997450129265417}, + {-0.0739470142808971997450129265417, 0.997262171687536169706334021612}, + {0.652882408974558958725253887678, 0.757459279467600721247322326235}, + {-0.757459279467600721247322326235, 0.652882408974558958725253887678}, + {0.893051811731707445574102166574, 0.449953843813690523845139068726}, + {-0.449953843813690523845139068726, 0.893051811731707445574102166574}, + {0.313317577844809014298022020739, 0.949648406208035478215379043831}, + {-0.949648406208035478215379043831, 0.313317577844809014298022020739}, + {0.963673711865903226403418102564, 0.267082341345496243611989939382}, + {-0.267082341345496243611989939382, 0.963673711865903226403418102564}, + {0.492564481811010645984083566873, 0.870275951212171938742301335878}, + {-0.870275951212171938742301335878, 0.492564481811010645984083566873}, + {0.788110431301888070265704300255, 0.615533872401147430508672186988}, + {-0.615533872401147430508672186988, 0.788110431301888070265704300255}, + {0.12203005507255336448135807359, 0.992526405522286103710882798623}, + {-0.992526405522286103710882798623, 0.12203005507255336448135807359}, + {0.985212006875659351834428889561, 0.171339725423019312300354499712}, + {-0.171339725423019312300354499712, 0.985212006875659351834428889561}, + {0.575494609234928233831851684954, 0.817805572701444272176729555213}, + {-0.817805572701444272176729555213, 0.575494609234928233831851684954}, + {0.844648334111417820047051918664, 0.535321577822907124222240327072}, + {-0.535321577822907124222240327072, 0.844648334111417820047051918664}, + {0.218727046974044436744577524223, 0.975786082562163925580023260409}, + {-0.975786082562163925580023260409, 0.218727046974044436744577524223}, + {0.93285471221324112178763243719, 0.360252808318756889693901257488}, + {-0.360252808318756889693901257488, 0.93285471221324112178763243719}, + {0.404890689164117634213369001372, 0.914365096571498559008261963754}, + {-0.914365096571498559008261963754, 0.404890689164117634213369001372}, + {0.723982594213935515270463838533, 0.68981823930312247128426861309}, + {-0.68981823930312247128426861309, 0.723982594213935515270463838533}, + {0.0241578470322998603569963904647, 0.999708156627104882474554869987}, + {-0.999708156627104882474554869987, 0.0241578470322998603569963904647}, + {0.999919922234522751125496142777, 0.0126550036944302422142749620093}, + {-0.0126550036944302422142749620093, 0.999919922234522751125496142777}, + {0.698101718727283881982259572396, 0.715998596583828694761564293003}, + {-0.715998596583828694761564293003, 0.698101718727283881982259572396}, + {0.91896269005237563032295611265, 0.394344486828079598961238616539}, + {-0.394344486828079598961238616539, 0.91896269005237563032295611265}, + {0.370961089033801982850491185673, 0.928648410553130521094544747029}, + {-0.928648410553130521094544747029, 0.370961089033801982850491185673}, + {0.978237872563701094108523648174, 0.207486540966020649445766821373}, + {-0.207486540966020649445766821373, 0.978237872563701094108523648174}, + {0.545003493181281162272000528901, 0.838433773425308337401418157242}, + {-0.838433773425308337401418157242, 0.545003493181281162272000528901}, + {0.824372286722551250726098714949, 0.566047995212271559672956300346}, + {-0.566047995212271559672956300346, 0.824372286722551250726098714949}, + {0.182662858272129274839201684699, 0.983175610055424420430369991664}, + {-0.983175610055424420430369991664, 0.182662858272129274839201684699}, + {0.993864627230059749507518063183, 0.110603357728661741421483100112}, + {-0.110603357728661741421483100112, 0.993864627230059749507518063183}, + {0.62456003322387720899655505491, 0.780976801767753747718359136343}, + {-0.780976801767753747718359136343, 0.62456003322387720899655505491}, + {0.875885114618103810535387765412, 0.482519705287184352826557187655}, + {-0.482519705287184352826557187655, 0.875885114618103810535387765412}, + {0.278151348422115085590178296115, 0.960537259751520045014672177786}, + {-0.960537259751520045014672177786, 0.278151348422115085590178296115}, + {0.95319015242533666754098931051, 0.302371515390195966244135661327}, + {-0.302371515390195966244135661327, 0.95319015242533666754098931051}, + {0.460198271570134320729295041019, 0.887816169510254438179686076182}, + {-0.887816169510254438179686076182, 0.460198271570134320729295041019}, + {0.764920303058128414619432078325, 0.644124933510154540350356455747}, + {-0.644124933510154540350356455747, 0.764920303058128414619432078325}, + {0.0854152249433073329498711245833, 0.996345441776035900538488476741}, + {-0.996345441776035900538488476741, 0.0854152249433073329498711245833}, + {0.998094523296980007387446676148, 0.0617035052859572982764113646681}, + {-0.0617035052859572982764113646681, 0.998094523296980007387446676148}, + {0.66212843867776871586983133966, 0.749390372699129558853314847511}, + {-0.749390372699129558853314847511, 0.66212843867776871586983133966}, + {0.898506192393901947923495754367, 0.438960843617984319831037964832}, + {-0.438960843617984319831037964832, 0.898506192393901947923495754367}, + {0.324947632382188433819436568228, 0.945732010777477150043068832019}, + {-0.945732010777477150043068832019, 0.324947632382188433819436568228}, + {0.966878660184995908366545336321, 0.255236471686291710447846980969}, + {-0.255236471686291710447846980969, 0.966878660184995908366545336321}, + {0.503207017265869027689006998116, 0.864165897136879301854150980944}, + {-0.864165897136879301854150980944, 0.503207017265869027689006998116}, + {0.795604635517188185644954501186, 0.60581619650151496969670006365}, + {-0.60581619650151496969670006365, 0.795604635517188185644954501186}, + {0.134200692218792022591955515054, 0.990954173616518496636729196325}, + {-0.990954173616518496636729196325, 0.134200692218792022591955515054}, + {0.987240424223882251375528085191, 0.159236756994887845850783492097}, + {-0.159236756994887845850783492097, 0.987240424223882251375528085191}, + {0.585487007944951454163629023242, 0.810681789315430667564044142637}, + {-0.810681789315430667564044142637, 0.585487007944951454163629023242}, + {0.85115395288271533669188784188, 0.524916134722612892637982895394}, + {-0.524916134722612892637982895394, 0.85115395288271533669188784188}, + {0.230684973500512202626566704566, 0.973028490333694207059522796044}, + {-0.973028490333694207059522796044, 0.230684973500512202626566704566}, + {0.937205326098887958607974724146, 0.348778119628908422900082086926}, + {-0.348778119628908422900082086926, 0.937205326098887958607974724146}, + {0.416080867929579212294299850328, 0.909327614967767261511255583173}, + {-0.909327614967767261511255583173, 0.416080867929579212294299850328}, + {0.732393210589896037632229308656, 0.680881917135287229037032830092}, + {-0.680881917135287229037032830092, 0.732393210589896037632229308656}, + {0.0364239849094441098253938093876, 0.999336426496761243143396313826}, + {-0.999336426496761243143396313826, 0.0364239849094441098253938093876}, + {0.999308195711029467744879184465, 0.0371904555600881189802997539573}, + {-0.0371904555600881189802997539573, 0.999308195711029467744879184465}, + {0.680319978360607202638732360356, 0.7329152250045177785509054047}, + {-0.7329152250045177785509054047, 0.680319978360607202638732360356}, + {0.909008217503247450963499431964, 0.416778191021997590492986773825}, + {-0.416778191021997590492986773825, 0.909008217503247450963499431964}, + {0.348059189628525611492904090483, 0.93747255987315913916546605833}, + {-0.93747255987315913916546605833, 0.348059189628525611492904090483}, + {0.972851270988544181150814438297, 0.231431209079445754372983401481}, + {-0.231431209079445754372983401481, 0.972851270988544181150814438297}, + {0.524263153483673360888417391834, 0.85155630812022897746516036932}, + {-0.85155630812022897746516036932, 0.524263153483673360888417391834}, + {0.810232487996982331246442754491, 0.586108620815476433207891204802}, + {-0.586108620815476433207891204802, 0.810232487996982331246442754491}, + {0.158479506309795958873820609369, 0.98736226689083239627109378489}, + {-0.98736226689083239627109378489, 0.158479506309795958873820609369}, + {0.990850951508413624324589363823, 0.134960705002868747159894269316}, + {-0.134960705002868747159894269316, 0.990850951508413624324589363823}, + {0.605205797255496502629057431477, 0.796069056657987994540803811105}, + {-0.796069056657987994540803811105, 0.605205797255496502629057431477}, + {0.863779688043046722789597424708, 0.503869676130898946908587276994}, + {-0.503869676130898946908587276994, 0.863779688043046722789597424708}, + {0.254494810040010788210196324144, 0.967074139692867040807300327288}, + {-0.967074139692867040807300327288, 0.254494810040010788210196324144}, + {0.945482500914453738438680829859, 0.325672904099419791013048097739}, + {-0.325672904099419791013048097739, 0.945482500914453738438680829859}, + {0.438271568952410484065751461458, 0.898842606827242263101140906656}, + {-0.898842606827242263101140906656, 0.438271568952410484065751461458}, + {0.748882306173375145164072819171, 0.66270301908203743668224205976}, + {-0.66270301908203743668224205976, 0.748882306173375145164072819171}, + {0.0609379583001072033798806160121, 0.998141555711520522820023870736}, + {-0.998141555711520522820023870736, 0.0609379583001072033798806160121}, + {0.996279636063254647737608138414, 0.0861793871274848938268675624386}, + {-0.0861793871274848938268675624386, 0.996279636063254647737608138414}, + {0.643538057582047740012853864755, 0.765414115654738269611812029325}, + {-0.765414115654738269611812029325, 0.643538057582047740012853864755}, + {0.887462940751568840624941003625, 0.460879082615578694603897247362}, + {-0.460879082615578694603897247362, 0.887462940751568840624941003625}, + {0.3016403388326787671225304166, 0.95342178808170030546165207852}, + {-0.95342178808170030546165207852, 0.3016403388326787671225304166}, + {0.96032363783047391958547223112, 0.278887989386500223520926056153}, + {-0.278887989386500223520926056153, 0.96032363783047391958547223112}, + {0.481847767956986028359267493215, 0.876254944930338508513045781001}, + {-0.876254944930338508513045781001, 0.481847767956986028359267493215}, + {0.780497540554531910039770536969, 0.625158851163707729448049121856}, + {-0.625158851163707729448049121856, 0.780497540554531910039770536969}, + {0.109841040648882587449364223176, 0.993949166602181133356452846783}, + {-0.993949166602181133356452846783, 0.109841040648882587449364223176}, + {0.983035220223095640434962660947, 0.18341689071873909511189992827}, + {-0.18341689071873909511189992827, 0.983035220223095640434962660947}, + {0.565415543153589661429236912227, 0.824806197576334443333223589434}, + {-0.824806197576334443333223589434, 0.565415543153589661429236912227}, + {0.838015514407863815193877599086, 0.545646403462648588167382968095}, + {-0.545646403462648588167382968095, 0.838015514407863815193877599086}, + {0.206736180958843690502746426318, 0.978396724995823086068469365273}, + {-0.978396724995823086068469365273, 0.206736180958843690502746426318}, + {0.92836361383924448364979298276, 0.371673244260786461712342543251}, + {-0.371673244260786461712342543251, 0.92836361383924448364979298276}, + {0.39363953535017293106079705467, 0.919264878154985254354869539384}, + {-0.919264878154985254354869539384, 0.39363953535017293106079705467}, + {0.71546294872230364880749675649, 0.698650677381469575877304123424}, + {-0.698650677381469575877304123424, 0.71546294872230364880749675649}, + {0.0118880710722520915173516797836, 0.999929334386276069679411193647}, + {-0.999929334386276069679411193647, 0.0118880710722520915173516797836}, + {0.999978748667468830824134329305, 0.00651937216633946808180821719247}, + {-0.00651937216633946808180821719247, 0.999978748667468830824134329305}, + {0.702481861957307995858457161376, 0.711701646493102968449306899856}, + {-0.711701646493102968449306899856, 0.702481861957307995858457161376}, + {0.921365043122642446427050799684, 0.388698414341519193904161966202}, + {-0.388698414341519193904161966202, 0.921365043122642446427050799684}, + {0.376652185322909616171926927564, 0.926354754557602855236098093883}, + {-0.926354754557602855236098093883, 0.376652185322909616171926927564}, + {0.979492570993820810265617637924, 0.201480280345037759959225809325}, + {-0.201480280345037759959225809325, 0.979492570993820810265617637924}, + {0.550137766564233632315961131098, 0.835073911578919303444479282916}, + {-0.835073911578919303444479282916, 0.550137766564233632315961131098}, + {0.827829973351729919706087912346, 0.560979086259438153305723062658}, + {-0.560979086259438153305723062658, 0.827829973351729919706087912346}, + {0.188692071828605201222472942391, 0.982036303824369016801654197479}, + {-0.982036303824369016801654197479, 0.188692071828605201222472942391}, + {0.994524567454151742218471099477, 0.104503036942150573374021860218}, + {-0.104503036942150573374021860218, 0.994524567454151742218471099477}, + {0.629340259627065745640095428826, 0.777129871779831615796751975722}, + {-0.777129871779831615796751975722, 0.629340259627065745640095428826}, + {0.878829311580933358882816719415, 0.477136291960884806329090679355}, + {-0.477136291960884806329090679355, 0.878829311580933358882816719415}, + {0.284039858128637190404219836637, 0.958812473320129310394577260013}, + {-0.958812473320129310394577260013, 0.284039858128637190404219836637}, + {0.955027525629714157950900244032, 0.296517158507877409689967862505}, + {-0.296517158507877409689967862505, 0.955027525629714157950900244032}, + {0.465637146073493657372210918766, 0.884975733111666662544791961409}, + {-0.884975733111666662544791961409, 0.465637146073493657372210918766}, + {0.76885817994125327246024426131, 0.639419345294950702829339661548}, + {-0.639419345294950702829339661548, 0.76885817994125327246024426131}, + {0.0915270777272848279348949063206, 0.99580258788712916473429004327}, + {-0.99580258788712916473429004327, 0.0915270777272848279348949063206}, + {0.998454340040524801480614769389, 0.0555781508710046778798385957998}, + {-0.0555781508710046778798385957998, 0.998454340040524801480614769389}, + {0.666714147181097671612803878816, 0.745313521914490517694673599181}, + {-0.745313521914490517694673599181, 0.666714147181097671612803878816}, + {0.901182691370684518794575978973, 0.43343944995107408502121870697}, + {-0.43343944995107408502121870697, 0.901182691370684518794575978973}, + {0.330744417861982942241638738778, 0.94372036645032619794903894217}, + {-0.94372036645032619794903894217, 0.330744417861982942241638738778}, + {0.968426560515983192445332861098, 0.249299011003218162763062082377}, + {-0.249299011003218162763062082377, 0.968426560515983192445332861098}, + {0.508499966798540814494344886043, 0.861062009245491477571476934827}, + {-0.861062009245491477571476934827, 0.508499966798540814494344886043}, + {0.799306876785086162229276851576, 0.600923053912954086008824106102}, + {-0.600923053912954086008824106102, 0.799306876785086162229276851576}, + {0.140278546430595424387988146009, 0.990112079216953766547248960705}, + {-0.990112079216953766547248960705, 0.140278546430595424387988146009}, + {0.988198898074717613226880530419, 0.153176166043917844072907996633}, + {-0.153176166043917844072907996633, 0.988198898074717613226880530419}, + {0.590450236263895811283930470381, 0.807074047715517606249591153755}, + {-0.807074047715517606249591153755, 0.590450236263895811283930470381}, + {0.854358755003227443580726685468, 0.519683670851158518999568514118}, + {-0.519683670851158518999568514118, 0.854358755003227443580726685468}, + {0.236651021498106378659898041406, 0.97159471695965016202478636842}, + {-0.97159471695965016202478636842, 0.236651021498106378659898041406}, + {0.939327745783671397283853821136, 0.343020970205855535439809500531}, + {-0.343020970205855535439809500531, 0.939327745783671397283853821136}, + {0.421652564678558328115087761034, 0.906757472922056551034586391324}, + {-0.906757472922056551034586391324, 0.421652564678558328115087761034}, + {0.736557236397919146142498902918, 0.676375219467611699108999800956}, + {-0.676375219467611699108999800956, 0.736557236397919146142498902918}, + {0.0425551122769040196525125452354, 0.999094120901079074670292357041}, + {-0.999094120901079074670292357041, 0.0425551122769040196525125452354}, + {0.999517580362016988537732231634, 0.0310581156424347033795374528609}, + {-0.0310581156424347033795374528609, 0.999517580362016988537732231634}, + {0.684804254807510615066235004633, 0.728727063170793831758942360466}, + {-0.728727063170793831758942360466, 0.684804254807510615066235004633}, + {0.911548408584833991241680450912, 0.411192775722600156740327292937}, + {-0.411192775722600156740327292937, 0.911548408584833991241680450912}, + {0.353804861001772052997438322564, 0.935319261178511607290886331612}, + {-0.935319261178511607290886331612, 0.353804861001772052997438322564}, + {0.974252992541422502270620498166, 0.225457549272768537074185246638}, + {-0.225457549272768537074185246638, 0.974252992541422502270620498166}, + {0.529478335656851983870296862733, 0.848323459577801641806615862151}, + {-0.848323459577801641806615862151, 0.529478335656851983870296862733}, + {0.813813530488567193899029916793, 0.581126094400977621923232163681}, + {-0.581126094400977621923232163681, 0.813813530488567193899029916793}, + {0.164534863954445970124496056997, 0.986371268105216025823267500527}, + {-0.986371268105216025823267500527, 0.164534863954445970124496056997}, + {0.991660402337333213296233225265, 0.128878417262776545637592562343}, + {-0.128878417262776545637592562343, 0.991660402337333213296233225265}, + {0.61007899233180962195177698959, 0.792340597922007061626459289982}, + {-0.792340597922007061626459289982, 0.61007899233180962195177698959}, + {0.866855113845470426348072123801, 0.498560138398525143355044519922}, + {-0.498560138398525143355044519922, 0.866855113845470426348072123801}, + {0.260423874615468009530161452858, 0.965494383997269500774507378082}, + {-0.965494383997269500774507378082, 0.260423874615468009530161452858}, + {0.947462993846477696813224156358, 0.319865401835630502880292169721}, + {-0.319865401835630502880292169721, 0.947462993846477696813224156358}, + {0.443778513167218224833021622544, 0.896136502577086768717151699093}, + {-0.896136502577086768717151699093, 0.443778513167218224833021622544}, + {0.752934477957330150488246545137, 0.658095488438511178053147432365}, + {-0.658095488438511178053147432365, 0.752934477957330150488246545137}, + {0.0670612926096368217043774961894, 0.99774885769592569495500811172}, + {-0.99774885769592569495500811172, 0.0670612926096368217043774961894}, + {0.996789668159204556019403753453, 0.080064707899690876202747347179}, + {-0.080064707899690876202747347179, 0.996789668159204556019403753453}, + {0.648222435882470415791090090352, 0.761451031661653510163034752622}, + {-0.761451031661653510163034752622, 0.648222435882470415791090090352}, + {0.890274135400644595073060827417, 0.45542503646224236080186642539}, + {-0.45542503646224236080186642539, 0.890274135400644595073060827417}, + {0.307484746652204099515159896328, 0.951553009861368592758879003668}, + {-0.951553009861368592758879003668, 0.307484746652204099515159896328}, + {0.962016784542290559478772138391, 0.272990304331329924902860284419}, + {-0.272990304331329924902860284419, 0.962016784542290559478772138391}, + {0.487215296574268763585280339612, 0.873281887355994212995824454993}, + {-0.873281887355994212995824454993, 0.487215296574268763585280339612}, + {0.784318750507038919828062262241, 0.620358039847213826867289299116}, + {-0.620358039847213826867289299116, 0.784318750507038919828062262241}, + {0.11593773035572778329349574733, 0.993256483834846437552812403737}, + {-0.993256483834846437552812403737, 0.11593773035572778329349574733}, + {0.984142139747038569019821352413, 0.177381647230260064418416732224}, + {-0.177381647230260064418416732224, 0.984142139747038569019821352413}, + {0.570465815052012992225627385778, 0.821321346281126740684896958555}, + {-0.821321346281126740684896958555, 0.570465815052012992225627385778}, + {0.841347762393501952260521647986, 0.540494165292695227797992174601}, + {-0.540494165292695227797992174601, 0.841347762393501952260521647986}, + {0.212735618654345898237423284627, 0.97710979759480087736278619559}, + {-0.97710979759480087736278619559, 0.212735618654345898237423284627}, + {0.930626681810531763971994223539, 0.365969915570008741401153429251}, + {-0.365969915570008741401153429251, 0.930626681810531763971994223539}, + {0.39927262845154098958033728195, 0.916832246471183887059908101946}, + {-0.916832246471183887059908101946, 0.39927262845154098958033728195}, + {0.719736320300951026851521419303, 0.694247527355803306647885619896}, + {-0.694247527355803306647885619896, 0.719736320300951026851521419303}, + {0.0180232983357737422402955473899, 0.99983756716633709338282187673}, + {-0.99983756716633709338282187673, 0.0180232983357737422402955473899}, + {0.999823449381661566448542544094, 0.0187901587687845580965095848569}, + {-0.0187901587687845580965095848569, 0.999823449381661566448542544094}, + {0.693695292362118243190138855425, 0.720268589732077080256544832082}, + {-0.720268589732077080256544832082, 0.693695292362118243190138855425}, + {0.916525738556228208864240514231, 0.399975712467595334587144861871}, + {-0.399975712467595334587144861871, 0.916525738556228208864240514231}, + {0.365256026269360267733077307639, 0.930907103460875129385954096506}, + {-0.930907103460875129385954096506, 0.365256026269360267733077307639}, + {0.976946344030581670381252479274, 0.213484989836008054453486693092}, + {-0.213484989836008054453486693092, 0.976946344030581670381252479274}, + {0.539848700724847585519228232442, 0.841762068714012490211473505042}, + {-0.841762068714012490211473505042, 0.539848700724847585519228232442}, + {0.820883562942714584131920219079, 0.571095592778016691859477305115}, + {-0.571095592778016691859477305115, 0.820883562942714584131920219079}, + {0.176626767562280878598457434236, 0.984277900280454365322668763838}, + {-0.984277900280454365322668763838, 0.176626767562280878598457434236}, + {0.993167268564487226711889888975, 0.116699514361267686624046291399}, + {-0.116699514361267686624046291399, 0.993167268564487226711889888975}, + {0.619756292488440663213111747609, 0.784794328420499232024099001137}, + {-0.784794328420499232024099001137, 0.619756292488440663213111747609}, + {0.872907941075761084626094543637, 0.487884952019301099124248821681}, + {-0.487884952019301099124248821681, 0.872907941075761084626094543637}, + {0.27225236647453671112728557091, 0.962225882497979023710854562523}, + {-0.962225882497979023710854562523, 0.27225236647453671112728557091}, + {0.951316892150465553967819687387, 0.308214488155861110474376118873}, + {-0.308214488155861110474376118873, 0.951316892150465553967819687387}, + {0.454742070861955449689872921226, 0.890623180131855929353434930817}, + {-0.890623180131855929353434930817, 0.454742070861955449689872921226}, + {0.760953627357928152896704432351, 0.648806270785672545287070533959}, + {-0.648806270785672545287070533959, 0.760953627357928152896704432351}, + {0.0793001563243875967623708334031, 0.996850783822196606642762617412}, + {-0.996850783822196606642762617412, 0.0793001563243875967623708334031}, + {0.99769712885875849739392151605, 0.0678265365988108687167468247026}, + {-0.0678265365988108687167468247026, 0.99769712885875849739392151605}, + {0.657517801412960123386142186064, 0.753439009359793576692254646332}, + {-0.753439009359793576692254646332, 0.657517801412960123386142186064}, + {0.89579586516681353192126380236, 0.444465710657234003289062229669}, + {-0.444465710657234003289062229669, 0.89579586516681353192126380236}, + {0.31913861280769589834349631019, 0.947708048828952209774456605373}, + {-0.947708048828952209774456605373, 0.31913861280769589834349631019}, + {0.965294357418934656500653090916, 0.261164322860466480147323409255}, + {-0.261164322860466480147323409255, 0.965294357418934656500653090916}, + {0.497895122273410872804078053377, 0.867237249670668397527606430231}, + {-0.867237249670668397527606430231, 0.497895122273410872804078053377}, + {0.791872440184440473665006265946, 0.610686530452686282544050300203}, + {-0.610686530452686282544050300203, 0.791872440184440473665006265946}, + {0.128117785426777125445951810434, 0.99175895915153611248626930319}, + {-0.99175895915153611248626930319, 0.128117785426777125445951810434}, + {0.986244781329065456354499019653, 0.165291352771958000023033719117}, + {-0.165291352771958000023033719117, 0.986244781329065456354499019653}, + {0.580501736371076604292795764195, 0.814259009204175265850267351198}, + {-0.814259009204175265850267351198, 0.580501736371076604292795764195}, + {0.847917105296951412185535446042, 0.530128835798278852386999915325}, + {-0.530128835798278852386999915325, 0.847917105296951412185535446042}, + {0.22471024034404943336973303758, 0.974425629735034992684461485624}, + {-0.974425629735034992684461485624, 0.22471024034404943336973303758}, + {0.935047621163287434598032632493, 0.354522137752887489536846032934}, + {-0.354522137752887489536846032934, 0.935047621163287434598032632493}, + {0.410493505971092353945550712524, 0.911863521342728522434128990426}, + {-0.911863521342728522434128990426, 0.410493505971092353945550712524}, + {0.728201610591444614684064617904, 0.685362979983618725299265861395}, + {-0.685362979983618725299265861395, 0.728201610591444614684064617904}, + {0.0302914861995392838134311119802, 0.999541107640812942491947978851}, + {-0.999541107640812942491947978851, 0.0302914861995392838134311119802}, + {0.999061187671284600675392084668, 0.0433213952781098254884994958047}, + {-0.0433213952781098254884994958047, 0.999061187671284600675392084668}, + {0.675810088251037055506742490252, 0.737075792994265621693728007813}, + {-0.737075792994265621693728007813, 0.675810088251037055506742490252}, + {0.906433802776045460802833986236, 0.422347914858067052801260388151}, + {-0.422347914858067052801260388151, 0.906433802776045460802833986236}, + {0.34230041402351352175514875853, 0.939590563255789157359743057896}, + {-0.939590563255789157359743057896, 0.34230041402351352175514875853}, + {0.971412922135170942006254790613, 0.237396155631906580207868273646}, + {-0.237396155631906580207868273646, 0.971412922135170942006254790613}, + {0.51902823309908086013564343375, 0.854757096048957221157138519629}, + {-0.854757096048957221157138519629, 0.51902823309908086013564343375}, + {0.806620940710169653797834143916, 0.591069080571671401358457842434}, + {-0.591069080571671401358457842434, 0.806620940710169653797834143916}, + {0.152418182001306329320655663651, 0.988316092045159688694866417791}, + {-0.988316092045159688694866417791, 0.152418182001306329320655663651}, + {0.990004195701200906398753431858, 0.141037911548697714181344053941}, + {-0.141037911548697714181344053941, 0.990004195701200906398753431858}, + {0.600309816522980432829115216009, 0.799767543843925676760875376203}, + {-0.799767543843925676760875376203, 0.600309816522980432829115216009}, + {0.860671741423578384733161783515, 0.509160243454754635195058654062}, + {-0.509160243454754635195058654062, 0.860671741423578384733161783515}, + {0.248556163878796559929540421763, 0.968617485593697535861679170921}, + {-0.968617485593697535861679170921, 0.248556163878796559929540421763}, + {0.943466411100659319011185743875, 0.331468144962440869338848870029}, + {-0.331468144962440869338848870029, 0.943466411100659319011185743875}, + {0.432748124060743699637043846451, 0.901514870161278736304666381329}, + {-0.901514870161278736304666381329, 0.432748124060743699637043846451}, + {0.744801939393862633131959682942, 0.667285599331456480420854404656}, + {-0.667285599331456480420854404656, 0.744801939393862633131959682942}, + {0.054812329710889853839894669818, 0.998496674261694638907727039623}, + {-0.998496674261694638907727039623, 0.054812329710889853839894669818}, + {0.995732094602106432290611337521, 0.0922908217500623406781556923306}, + {-0.0922908217500623406781556923306, 0.995732094602106432290611337521}, + {0.638829450437486290326205562451, 0.769348382238982275715954983752}, + {-0.769348382238982275715954983752, 0.638829450437486290326205562451}, + {0.884618333624369923562369422143, 0.466315776931944481198399898858}, + {-0.466315776931944481198399898858, 0.884618333624369923562369422143}, + {0.295784574424884261212298497412, 0.955254670510586989529144830158}, + {-0.955254670510586989529144830158, 0.295784574424884261212298497412}, + {0.958594335476470216228506160405, 0.284775174466498304237660477156}, + {-0.284775174466498304237660477156, 0.958594335476470216228506160405}, + {0.476462098043581194772855269548, 0.879195012001267484080813119363}, + {-0.879195012001267484080813119363, 0.476462098043581194772855269548}, + {0.776646945310762060188380928594, 0.629936125602796548328399239836}, + {-0.629936125602796548328399239836, 0.776646945310762060188380928594}, + {0.103740215488939371835108715914, 0.994604427745175656561116284138}, + {-0.994604427745175656561116284138, 0.103740215488939371835108715914}, + {0.981891289978725101406098474399, 0.189445228664950227059904364069}, + {-0.189445228664950227059904364069, 0.981891289978725101406098474399}, + {0.560343983679540746933867012558, 0.828259995384385661054693628103}, + {-0.828259995384385661054693628103, 0.560343983679540746933867012558}, + {0.834651715611756328527803816542, 0.550778098353912226592399292713}, + {-0.550778098353912226592399292713, 0.834651715611756328527803816542}, + {0.200728959762976139069579062379, 0.979646816313141211018944431999}, + {-0.979646816313141211018944431999, 0.200728959762976139069579062379}, + {0.926065593502609307741124666791, 0.377362579663988340072933169722}, + {-0.377362579663988340072933169722, 0.926065593502609307741124666791}, + {0.387991621942784914445212507417, 0.921662900035694732103763726627}, + {-0.921662900035694732103763726627, 0.387991621942784914445212507417}, + {0.711162640368018350578438457887, 0.703027523604011328473006869899}, + {-0.703027523604011328473006869899, 0.711162640368018350578438457887}, + {0.00575239622957373665512736948813, 0.999983454831937734752500546165}, + {-0.999983454831937734752500546165, 0.00575239622957373665512736948813}, + {0.99999404372898581527806527447, 0.00345144992013599397062684204229}, + {-0.00345144992013599397062684204229, 0.99999404372898581527806527447}, + {0.704662025823468929353055045794, 0.709543113110376766350384514226}, + {-0.709543113110376766350384514226, 0.704662025823468929353055045794}, + {0.922553216932332831312635335053, 0.385869876937555311702254812189}, + {-0.385869876937555311702254812189, 0.922553216932332831312635335053}, + {0.379492429060152569597619276465, 0.925194842336480527400510709413}, + {-0.925194842336480527400510709413, 0.379492429060152569597619276465}, + {0.980106094103951774876293256966, 0.198474291283016385234461154141}, + {-0.198474291283016385234461154141, 0.980106094103951774876293256966}, + {0.552697148165749774229027480033, 0.83338218268057973059370624469}, + {-0.83338218268057973059370624469, 0.552697148165749774229027480033}, + {0.82954713700780891016961504647, 0.558436699619704102204309492663}, + {-0.558436699619704102204309492663, 0.82954713700780891016961504647}, + {0.191704028727579800506219953604, 0.981452783056635524872035603039}, + {-0.981452783056635524872035603039, 0.191704028727579800506219953604}, + {0.994840497831093184544215546339, 0.101451386758302078416882352485}, + {-0.101451386758302078416882352485, 0.994840497831093184544215546339}, + {0.631721498677792370202155325387, 0.775195425752941313923827237886}, + {-0.775195425752941313923827237886, 0.631721498677792370202155325387}, + {0.880289009156620894103184582491, 0.474437836136679280674144365548}, + {-0.474437836136679280674144365548, 0.880289009156620894103184582491}, + {0.286980116594915513061891942925, 0.957936538962351424864039017848}, + {-0.957936538962351424864039017848, 0.286980116594915513061891942925}, + {0.955932732910098170719948029728, 0.293585779885591202642558528169}, + {-0.293585779885591202642558528169, 0.955932732910098170719948029728}, + {0.468350021981876529775234985209, 0.883543013615961880802274208691}, + {-0.883543013615961880802274208691, 0.468350021981876529775234985209}, + {0.77081627245301853612602371868, 0.637057512412838589099806085869}, + {-0.637057512412838589099806085869, 0.77081627245301853612602371868}, + {0.0945817262675154452056247578184, 0.99551710033341811456608638764}, + {-0.99551710033341811456608638764, 0.0945817262675154452056247578184}, + {0.998620152488108869803795641928, 0.0525146745646032156451532557639}, + {-0.0525146745646032156451532557639, 0.998620152488108869803795641928}, + {0.668997599157450273388292316668, 0.743264564150321604962812216399}, + {-0.743264564150321604962812216399, 0.668997599157450273388292316668}, + {0.902508223725145941607195254619, 0.430672620569826802849888736091}, + {-0.430672620569826802849888736091, 0.902508223725145941607195254619}, + {0.333638154596370861693088727407, 0.942701215548982007774725389027}, + {-0.942701215548982007774725389027, 0.333638154596370861693088727407}, + {0.969186841502985951812831899588, 0.246326746938829027611106425866}, + {-0.246326746938829027611106425866, 0.969186841502985951812831899588}, + {0.511139274715464386744656621886, 0.859497901011601728171740433027}, + {-0.859497901011601728171740433027, 0.511139274715464386744656621886}, + {0.801146721041991360934275689942, 0.598467986916314309553399652941}, + {-0.598467986916314309553399652941, 0.801146721041991360934275689942}, + {0.143315507302571504277821645701, 0.989677051045747213642300721403}, + {-0.989677051045747213642300721403, 0.143315507302571504277821645701}, + {0.988664185277066231982701083325, 0.150143693675208189652892087906}, + {-0.150143693675208189652892087906, 0.988664185277066231982701083325}, + {0.592923525775551296668197664985, 0.805258773675822214777042518108}, + {-0.805258773675822214777042518108, 0.592923525775551296668197664985}, + {0.855949101260826905601675207436, 0.517060089400432021378151148383}, + {-0.517060089400432021378151148383, 0.855949101260826905601675207436}, + {0.239630718356093563858877359962, 0.970864109348029469259699908434}, + {-0.970864109348029469259699908434, 0.239630718356093563858877359962}, + {0.940375698633811540894100744481, 0.340137538973531772246161608564}, + {-0.340137538973531772246161608564, 0.940375698633811540894100744481}, + {0.424432473022717415833682252924, 0.905459593711293253548433312972}, + {-0.905459593711293253548433312972, 0.424432473022717415833682252924}, + {0.73862885994817484291985465461, 0.67411231056231235569953241793}, + {-0.67411231056231235569953241793, 0.73862885994817484291985465461}, + {0.0456200895695001440444116269646, 0.99895886172938608282834138663}, + {-0.99895886172938608282834138663, 0.0456200895695001440444116269646}, + {0.999608161397882111209867161961, 0.0279914927566532467650972648698}, + {-0.0279914927566532467650972648698, 0.999608161397882111209867161961}, + {0.687036735110095664325058351096, 0.726622683797622959112061380438}, + {-0.726622683797622959112061380438, 0.687036735110095664325058351096}, + {0.912805640321603495301872044365, 0.408394249466208003607192722484}, + {-0.408394249466208003607192722484, 0.912805640321603495301872044365}, + {0.356672714981588256932809599675, 0.934229401371880818771842314163}, + {-0.934229401371880818771842314163, 0.356672714981588256932809599675}, + {0.974940101534371827973757262953, 0.222467522169301878953717732657}, + {-0.222467522169301878953717732657, 0.974940101534371827973757262953}, + {0.532078463525973544001601567288, 0.846695050565337448134073383699}, + {-0.846695050565337448134073383699, 0.532078463525973544001601567288}, + {0.815592570258576676778261571599, 0.57862661478626142841363844127}, + {-0.57862661478626142841363844127, 0.815592570258576676778261571599}, + {0.167560234024823562215544825449, 0.985861840205586981156216097588}, + {-0.985861840205586981156216097588, 0.167560234024823562215544825449}, + {0.992051128806485715827534477285, 0.125835439498486995058001980397}, + {-0.125835439498486995058001980397, 0.992051128806485715827534477285}, + {0.612506987879865461010808758147, 0.79046517304580488083587397341}, + {-0.79046517304580488083587397341, 0.612506987879865461010808758147}, + {0.868380595208579797450454407226, 0.495898318070542243329867915236}, + {-0.495898318070542243329867915236, 0.868380595208579797450454407226}, + {0.263384744036113283005562379913, 0.964690871009481032416488233139}, + {-0.964690871009481032416488233139, 0.263384744036113283005562379913}, + {0.948439868128009622161300740117, 0.316957120988508145309481278673}, + {-0.316957120988508145309481278673, 0.948439868128009622161300740117}, + {0.446525732704651345805757500784, 0.894770791897329553776785360242}, + {-0.894770791897329553776785360242, 0.446525732704651345805757500784}, + {0.754949943008732637927948871948, 0.655782420892106143739397339232}, + {-0.655782420892106143739397339232, 0.754949943008732637927948871948}, + {0.0701220273621335210556537731463, 0.997538420953611337793631719251}, + {-0.997538420953611337793631719251, 0.0701220273621335210556537731463}, + {0.997030612139289451612000902969, 0.077006223496245640447455116373}, + {-0.077006223496245640447455116373, 0.997030612139289451612000902969}, + {0.650555484066503986184670793591, 0.759458729722028214048634708888}, + {-0.759458729722028214048634708888, 0.650555484066503986184670793591}, + {0.891667169921672275734181312146, 0.452691570590700920195814660474}, + {-0.452691570590700920195814660474, 0.891667169921672275734181312146}, + {0.310402623062358717920261597101, 0.950605181763705342490311522852}, + {-0.950605181763705342490311522852, 0.310402623062358717920261597101}, + {0.962849779558509033527968767885, 0.270037593686750565513676747287}, + {-0.270037593686750565513676747287, 0.962849779558509033527968767885}, + {0.489892194718595186397891438901, 0.871783022060993117996474666143}, + {-0.871783022060993117996474666143, 0.489892194718595186397891438901}, + {0.786218290997455659940840178024, 0.617948864309208256706540396408}, + {-0.617948864309208256706540396408, 0.786218290997455659940840178024}, + {0.118984452677632357442405464099, 0.992896117436765979213930677361}, + {-0.992896117436765979213930677361, 0.118984452677632357442405464099}, + {0.984681707410970941118932842073, 0.174361506905093749386637114185}, + {-0.174361506905093749386637114185, 0.984681707410970941118932842073}, + {0.57298290871014856406873150263, 0.819567316531142231461615210719}, + {-0.819567316531142231461615210719, 0.57298290871014856406873150263}, + {0.843002015580472829903158071829, 0.537910403066588882481369182642}, + {-0.537910403066588882481369182642, 0.843002015580472829903158071829}, + {0.215732348091705883330604365256, 0.976452535450054059928959304671}, + {-0.976452535450054059928959304671, 0.215732348091705883330604365256}, + {0.931745081981668721304856717325, 0.36311307082363947218439648168}, + {-0.36311307082363947218439648168, 0.931745081981668721304856717325}, + {0.402083551089586987981050469898, 0.915602980523320231220907317038}, + {-0.915602980523320231220907317038, 0.402083551089586987981050469898}, + {0.721862854481496341030322128063, 0.692036140183318826402114609664}, + {-0.692036140183318826402114609664, 0.721862854481496341030322128063}, + {0.0210906719407551214440221798441, 0.999777567040332937331470475328}, + {-0.999777567040332937331470475328, 0.0210906719407551214440221798441}, + {0.999876391416790410993087334646, 0.015722655225416857366349532299}, + {-0.015722655225416857366349532299, 0.999876391416790410993087334646}, + {0.695901780590996832387418180588, 0.718136972847297494482177171449}, + {-0.718136972847297494482177171449, 0.695901780590996832387418180588}, + {0.917748533403661248541993700201, 0.397161968767691608839243144757}, + {-0.397161968767691608839243144757, 0.917748533403661248541993700201}, + {0.368110290048703048260136938552, 0.929782132738772193469856119918}, + {-0.929782132738772193469856119918, 0.368110290048703048260136938552}, + {0.977596709053411894174701046722, 0.210486755991769747264896750494}, + {-0.210486755991769747264896750494, 0.977596709053411894174701046722}, + {0.542428649725581357721182484966, 0.840101874749058397107148721261}, + {-0.840101874749058397107148721261, 0.542428649725581357721182484966}, + {0.822631796294514994194457813137, 0.568574469814869143391433681245}, + {-0.568574469814869143391433681245, 0.822631796294514994194457813137}, + {0.179645658363882160246660646408, 0.983731384795162089318409925909}, + {-0.983731384795162089318409925909, 0.179645658363882160246660646408}, + {0.993520623594518093035787842382, 0.113651970912781868916496819111}, + {-0.113651970912781868916496819111, 0.993520623594518093035787842382}, + {0.622161090864726817883934018028, 0.782889249520015484407053918403}, + {-0.782889249520015484407053918403, 0.622161090864726817883934018028}, + {0.874400642942864791962165327277, 0.485204612118541878107436104983}, + {-0.485204612118541878107436104983, 0.874400642942864791962165327277}, + {0.275203152606767309507063146157, 0.961386095590786249331927137973}, + {-0.961386095590786249331927137973, 0.275203152606767309507063146157}, + {0.952258003795399599056281658704, 0.305294438546791668809277098262}, + {-0.305294438546791668809277098262, 0.952258003795399599056281658704}, + {0.457472324167916055692728605209, 0.889223859677868211370821427408}, + {-0.889223859677868211370821427408, 0.457472324167916055692728605209}, + {0.762940555751565718800577542424, 0.646468644552457893937003063911}, + {-0.646468644552457893937003063911, 0.762940555751565718800577542424}, + {0.0823580782266465361018958901695, 0.996602803001684134365234513098}, + {-0.996602803001684134365234513098, 0.0823580782266465361018958901695}, + {0.997900522387751620634332994086, 0.0647653257403398852076747971296}, + {-0.0647653257403398852076747971296, 0.997900522387751620634332994086}, + {0.659826225313227432422991114436, 0.751418227346727363169520685915}, + {-0.751418227346727363169520685915, 0.659826225313227432422991114436}, + {0.89715525096380854819244632381, 0.44171535593418731480142014334}, + {-0.44171535593418731480142014334, 0.89715525096380854819244632381}, + {0.322044638198334509660014646215, 0.946724485268921167602229616023}, + {-0.946724485268921167602229616023, 0.322044638198334509660014646215}, + {0.96609105541043882592333602588, 0.258201612419334869397147258496}, + {-0.258201612419334869397147258496, 0.96609105541043882592333602588}, + {0.500553425469377533119086365332, 0.865705647579402381985858028202}, + {-0.865705647579402381985858028202, 0.500553425469377533119086365332}, + {0.793742273353100213917343808134, 0.60825422603731438275787013481}, + {-0.60825422603731438275787013481, 0.793742273353100213917343808134}, + {0.131159856086043274947527947916, 0.991361231918763463610844155482}, + {-0.991361231918763463610844155482, 0.131159856086043274947527947916}, + {0.986747246596916482985761831515, 0.162264818532558030561574469175}, + {-0.162264818532558030561574469175, 0.986747246596916482985761831515}, + {0.58299711585345770359367634228, 0.812474222918210475796740865917}, + {-0.812474222918210475796740865917, 0.58299711585345770359367634228}, + {0.849539527184620890665200931835, 0.527524967893398200047272439406}, + {-0.527524967893398200047272439406, 0.849539527184620890665200931835}, + {0.227698678515621172335769983874, 0.973731642600896396544385424932}, + {-0.973731642600896396544385424932, 0.227698678515621172335769983874}, + {0.936130879241267033208373504749, 0.351651783631154624121251117685}, + {-0.351651783631154624121251117685, 0.936130879241267033208373504749}, + {0.413289131967690903657342005317, 0.910599853611558929245006765996}, + {-0.910599853611558929245006765996, 0.413289131967690903657342005317}, + {0.730300847525525487213826636435, 0.683125663478908684567159070866}, + {-0.683125663478908684567159070866, 0.730300847525525487213826636435}, + {0.0333578925430861455980746654859, 0.999443470640077769040487964958}, + {-0.999443470640077769040487964958, 0.0333578925430861455980746654859}, + {0.999189394066714919873106737214, 0.0402561148720412820267711140332}, + {-0.0402561148720412820267711140332, 0.999189394066714919873106737214}, + {0.678068224424006604778014661861, 0.73499896804449671439130042927}, + {-0.73499896804449671439130042927, 0.678068224424006604778014661861}, + {0.907725282067676442210313325631, 0.41956502749294688481285220405}, + {-0.41956502749294688481285220405, 0.907725282067676442210313325631}, + {0.345181426315542605465225278749, 0.93853597849350856030525847018}, + {-0.93853597849350856030525847018, 0.345181426315542605465225278749}, + {0.972136671622152226390767282282, 0.234414785556295163226891986596}, + {-0.234414785556295163226891986596, 0.972136671622152226390767282282}, + {0.521648148266897093705551924359, 0.85316071722139041888510746503}, + {-0.85316071722139041888510746503, 0.521648148266897093705551924359}, + {0.808430518981542722833921743586, 0.58859162071782289427090972822}, + {-0.58859162071782289427090972822, 0.808430518981542722833921743586}, + {0.155449575730855826805054675788, 0.987843828449161742710771250131}, + {-0.987843828449161742710771250131, 0.155449575730855826805054675788}, + {0.990432234767505970118861569063, 0.137999957729862787747521224446}, + {-0.137999957729862787747521224446, 0.990432234767505970118861569063}, + {0.602760643595607215061704664549, 0.79792205542409300189632403999}, + {-0.79792205542409300189632403999, 0.602760643595607215061704664549}, + {0.862229772550811235376500007987, 0.506517343559898525207074726495}, + {-0.506517343559898525207074726495, 0.862229772550811235376500007987}, + {0.251526670691812614943927428612, 0.967850367531413624533342954237}, + {-0.967850367531413624533342954237, 0.251526670691812614943927428612}, + {0.944478900905115548169987960136, 0.328572070853663689149470883422}, + {-0.328572070853663689149470883422, 0.944478900905115548169987960136}, + {0.435511896108492002621659366923, 0.900182974926756807043659591727}, + {-0.900182974926756807043659591727, 0.435511896108492002621659366923}, + {0.746845637581406540661532744707, 0.664997438811325336516233619477}, + {-0.664997438811325336516233619477, 0.746845637581406540661532744707}, + {0.057875416378228863867327902426, 0.998323813288577555091762860684}, + {-0.998323813288577555091762860684, 0.057875416378228863867327902426}, + {0.996010552748005872913950042857, 0.0892355243981440143796746156113}, + {-0.0892355243981440143796746156113, 0.996010552748005872913950042857}, + {0.641186771556811252459340266796, 0.767384860406141733335516619263}, + {-0.767384860406141733335516619263, 0.641186771556811252459340266796}, + {0.886044807083555596705082280096, 0.46359961156181400676601356281}, + {-0.46359961156181400676601356281, 0.886044807083555596705082280096}, + {0.298713862433100385551654198935, 0.954342720614716477633976410289}, + {-0.954342720614716477633976410289, 0.298713862433100385551654198935}, + {0.959463502071417506655848228547, 0.281832908285833350081617254546}, + {-0.281832908285833350081617254546, 0.959463502071417506655848228547}, + {0.479157188005253309448505660839, 0.87772910922613156525784461337}, + {-0.87772910922613156525784461337, 0.479157188005253309448505660839}, + {0.778575907059125049691772346705, 0.62755044175513152726608723242}, + {-0.62755044175513152726608723242, 0.778575907059125049691772346705}, + {0.106791130648307391881601802197, 0.994281476451641554881177853531}, + {-0.994281476451641554881177853531, 0.106791130648307391881601802197}, + {0.982467878781833170442894243024, 0.186431937076041609469001514299}, + {-0.186431937076041609469001514299, 0.982467878781833170442894243024}, + {0.562882412448384439329629458371, 0.826536986320809963224576222274}, + {-0.826536986320809963224576222274, 0.562882412448384439329629458371}, + {0.836337550973583532254451711196, 0.548214830911667783119867181085}, + {-0.548214830911667783119867181085, 0.836337550973583532254451711196}, + {0.203733529169693894367298980796, 0.979026378139047581683485077519}, + {-0.979026378139047581683485077519, 0.203733529169693894367298980796}, + {0.927218967339951793960040049569, 0.374519674523293211176877548496}, + {-0.374519674523293211176877548496, 0.927218967339951793960040049569}, + {0.390817417907668551713129545533, 0.920468220994067110041214618832}, + {-0.920468220994067110041214618832, 0.390817417907668551713129545533}, + {0.713316151546802612593012327125, 0.700842398790526233121056520758}, + {-0.700842398790526233121056520758, 0.713316151546802612593012327125}, + {0.00882027516080741147419530534535, 0.999961100616462816859097983979}, + {-0.999961100616462816859097983979, 0.00882027516080741147419530534535}, + {0.999954041425129780407132784603, 0.00958723304972922477085184311818}, + {-0.00958723304972922477085184311818, 0.999954041425129780407132784603}, + {0.700295086064323779595497398986, 0.713853481068882467219793852564}, + {-0.713853481068882467219793852564, 0.700295086064323779595497398986}, + {0.9201681970742663363438396118, 0.391523293167972408213017843082}, + {-0.391523293167972408213017843082, 0.9201681970742663363438396118}, + {0.373808396391851205375900235595, 0.927505947574975175839995245042}, + {-0.927505947574975175839995245042, 0.373808396391851205375900235595}, + {0.978869828526574115024061484291, 0.20448437299792723842450925531}, + {-0.20448437299792723842450925531, 0.978869828526574115024061484291}, + {0.547573206856539762554803019157, 0.836757780443567189543330187007}, + {-0.836757780443567189543330187007, 0.547573206856539762554803019157}, + {0.826105017844664613058114355226, 0.563516192750364797170448127872}, + {-0.563516192750364797170448127872, 0.826105017844664613058114355226}, + {0.185678338887987626204534308272, 0.982610581292404750008984137821}, + {-0.982610581292404750008984137821, 0.185678338887987626204534308272}, + {0.994199276233218909304412136407, 0.107553703503615635805878980591}, + {-0.107553703503615635805878980591, 0.994199276233218909304412136407}, + {0.626953096986132663026580758014, 0.779057003164400629913188822684}, + {-0.779057003164400629913188822684, 0.626953096986132663026580758014}, + {0.877361342129065135964083310682, 0.479830256796594190049631833972}, + {-0.479830256796594190049631833972, 0.877361342129065135964083310682}, + {0.281096926171038263841950310962, 0.959679382969746752607420603454}, + {-0.959679382969746752607420603454, 0.281096926171038263841950310962}, + {0.954113329266538801043395778834, 0.299445746197739892657807558862}, + {-0.299445746197739892657807558862, 0.954113329266538801043395778834}, + {0.462919887410955133155709972925, 0.886400122878730600817220874887}, + {-0.886400122878730600817220874887, 0.462919887410955133155709972925}, + {0.766892850643480672445662094106, 0.641775159718663501529078985186}, + {-0.641775159718663501529078985186, 0.766892850643480672445662094106}, + {0.0884715676993407668105007246595, 0.996078702567633977871253136982}, + {-0.996078702567633977871253136982, 0.0884715676993407668105007246595}, + {0.998279129760433203699676596443, 0.0586411040546833337017140763692}, + {-0.0586411040546833337017140763692, 0.998279129760433203699676596443}, + {0.66442441983727518195479433416, 0.747355464503940303266915634595}, + {-0.747355464503940303266915634595, 0.66442441983727518195479433416}, + {0.899848676741518582744561172149, 0.436202199635143950118276734429}, + {-0.436202199635143950118276734429, 0.899848676741518582744561172149}, + {0.327847568035170844336079198911, 0.944730634696167803632249615475}, + {-0.944730634696167803632249615475, 0.327847568035170844336079198911}, + {0.967657164329369878785769287788, 0.252268928570370809527645405979}, + {-0.252268928570370809527645405979, 0.967657164329369878785769287788}, + {0.505855872686268859261815578066, 0.862618012835816738714811435784}, + {-0.862618012835816738714811435784, 0.505855872686268859261815578066}, + {0.797459509147442457965837547818, 0.60337246479295036927226192347}, + {-0.60337246479295036927226192347, 0.797459509147442457965837547818}, + {0.137240265203515593439576036872, 0.990537788076188752128814485332}, + {-0.990537788076188752128814485332, 0.137240265203515593439576036872}, + {0.987724309567986957780760803871, 0.156207196660215902328516790476}, + {-0.156207196660215902328516790476, 0.987724309567986957780760803871}, + {0.587971389209745010084873229061, 0.808881725266903606197388398868}, + {-0.808881725266903606197388398868, 0.587971389209745010084873229061}, + {0.852760367195645296867212437064, 0.522302360841254698087254837446}, + {-0.522302360841254698087254837446, 0.852760367195645296867212437064}, + {0.233669097190576824374375064508, 0.972316179551765302768728815863}, + {-0.972316179551765302768728815863, 0.233669097190576824374375064508}, + {0.938270951623047189116277877474, 0.34590117279416898732335994282}, + {-0.34590117279416898732335994282, 0.938270951623047189116277877474}, + {0.418868687579875109694427237628, 0.908046817386148341633145264495}, + {-0.908046817386148341633145264495, 0.418868687579875109694427237628}, + {0.73447868009043837389526743209, 0.678631762071749466969095010427}, + {-0.678631762071749466969095010427, 0.73447868009043837389526743209}, + {0.0394897344393841248644250185862, 0.999219976218403527212785775191}, + {-0.999219976218403527212785775191, 0.0394897344393841248644250185862}, + {0.999417591486021716917775847833, 0.0341244461974033255757809968145}, + {-0.0341244461974033255757809968145, 0.999417591486021716917775847833}, + {0.682565328866473253199842474714, 0.730824583487312051666151546669}, + {-0.730824583487312051666151546669, 0.682565328866473253199842474714}, + {0.91028259700728175740636061164, 0.413987431675985451118293667605}, + {-0.413987431675985451118293667605, 0.91028259700728175740636061164}, + {0.350933676875858358013005044995, 0.936400317404042059621360749588}, + {-0.936400317404042059621360749588, 0.350933676875858358013005044995}, + {0.973556713508265558765231162397, 0.228445454283916465909598514372}, + {-0.228445454283916465909598514372, 0.973556713508265558765231162397}, + {0.526873224135984696836487728433, 0.8499438838467822110445126782}, + {-0.8499438838467822110445126782, 0.526873224135984696836487728433}, + {0.812026830795669729567975991813, 0.583620104235572645379193090776}, + {-0.583620104235572645379193090776, 0.812026830795669729567975991813}, + {0.161507945219266119130097081324, 0.986871411902812467609180657746}, + {-0.986871411902812467609180657746, 0.161507945219266119130097081324}, + {0.99126034198280243980860859665, 0.131920181974319761231129177759}, + {-0.131920181974319761231129177759, 0.99126034198280243980860859665}, + {0.607645254487930830400443937833, 0.79420856498674063939091638531}, + {-0.79420856498674063939091638531, 0.607645254487930830400443937833}, + {0.865321473311889799440166370914, 0.501217266088609947338738948019}, + {-0.501217266088609947338738948019, 0.865321473311889799440166370914}, + {0.257460553986133100501376702596, 0.96628880938420969037849772576}, + {-0.96628880938420969037849772576, 0.257460553986133100501376702596}, + {0.946477201682408675331714675849, 0.322770671987770763067260304524}, + {-0.322770671987770763067260304524, 0.946477201682408675331714675849}, + {0.441027116617407233256642484776, 0.897493778478790305008772065776}, + {-0.897493778478790305008772065776, 0.441027116617407233256642484776}, + {0.75091192599986800182421120553, 0.660402361739545029628573047376}, + {-0.660402361739545029628573047376, 0.75091192599986800182421120553}, + {0.0639999266507139397130998759167, 0.997949903246001190915137613047}, + {-0.997949903246001190915137613047, 0.0639999266507139397130998759167}, + {0.996539342015137941110936026234, 0.0831224387036129247485760629388}, + {-0.0831224387036129247485760629388, 0.996539342015137941110936026234}, + {0.645883286381996324365672990098, 0.763436166534172011566283799766}, + {-0.763436166534172011566283799766, 0.645883286381996324365672990098}, + {0.888872721280395627907466860051, 0.458154215699893119229102467216}, + {-0.458154215699893119229102467216, 0.888872721280395627907466860051}, + {0.304563976078509102141111952733, 0.952491881579706323179834726034}, + {-0.952491881579706323179834726034, 0.304563976078509102141111952733}, + {0.961174734657714080476864637603, 0.275940445487197150153946267892}, + {-0.275940445487197150153946267892, 0.961174734657714080476864637603}, + {0.4845338125740161761001445484, 0.874772532989284146154318477784}, + {-0.874772532989284146154318477784, 0.4845338125740161761001445484}, + {0.782411827709836416033795103431, 0.622761376339086347719842251536}, + {-0.622761376339086347719842251536, 0.782411827709836416033795103431}, + {0.112889916783750501871885774108, 0.993607501324621611438203672151}, + {-0.993607501324621611438203672151, 0.112889916783750501871885774108}, + {0.983593308962478651835681375815, 0.180400117971807216754598357511}, + {-0.180400117971807216754598357511, 0.983593308962478651835681375815}, + {0.567943351952365560464386362582, 0.823067645441801665207037785876}, + {-0.823067645441801665207037785876, 0.567943351952365560464386362582}, + {0.839685590120966107718913917779, 0.54307284018187174101655045888}, + {-0.54307284018187174101655045888, 0.839685590120966107718913917779}, + {0.209736886868323313404260943571, 0.97775786281000276467523235624}, + {-0.97775786281000276467523235624, 0.209736886868323313404260943571}, + {0.929499522226638563715539476107, 0.368823315668153905999560038254}, + {-0.368823315668153905999560038254, 0.929499522226638563715539476107}, + {0.396457947707453906005525823275, 0.918052882844770379300314289139}, + {-0.918052882844770379300314289139, 0.396457947707453906005525823275}, + {0.717603011688049075011974764493, 0.696452380006157834024804742512}, + {-0.696452380006157834024804742512, 0.717603011688049075011974764493}, + {0.0149557550886442961607381008093, 0.999888156440373321309778020805}, + {-0.999888156440373321309778020805, 0.0149557550886442961607381008093}, + {0.999761096627446610440870244929, 0.0218574854520217354281186317166}, + {-0.0218574854520217354281186317166, 0.999761096627446610440870244929}, + {0.691482274808955854616954184166, 0.722393427174577551497236527211}, + {-0.722393427174577551497236527211, 0.691482274808955854616954184166}, + {0.915294317019487047026871096023, 0.402785691443763527175292438187}, + {-0.402785691443763527175292438187, 0.915294317019487047026871096023}, + {0.362398324561191365056345148332, 0.932023312130786485418809661496}, + {-0.932023312130786485418809661496, 0.362398324561191365056345148332}, + {0.976286783616693631948635356821, 0.216481214278216760327211432013}, + {-0.216481214278216760327211432013, 0.976286783616693631948635356821}, + {0.537263670462542530792404704698, 0.843414339693792758367862916202}, + {-0.843414339693792758367862916202, 0.537263670462542530792404704698}, + {0.819127603122188241435708278004, 0.573611340371944611327137408807}, + {-0.573611340371944611327137408807, 0.819127603122188241435708278004}, + {0.173606214282275406191047295579, 0.984815151367289143280459029484}, + {-0.984815151367289143280459029484, 0.173606214282275406191047295579}, + {0.992804565465879140795379953488, 0.119745959389479600387673485784}, + {-0.119745959389479600387673485784, 0.992804565465879140795379953488}, + {0.617345660729896827945140103111, 0.786692020537876790520215308788}, + {-0.786692020537876790520215308788, 0.617345660729896827945140103111}, + {0.871407023066670949340561946883, 0.490560699761082019687563615662}, + {-0.490560699761082019687563615662, 0.871407023066670949340561946883}, + {0.269299017799346174228958261665, 0.96305661256870433994947688916}, + {-0.96305661256870433994947688916, 0.269299017799346174228958261665}, + {0.950366826348635895804761730687, 0.311131636732785266108436417198}, + {-0.311131636732785266108436417198, 0.950366826348635895804761730687}, + {0.452007537350436472411985278086, 0.89201411770128047340477905891}, + {-0.89201411770128047340477905891, 0.452007537350436472411985278086}, + {0.758959536578942439710715461842, 0.651137790207170330525343615591}, + {-0.651137790207170330525343615591, 0.758959536578942439710715461842}, + {0.0762414880188560517559892559802, 0.997089381903483396030196672655}, + {-0.997089381903483396030196672655, 0.0762414880188560517559892559802}, + {0.997484344624417929026094498113, 0.0708871090480878152106924972031}, + {-0.0708871090480878152106924972031, 0.997484344624417929026094498113}, + {0.655203188704731931402136524412, 0.755452699717958253700089699123}, + {-0.755452699717958253700089699123, 0.655203188704731931402136524412}, + {0.894428047797973801991133768752, 0.447211881899738317169834544984}, + {-0.447211881899738317169834544984, 0.894428047797973801991133768752}, + {0.316229583562890326220440329053, 0.9486826921998950945535966639}, + {-0.9486826921998950945535966639, 0.316229583562890326220440329053}, + {0.964488573709308405490503446345, 0.26412457512352754962492440427}, + {-0.26412457512352754962492440427, 0.964488573709308405490503446345}, + {0.49523213269893123777976029487, 0.868760688994655305705805403704}, + {-0.868760688994655305705805403704, 0.49523213269893123777976029487}, + {0.789995153610791089882070536987, 0.613113086853854905022842558537}, + {-0.613113086853854905022842558537, 0.789995153610791089882070536987}, + {0.12507450887412116524721739097, 0.992147351571276092663254075887}, + {-0.992147351571276092663254075887, 0.12507450887412116524721739097}, + {0.985733033149723492094551602349, 0.168316331226194826342634769389}, + {-0.168316331226194826342634769389, 0.985733033149723492094551602349}, + {0.578000892985269909551959699456, 0.816036131374236695101842542499}, + {-0.816036131374236695101842542499, 0.578000892985269909551959699456}, + {0.846286702490559705758244035678, 0.532727713928658808129057433689}, + {-0.532727713928658808129057433689, 0.846286702490559705758244035678}, + {0.221719687114115215909038170139, 0.975110445204038889244202437112}, + {-0.975110445204038889244202437112, 0.221719687114115215909038170139}, + {0.933955562060986732575429414283, 0.357389154977240885990852348186}, + {-0.357389154977240885990852348186, 0.933955562060986732575429414283}, + {0.407694016253280167028094638226, 0.913118606267154242495109883748}, + {-0.913118606267154242495109883748, 0.407694016253280167028094638226}, + {0.726095519546470891114609003125, 0.687593845590942165379999551078}, + {-0.687593845590942165379999551078, 0.726095519546470891114609003125}, + {0.0272247947409878718327824742573, 0.999629336579970106946291252825}, + {-0.999629336579970106946291252825, 0.0272247947409878718327824742573}, + {0.998923577731465783813291636761, 0.0463862679267071573163683240182}, + {-0.0463862679267071573163683240182, 0.998923577731465783813291636761}, + {0.673545591096136098130386926641, 0.739145680305957508693381896592}, + {-0.739145680305957508693381896592, 0.673545591096136098130386926641}, + {0.905133791784249686074304008798, 0.425126826923762357601788153261}, + {-0.425126826923762357601788153261, 0.905133791784249686074304008798}, + {0.339416179869623357845398459176, 0.9406363042338475866088742805}, + {-0.9406363042338475866088742805, 0.339416179869623357845398459176}, + {0.970680029339806127453016415529, 0.240375291244489447395338288516}, + {-0.240375291244489447395338288516, 0.970680029339806127453016415529}, + {0.516403432639863990694095718936, 0.856345429577203609561308894627}, + {-0.856345429577203609561308894627, 0.516403432639863990694095718936}, + {0.804803770215302805013379838783, 0.593540977058226393303641543753}, + {-0.593540977058226393303641543753, 0.804803770215302805013379838783}, + {0.149385353653779723304140247819, 0.98877905323370152146367217938}, + {-0.98877905323370152146367217938, 0.149385353653779723304140247819}, + {0.989566838338365117877515331202, 0.144074537864995161351444608044}, + {-0.144074537864995161351444608044, 0.989566838338365117877515331202}, + {0.597853339105733905256556681707, 0.801605504547046154861789091228}, + {-0.801605504547046154861789091228, 0.597853339105733905256556681707}, + {0.859105609326130448266667372081, 0.511798350939486890176510769379}, + {-0.511798350939486890176510769379, 0.859105609326130448266667372081}, + {0.245583317560504055254710920053, 0.969375486659311280668305244035}, + {-0.969375486659311280668305244035, 0.245583317560504055254710920053}, + {0.942445041031024888233957881312, 0.334361099166798736437300476609}, + {-0.334361099166798736437300476609, 0.942445041031024888233957881312}, + {0.429980278822840622510170760506, 0.902838279994502834746583630476}, + {-0.902838279994502834746583630476, 0.429980278822840622510170760506}, + {0.742751230846809051833190551406, 0.669567479105392493465842562728}, + {-0.669567479105392493465842562728, 0.742751230846809051833190551406}, + {0.0517487271290284558955541172054, 0.998660137003838488389817484858}, + {-0.998660137003838488389817484858, 0.0517487271290284558955541172054}, + {0.995444264246510335247819512006, 0.0953452504256176308627956927921}, + {-0.0953452504256176308627956927921, 0.995444264246510335247819512006}, + {0.636466116412077176356376639887, 0.77130466267184472073381584778}, + {-0.77130466267184472073381584778, 0.636466116412077176356376639887}, + {0.883183533800523390411285618029, 0.469027553160387133956987781858}, + {-0.469027553160387133956987781858, 0.883183533800523390411285618029}, + {0.292852502379604806570512209873, 0.956157629185692137241403543158}, + {-0.956157629185692137241403543158, 0.292852502379604806570512209873}, + {0.957716146226558873166823104839, 0.28771476023476516559185256483}, + {-0.28771476023476516559185256483, 0.957716146226558873166823104839}, + {0.473762523439182847706518941777, 0.880652639458111008430307720118}, + {-0.880652639458111008430307720118, 0.473762523439182847706518941777}, + {0.774710673465565657913600716711, 0.632315880251737572059766989696}, + {-0.632315880251737572059766989696, 0.774710673465565657913600716711}, + {0.100688323887153957647555557742, 0.994918017443043201097907513031}, + {-0.994918017443043201097907513031, 0.100688323887153957647555557742}, + {0.981305459240844668578063192399, 0.192456737123216842233475176727}, + {-0.192456737123216842233475176727, 0.981305459240844668578063192399}, + {0.557800280739717102562735817628, 0.829975208549443843608628412767}, + {-0.829975208549443843608628412767, 0.557800280739717102562735817628}, + {0.832958024190106671724720399652, 0.553336181662932302671720208309}, + {-0.553336181662932302671720208309, 0.832958024190106671724720399652}, + {0.197722501018841922970636915124, 0.980258033678303553060118247231}, + {-0.980258033678303553060118247231, 0.197722501018841922970636915124}, + {0.92490350318321090661299876956, 0.380201932924366048371922488514}, + {-0.380201932924366048371922488514, 0.92490350318321090661299876956}, + {0.385162174052989858541451440033, 0.922848904035094119713278359995}, + {-0.922848904035094119713278359995, 0.385162174052989858541451440033}, + {0.709002435455618362780683128221, 0.705206031254697829346866910782}, + {-0.705206031254697829346866910782, 0.709002435455618362780683128221}, + {0.00268446315459596168001565885675, 0.999996396822294353334825700585}, + {-0.999996396822294353334825700585, 0.00268446315459596168001565885675}, + {0.999998161643486982441686450329, 0.00191747480985541901218893734438}, + {-0.00191747480985541901218893734438, 0.999998161643486982441686450329}, + {0.705749621831387785597655692982, 0.708461340712994047663642049883}, + {-0.708461340712994047663642049883, 0.705749621831387785597655692982}, + {0.923144048249621818058585631661, 0.384454244587440874258987832945}, + {-0.384454244587440874258987832945, 0.923144048249621818058585631661}, + {0.380911213125578074212285173417, 0.924611619933039974306154817896}, + {-0.924611619933039974306154817896, 0.380911213125578074212285173417}, + {0.980409396592109905199663444364, 0.196970594439614343773570226404}, + {-0.196970594439614343773570226404, 0.980409396592109905199663444364}, + {0.553974889646695500822204394353, 0.832533375691888677394558726519}, + {-0.832533375691888677394558726519, 0.553974889646695500822204394353}, + {0.830402791838047549255463763984, 0.557163533720196335607965920644}, + {-0.557163533720196335607965920644, 0.830402791838047549255463763984}, + {0.193209332301513991847485840481, 0.981157558148334829617454033723}, + {-0.981157558148334829617454033723, 0.193209332301513991847485840481}, + {0.994994951770357016762602597737, 0.0999252017836590733512736051125}, + {-0.0999252017836590733512736051125, 0.994994951770357016762602597737}, + {0.632909889850541751421530989319, 0.774225465435860682461566284474}, + {-0.774225465435860682461566284474, 0.632909889850541751421530989319}, + {0.881015751694342874600351933623, 0.473086932039400109850646458654}, + {-0.473086932039400109850646458654, 0.881015751694342874600351933623}, + {0.288449234619434224935474730955, 0.957495190091032566392925673426}, + {-0.957495190091032566392925673426, 0.288449234619434224935474730955}, + {0.956381962978387734075624848629, 0.29211905259603637707854772998}, + {-0.29211905259603637707854772998, 0.956381962978387734075624848629}, + {0.469704808422072461748797422842, 0.882823534430966616604052887851}, + {-0.882823534430966616604052887851, 0.469704808422072461748797422842}, + {0.771792599152010150298508506239, 0.635874345994697720563237908209}, + {-0.635874345994697720563237908209, 0.771792599152010150298508506239}, + {0.0961087184945654954271176961811, 0.995370842565388991296515541762}, + {-0.995370842565388991296515541762, 0.0961087184945654954271176961811}, + {0.99869953403353928234054137647, 0.0509827492510108032375271136516}, + {-0.0509827492510108032375271136516, 0.99869953403353928234054137647}, + {0.670136965164037756714776605804, 0.742237460601884002642236737302}, + {-0.742237460601884002642236737302, 0.670136965164037756714776605804}, + {0.903167805147360724937755094288, 0.429287684129534607979650218113}, + {-0.429287684129534607979650218113, 0.903167805147360724937755094288}, + {0.335083847041206583927674955703, 0.9421883120969317682025234717}, + {-0.9421883120969317682025234717, 0.335083847041206583927674955703}, + {0.969563561557013287028894410469, 0.244839743711840640560950532745}, + {-0.244839743711840640560950532745, 0.969563561557013287028894410469}, + {0.512457126085725800379577776766, 0.858712812250963519744573204662}, + {-0.858712812250963519744573204662, 0.512457126085725800379577776766}, + {0.802063816488235548085583559441, 0.597238339593437417285315405024}, + {-0.597238339593437417285315405024, 0.802063816488235548085583559441}, + {0.144833483672080209903043623854, 0.9894560434943077131819677561}, + {-0.9894560434943077131819677561, 0.144833483672080209903043623854}, + {0.988893339517095126822709971748, 0.148626925752796540391997837105}, + {-0.148626925752796540391997837105, 0.988893339517095126822709971748}, + {0.594158079176036801882787585782, 0.804348293309460782296582692652}, + {-0.804348293309460782296582692652, 0.594158079176036801882787585782}, + {0.856741254127627471781636359083, 0.515746472092461383240902250691}, + {-0.515746472092461383240902250691, 0.856741254127627471781636359083}, + {0.241119722726294616332864961805, 0.970495378305530564944092475343}, + {-0.970495378305530564944092475343, 0.241119722726294616332864961805}, + {0.940896356481780826719329979824, 0.338694621095921188924648959073}, + {-0.338694621095921188924648959073, 0.940896356481780826719329979824}, + {0.42582093073364818414106025557, 0.904807457390316538514696276252}, + {-0.904807457390316538514696276252, 0.42582093073364818414106025557}, + {0.739662065843380012175600768387, 0.672978475400442088805164075893}, + {-0.672978475400442088805164075893, 0.739662065843380012175600768387}, + {0.0471524189960678685729078551958, 0.998887706092541294289333109191}, + {-0.998887706092541294289333109191, 0.0471524189960678685729078551958}, + {0.999649923705874243751168251038, 0.0264580807096771869479923111612}, + {-0.0264580807096771869479923111612, 0.999649923705874243751168251038}, + {0.688150551578044833433978055837, 0.725567928152032304289775765938}, + {-0.725567928152032304289775765938, 0.688150551578044833433978055837}, + {0.913431035048554718080993097828, 0.406993543204466512452910365027}, + {-0.406993543204466512452910365027, 0.913431035048554718080993097828}, + {0.358105384730061593323569013592, 0.933681173328098412689257656893}, + {-0.933681173328098412689257656893, 0.358105384730061593323569013592}, + {0.975280215241354220268021890661, 0.220971721626949085814928253058}, + {-0.220971721626949085814928253058, 0.975280215241354220268021890661}, + {0.533376650941355445745273300417, 0.845877856567119001951482459845}, + {-0.845877856567119001951482459845, 0.533376650941355445745273300417}, + {0.816479212436865386948170453252, 0.577374831161244883581673548179}, + {-0.577374831161244883581673548179, 0.816479212436865386948170453252}, + {0.169072329411405014587543860216, 0.985603646212513395674648108979}, + {-0.985603646212513395674648108979, 0.169072329411405014587543860216}, + {0.992242990681341696657113971014, 0.124313504671644231569516136915}, + {-0.124313504671644231569516136915, 0.992242990681341696657113971014}, + {0.613718825149211721914355166518, 0.789524669441982185347228551109}, + {-0.789524669441982185347228551109, 0.613718825149211721914355166518}, + {0.869140271711200562698707017262, 0.494565655995015951429394363004}, + {-0.494565655995015951429394363004, 0.869140271711200562698707017262}, + {0.264864250833259262662977562286, 0.964285709025357484769358507037}, + {-0.964285709025357484769358507037, 0.264864250833259262662977562286}, + {0.9489249581861951554628831218, 0.31550186010755598697841151079}, + {-0.31550186010755598697841151079, 0.9489249581861951554628831218}, + {0.447897768011597308124294158915, 0.894084777529641994142650673894}, + {-0.894084777529641994142650673894, 0.447897768011597308124294158915}, + {0.755955012013824312333554189536, 0.654623571078202681761126768833}, + {-0.654623571078202681761126768833, 0.755955012013824312333554189536}, + {0.0716521490329822124953906836708, 0.997429681500884179889965253096}, + {-0.997429681500884179889965253096, 0.0716521490329822124953906836708}, + {0.997147565105683475472631016601, 0.0754767076905634021199986705142}, + {-0.0754767076905634021199986705142, 0.997147565105683475472631016601}, + {0.651719713300250913512456918397, 0.758459896959515433856324762019}, + {-0.758459896959515433856324762019, 0.651719713300250913512456918397}, + {0.892360540731965357075239353435, 0.451323238205783516807656496894}, + {-0.451323238205783516807656496894, 0.892360540731965357075239353435}, + {0.311860467372486016568444711083, 0.950127911857248097504680117709}, + {-0.950127911857248097504680117709, 0.311860467372486016568444711083}, + {0.963262879037507069313051033532, 0.268560283490267892592129328477}, + {-0.268560283490267892592129328477, 0.963262879037507069313051033532}, + {0.491228916219348277216738551942, 0.871030511446048372192763054045}, + {-0.871030511446048372192763054045, 0.491228916219348277216738551942}, + {0.787165287287650894398893797188, 0.616742093982038830368708204333}, + {-0.616742093982038830368708204333, 0.787165287287650894398893797188}, + {0.120507395657864127547220789438, 0.992712429453645461840949337784}, + {-0.992712429453645461840949337784, 0.120507395657864127547220789438}, + {0.984948015982227031983597953513, 0.172850819531394084282993617308}, + {-0.172850819531394084282993617308, 0.984948015982227031983597953513}, + {0.574239434592967890047532364406, 0.81868740784156968093299155953}, + {-0.81868740784156968093299155953, 0.574239434592967890047532364406}, + {0.843826167648186742376026359125, 0.536616621800121151864004787058}, + {-0.536616621800121151864004787058, 0.843826167648186742376026359125}, + {0.217229953114406793002189033359, 0.97612045745897191295625816565}, + {-0.97612045745897191295625816565, 0.217229953114406793002189033359}, + {0.932300993994602755776668345788, 0.361683365109145837923421140658}, + {-0.361683365109145837923421140658, 0.932300993994602755776668345788}, + {0.403487594849495312399056956565, 0.914985115071589305557608895469}, + {-0.914985115071589305557608895469, 0.403487594849495312399056956565}, + {0.722923574902217702664586340688, 0.690928002653386275078162270802}, + {-0.690928002653386275078162270802, 0.722923574902217702664586340688}, + {0.0226242861050928029120754558789, 0.999744038080865426998400380398}, + {-0.999744038080865426998400380398, 0.0226242861050928029120754558789}, + {0.999899333255515387897105483717, 0.0141888461537863445199869616431}, + {-0.0141888461537863445199869616431, 0.999899333255515387897105483717}, + {0.69700256971632745806033426561, 0.717068628381437478402915530751}, + {-0.717068628381437478402915530751, 0.69700256971632745806033426561}, + {0.918356692219021719658655911189, 0.395753693421220076320565794958}, + {-0.395753693421220076320565794958, 0.918356692219021719658655911189}, + {0.369536124318350645001629573017, 0.929216364913884040888092386012}, + {-0.929216364913884040888092386012, 0.369536124318350645001629573017}, + {0.977918441376834368661263852118, 0.20898689436207007474877173081}, + {-0.20898689436207007474877173081, 0.977918441376834368661263852118}, + {0.543716711162402388524128582503, 0.839268811527475233624784323183}, + {-0.839268811527475233624784323183, 0.543716711162402388524128582503}, + {0.823503010399598500690387936629, 0.567311899983420797610733643523}, + {-0.567311899983420797610733643523, 0.823503010399598500690387936629}, + {0.181154471454990806389417912214, 0.983454654507193271051335159427}, + {-0.983454654507193271051335159427, 0.181154471454990806389417912214}, + {0.993693794541031794054219972168, 0.112127796244489638666230746367}, + {-0.112127796244489638666230746367, 0.993693794541031794054219972168}, + {0.623361295458973341254704791936, 0.781933945626937521033994471509}, + {-0.781933945626937521033994471509, 0.623361295458973341254704791936}, + {0.87514390842956035765354272371, 0.483862727990732210958668702006}, + {-0.483862727990732210958668702006, 0.87514390842956035765354272371}, + {0.276677576038972417027395067635, 0.960962808290309777881077479833}, + {-0.960962808290309777881077479833, 0.276677576038972417027395067635}, + {0.952725199037579573158041057468, 0.303833334443086355847185586754}, + {-0.303833334443086355847185586754, 0.952725199037579573158041057468}, + {0.458835837711549121564758024761, 0.888521059982002259225453144609}, + {-0.888521059982002259225453144609, 0.458835837711549121564758024761}, + {0.763931328206951087977927272732, 0.645297548255038377895687062846}, + {-0.645297548255038377895687062846, 0.763931328206951087977927272732}, + {0.0838867502817902122025373046199, 0.996475294790172161363273062307}, + {-0.996475294790172161363273062307, 0.0838867502817902122025373046199}, + {0.99799869703603438786387869186, 0.0632344899115800801459030822116}, + {-0.0632344899115800801459030822116, 0.99799869703603438786387869186}, + {0.660978109668168056778370100801, 0.750405182910869328338776540477}, + {-0.750405182910869328338776540477, 0.660978109668168056778370100801}, + {0.897831778021305648707084401394, 0.440338617855737246564729048259}, + {-0.440338617855737246564729048259, 0.897831778021305648707084401394}, + {0.323496515899536651872381298745, 0.946229361307743821463134281657}, + {-0.946229361307743821463134281657, 0.323496515899536651872381298745}, + {0.966485994915169843721969300532, 0.256719344095520662918374910078}, + {-0.256719344095520662918374910078, 0.966485994915169843721969300532}, + {0.501880811854638286817476000579, 0.864936789998049015970593700331}, + {-0.864936789998049015970593700331, 0.501880811854638286817476000579}, + {0.794674389407944548047169064375, 0.607035925476499649278139258968}, + {-0.607035925476499649278139258968, 0.794674389407944548047169064375}, + {0.13268043025735207218751554592, 0.991158868913921353716034445824}, + {-0.991158868913921353716034445824, 0.13268043025735207218751554592}, + {0.986994996657682976959335974243, 0.160750976895011221667886047726}, + {-0.160750976895011221667886047726, 0.986994996657682976959335974243}, + {0.584242749289016982672251288022, 0.811578960978665886472072088509}, + {-0.811578960978665886472072088509, 0.584242749289016982672251288022}, + {0.85034774050885508778208077274, 0.526221170432628060353863475029}, + {-0.526221170432628060353863475029, 0.85034774050885508778208077274}, + {0.229192095663636768998472348358, 0.973381211697303294627658942773}, + {-0.973381211697303294627658942773, 0.229192095663636768998472348358}, + {0.936669204706636171486877628922, 0.350215363675321578185162252339}, + {-0.350215363675321578185162252339, 0.936669204706636171486877628922}, + {0.414685487846140010681494914024, 0.909964804907205659922908580484}, + {-0.909964804907205659922908580484, 0.414685487846140010681494914024}, + {0.731347889523825456237204889476, 0.682004592718440827425752104318}, + {-0.682004592718440827425752104318, 0.731347889523825456237204889476}, + {0.0348909797771880039740644008361, 0.999391124400346053668897639}, + {-0.999391124400346053668897639, 0.0348909797771880039740644008361}, + {0.999249970554724420424008712871, 0.03872333077593362316015657143}, + {-0.03872333077593362316015657143, 0.999249970554724420424008712871}, + {0.679194900497911202563727783854, 0.733957960061496050840901261836}, + {-0.733957960061496050840901261836, 0.679194900497911202563727783854}, + {0.90836781852407288972983678832, 0.418172101257146322517144199082}, + {-0.418172101257146322517144199082, 0.90836781852407288972983678832}, + {0.346620715788047317751363607385, 0.938005372791958835776426894881}, + {-0.938005372791958835776426894881, 0.346620715788047317751363607385}, + {0.97249511549282119382553446485, 0.232923271363348977081031421221}, + {-0.232923271363348977081031421221, 0.97249511549282119382553446485}, + {0.522956266158590143966478080984, 0.852359515512947085724704265886}, + {-0.852359515512947085724704265886, 0.522956266158590143966478080984}, + {0.809332455707985842785490149254, 0.587350811813247664083803556423}, + {-0.587350811813247664083803556423, 0.809332455707985842785490149254}, + {0.156964725696906781671202679718, 0.987604209634049157173762978346}, + {-0.987604209634049157173762978346, 0.156964725696906781671202679718}, + {0.99064275867701157007871870519, 0.136480491942256282333900685444}, + {-0.136480491942256282333900685444, 0.99064275867701157007871870519}, + {0.603983931041818022933398424357, 0.796996493745908751726858554321}, + {-0.796996493745908751726858554321, 0.603983931041818022933398424357}, + {0.863005745664870316247174741875, 0.505194104230662244248151182546}, + {-0.505194104230662244248151182546, 0.863005745664870316247174741875}, + {0.253011038045617919767238390705, 0.967463391879547440765918508987}, + {-0.967463391879547440765918508987, 0.253011038045617919767238390705}, + {0.944981812726528147550197900273, 0.327122872352400506645864197708}, + {-0.327122872352400506645864197708, 0.944981812726528147550197900273}, + {0.436892246555280361341999650904, 0.899513849198487980274308029038}, + {-0.899513849198487980274308029038, 0.436892246555280361341999650904}, + {0.747864851776509409475579559512, 0.6638510099994573421255950052}, + {-0.6638510099994573421255950052, 0.747864851776509409475579559512}, + {0.0594067572340871499769754393583, 0.99823385897039684788722979647}, + {-0.99823385897039684788722979647, 0.0594067572340871499769754393583}, + {0.996146266419824621962675337272, 0.0877075589549936723843259755995}, + {-0.0877075589549936723843259755995, 0.996146266419824621962675337272}, + {0.642363170340724320972469740809, 0.766400389737514120547245966009}, + {-0.766400389737514120547245966009, 0.642363170340724320972469740809}, + {0.886754917227550842895311689063, 0.462239890936253339237538284578}, + {-0.462239890936253339237538284578, 0.886754917227550842895311689063}, + {0.300177453806162009009028679429, 0.953883376638071767139592793683}, + {-0.953883376638071767139592793683, 0.300177453806162009009028679429}, + {0.959894699313420529662721492059, 0.280360778694163814694917391535}, + {-0.280360778694163814694917391535, 0.959894699313420529662721492059}, + {0.480503043316157507636177115273, 0.876993058902925892716950784234}, + {-0.876993058902925892716950784234, 0.480503043316157507636177115273}, + {0.779537640970513256988283501414, 0.626355383396779985538671553513}, + {-0.626355383396779985538671553513, 0.779537640970513256988283501414}, + {0.108316213087851151430918150709, 0.994116491152977066469986766606}, + {-0.994116491152977066469986766606, 0.108316213087851151430918150709}, + {0.982752705758487832277126017289, 0.18492463147015078539503463162}, + {-0.18492463147015078539503463162, 0.982752705758487832277126017289}, + {0.564149641550287683777753500181, 0.825672563392221392497560827906}, + {-0.825672563392221392497560827906, 0.564149641550287683777753500181}, + {0.837177517670507298497284409677, 0.546931260678202080249832306436}, + {-0.546931260678202080249832306436, 0.837177517670507298497284409677}, + {0.205235096533272320940000099654, 0.978712703070200418231650019152}, + {-0.978712703070200418231650019152, 0.205235096533272320940000099654}, + {0.927792382182146324609561816033, 0.37309689835856063577779195839}, + {-0.37309689835856063577779195839, 0.927792382182146324609561816033}, + {0.392228938105210311881876350526, 0.919867631843222954834971005766}, + {-0.919867631843222954834971005766, 0.392228938105210311881876350526}, + {0.714390390649351503249420147768, 0.699747361372564991022215963312}, + {-0.699747361372564991022215963312, 0.714390390649351503249420147768}, + {0.0103541852987288438697754955342, 0.999946393986597459324627834576}, + {-0.999946393986597459324627834576, 0.0103541852987288438697754955342}, + {0.999967571556443779456913034664, 0.00805331208314497178080149097923}, + {-0.00805331208314497178080149097923, 0.999967571556443779456913034664}, + {0.701389299229202234364777268638, 0.71277840239920897680292455334}, + {-0.71277840239920897680292455334, 0.701389299229202234364777268638}, + {0.920767703426128791832638853521, 0.390111312739546856054317913731}, + {-0.390111312739546856054317913731, 0.920767703426128791832638853521}, + {0.37523073233445991547796438681, 0.926931441645899134584851708496}, + {-0.926931441645899134584851708496, 0.37523073233445991547796438681}, + {0.979182351815526930138844363682, 0.202982565490274463604336574463}, + {-0.202982565490274463604336574463, 0.979182351815526930138844363682}, + {0.548856132466135293590525634499, 0.835916829507766356854858713632}, + {-0.835916829507766356854858713632, 0.548856132466135293590525634499}, + {0.826968468566541492492660836433, 0.562248301017183149674849573785}, + {-0.562248301017183149674849573785, 0.826968468566541492492660836433}, + {0.187185425590990328625906613524, 0.982324598310721275318257994513}, + {-0.982324598310721275318257994513, 0.187185425590990328625906613524}, + {0.994363091759888573228920449765, 0.106028494970528408547494336744}, + {-0.106028494970528408547494336744, 0.994363091759888573228920449765}, + {0.628147417352374004551052166789, 0.778094352937702793404639578512}, + {-0.778094352937702793404639578512, 0.628147417352374004551052166789}, + {0.878096359977777130012555062422, 0.478483837338083972667845955584}, + {-0.478483837338083972667845955584, 0.878096359977777130012555062422}, + {0.282568724605589793874571569177, 0.959247056745430093371851398842}, + {-0.959247056745430093371851398842, 0.282568724605589793874571569177}, + {0.954571550547659741781103548419, 0.297981802942791751398488031555}, + {-0.297981802942791751398488031555, 0.954571550547659741781103548419}, + {0.4642790629889657627593635425, 0.885688970051048962695006139256}, + {-0.885688970051048962695006139256, 0.4642790629889657627593635425}, + {0.767876418736060606384796756174, 0.640598006201301028994521402637}, + {-0.640598006201301028994521402637, 0.767876418736060606384796756174}, + {0.0899994286019873551696335312045, 0.995941817001031348688400157698}, + {-0.995941817001031348688400157698, 0.0899994286019873551696335312045}, + {0.998367909528543817643253532879, 0.0571096946551580553208538049148}, + {-0.0571096946551580553208538049148, 0.998367909528543817643253532879}, + {0.665570066584515562801982468955, 0.746335371308826323044627315539}, + {-0.746335371308826323044627315539, 0.665570066584515562801982468955}, + {0.900516743557543630771533571533, 0.434821336381412293370374300139}, + {-0.434821336381412293370374300139, 0.900516743557543630771533571533}, + {0.329296380381672804116277575304, 0.944226611501459811570668989589}, + {-0.944226611501459811570668989589, 0.329296380381672804116277575304}, + {0.968043001372022260397898207884, 0.250784264846594495690368376017}, + {-0.250784264846594495690368376017, 0.968043001372022260397898207884}, + {0.507178516462425177380168861418, 0.86184102503824533414444886148}, + {-0.86184102503824533414444886148, 0.507178516462425177380168861418}, + {0.798384132303756377524450726924, 0.602148467809707210740555183293}, + {-0.602148467809707210740555183293, 0.798384132303756377524450726924}, + {0.138759569074390354259662672121, 0.990326098813057331682330186595}, + {-0.990326098813057331682330186595, 0.138759569074390354259662672121}, + {0.987962766207263420881190540968, 0.154691863354515429795910108624}, + {-0.154691863354515429795910108624, 0.987962766207263420881190540968}, + {0.589211505972614957293842508079, 0.807978837117336312623194771732}, + {-0.807978837117336312623194771732, 0.589211505972614957293842508079}, + {0.8535605653546668447617662423, 0.520993628820373921861630606145}, + {-0.520993628820373921861630606145, 0.8535605653546668447617662423}, + {0.235160336021834726061285891774, 0.971956591809581715857291328575}, + {-0.971956591809581715857291328575, 0.235160336021834726061285891774}, + {0.93880045324743477408446779009, 0.344461476775576480591212202853}, + {-0.344461476775576480591212202853, 0.93880045324743477408446779009}, + {0.42026112058672288052108001466, 0.907403212757808108612778141833}, + {-0.907403212757808108612778141833, 0.42026112058672288052108001466}, + {0.735518823617598904718306584982, 0.677504287886197431589607731439}, + {-0.677504287886197431589607731439, 0.735518823617598904718306584982}, + {0.0410224716230632377444997871407, 0.999158224117649429452114873129}, + {-0.999158224117649429452114873129, 0.0410224716230632377444997871407}, + {0.999468761847290054767256606283, 0.0325913192651802255395132590365}, + {-0.0325913192651802255395132590365, 0.999468761847290054767256606283}, + {0.683685596226116687468277177686, 0.729776681946566085912309063133}, + {-0.729776681946566085912309063133, 0.683685596226116687468277177686}, + {0.910916574533403355928840028355, 0.412590589132048268794505929691}, + {-0.412590589132048268794505929691, 0.910916574533403355928840028355}, + {0.352369683518766629859442218731, 0.935860890376814635871483005758}, + {-0.935860890376814635871483005758, 0.352369683518766629859442218731}, + {0.973905998872289568879523358191, 0.226951768798059838605496452146}, + {-0.226951768798059838605496452146, 0.973905998872289568879523358191}, + {0.528176401321464372706770973309, 0.849134670760243626119745385949}, + {-0.849134670760243626119745385949, 0.528176401321464372706770973309}, + {0.812921137083098765963029563864, 0.582373784509160108413539091998}, + {-0.582373784509160108413539091998, 0.812921137083098765963029563864}, + {0.163021596389637840607278462812, 0.986622500813038483258310407109}, + {-0.986622500813038483258310407109, 0.163021596389637840607278462812}, + {0.991461538662453789605422116438, 0.130399453039802715181849634973}, + {-0.130399453039802715181849634973, 0.991461538662453789605422116438}, + {0.608862839766408203701075763092, 0.793275514781330626234989722434}, + {-0.793275514781330626234989722434, 0.608862839766408203701075763092}, + {0.866089312574586767112805318902, 0.499889290387461382447753521774}, + {-0.499889290387461382447753521774, 0.866089312574586767112805318902}, + {0.258942518959180523197005641123, 0.965892733110190859768806603824}, + {-0.965892733110190859768806603824, 0.258942518959180523197005641123}, + {0.946971211921810884781791628484, 0.321318414958334908071435620514}, + {-0.321318414958334908071435620514, 0.946971211921810884781791628484}, + {0.442403335401204078625170268424, 0.89681619567550729943405940503}, + {-0.89681619567550729943405940503, 0.442403335401204078625170268424}, + {0.75192408665360355168871819842, 0.659249700728141485583932990266}, + {-0.659249700728141485583932990266, 0.75192408665360355168871819842}, + {0.0655306867301933271274805292705, 0.997850554490335106549991905922}, + {-0.997850554490335106549991905922, 0.0655306867301933271274805292705}, + {0.996665677712478159655518084037, 0.0815936693005446522786172636188}, + {-0.0815936693005446522786172636188, 0.996665677712478159655518084037}, + {0.647053622422071650355235306051, 0.762444496150687101021503622178}, + {-0.762444496150687101021503622178, 0.647053622422071650355235306051}, + {0.889574474967854578189019321144, 0.456790163516757219408503942759}, + {-0.456790163516757219408503942759, 0.889574474967854578189019321144}, + {0.30602472141822173457015310305, 0.952023565822243567069449454721}, + {-0.952023565822243567069449454721, 0.30602472141822173457015310305}, + {0.961596890965187856004092736839, 0.274465697831413224516694526756}, + {-0.274465697831413224516694526756, 0.961596890965187856004092736839}, + {0.485875126229695308843758994044, 0.874028238509075627860056556528}, + {-0.874028238509075627860056556528, 0.485875126229695308843758994044}, + {0.783366210776619720235203203629, 0.621560439389027163947787357756}, + {-0.621560439389027163947787357756, 0.783366210776619720235203203629}, + {0.114413958183286923464550000062, 0.993433161401829356584869401559}, + {-0.993433161401829356584869401559, 0.114413958183286923464550000062}, + {0.983868881924017224527290181868, 0.178891093075044749216573336525}, + {-0.178891093075044749216573336525, 0.983868881924017224527290181868}, + {0.569205253199661198593162225734, 0.822195463214137167717865395389}, + {-0.822195463214137167717865395389, 0.569205253199661198593162225734}, + {0.840517665166862548709048041928, 0.541784140172491546216804181313}, + {-0.541784140172491546216804181313, 0.840517665166862548709048041928}, + {0.21123650129128068719808197784, 0.977434980201864256343924353132}, + {-0.977434980201864256343924353132, 0.21123650129128068719808197784}, + {0.930064196284032362882499000989, 0.367397047879452764984620216637}, + {-0.367397047879452764984620216637, 0.930064196284032362882499000989}, + {0.397865756187775754249003057339, 0.917443644074735220605987251474}, + {-0.917443644074735220605987251474, 0.397865756187775754249003057339}, + {0.71867051154506722543402474912, 0.695350771794747690712767962395}, + {-0.695350771794747690712767962395, 0.71867051154506722543402474912}, + {0.0164895461129564366298261290922, 0.999864038191687676260244188597}, + {-0.999864038191687676260244188597, 0.0164895461129564366298261290922}, + {0.999793449309835269289692405437, 0.0203238460223895932299509325958}, + {-0.0203238460223895932299509325958, 0.999793449309835269289692405437}, + {0.692589598450650378858028943796, 0.721331857135096177202626677172}, + {-0.721331857135096177202626677172, 0.692589598450650378858028943796}, + {0.915911105401509884060828881047, 0.401381174200016843656158016529}, + {-0.401381174200016843656158016529, 0.915911105401509884060828881047}, + {0.363827603476023497819369367789, 0.931466303710925092040895378886}, + {-0.931466303710925092040895378886, 0.363827603476023497819369367789}, + {0.976617712861545639313476385723, 0.214983354995412823118527967381}, + {-0.214983354995412823118527967381, 0.976617712861545639313476385723}, + {0.538556819231804095871041226928, 0.842589195550786707045176626707}, + {-0.842589195550786707045176626707, 0.538556819231804095871041226928}, + {0.820006547809759678280272510165, 0.572354139977269915640079034347}, + {-0.572354139977269915640079034347, 0.820006547809759678280272510165}, + {0.175116696955529921320149355779, 0.984547684191773964101912497426}, + {-0.984547684191773964101912497426, 0.175116696955529921320149355779}, + {0.992987085312448392038220390532, 0.118222875970297167103062463411}, + {-0.118222875970297167103062463411, 0.992987085312448392038220390532}, + {0.618551704365123855566821475804, 0.78574409894507035723165699892}, + {-0.78574409894507035723165699892, 0.618551704365123855566821475804}, + {0.872158508207824478830616499181, 0.489223401485151976331167134049}, + {-0.489223401485151976331167134049, 0.872158508207824478830616499181}, + {0.2707760107179960074041957796, 0.962642380128595709365413313208}, + {-0.962642380128595709365413313208, 0.2707760107179960074041957796}, + {0.950842977962238156131036248553, 0.309673426790066430580594669664}, + {-0.309673426790066430580594669664, 0.950842977962238156131036248553}, + {0.453375337524177746129083743654, 0.891319697597241389352973328641}, + {-0.891319697597241389352973328641, 0.453375337524177746129083743654}, + {0.759957476095110440184043909539, 0.649972795220807531002549239929}, + {-0.649972795220807531002549239929, 0.759957476095110440184043909539}, + {0.0777709136728579469854949479668, 0.99697125584767432027177846976}, + {-0.99697125584767432027177846976, 0.0777709136728579469854949479668}, + {0.997591910456652630756479993579, 0.0693569044251971938974676845646}, + {-0.0693569044251971938974676845646, 0.997591910456652630756479993579}, + {0.656361267299577999523307880736, 0.754446742181906437885174909752}, + {-0.754446742181906437885174909752, 0.656361267299577999523307880736}, + {0.895113009626081757019733231573, 0.445839320829980290117333652233}, + {-0.445839320829980290117333652233, 0.895113009626081757019733231573}, + {0.317684471956417913141024200741, 0.948196486113385583749391116726}, + {-0.948196486113385583749391116726, 0.317684471956417913141024200741}, + {0.964892600806868894203205400117, 0.262644758006240042202961149087}, + {-0.262644758006240042202961149087, 0.964892600806868894203205400117}, + {0.496564211717949233193536429098, 0.867999990576573510203672867647}, + {-0.867999990576573510203672867647, 0.496564211717949233193536429098}, + {0.790934727470523291081860861595, 0.611900528583796066151023751445}, + {-0.611900528583796066151023751445, 0.790934727470523291081860861595}, + {0.126596296097105875944066610828, 0.991954322443575953194283556513}, + {-0.991954322443575953194283556513, 0.126596296097105875944066610828}, + {0.985990067304330142228252498171, 0.166804038252083730586861065603}, + {-0.166804038252083730586861065603, 0.985990067304330142228252498171}, + {0.57925199619612355306230710994, 0.815148529350820938432775619731}, + {-0.815148529350820938432775619731, 0.57925199619612355306230710994}, + {0.847102900551231496883985983004, 0.531428900115236801937612654001}, + {-0.531428900115236801937612654001, 0.847102900551231496883985983004}, + {0.223215226352576984192310760591, 0.974769184332561766659352997522}, + {-0.974769184332561766659352997522, 0.223215226352576984192310760591}, + {0.934502691099687865516898455098, 0.355956065164566848313398850223}, + {-0.355956065164566848313398850223, 0.934502691099687865516898455098}, + {0.409094242431320975761366298684, 0.912492137396012648054011151544}, + {-0.912492137396012648054011151544, 0.409094242431320975761366298684}, + {0.727149420595371021036612546595, 0.686479220463238948291007091029}, + {-0.686479220463238948291007091029, 0.727149420595371021036612546595}, + {0.0287581743056446147155380543836, 0.999586398172067069900492697343}, + {-0.999586398172067069900492697343, 0.0287581743056446147155380543836}, + {0.998993558065545683710695357149, 0.0448538843751698154682649999359}, + {-0.0448538843751698154682649999359, 0.998993558065545683710695357149}, + {0.674678633465584542960868930095, 0.738111605074064258680266448209}, + {-0.738111605074064258680266448209, 0.674678633465584542960868930095}, + {0.905784862979786553616179389792, 0.42373786943898383849926858602}, + {-0.42373786943898383849926858602, 0.905784862979786553616179389792}, + {0.340858697983289438138143623291, 0.940114539834980278421028287994}, + {-0.940114539834980278421028287994, 0.340858697983289438138143623291}, + {0.97104761822191110187674212284, 0.238886004499120063737649388713}, + {-0.238886004499120063737649388713, 0.97104761822191110187674212284}, + {0.517716441987871145435917696886, 0.85555226941164697063157973389}, + {-0.85555226941164697063157973389, 0.517716441987871145435917696886}, + {0.805713303423352122933920327341, 0.592305725691242401786951177201}, + {-0.592305725691242401786951177201, 0.805713303423352122933920327341}, + {0.150901945370970042015201784125, 0.988548735714763204818211761449}, + {-0.988548735714763204818211761449, 0.150901945370970042015201784125}, + {0.989786681551618641172751722479, 0.142556392431327338954361039214}, + {-0.142556392431327338954361039214, 0.989786681551618641172751722479}, + {0.599082282663597198713034686079, 0.800687466242961609630413022387}, + {-0.800687466242961609630413022387, 0.599082282663597198713034686079}, + {0.859889687076602293736016235925, 0.510479897801375703814130702085}, + {-0.510479897801375703814130702085, 0.859889687076602293736016235925}, + {0.247070031409475254768892682478, 0.968997626199012418446443462017}, + {-0.968997626199012418446443462017, 0.247070031409475254768892682478}, + {0.942956835500102119596022021142, 0.332915013755212652046822086049}, + {-0.332915013755212652046822086049, 0.942956835500102119596022021142}, + {0.431364708963206389125133455309, 0.902177636533453619627209718601}, + {-0.902177636533453619627209718601, 0.431364708963206389125133455309}, + {0.743777460210440777288454228255, 0.668427325655456816555499699462}, + {-0.668427325655456816555499699462, 0.743777460210440777288454228255}, + {0.0532805911071479454399479891435, 0.998579580509872499760604114272}, + {-0.998579580509872499760604114272, 0.0532805911071479454399479891435}, + {0.995589350783264603883537802176, 0.0938181464694205491383627304458}, + {-0.0938181464694205491383627304458, 0.995589350783264603883537802176}, + {0.63764853364907880806100592963, 0.770327428782838774523611391487}, + {-0.770327428782838774523611391487, 0.63764853364907880806100592963}, + {0.883901973665809470581677942391, 0.46767221528511476735801011273}, + {-0.46767221528511476735801011273, 0.883901973665809470581677942391}, + {0.294318884682627401883081574852, 0.95570727428390656044143725012}, + {-0.95570727428390656044143725012, 0.294318884682627401883081574852}, + {0.958156368168758820758057481726, 0.286245304132057176715875357331}, + {-0.286245304132057176715875357331, 0.958156368168758820758057481726}, + {0.475112869734620302253347290389, 0.879924861003786862312381344964}, + {-0.879924861003786862312381344964, 0.475112869734620302253347290389}, + {0.775679722012820649368336489715, 0.631126745478365336339265923016}, + {-0.631126745478365336339265923016, 0.775679722012820649368336489715}, + {0.102214389948213205117077961859, 0.994762392980109932949517315137}, + {-0.994762392980109932949517315137, 0.102214389948213205117077961859}, + {0.981599529509040724661872445722, 0.19095120755740180307213904598}, + {-0.19095120755740180307213904598, 0.981599529509040724661872445722}, + {0.559072789985768481280103969766, 0.829118577464965977874555846938}, + {-0.829118577464965977874555846938, 0.559072789985768481280103969766}, + {0.833805850913786339440036954329, 0.552057789531074982747327339894}, + {-0.552057789531074982747327339894, 0.833805850913786339440036954329}, + {0.199225964789878834393377360357, 0.979953577958436738981617963873}, + {-0.979953577958436738981617963873, 0.199225964789878834393377360357}, + {0.925485637221461487911255971994, 0.378782701950320543904382475375}, + {-0.378782701950320543904382475375, 0.925485637221461487911255971994}, + {0.386577352824813924581093260713, 0.922256987115283033418222657929}, + {-0.922256987115283033418222657929, 0.386577352824813924581093260713}, + {0.710083373359202796315514660819, 0.704117605857725425977378108655}, + {-0.704117605857725425977378108655, 0.710083373359202796315514660819}, + {0.00421843465527696297179272022504, 0.999991102364945594338507817156}, + {-0.999991102364945594338507817156, 0.00421843465527696297179272022504}, + {0.999987572731904084122334097628, 0.00498541690882151056207760575489}, + {-0.00498541690882151056207760575489, 0.999987572731904084122334097628}, + {0.703572771677735575579504256893, 0.71062321588427501684037679297}, + {-0.71062321588427501684037679297, 0.703572771677735575579504256893}, + {0.921960214758209217755791087257, 0.387284601298575781314070809458}, + {-0.387284601298575781314070809458, 0.921960214758209217755791087257}, + {0.378072752012384050157578485596, 0.925775887667086738730404249509}, + {-0.925775887667086738730404249509, 0.378072752012384050157578485596}, + {0.979800485331479786843544843578, 0.199977521097239180347671094751}, + {-0.199977521097239180347671094751, 0.979800485331479786843544843578}, + {0.551418106135026064862358907703, 0.834229028640493419644030836935}, + {-0.834229028640493419644030836935, 0.551418106135026064862358907703}, + {0.828689530173025712400658449042, 0.559708551463714787388425975223}, + {-0.559708551463714787388425975223, 0.828689530173025712400658449042}, + {0.190198274055548149918948297454, 0.981745698511732989288702810882}, + {-0.981745698511732989288702810882, 0.190198274055548149918948297454}, + {0.994683702936040248232529847883, 0.102977333008032218009120128954}, + {-0.102977333008032218009120128954, 0.994683702936040248232529847883}, + {0.630531621003334596942124790075, 0.776163561960304337894456239155}, + {-0.776163561960304337894456239155, 0.630531621003334596942124790075}, + {0.879560195213827888949253974715, 0.475787623835901118951596799889}, + {-0.475787623835901118951596799889, 0.879560195213827888949253974715}, + {0.285510323278461319862486789134, 0.958375633716461172717515637487}, + {-0.958375633716461172717515637487, 0.285510323278461319862486789134}, + {0.955481253439748767775085980247, 0.295051816339446659753775747959}, + {-0.295051816339446659753775747959, 0.955481253439748767775085980247}, + {0.466994133468838057421379517109, 0.884260413738899075930532944767}, + {-0.884260413738899075930532944767, 0.466994133468838057421379517109}, + {0.769838131948879844657085413928, 0.638239179773115394844751335768}, + {-0.638239179773115394844751335768, 0.769838131948879844657085413928}, + {0.0930545114805272494118426607201, 0.995661015553546913103843962745}, + {-0.995661015553546913103843962745, 0.0930545114805272494118426607201}, + {0.998538421092996730799029592163, 0.0540464763060936673189083023772}, + {-0.0540464763060936673189083023772, 0.998538421092996730799029592163}, + {0.667856658934889435563775350602, 0.744289918725443144786879656749}, + {-0.744289918725443144786879656749, 0.667856658934889435563775350602}, + {0.901846518613901748651073830843, 0.432056543595841502369836462094}, + {-0.432056543595841502369836462094, 0.901846518613901748651073830843}, + {0.332191677068729207533692715515, 0.943211900734010622038283599977}, + {-0.943211900734010622038283599977, 0.332191677068729207533692715515}, + {0.968807840858700974884243350971, 0.247813170535187643483965302948}, + {-0.247813170535187643483965302948, 0.968807840858700974884243350971}, + {0.509820220585115446709778552759, 0.860280967290654507984015708644}, + {-0.860280967290654507984015708644, 0.509820220585115446709778552759}, + {0.800227740420124789011424581986, 0.599696225986208308889047202683}, + {-0.599696225986208308889047202683, 0.800227740420124789011424581986}, + {0.141797193697830392622449835471, 0.989895729791486655990695453511}, + {-0.989895729791486655990695453511, 0.141797193697830392622449835471}, + {0.988432704614708335100203839829, 0.15166010829500531364999460493}, + {-0.15166010829500531364999460493, 0.988432704614708335100203839829}, + {0.591687577168735434618440649501, 0.806167359190504417831846240006}, + {-0.806167359190504417831846240006, 0.591687577168735434618440649501}, + {0.855154934263109622882836902136, 0.518372490016066111984116560052}, + {-0.518372490016066111984116560052, 0.855154934263109622882836902136}, + {0.238141150111664839927883008386, 0.971230555853497379281691337383}, + {-0.971230555853497379281691337383, 0.238141150111664839927883008386}, + {0.939852827990986683381890998135, 0.341579656474657156195462448522}, + {-0.341579656474657156195462448522, 0.939852827990986683381890998135}, + {0.423043016581179043278382323479, 0.906109599398381981849581734423}, + {-0.906109599398381981849581734423, 0.423043016581179043278382323479}, + {0.73759391598791357314013339419, 0.675244559472799266153231201315}, + {-0.675244559472799266153231201315, 0.73759391598791357314013339419}, + {0.0440876527944549442827870677775, 0.999027666719533691264132357901}, + {-0.999027666719533691264132357901, 0.0440876527944549442827870677775}, + {0.99956404691532774187123777665, 0.0295248389369429757589369245352}, + {-0.0295248389369429757589369245352, 0.99956404691532774187123777665}, + {0.685921301978343556093875577062, 0.727675729629849610269332060852}, + {-0.727675729629849610269332060852, 0.685921301978343556093875577062}, + {0.912178097674807175643252321606, 0.409793994736831146408206905107}, + {-0.409793994736831146408206905107, 0.912178097674807175643252321606}, + {0.35523920594776331460806773066, 0.934775431083638697060678168782}, + {-0.934775431083638697060678168782, 0.35523920594776331460806773066}, + {0.974597693699155054325444780261, 0.223962799224085462590494444157}, + {-0.223962799224085462590494444157, 0.974597693699155054325444780261}, + {0.530779024078570138556187885115, 0.847510252208314329713800816535}, + {-0.847510252208314329713800816535, 0.530779024078570138556187885115}, + {0.814704008912187083168987555837, 0.579877036846960347560298032477}, + {-0.579877036846960347560298032477, 0.814704008912187083168987555837}, + {0.166047744352825793479055960233, 0.986117714370520093147831630631}, + {-0.986117714370520093147831630631, 0.166047744352825793479055960233}, + {0.991856932539495472767043793283, 0.127357078222385400323446447146}, + {-0.127357078222385400323446447146, 0.991856932539495472767043793283}, + {0.611293709322410894202448616852, 0.791403816608719501424218378816}, + {-0.791403816608719501424218378816, 0.611293709322410894202448616852}, + {0.86761887532253623156464072963, 0.497229813249424223986494553174}, + {-0.497229813249424223986494553174, 0.86761887532253623156464072963}, + {0.261904617469222611436663328277, 0.965093762982799585614657189581}, + {-0.965093762982799585614657189581, 0.261904617469222611436663328277}, + {0.947952546299198672841157531366, 0.318411636038737788645391901809}, + {-0.318411636038737788645391901809, 0.947952546299198672841157531366}, + {0.445152646679523644746012678297, 0.895454700782912449419370659598}, + {-0.895454700782912449419370659598, 0.445152646679523644746012678297}, + {0.753943097533499750895202851098, 0.656939727586627109090500198363}, + {-0.656939727586627109090500198363, 0.753943097533499750895202851098}, + {0.0685917406873809420986276563781, 0.997644813102075422861503284366}, + {-0.997644813102075422861503284366, 0.0685917406873809420986276563781}, + {0.996911313063555737379317633895, 0.0785355580988454787805252976796}, + {-0.0785355580988454787805252976796, 0.996911313063555737379317633895}, + {0.649389724012861768720483723882, 0.760455775404789258153925857187}, + {-0.760455775404789258153925857187, 0.649389724012861768720483723882}, + {0.890971700932396859506923192384, 0.454058837748624433139355005551}, + {-0.454058837748624433139355005551, 0.890971700932396859506923192384}, + {0.308944048344875710210288843882, 0.951080214804345014378839096025}, + {-0.951080214804345014378839096025, 0.308944048344875710210288843882}, + {0.962434414400972104708387178107, 0.27151426845869069959249486601}, + {-0.27151426845869069959249486601, 0.962434414400972104708387178107}, + {0.488554320454186230548998537415, 0.872533481286276058064288463356}, + {-0.872533481286276058064288463356, 0.488554320454186230548998537415}, + {0.78526944465967585262688999137, 0.619154180543008414439043463062}, + {-0.619154180543008414439043463062, 0.78526944465967585262688999137}, + {0.11746122971548998703550381606, 0.993077469039412297213686997566}, + {-0.993077469039412297213686997566, 0.11746122971548998703550381606}, + {0.984413081788540700323153487261, 0.175871783989325042307783064643}, + {-0.175871783989325042307783064643, 0.984413081788540700323153487261}, + {0.571725034543197119241142445389, 0.820445296699652049099427131296}, + {-0.820445296699652049099427131296, 0.571725034543197119241142445389}, + {0.842175879847585573401147485129, 0.539202918577918244302793482348}, + {-0.539202918577918244302793482348, 0.842175879847585573401147485129}, + {0.214234235429950986562275261349, 0.976782315753998653384826411639}, + {-0.976782315753998653384826411639, 0.214234235429950986562275261349}, + {0.93118697748255374602166511977, 0.36454192209800218016368944518}, + {-0.36454192209800218016368944518, 0.93118697748255374602166511977}, + {0.40067856118824324296312511251, 0.916218691472794222185882517806}, + {-0.916218691472794222185882517806, 0.40067856118824324296312511251}, + {0.720800435447749299200381756236, 0.693142649285365397560099154362}, + {-0.693142649285365397560099154362, 0.720800435447749299200381756236}, + {0.0195570081480290862419035846642, 0.999808743426610524451803030388}, + {-0.999808743426610524451803030388, 0.0195570081480290862419035846642}, + {0.999851096772332192941235007311, 0.0172564273001208773328229995059}, + {-0.0172564273001208773328229995059, 0.999851096772332192941235007311}, + {0.69479935394155489802869851701, 0.719203627467491224400930605043}, + {-0.719203627467491224400930605043, 0.69479935394155489802869851701}, + {0.917138215037350712499630844832, 0.398569309553686301761388222076}, + {-0.398569309553686301761388222076, 0.917138215037350712499630844832}, + {0.366683589579984925421740626916, 0.930345712696488469362066098256}, + {-0.930345712696488469362066098256, 0.366683589579984925421740626916}, + {0.977272676350500857367364915262, 0.211986122325800330212786093398}, + {-0.211986122325800330212786093398, 0.977272676350500857367364915262}, + {0.541139311901750907907171495026, 0.840932961129779665832018054061}, + {-0.840932961129779665832018054061, 0.541139311901750907907171495026}, + {0.821758646457351749070596724778, 0.569835701735667998768519737496}, + {-0.569835701735667998768519737496, 0.821758646457351749070596724778}, + {0.17813642254918629626025960988, 0.984005800268157870824836663814}, + {-0.984005800268157870824836663814, 0.17813642254918629626025960988}, + {0.993345114798006911804861829296, 0.115175878147008192708966589635}, + {-0.115175878147008192708966589635, 0.993345114798006911804861829296}, + {0.620959422265335181378986817435, 0.783842711199065234062288709538}, + {-0.783842711199065234062288709538, 0.620959422265335181378986817435}, + {0.873655319906992633427478267549, 0.486545354513030270382500930282}, + {-0.486545354513030270382500930282, 0.873655319906992633427478267549}, + {0.273728081594960537259453303705, 0.961807120656913538958576737059}, + {-0.961807120656913538958576737059, 0.273728081594960537259453303705}, + {0.951788567798152129739719384816, 0.306754824263192782396458824223}, + {-0.306754824263192782396458824223, 0.951788567798152129739719384816}, + {0.456107734147714105610305068694, 0.889924566944096717691081721568}, + {-0.889924566944096717691081721568, 0.456107734147714105610305068694}, + {0.761947988023355504694222872786, 0.647638219646710311394599557389}, + {-0.647638219646710311394599557389, 0.761947988023355504694222872786}, + {0.0808292123749893287598666802296, 0.996727966110532492827189798845}, + {-0.996727966110532492827189798845, 0.0808292123749893287598666802296}, + {0.997799999583146468573602305696, 0.0662960091700321302310427995508}, + {-0.0662960091700321302310427995508, 0.997799999583146468573602305696}, + {0.658672788323441893432175220369, 0.752429503622912387861276783951}, + {-0.752429503622912387861276783951, 0.658672788323441893432175220369}, + {0.896476612813344120311853657768, 0.44309105461373687884218952604}, + {-0.44309105461373687884218952604, 0.896476612813344120311853657768}, + {0.320592002694990330091684427316, 0.947217381495934818147475198202}, + {-0.947217381495934818147475198202, 0.320592002694990330091684427316}, + {0.965693842600133689479946497158, 0.259683273169813766401148313889}, + {-0.259683273169813766401148313889, 0.965693842600133689479946497158}, + {0.499224861233555083916968442281, 0.866472468071743051609701069538}, + {-0.866472468071743051609701069538, 0.499224861233555083916968442281}, + {0.792808289546014122173289706552, 0.609471095317180244776977815491}, + {-0.609471095317180244776977815491, 0.792808289546014122173289706552}, + {0.129638973282923591900939186417, 0.991561262154865286078120334423}, + {-0.991561262154865286078120334423, 0.129638973282923591900939186417}, + {0.986497174624562878086919681664, 0.16377827834531266670836657795}, + {-0.16377827834531266670836657795, 0.986497174624562878086919681664}, + {0.581750110569369649482496242854, 0.813367573027426682408247415879}, + {-0.813367573027426682408247415879, 0.581750110569369649482496242854}, + {0.848729314811817014962969096814, 0.528827524036961871267692458787}, + {-0.528827524036961871267692458787, 0.848729314811817014962969096814}, + {0.226204725570620185370884769327, 0.974079782219875678350717862486}, + {-0.974079782219875678350717862486, 0.226204725570620185370884769327}, + {0.935590350969512374312841984647, 0.353087376116372475554072707382}, + {-0.353087376116372475554072707382, 0.935590350969512374312841984647}, + {0.411891803579992166195467007128, 0.911232759586496188219939540431}, + {-0.911232759586496188219939540431, 0.411891803579992166195467007128}, + {0.72925208705878696857638487927, 0.684245126778703083303412313398}, + {-0.684245126778703083303412313398, 0.72925208705878696857638487927}, + {0.031824726814640887095908539095, 0.999493465092780586367382511526}, + {-0.999493465092780586367382511526, 0.031824726814640887095908539095}, + {0.999126466389543388402216805844, 0.041788804241622061474981109086}, + {-0.041788804241622061474981109086, 0.999126466389543388402216805844}, + {0.676939952790071131083493582992, 0.736038246503927462427441241744}, + {-0.736038246503927462427441241744, 0.676939952790071131083493582992}, + {0.907080609646008451107945802505, 0.420956966451709435617090093729}, + {-0.420956966451709435617090093729, 0.907080609646008451107945802505}, + {0.343741324597798492135325432173, 0.939064375729241951340497962519}, + {-0.939064375729241951340497962519, 0.343741324597798492135325432173}, + {0.971775940219990141955008766672, 0.235905748148607374847784967642}, + {-0.235905748148607374847784967642, 0.971775940219990141955008766672}, + {0.520338802886721962792648810137, 0.853959911360254175782813490514}, + {-0.853959911360254175782813490514, 0.520338802886721962792648810137}, + {0.807526679939997160673215148563, 0.58983104460945889879752712659}, + {-0.58983104460945889879752712659, 0.807526679939997160673215148563}, + {0.153934059976937376301719950789, 0.988081122772324071945604373468}, + {-0.988081122772324071945604373468, 0.153934059976937376301719950789}, + {0.990219380275280003367299741512, 0.13951909879023849381241006995}, + {-0.13951909879023849381241006995, 0.990219380275280003367299741512}, + {0.601535937795377617653969082312, 0.79884573951460458030027211862}, + {-0.79884573951460458030027211862, 0.601535937795377617653969082312}, + {0.861451770526809323946793028881, 0.507839391004897833248321603605}, + {-0.507839391004897833248321603605, 0.861451770526809323946793028881}, + {0.250041711471454652926382777878, 0.968235065737874323055223158008}, + {-0.968235065737874323055223158008, 0.250041711471454652926382777878}, + {0.943973766633615984922300867765, 0.330020496193105417059854289619}, + {-0.330020496193105417059854289619, 0.943973766633615984922300867765}, + {0.434130520860143309658241150828, 0.900849982437531449086520751734}, + {-0.900849982437531449086520751734, 0.434130520860143309658241150828}, + {0.745824665986375978654621121677, 0.666142302819983544992510360316}, + {-0.666142302819983544992510360316, 0.745824665986375978654621121677}, + {0.0563439393359252901904987709258, 0.998411418454391297316874442913}, + {-0.998411418454391297316874442913, 0.0563439393359252901904987709258}, + {0.995872495367145726952173845348, 0.0907632798614856212138235491693}, + {-0.0907632798614856212138235491693, 0.995872495367145726952173845348}, + {0.640008863998488441993117703532, 0.768367525344066271131282519491}, + {-0.768367525344066271131282519491, 0.640008863998488441993117703532}, + {0.88533261199054058554480661769, 0.464958241292706686564883966639}, + {-0.464958241292706686564883966639, 0.88533261199054058554480661769}, + {0.297249568157465837714426015737, 0.954799818930753718682069575152}, + {-0.954799818930753718682069575152, 0.297249568157465837714426015737}, + {0.959030047119113659981337605132, 0.283304374697445737751877459232}, + {-0.283304374697445737751877459232, 0.959030047119113659981337605132}, + {0.47781020519120098732912538253, 0.878463094167957869728979858337}, + {-0.878463094167957869728979858337, 0.47781020519120098732912538253}, + {0.777612341083420033349682398693, 0.62874402342667479182125589432}, + {-0.62874402342667479182125589432, 0.777612341083420033349682398693}, + {0.105265796918917589608533091905, 0.994444122109948036580817642971}, + {-0.994444122109948036580817642971, 0.105265796918917589608533091905}, + {0.982180739963357085642314814322, 0.187938803989575881070450691368}, + {-0.187938803989575881070450691368, 0.982180739963357085642314814322}, + {0.561613858829792422788784733712, 0.82739946432802935483152850793}, + {-0.82739946432802935483152850793, 0.561613858829792422788784733712}, + {0.835495616293615350755885629042, 0.549497111142680960682582735899}, + {-0.549497111142680960682582735899, 0.835495616293615350755885629042}, + {0.202231482401441453600199338325, 0.979337749464256779319271117856}, + {-0.979337749464256779319271117856, 0.202231482401441453600199338325}, + {0.926643370661961229828307295975, 0.375941569407054421159131152308}, + {-0.375941569407054421159131152308, 0.926643370661961229828307295975}, + {0.389404978078990937628844903884, 0.921066644194273642654025024967}, + {-0.921066644194273642654025024967, 0.389404978078990937628844903884}, + {0.712240233942445510884056147916, 0.701935787058624360845726641855}, + {-0.701935787058624360845726641855, 0.712240233942445510884056147916}, + {0.00728634426792652140081330003341, 0.999973454241265935493743199913}, + {-0.999973454241265935493743199913, 0.00728634426792652140081330003341}, + {0.999938158305364588329666730715, 0.0111211314566280212307036734387}, + {-0.0111211314566280212307036734387, 0.999938158305364588329666730715}, + {0.699199225037462124276999020367, 0.714926879972359485826416403143}, + {-0.714926879972359485826416403143, 0.699199225037462124276999020367}, + {0.919566525477751528505621081422, 0.392934352304269485234300418597}, + {-0.392934352304269485234300418597, 0.919566525477751528505621081422}, + {0.372385180841977359023076132871, 0.9280782709929631391077009539}, + {-0.9280782709929631391077009539, 0.372385180841977359023076132871}, + {0.978555001862359552511350102577, 0.205985699334097910329077762981}, + {-0.205985699334097910329077762981, 0.978555001862359552511350102577}, + {0.546288992754295210652060177381, 0.8375967624074830419900195011}, + {-0.8375967624074830419900195011, 0.546288992754295210652060177381}, + {0.825239623217882134476042210736, 0.56478275847551140387281520816}, + {-0.56478275847551140387281520816, 0.825239623217882134476042210736}, + {0.184170815265917720005006685824, 0.982894252096474074775755980227}, + {-0.982894252096474074775755980227, 0.184170815265917720005006685824}, + {0.994033121259616403442294085835, 0.109078658952449225605363380964}, + {-0.109078658952449225605363380964, 0.994033121259616403442294085835}, + {0.625757301338692895065207721927, 0.780017820194715993942224940838}, + {-0.780017820194715993942224940838, 0.625757301338692895065207721927}, + {0.876624259764365310054756719182, 0.481175547168160355759880530968}, + {-0.481175547168160355759880530968, 0.876624259764365310054756719182}, + {0.279624466288266593227973544344, 0.960109450975773937031476634729}, + {-0.960109450975773937031476634729, 0.279624466288266593227973544344}, + {0.953652862864590500358019653504, 0.300908984827921888172141962059}, + {-0.300908984827921888172141962059, 0.953652862864590500358019653504}, + {0.46155962253773308301418865085, 0.887109189921300167469553343835}, + {-0.887109189921300167469553343835, 0.46155962253773308301418865085}, + {0.765907477977944228797468895209, 0.642950803077082078118564822944}, + {-0.642950803077082078118564822944, 0.765907477977944228797468895209}, + {0.0869434986145493776676929087444, 0.996213244264832042951240964612}, + {-0.996213244264832042951240964612, 0.0869434986145493776676929087444}, + {0.998188000945100295524525790825, 0.0601723754660262663551684170216}, + {-0.0601723754660262663551684170216, 0.998188000945100295524525790825}, + {0.663277209635194098247268357227, 0.748373799099454561911670680274}, + {-0.748373799099454561911670680274, 0.663277209635194098247268357227}, + {0.899178492494635328569074772531, 0.437582036462964396594799154627}, + {-0.437582036462964396594799154627, 0.899178492494635328569074772531}, + {0.326397984231672488864006709264, 0.945232434848434999175026405283}, + {-0.945232434848434999175026405283, 0.326397984231672488864006709264}, + {0.967269050295937793393363790528, 0.253752998680989994007006771426}, + {-0.253752998680989994007006771426, 0.967269050295937793393363790528}, + {0.504532038582380271130034543603, 0.86339297080987842392119091528}, + {-0.86339297080987842392119091528, 0.504532038582380271130034543603}, + {0.796533009491872001639478639845, 0.604595041982500358557217623456}, + {-0.604595041982500358557217623456, 0.796533009491872001639478639845}, + {0.13572063839303991072959831854, 0.990747146508222709115898396703}, + {-0.990747146508222709115898396703, 0.13572063839303991072959831854}, + {0.987483528717999714530151322833, 0.157722162395293630243742200037}, + {-0.157722162395293630243742200037, 0.987483528717999714530151322833}, + {0.586729888893400497451580122288, 0.809782710039636421939235333411}, + {-0.809782710039636421939235333411, 0.586729888893400497451580122288}, + {0.851958162409106378731848963071, 0.523609863834227917678276753577}, + {-0.523609863834227917678276753577, 0.851958162409106378731848963071}, + {0.232177308513361713160350063845, 0.97267347934005643494970172469}, + {-0.97267347934005643494970172469, 0.232177308513361713160350063845}, + {0.937739242156476970890821576177, 0.347340054873889136910491970411}, + {-0.347340054873889136910491970411, 0.937739242156476970890821576177}, + {0.417475268934544285759358217547, 0.9086882852926133624649196463}, + {-0.9086882852926133624649196463, 0.417475268934544285759358217547}, + {0.733436808263995709822324897686, 0.679757639371212030177105134499}, + {-0.679757639371212030177105134499, 0.733436808263995709822324897686}, + {0.0379569043325453103765454443419, 0.999279377058032713954105474841}, + {-0.999279377058032713954105474841, 0.0379569043325453103765454443419}, + {0.999364069398620546991196533781, 0.0356574928315082223528875715601}, + {-0.0356574928315082223528875715601, 0.999364069398620546991196533781}, + {0.681443455364677985741650445561, 0.731870765327218286699917371152}, + {-0.731870765327218286699917371152, 0.681443455364677985741650445561}, + {0.90964647749827953759904630715, 0.415383300067506289199315006044}, + {-0.415383300067506289199315006044, 0.90964647749827953759904630715}, + {0.349496844452109489687074983522, 0.936937540990869899282245114591}, + {-0.936937540990869899282245114591, 0.349496844452109489687074983522}, + {0.973205137271252795905240873253, 0.229938602215552206908810717323}, + {-0.229938602215552206908810717323, 0.973205137271252795905240873253}, + {0.525568807166914675477187302022, 0.850751096933260786414621179574}, + {-0.850751096933260786414621179574, 0.525568807166914675477187302022}, + {0.811130613730669192129596467566, 0.5848650506475044918985872755}, + {-0.5848650506475044918985872755, 0.811130613730669192129596467566}, + {0.159993914005098269992544146589, 0.987118000788826277513976492628}, + {-0.987118000788826277513976492628, 0.159993914005098269992544146589}, + {0.991056812771814343854259732325, 0.133440600487905652871134520865}, + {-0.133440600487905652871134520865, 0.991056812771814343854259732325}, + {0.606426239361473551348069577216, 0.795139746342679587520763107023}, + {-0.795139746342679587520763107023, 0.606426239361473551348069577216}, + {0.864551597864179344199442311947, 0.502544062377115685613659934461}, + {-0.502544062377115685613659934461, 0.864551597864179344199442311947}, + {0.2559779831835323760103051427, 0.966682611887320186738747906929}, + {-0.966682611887320186738747906929, 0.2559779831835323760103051427}, + {0.945980964290724757326245253353, 0.324222169506637014624317316702}, + {-0.324222169506637014624317316702, 0.945980964290724757326245253353}, + {0.439649860054203422965457548344, 0.898169249392518076469116294902}, + {-0.898169249392518076469116294902, 0.439649860054203422965457548344}, + {0.749897998377835328653873148141, 0.661553468760398999393146368675}, + {-0.661553468760398999393146368675, 0.749897998377835328653873148141}, + {0.0624690159732249963853156771165, 0.998046903729146839268082658236}, + {-0.998046903729146839268082658236, 0.0624690159732249963853156771165}, + {0.99641066136446410084204217128, 0.0846510125115536166129359685328}, + {-0.0846510125115536166129359685328, 0.99641066136446410084204217128}, + {0.644711430516158423564831991825, 0.764426040478612067730068702076}, + {-0.764426040478612067730068702076, 0.644711430516158423564831991825}, + {0.888168875989561734662913750071, 0.45951718980190348062819793995}, + {-0.45951718980190348062819793995, 0.888168875989561734662913750071}, + {0.303102514070341000351760385456, 0.952957956031764696902541800227}, + {-0.952957956031764696902541800227, 0.303102514070341000351760385456}, + {0.960750316613243948715705755603, 0.277414543828458093344124790747}, + {-0.277414543828458093344124790747, 0.960750316613243948715705755603}, + {0.483191358763471912052267498439, 0.875514769045222740473377598391}, + {-0.875514769045222740473377598391, 0.483191358763471912052267498439}, + {0.781455603552444588721925811114, 0.623960847871470658532189190737}, + {-0.623960847871470658532189190737, 0.781455603552444588721925811114}, + {0.111365609743335161607724614896, 0.993779503192984581261271159747}, + {-0.993779503192984581261271159747, 0.111365609743335161607724614896}, + {0.983315421510872811339254440099, 0.181908718369666155822983455437}, + {-0.181908718369666155822983455437, 0.983315421510872811339254440099}, + {0.566680114279501601082245088037, 0.823937890911791370740502316039}, + {-0.823937890911791370740502316039, 0.566680114279501601082245088037}, + {0.838851539213765762248442570126, 0.544360262288400287289391599188}, + {-0.544360262288400287289391599188, 0.838851539213765762248442570126}, + {0.208236778914211329016836771189, 0.978078444659442380881841927476}, + {-0.978078444659442380881841927476, 0.208236778914211329016836771189}, + {0.928932660967082823866292073944, 0.370248715579966358735219955634}, + {-0.370248715579966358735219955634, 0.928932660967082823866292073944}, + {0.395049206323284773922921431222, 0.91865996134769190017976825402}, + {-0.91865996134769190017976825402, 0.395049206323284773922921431222}, + {0.716533823241826683947408582753, 0.697552349397843163281152101263}, + {-0.697552349397843163281152101263, 0.716533823241826683947408582753}, + {0.0134219288719957668964966046588, 0.999909921855641536936332158803}, + {-0.999909921855641536936332158803, 0.0134219288719957668964966046588}, + {0.999726391410624470879042746674, 0.023391073448879258489530030829}, + {-0.023391073448879258489530030829, 0.999726391410624470879042746674}, + {0.690373324042674041400857731787, 0.72345329735254437775182623227}, + {-0.72345329735254437775182623227, 0.690373324042674041400857731787}, + {0.91467537486152239445402756246, 0.404189260893870694335561211119}, + {-0.404189260893870694335561211119, 0.91467537486152239445402756246}, + {0.360968192888095229520217799291, 0.932578127409764423561000512564}, + {-0.932578127409764423561000512564, 0.360968192888095229520217799291}, + {0.975953557074734301401974789769, 0.217978564159812204792388001806}, + {-0.217978564159812204792388001806, 0.975953557074734301401974789769}, + {0.535969257459966708090348674887, 0.844237499201387020519860016066}, + {-0.844237499201387020519860016066, 0.535969257459966708090348674887}, + {0.818246730948242073644394167786, 0.574867191003726740738954958942}, + {-0.574867191003726740738954958942, 0.818246730948242073644394167786}, + {0.172095323096829011522146402058, 0.985080301177623796071713968558}, + {-0.985080301177623796071713968558, 0.172095323096829011522146402058}, + {0.992619709454266141435141435068, 0.121268761034852595726007962185}, + {-0.121268761034852595726007962185, 0.992619709454266141435141435068}, + {0.616138164420696909928665263578, 0.787638090968367454713927600096}, + {-0.787638090968367454713927600096, 0.616138164420696909928665263578}, + {0.870653487420617544501055817818, 0.491896843700299291057831396756}, + {-0.491896843700299291057831396756, 0.870653487420617544501055817818}, + {0.267821391194094149401649929132, 0.963468578843575951253797029494}, + {-0.963468578843575951253797029494, 0.267821391194094149401649929132}, + {0.949888438430089299124858825962, 0.312589114552708713379303162583}, + {-0.312589114552708713379303162583, 0.949888438430089299124858825962}, + {0.45063867355929759783705890186, 0.892706438809935276523788161285}, + {-0.892706438809935276523788161285, 0.45063867355929759783705890186}, + {0.757959811157672302961429977586, 0.652301253003415459907898821257}, + {-0.652301253003415459907898821257, 0.757959811157672302961429977586}, + {0.0747118829612682111562804720961, 0.997205161711661847157017746213}, + {-0.997205161711661847157017746213, 0.0747118829612682111562804720961}, + {0.997374431615167145181999330816, 0.0724171468667633988491871832593}, + {-0.0724171468667633988491871832593, 0.997374431615167145181999330816}, + {0.654043568353492643652202787052, 0.756456879600833742571808215871}, + {-0.756456879600833742571808215871, 0.654043568353492643652202787052}, + {0.893740981294271041868171323586, 0.448583390636739187673498463482}, + {-0.448583390636739187673498463482, 0.893740981294271041868171323586}, + {0.314773951050606071166981791976, 0.949166665944390697262633693754}, + {-0.949166665944390697262633693754, 0.314773951050606071166981791976}, + {0.964082277076968141571455817029, 0.265603770730176325098881306985}, + {-0.265603770730176325098881306985, 0.964082277076968141571455817029}, + {0.493898888350867426577650576291, 0.869519343134916966597813825501}, + {-0.869519343134916966597813825501, 0.493898888350867426577650576291}, + {0.789053720816151882289091190614, 0.614324202409595954144094775984}, + {-0.614324202409595954144094775984, 0.789053720816151882289091190614}, + {0.123552427338735379414025317146, 0.99233804608042042172399987976}, + {-0.99233804608042042172399987976, 0.123552427338735379414025317146}, + {0.985473679470071806107966949639, 0.169828228135719849767326650181}, + {-0.169828228135719849767326650181, 0.985473679470071806107966949639}, + {0.576748429682482410818522566842, 0.816921813185809475577059401985}, + {-0.816921813185809475577059401985, 0.576748429682482410818522566842}, + {0.845468513035528834720366830879, 0.534025274182310383253025065642}, + {-0.534025274182310383253025065642, 0.845468513035528834720366830879}, + {0.220223626147812379105772606636, 0.975449411546446376597430116817}, + {-0.975449411546446376597430116817, 0.220223626147812379105772606636}, + {0.933406235334631517730485938955, 0.358821403818710860100793524907}, + {-0.358821403818710860100793524907, 0.933406235334631517730485938955}, + {0.406292830731837417701513004431, 0.913742926482011386113413209387}, + {-0.913742926482011386113413209387, 0.406292830731837417701513004431}, + {0.725039909924675374242042380502, 0.688706852743907749214713476249}, + {-0.688706852743907749214713476249, 0.725039909924675374242042380502}, + {0.0256913511137592948108121504447, 0.999669922763483764782677098992}, + {-0.999669922763483764782677098992, 0.0256913511137592948108121504447}, + {0.998851246833715178397028466861, 0.0479185423268753338255088181086}, + {-0.0479185423268753338255088181086, 0.998851246833715178397028466861}, + {0.672410963808849904133069230738, 0.740178016256666237993044887844}, + {-0.740178016256666237993044887844, 0.672410963808849904133069230738}, + {0.904480590721468247217273983551, 0.426514784044051520339735361631}, + {-0.426514784044051520339735361631, 0.904480590721468247217273983551}, + {0.337972863076899721068713233763, 0.941155855224629189770269022119}, + {-0.941155855224629189770269022119, 0.337972863076899721068713233763}, + {0.970310156353828112685278028948, 0.241864012363579211628561438374}, + {-0.241864012363579211628561438374, 0.970310156353828112685278028948}, + {0.515089208144697274782686235994, 0.857136574679244866992178231158}, + {-0.857136574679244866992178231158, 0.515089208144697274782686235994}, + {0.803892343226241257170272547228, 0.594774831765957467943906067376}, + {-0.594774831765957467943906067376, 0.803892343226241257170272547228}, + {0.147868410418422219221312730042, 0.989007044060015272357588855812}, + {-0.989007044060015272357588855812, 0.147868410418422219221312730042}, + {0.989344666578752640617722136085, 0.145592344277358370918662444637}, + {-0.145592344277358370918662444637, 0.989344666578752640617722136085}, + {0.596622988741213333696578047238, 0.802521656595946319967538329365}, + {-0.802521656595946319967538329365, 0.596622988741213333696578047238}, + {0.858319510017173437077531161776, 0.513115599766640562151565063687}, + {-0.513115599766640562151565063687, 0.858319510017173437077531161776}, + {0.244096025830264184985907149894, 0.969751066085452140264067111275}, + {-0.969751066085452140264067111275, 0.244096025830264184985907149894}, + {0.94193102889772950536695361734, 0.335806397794420452562746959302}, + {-0.335806397794420452562746959302, 0.94193102889772950536695361734}, + {0.42859483689734440003960003196, 0.90349679898986845483221941322}, + {-0.90349679898986845483221941322, 0.42859483689734440003960003196}, + {0.741723253717784136718194076821, 0.67070605699837204660695988423}, + {-0.67070605699837204660695988423, 0.741723253717784136718194076821}, + {0.0502167413811553109415086737499, 0.998738343554035234994614711468}, + {-0.998738343554035234994614711468, 0.0502167413811553109415086737499}, + {0.995296835333246088239889104443, 0.096872130025230471228603335021}, + {-0.096872130025230471228603335021, 0.995296835333246088239889104443}, + {0.635282201508823418656390913384, 0.772280081606474322342137384112}, + {-0.772280081606474322342137384112, 0.635282201508823418656390913384}, + {0.882463015719070154041503428743, 0.470381787368520709602393026216}, + {-0.470381787368520709602393026216, 0.882463015719070154041503428743}, + {0.291385430966355662985023400324, 0.95660573415621508175377130101}, + {-0.95660573415621508175377130101, 0.291385430966355662985023400324}, + {0.95727367068575519581941080105, 0.289183539316850257705482363235}, + {-0.289183539316850257705482363235, 0.95727367068575519581941080105}, + {0.472411062334764098036288260118, 0.881378345651706918140178004251}, + {-0.881378345651706918140178004251, 0.472411062334764098036288260118}, + {0.77373980194926184061898766231, 0.633503527124764320710426090955}, + {-0.633503527124764320710426090955, 0.77373980194926184061898766231}, + {0.0991620208967425031953979441823, 0.995071300767776167894851369056}, + {-0.995071300767776167894851369056, 0.0991620208967425031953979441823}, + {0.981009079866112632117847169866, 0.193961813819738870812159348134}, + {-0.193961813819738870812159348134, 0.981009079866112632117847169866}, + {0.556526458935723722376565092418, 0.830829886622083568781249596213}, + {-0.830829886622083568781249596213, 0.556526458935723722376565092418}, + {0.832108237435735587084195685748, 0.55461327174130403694363167233}, + {-0.55461327174130403694363167233, 0.832108237435735587084195685748}, + {0.19621857198766087804209234946, 0.980560182756327836095522343385}, + {-0.980560182756327836095522343385, 0.19621857198766087804209234946}, + {0.924319192757675156535412952508, 0.381620269246537358043269705377}, + {-0.381620269246537358043269705377, 0.924319192757675156535412952508}, + {0.383746088957365005800426160931, 0.923438649402290367973478169006}, + {-0.923438649402290367973478169006, 0.383746088957365005800426160931}, + {0.70791982920081630847874976098, 0.706292797233758484765075991163}, + {-0.706292797233758484765075991163, 0.70791982920081630847874976098}, + {0.00115048533711384847431913325266, 0.99999933819152553304832053982}, + {-0.99999933819152553304832053982, 0.00115048533711384847431913325266}}; + +__device__ double2 negtwiddles13[4096] = { + {0.999999981616429334252416083473, 0.000191747597310703291528452552051}, + {-0.000191747597310703291528452552051, 0.999999981616429334252416083473}, + {0.706971182161065359039753275283, 0.707242354213734603085583785287}, + {-0.707242354213734603085583785287, 0.706971182161065359039753275283}, + {0.923806136898395413581397406233, 0.38286057701056541713313663422}, + {-0.38286057701056541713313663422, 0.923806136898395413581397406233}, + {0.382506273649438233164232769923, 0.923952894155768644779413989454}, + {-0.923952894155768644779413989454, 0.382506273649438233164232769923}, + {0.980747854272389751884020370198, 0.195278381650666549296246898848}, + {-0.195278381650666549296246898848, 0.980747854272389751884020370198}, + {0.555410790505841633901695786335, 0.831576126274483740097309691919}, + {-0.831576126274483740097309691919, 0.555410790505841633901695786335}, + {0.831363067759845919546535242262, 0.555729655106633413375050167815}, + {-0.555729655106633413375050167815, 0.831363067759845919546535242262}, + {0.194902255208676516984311888336, 0.980822670473400104462768922531}, + {-0.980822670473400104462768922531, 0.194902255208676516984311888336}, + {0.995165913825994619124060136528, 0.0982079628078752764341885495014}, + {-0.0982079628078752764341885495014, 0.995165913825994619124060136528}, + {0.634245049604103328100279668433, 0.77313208254003307207113948607}, + {-0.77313208254003307207113948607, 0.634245049604103328100279668433}, + {0.881830858943826623175255008391, 0.471565834443498421713059087779}, + {-0.471565834443498421713059087779, 0.881830858943826623175255008391}, + {0.290101180907847144840872033456, 0.95699597952962822677847043451}, + {-0.95699597952962822677847043451, 0.290101180907847144840872033456}, + {0.95688465675082889916325257218, 0.290468162928139816969519415579}, + {-0.290468162928139816969519415579, 0.95688465675082889916325257218}, + {0.471227621876586399984887520986, 0.882011637327159592203429383517}, + {-0.882011637327159592203429383517, 0.471227621876586399984887520986}, + {0.77288879576405622451318322419, 0.634541495398360022583972295251}, + {-0.634541495398360022583972295251, 0.77288879576405622451318322419}, + {0.0978263142474358610156670579272, 0.99520350292830150884526574373}, + {-0.99520350292830150884526574373, 0.0978263142474358610156670579272}, + {0.998786029235087635669287919882, 0.0492591900543111402277496324587}, + {-0.0492591900543111402277496324587, 0.998786029235087635669287919882}, + {0.671416866903355447071533035341, 0.74107988154967607830059250773}, + {-0.74107988154967607830059250773, 0.671416866903355447071533035341}, + {0.903907293843009052025649907591, 0.427728423345243802256021581343}, + {-0.427728423345243802256021581343, 0.903907293843009052025649907591}, + {0.336709308386720584937989997343, 0.941608645694025248040759379364}, + {-0.941608645694025248040759379364, 0.336709308386720584937989997343}, + {0.969984644496215242526204747264, 0.243166176598536898367797220999}, + {-0.243166176598536898367797220999, 0.969984644496215242526204747264}, + {0.513938267342065380205440305872, 0.857827172198127430036151963577}, + {-0.857827172198127430036151963577, 0.513938267342065380205440305872}, + {0.803093292804466396539453398873, 0.595853306655656389523301186273}, + {-0.595853306655656389523301186273, 0.803093292804466396539453398873}, + {0.146540799538829757953095622725, 0.989204626996113778325536713965}, + {-0.989204626996113778325536713965, 0.146540799538829757953095622725}, + {0.989148356564255593070811301004, 0.14692014397703362371672142217}, + {-0.14692014397703362371672142217, 0.989148356564255593070811301004}, + {0.59554528042704979462484971009, 0.803321740625178581929333176959}, + {-0.803321740625178581929333176959, 0.59554528042704979462484971009}, + {0.857630016266187622520078548405, 0.514267202142289825417265092256}, + {-0.514267202142289825417265092256, 0.857630016266187622520078548405}, + {0.242794174274304219718345621004, 0.970077826227596418107168574352}, + {-0.970077826227596418107168574352, 0.242794174274304219718345621004}, + {0.941479450054132582792476569011, 0.337070386011242617563254952984}, + {-0.337070386011242617563254952984, 0.941479450054132582792476569011}, + {0.427381747795341770945753978594, 0.90407125916677544275046329858}, + {-0.90407125916677544275046329858, 0.427381747795341770945753978594}, + {0.740822341917587334769734752626, 0.671701018099378321934977975616}, + {-0.671701018099378321934977975616, 0.740822341917587334769734752626}, + {0.0488761567964467597002808929574, 0.99880484645240341645688886274}, + {-0.99880484645240341645688886274, 0.0488761567964467597002808929574}, + {0.999694094596565996369008644251, 0.0247329179182762225186476001682}, + {-0.0247329179182762225186476001682, 0.999694094596565996369008644251}, + {0.689401659422834378609934447013, 0.72437928737992107741661129694}, + {-0.72437928737992107741661129694, 0.689401659422834378609934447013}, + {0.914132034848799457371626431268, 0.4054166040793016900778411582}, + {-0.4054166040793016900778411582, 0.914132034848799457371626431268}, + {0.359716130791347510342603754907, 0.933061790691539383324482059834}, + {-0.933061790691539383324482059834, 0.359716130791347510342603754907}, + {0.975660099965271587763027127949, 0.219288324668132472705650570788}, + {-0.219288324668132472705650570788, 0.975660099965271587763027127949}, + {0.534835611410714673041866262793, 0.844956134226462096670218215877}, + {-0.844956134226462096670218215877, 0.534835611410714673041866262793}, + {0.817474388284239239510498009622, 0.575964950755954219197008114861}, + {-0.575964950755954219197008114861, 0.817474388284239239510498009622}, + {0.17077296099679922791736430554, 0.985310405807421574309046263807}, + {-0.985310405807421574309046263807, 0.17077296099679922791736430554}, + {0.992456044400537695793218517792, 0.122600978515010242131388906728}, + {-0.122600978515010242131388906728, 0.992456044400537695793218517792}, + {0.615080415737127572661790964048, 0.788464382313267431179326649726}, + {-0.788464382313267431179326649726, 0.615080415737127572661790964048}, + {0.869992463069326871227815445309, 0.493065020258551645060407508936}, + {-0.493065020258551645060407508936, 0.869992463069326871227815445309}, + {0.266527950826803694539535172225, 0.96382718960821234244207289521}, + {-0.96382718960821234244207289521, 0.266527950826803694539535172225}, + {0.949468015417276545164781964559, 0.313863804379508504194262741294}, + {-0.313863804379508504194262741294, 0.949468015417276545164781964559}, + {0.449440047775531148488425969845, 0.893310496667048203889294200053}, + {-0.893310496667048203889294200053, 0.449440047775531148488425969845}, + {0.757083588263017248642938739067, 0.653318023923107671357968229131}, + {-0.653318023923107671357968229131, 0.757083588263017248642938739067}, + {0.0733733341983990317025643435045, 0.997304544173247986549313282012}, + {-0.997304544173247986549313282012, 0.0733733341983990317025643435045}, + {0.997276332516613184253628787701, 0.0737557902961770983640832355377}, + {-0.0737557902961770983640832355377, 0.997276332516613184253628787701}, + {0.653027637969147645158329851256, 0.757334076909547126987831688893}, + {-0.757334076909547126987831688893, 0.653027637969147645158329851256}, + {0.893138072882678324404537306691, 0.449782595002758689428645766384}, + {-0.449782595002758689428645766384, 0.893138072882678324404537306691}, + {0.313499664885093509703040126624, 0.949588310857359951810963138996}, + {-0.949588310857359951810963138996, 0.313499664885093509703040126624}, + {0.963724906547376525267623037507, 0.266897554316727403378450844684}, + {-0.266897554316727403378450844684, 0.963724906547376525267623037507}, + {0.49273134607855884059546269782, 0.870181487157484556682618404011}, + {-0.870181487157484556682618404011, 0.49273134607855884059546269782}, + {0.788228443954700486173692297598, 0.615382742803819327015446560836}, + {-0.615382742803819327015446560836, 0.788228443954700486173692297598}, + {0.122220367382731537331252980039, 0.992502988306246947125544011215}, + {-0.992502988306246947125544011215, 0.122220367382731537331252980039}, + {0.985244842744618543051160486357, 0.171150810238023282083830167721}, + {-0.171150810238023282083830167721, 0.985244842744618543051160486357}, + {0.575651410908915139863495369354, 0.81769520795867167706205691502}, + {-0.81769520795867167706205691502, 0.575651410908915139863495369354}, + {0.844750965210101512070650642272, 0.535159608693146604529999876831}, + {-0.535159608693146604529999876831, 0.844750965210101512070650642272}, + {0.218914147589880814237517370202, 0.975744124238007271010530985222}, + {-0.975744124238007271010530985222, 0.218914147589880814237517370202}, + {0.932923772674460138887297944166, 0.360073929046317020574718981152}, + {-0.360073929046317020574718981152, 0.932923772674460138887297944166}, + {0.405066009031113338778595789336, 0.914287442945382444747792760609}, + {-0.914287442945382444747792760609, 0.405066009031113338778595789336}, + {0.724114851894517852493038390094, 0.689679404698864795264512395079}, + {-0.689679404698864795264512395079, 0.724114851894517852493038390094}, + {0.0243495382252375304477087780697, 0.999703506039774647540241403476}, + {-0.999703506039774647540241403476, 0.0243495382252375304477087780697}, + {0.999922330418976490484794794611, 0.012463271219194511224559462903}, + {-0.012463271219194511224559462903, 0.999922330418976490484794794611}, + {0.698238996904254283215607301827, 0.715864724093973392449186121667}, + {-0.715864724093973392449186121667, 0.698238996904254283215607301827}, + {0.919038287766422046232150933065, 0.394168270690784139365092642038}, + {-0.394168270690784139365092642038, 0.919038287766422046232150933065}, + {0.371139148315682509959856361093, 0.928577262583738849421877148416}, + {-0.928577262583738849421877148416, 0.371139148315682509959856361093}, + {0.978277639625900530262470056186, 0.207298962390014718248920644328}, + {-0.207298962390014718248920644328, 0.978277639625900530262470056186}, + {0.545164250823729323869315521733, 0.83832925490155829617577865065}, + {-0.83832925490155829617577865065, 0.545164250823729323869315521733}, + {0.824480809910689504960146223311, 0.565889913401019573058192690951}, + {-0.565889913401019573058192690951, 0.824480809910689504960146223311}, + {0.182851376475096333296121997591, 0.983140566816954497575409277488}, + {-0.983140566816954497575409277488, 0.182851376475096333296121997591}, + {0.993885816887378092943094998191, 0.110412784541053632381135685137}, + {-0.110412784541053632381135685137, 0.993885816887378092943094998191}, + {0.624709772167528099018340981274, 0.780857029524864576508491609275}, + {-0.780857029524864576508491609275, 0.624709772167528099018340981274}, + {0.875977620510351662197479072347, 0.48235174755050103101439162856}, + {-0.48235174755050103101439162856, 0.875977620510351662197479072347}, + {0.278335524020384861287880085001, 0.960483907240666789917327150761}, + {-0.960483907240666789917327150761, 0.278335524020384861287880085001}, + {0.953248113913869321578431481612, 0.302188737910020044363790248099}, + {-0.302188737910020044363790248099, 0.953248113913869321578431481612}, + {0.46036849972740401248927355482, 0.887727911276163017539886368468}, + {-0.887727911276163017539886368468, 0.46036849972740401248927355482}, + {0.765043798404530406287449295633, 0.643978250038592658910374666448}, + {-0.643978250038592658910374666448, 0.765043798404530406287449295633}, + {0.0856062702176225293060340959528, 0.996329045295492377043444776064}, + {-0.996329045295492377043444776064, 0.0856062702176225293060340959528}, + {0.998106336447323050364843766147, 0.061512121924895378499709153175}, + {-0.061512121924895378499709153175, 0.998106336447323050364843766147}, + {0.662272120308896594664815893339, 0.749263397385431129116284409974}, + {-0.749263397385431129116284409974, 0.662272120308896594664815893339}, + {0.898590345563227033132136511995, 0.438788549144756290232294304587}, + {-0.438788549144756290232294304587, 0.898590345563227033132136511995}, + {0.325128968249257077616221067728, 0.945669685463784714940516096249}, + {-0.945669685463784714940516096249, 0.325128968249257077616221067728}, + {0.966927583390505662741531978099, 0.255051070334152529817117738276}, + {-0.255051070334152529817117738276, 0.966927583390505662741531978099}, + {0.503372709749581037641519287718, 0.864069392513913792086555076821}, + {-0.864069392513913792086555076821, 0.503372709749581037641519287718}, + {0.795720784691225202500675095507, 0.605663630087180382410849688313}, + {-0.605663630087180382410849688313, 0.795720784691225202500675095507}, + {0.134390702833540098826148323496, 0.990928422738951986126210158545}, + {-0.990928422738951986126210158545, 0.134390702833540098826148323496}, + {0.98727093934043541612055605583, 0.159047453088234758133978630212}, + {-0.159047453088234758133978630212, 0.98727093934043541612055605583}, + {0.585642443466894424553004228073, 0.810569508685174633377812369872}, + {-0.810569508685174633377812369872, 0.585642443466894424553004228073}, + {0.851254588643089227240068339597, 0.524752918347373253915577606676}, + {-0.524752918347373253915577606676, 0.851254588643089227240068339597}, + {0.230871545134835043366905438234, 0.972984239156551744542866799748}, + {-0.972984239156551744542866799748, 0.230871545134835043366905438234}, + {0.937272186236140947279693591554, 0.348598406347654932169177754986}, + {-0.348598406347654932169177754986, 0.937272186236140947279693591554}, + {0.41625522166586548244993082335, 0.909247815744366305601431577088}, + {-0.909247815744366305601431577088, 0.41625522166586548244993082335}, + {0.732523754597556697731874919555, 0.680741469979829094683054790949}, + {-0.680741469979829094683054790949, 0.732523754597556697731874919555}, + {0.0366156045985270295339830681769, 0.999329423913798420286980217497}, + {-0.999329423913798420286980217497, 0.0366156045985270295339830681769}, + {0.999315308520673073466866753733, 0.0369988399308942630949736951607}, + {-0.0369988399308942630949736951607, 0.999315308520673073466866753733}, + {0.680460500587323879884138477792, 0.732784761809665785570189200371}, + {-0.732784761809665785570189200371, 0.680460500587323879884138477792}, + {0.909088117009170582072385968786, 0.416603883218484349892918316982}, + {-0.416603883218484349892918316982, 0.909088117009170582072385968786}, + {0.348238941340855312489566131262, 0.93740580312573296151157364875}, + {-0.93740580312573296151157364875, 0.348238941340855312489566131262}, + {0.97289562948234775685563135994, 0.231244662931161054642004160087}, + {-0.231244662931161054642004160087, 0.97289562948234775685563135994}, + {0.524426427721901400325066333608, 0.851455766265544311544033462269}, + {-0.851455766265544311544033462269, 0.524426427721901400325066333608}, + {0.810344858021820546589708555985, 0.585953249907870676871368686989}, + {-0.585953249907870676871368686989, 0.810344858021820546589708555985}, + {0.158668827738728313025973193362, 0.98733186067503042604442953234}, + {-0.98733186067503042604442953234, 0.158668827738728313025973193362}, + {0.990876811683950697862144352257, 0.134770709232564345203542188756}, + {-0.134770709232564345203542188756, 0.990876811683950697862144352257}, + {0.605358430458560525799782681133, 0.795952995265893914478283477365}, + {-0.795952995265893914478283477365, 0.605358430458560525799782681133}, + {0.863876287963447508921888129407, 0.503704039188187069342461654742}, + {-0.503704039188187069342461654742, 0.863876287963447508921888129407}, + {0.254680239504194882993459714271, 0.967025323146237902882660364412}, + {-0.967025323146237902882660364412, 0.254680239504194882993459714271}, + {0.945544930529979676947505140561, 0.325491604114539256897842278704}, + {-0.325491604114539256897842278704, 0.945544930529979676947505140561}, + {0.4384439118056336903705982877, 0.898758552782989439400296305394}, + {-0.898758552782989439400296305394, 0.4384439118056336903705982877}, + {0.749009364117883769118577674817, 0.662559410516312397021465585567}, + {-0.662559410516312397021465585567, 0.749009364117883769118577674817}, + {0.0611293484249335883351506026884, 0.998129852655025628926921399398}, + {-0.998129852655025628926921399398, 0.0611293484249335883351506026884}, + {0.996296142438496845628037590359, 0.0859883513167353230599587732286}, + {-0.0859883513167353230599587732286, 0.996296142438496845628037590359}, + {0.643684812069144851598423429095, 0.765290704707374369597516761132}, + {-0.765290704707374369597516761132, 0.643684812069144851598423429095}, + {0.887551296893573371526997561887, 0.460708905256384082527176815347}, + {-0.460708905256384082527176815347, 0.887551296893573371526997561887}, + {0.30182314962454065421226800936, 0.95336393174418032625538899083}, + {-0.95336393174418032625538899083, 0.30182314962454065421226800936}, + {0.960377096278180131250223894313, 0.278703844509348486191413485358}, + {-0.278703844509348486191413485358, 0.960377096278180131250223894313}, + {0.482015778879225531561303341732, 0.87616253566986845857655907821}, + {-0.87616253566986845857655907821, 0.482015778879225531561303341732}, + {0.780617398913848292707484688435, 0.625009181142947567444423384586}, + {-0.625009181142947567444423384586, 0.780617398913848292707484688435}, + {0.110031625994157000736350937586, 0.993928086574215829962497537053}, + {-0.993928086574215829962497537053, 0.110031625994157000736350937586}, + {0.983070371899499639845032561425, 0.183228392705332143730601046627}, + {-0.183228392705332143730601046627, 0.983070371899499639845032561425}, + {0.565573687365865329645941983472, 0.824697765341569466457372072909}, + {-0.824697765341569466457372072909, 0.565573687365865329645941983472}, + {0.838120125388991499271185148245, 0.545485705970322531932481524564}, + {-0.545485705970322531932481524564, 0.838120125388991499271185148245}, + {0.206923782379529103137016932124, 0.97835706584342163605327868936}, + {-0.97835706584342163605327868936, 0.206923782379529103137016932124}, + {0.928434864224177980318586378417, 0.371495225935720763121850040989}, + {-0.371495225935720763121850040989, 0.928434864224177980318586378417}, + {0.393815794945351016487222750584, 0.919189381820504469722266094323}, + {-0.919189381820504469722266094323, 0.393815794945351016487222750584}, + {0.7155969001582873634248471717, 0.698513476236393038298899682559}, + {-0.698513476236393038298899682559, 0.7155969001582873634248471717}, + {0.0120798049010559568200440239139, 0.999927036494939636135370619741}, + {-0.999927036494939636135370619741, 0.0120798049010559568200440239139}, + {0.999979980358237652282582530461, 0.0063276285240713775123722051319}, + {-0.0063276285240713775123722051319, 0.999979980358237652282582530461}, + {0.702618316123900132907920124126, 0.711566934200300815760442674218}, + {-0.711566934200300815760442674218, 0.702618316123900132907920124126}, + {0.921439558171691430743521777913, 0.388521737662589572437354945578}, + {-0.388521737662589572437354945578, 0.921439558171691430743521777913}, + {0.376829804697141279579142292278, 0.926282515376337212131829801365}, + {-0.926282515376337212131829801365, 0.376829804697141279579142292278}, + {0.979531186346911497686562597664, 0.201292461294039021213464479843}, + {-0.201292461294039021213464479843, 0.979531186346911497686562597664}, + {0.550297879866859185327143677569, 0.834968408632350445053305065812}, + {-0.834968408632350445053305065812, 0.550297879866859185327143677569}, + {0.827937524525190871038660134218, 0.560820341538267541636741952971}, + {-0.560820341538267541636741952971, 0.827937524525190871038660134218}, + {0.188880371461501384011683057906, 0.982000104519630490074177942006}, + {-0.982000104519630490074177942006, 0.188880371461501384011683057906}, + {0.994544587377484301526919807657, 0.104312337324735798782526785544}, + {-0.104312337324735798782526785544, 0.994544587377484301526919807657}, + {0.629489260843256737132378475508, 0.777009183010735293706261472835}, + {-0.777009183010735293706261472835, 0.629489260843256737132378475508}, + {0.878920785162485840302792894363, 0.476967769780474171170681074727}, + {-0.476967769780474171170681074727, 0.878920785162485840302792894363}, + {0.28422370289500104023616700033, 0.958757991733395709132992124069}, + {-0.958757991733395709132992124069, 0.28422370289500104023616700033}, + {0.955084364525603413120791174151, 0.296334028823428186072419521224}, + {-0.296334028823428186072419521224, 0.955084364525603413120791174151}, + {0.465806829483922713031063267408, 0.884886432038674564104496766959}, + {-0.884886432038674564104496766959, 0.465806829483922713031063267408}, + {0.768980772930028866340990134631, 0.639271906831463510023638718849}, + {-0.639271906831463510023638718849, 0.768980772930028866340990134631}, + {0.0917180187983134553864061899731, 0.995785019483478750501603826706}, + {-0.995785019483478750501603826706, 0.0917180187983134553864061899731}, + {0.998464978662261248842924032942, 0.0553866986285525969568865889414}, + {-0.0553866986285525969568865889414, 0.998464978662261248842924032942}, + {0.66685704700158121571718083942, 0.745185667377151639279020400863}, + {-0.745185667377151639279020400863, 0.66685704700158121571718083942}, + {0.901265785776836581000281967135, 0.43326664236710094035842644189}, + {-0.43326664236710094035842644189, 0.901265785776836581000281967135}, + {0.330925367894519539824216280977, 0.943656929653927223355935893778}, + {-0.943656929653927223355935893778, 0.330925367894519539824216280977}, + {0.968474345199216823942833798355, 0.249113312954061361104152183543}, + {-0.249113312954061361104152183543, 0.968474345199216823942833798355}, + {0.50866506402190414526387485239, 0.860964489769230900684249263577}, + {-0.860964489769230900684249263577, 0.50866506402190414526387485239}, + {0.799422087642728040890460761148, 0.600769777692705231331160575792}, + {-0.600769777692705231331160575792, 0.799422087642728040890460761148}, + {0.140468395464033002362214119785, 0.990085162940925966879035513557}, + {-0.990085162940925966879035513557, 0.140468395464033002362214119785}, + {0.988228251069897423164434258069, 0.152986678463622044477077110969}, + {-0.152986678463622044477077110969, 0.988228251069897423164434258069}, + {0.590604979918813444150771374552, 0.806960815464479619407711652457}, + {-0.806960815464479619407711652457, 0.590604979918813444150771374552}, + {0.85445838739231017022746073053, 0.519519840059003756849165256426}, + {-0.519519840059003756849165256426, 0.85445838739231017022746073053}, + {0.236837318100152377509104439923, 0.971549321833496626332760115474}, + {-0.971549321833496626332760115474, 0.236837318100152377509104439923}, + {0.939393501962337507116274082364, 0.342840850061563950923471111309}, + {-0.342840850061563950923471111309, 0.939393501962337507116274082364}, + {0.421826425493854906267898741135, 0.906676605386439460332326234493}, + {-0.906676605386439460332326234493, 0.421826425493854906267898741135}, + {0.73668691618058057546392092263, 0.676233973953058953298977939994}, + {-0.676233973953058953298977939994, 0.73668691618058057546392092263}, + {0.0427466853917591316225355058123, 0.999085942693629269761856903642}, + {-0.999085942693629269761856903642, 0.0427466853917591316225355058123}, + {0.99952351730636634918880645273, 0.0308664599769714123600472532871}, + {-0.0308664599769714123600472532871, 0.99952351730636634918880645273}, + {0.684943973881821488447485535289, 0.728595740203700770187822399748}, + {-0.728595740203700770187822399748, 0.684943973881821488447485535289}, + {0.911627237054095651558327517705, 0.411017980946230154781773080686}, + {-0.411017980946230154781773080686, 0.911627237054095651558327517705}, + {0.353984199718624770181207850328, 0.935251402751989924233555484534}, + {-0.935251402751989924233555484534, 0.353984199718624770181207850328}, + {0.974296205574542439897811618721, 0.225270734457561155883453807292}, + {-0.225270734457561155883453807292, 0.974296205574542439897811618721}, + {0.529640989908265913399532109906, 0.84822191778389699479134833382}, + {-0.84822191778389699479134833382, 0.529640989908265913399532109906}, + {0.813924945060104598759664895624, 0.580970036928674771203873206105}, + {-0.580970036928674771203873206105, 0.813924945060104598759664895624}, + {0.16472399525042316636991301948, 0.986339700807353003675359559566}, + {-0.986339700807353003675359559566, 0.16472399525042316636991301948}, + {0.991685096233929419540231720021, 0.128688266394034689366066004368}, + {-0.128688266394034689366066004368, 0.991685096233929419540231720021}, + {0.610230910522282621322176510148, 0.792223602175008312364923312998}, + {-0.792223602175008312364923312998, 0.610230910522282621322176510148}, + {0.866950695618230904315737461729, 0.498393911847913206880633651963}, + {-0.498393911847913206880633651963, 0.866950695618230904315737461729}, + {0.260609001056295752629665685163, 0.965444430595795433092121129448}, + {-0.965444430595795433092121129448, 0.260609001056295752629665685163}, + {0.947524309850989565617851440038, 0.319683722202751374741325207651}, + {-0.319683722202751374741325207651, 0.947524309850989565617851440038}, + {0.443950337030216135580928948912, 0.896051392639260146921742489212}, + {-0.896051392639260146921742489212, 0.443950337030216135580928948912}, + {0.753060652344415104586516918062, 0.657951102963285516800340246846}, + {-0.657951102963285516800340246846, 0.753060652344415104586516918062}, + {0.067252607322993498972785175738, 0.997735980512008624465636330569}, + {-0.997735980512008624465636330569, 0.067252607322993498972785175738}, + {0.996805002050020427795118393988, 0.0798735744039220096901487977448}, + {-0.0798735744039220096901487977448, 0.996805002050020427795118393988}, + {0.64836843037161828728898171903, 0.761326722568861358197978006501}, + {-0.761326722568861358197978006501, 0.64836843037161828728898171903}, + {0.890361445690723840584723802749, 0.455254320163493098050366825191}, + {-0.455254320163493098050366825191, 0.890361445690723840584723802749}, + {0.30766719900289118960756695742, 0.951494032907046372038450954278}, + {-0.951494032907046372038450954278, 0.30766719900289118960756695742}, + {0.96206911209193157752395109128, 0.272805834905784805322070951661}, + {-0.272805834905784805322070951661, 0.96206911209193157752395109128}, + {0.48738273732118736480956044943, 0.873188448939463790487991445843}, + {-0.873188448939463790487991445843, 0.48738273732118736480956044943}, + {0.784437688252072717887131148018, 0.620207637206882433922316977259}, + {-0.620207637206882433922316977259, 0.784437688252072717887131148018}, + {0.116128182768666932123124979626, 0.993234234794012293967568894004}, + {-0.993234234794012293967568894004, 0.116128182768666932123124979626}, + {0.984176134159655324751270200068, 0.177192937078643281134660014686}, + {-0.177192937078643281134660014686, 0.984176134159655324751270200068}, + {0.570623290959583751025263609336, 0.821211945732923553897819601843}, + {-0.821211945732923553897819601843, 0.570623290959583751025263609336}, + {0.841451385384081262941435852554, 0.540332828944540821680675435346}, + {-0.540332828944540821680675435346, 0.841451385384081262941435852554}, + {0.21292297319950317935344230591, 0.977068988088294454463778038189}, + {-0.977068988088294454463778038189, 0.21292297319950317935344230591}, + {0.930696838554288863321062308387, 0.365791463411944517414298161384}, + {-0.365791463411944517414298161384, 0.930696838554288863321062308387}, + {0.399448421491882199596545888198, 0.916755670049355986250816386018}, + {-0.916755670049355986250816386018, 0.399448421491882199596545888198}, + {0.719869427364936864677247285726, 0.694109506882939930605402878427}, + {-0.694109506882939930605402878427, 0.719869427364936864677247285726}, + {0.018215014455646286184453686019, 0.999834092861600964852186734788}, + {-0.999834092861600964852186734788, 0.018215014455646286184453686019}, + {0.999827033969133416313468387671, 0.0185984446792005105042466794885}, + {-0.0185984446792005105042466794885, 0.999827033969133416313468387671}, + {0.693833389381021348185640817974, 0.720135562085392422204677131958}, + {-0.720135562085392422204677131958, 0.693833389381021348185640817974}, + {0.916602416089060789694542563666, 0.399799963506371980326292714381}, + {-0.399799963506371980326292714381, 0.916602416089060789694542563666}, + {0.365434518755058390659939959733, 0.93083704938203815082431447081}, + {-0.93083704938203815082431447081, 0.365434518755058390659939959733}, + {0.976987261304682386153785955685, 0.213297658797222289273065598536}, + {-0.213297658797222289273065598536, 0.976987261304682386153785955685}, + {0.540010096654684024208847858972, 0.841658538548144763424829761789}, + {-0.841658538548144763424829761789, 0.540010096654684024208847858972}, + {0.820993054059693472268577352224, 0.570938179828374359203735366464}, + {-0.570938179828374359203735366464, 0.820993054059693472268577352224}, + {0.176815497237715002487590254532, 0.984244014427611113582372581732}, + {-0.984244014427611113582372581732, 0.176815497237715002487590254532}, + {0.993189627158012622309968264744, 0.116509074778439039588029402239}, + {-0.116509074778439039588029402239, 0.993189627158012622309968264744}, + {0.619906763521964720986545671622, 0.784675477213174321633459840086}, + {-0.784675477213174321633459840086, 0.619906763521964720986545671622}, + {0.873001475795909920307735774259, 0.487717565049858858827747098985}, + {-0.487717565049858858827747098985, 0.873001475795909920307735774259}, + {0.272436865970605235354184969765, 0.962173661071697883251374605607}, + {-0.962173661071697883251374605607, 0.272436865970605235354184969765}, + {0.951375974049424422140930346359, 0.308032069761427329535052876963}, + {-0.308032069761427329535052876963, 0.951375974049424422140930346359}, + {0.454912837357071941912778356709, 0.89053596805953783022147263182}, + {-0.89053596805953783022147263182, 0.454912837357071941912778356709}, + {0.761078020412426559104801526701, 0.648660347828585837071102560003}, + {-0.648660347828585837071102560003, 0.761078020412426559104801526701}, + {0.0794912986092427692463857624716, 0.996835559882078170446106923919}, + {-0.996835559882078170446106923919, 0.0794912986092427692463857624716}, + {0.997710116092949572674797309446, 0.067635229324614479229360597401}, + {-0.067635229324614479229360597401, 0.997710116092949572674797309446}, + {0.657662259445200070295811656251, 0.753312918050284330284682710044}, + {-0.753312918050284330284682710044, 0.657662259445200070295811656251}, + {0.895881073930992366527448211855, 0.444293935781540583196402849353}, + {-0.444293935781540583196402849353, 0.895881073930992366527448211855}, + {0.319320327682103610733577170322, 0.947646837344479298081978413393}, + {-0.947646837344479298081978413393, 0.319320327682103610733577170322}, + {0.965344417304789370604112264118, 0.260979225185601071412833107388}, + {-0.260979225185601071412833107388, 0.965344417304789370604112264118}, + {0.49806140377924340567616923181, 0.867141763534342469021964916465}, + {-0.867141763534342469021964916465, 0.49806140377924340567616923181}, + {0.791989523301921849451900925487, 0.610534679588305317921026471595}, + {-0.610534679588305317921026471595, 0.791989523301921849451900925487}, + {0.128307950469043419161607744172, 0.99173437464193681112334388672}, + {-0.99173437464193681112334388672, 0.128307950469043419161607744172}, + {0.986276457418115093389587855199, 0.165102239666132660778075091912}, + {-0.165102239666132660778075091912, 0.986276457418115093389587855199}, + {0.580657857907985297529762647173, 0.814147684422003359294706115179}, + {-0.814147684422003359294706115179, 0.580657857907985297529762647173}, + {0.848018740639736812170212942874, 0.529966239984958620290456110524}, + {-0.529966239984958620290456110524, 0.848018740639736812170212942874}, + {0.224897079986332515399993781102, 0.974382524172935471362677617435}, + {-0.974382524172935471362677617435, 0.224897079986332515399993781102}, + {0.935115582741880890793595426658, 0.354342838100775547882648197628}, + {-0.354342838100775547882648197628, 0.935115582741880890793595426658}, + {0.410668346064048728205619909204, 0.911784793435939433514647589618}, + {-0.911784793435939433514647589618, 0.410668346064048728205619909204}, + {0.72833301390919635842635670997, 0.685223336475011213231312012795}, + {-0.685223336475011213231312012795, 0.72833301390919635842635670997}, + {0.0304831452484770094868427037227, 0.999535280945980542099960075575}, + {-0.999535280945980542099960075575, 0.0304831452484770094868427037227}, + {0.999069476078429330456742718525, 0.0431298268994055461478254187568}, + {-0.0431298268994055461478254187568, 0.999069476078429330456742718525}, + {0.675951408339577008987930639705, 0.736946194483520278240007428394}, + {-0.736946194483520278240007428394, 0.675951408339577008987930639705}, + {0.906514770310458795066210768709, 0.422174100590000822563752080896}, + {-0.422174100590000822563752080896, 0.906514770310458795066210768709}, + {0.342480571963769742094285675194, 0.939524910700812232455803041375}, + {-0.939524910700812232455803041375, 0.342480571963769742094285675194}, + {0.971458424419585964137979772204, 0.237209885173901596777668032701}, + {-0.237209885173901596777668032701, 0.971458424419585964137979772204}, + {0.519192121176940246662923073018, 0.854657557918836463528577951365}, + {-0.854657557918836463528577951365, 0.519192121176940246662923073018}, + {0.806734261957640752527254335291, 0.590914402078389522365853281372}, + {-0.590914402078389522365853281372, 0.806734261957640752527254335291}, + {0.152607686435349082509915774608, 0.988286848056195710299220991146}, + {-0.988286848056195710299220991146, 0.152607686435349082509915774608}, + {0.990031221182057996621495021827, 0.140848078030064077781347009477}, + {-0.140848078030064077781347009477, 0.990031221182057996621495021827}, + {0.600463158992081580400679285958, 0.799652421176382244283331601764}, + {-0.799652421176382244283331601764, 0.600463158992081580400679285958}, + {0.860769355854687168161376575881, 0.508995202356080089778345154627}, + {-0.508995202356080089778345154627, 0.860769355854687168161376575881}, + {0.248741889385022479919129523296, 0.96856980773982892518603193821}, + {-0.96856980773982892518603193821, 0.248741889385022479919129523296}, + {0.943529951976759484466583671747, 0.331287231451400876469648437705}, + {-0.331287231451400876469648437705, 0.943529951976759484466583671747}, + {0.432920979415581275162594465655, 0.901431875175186969606500042573}, + {-0.901431875175186969606500042573, 0.432920979415581275162594465655}, + {0.744929876112135347376863592217, 0.667142773082013307295312642964}, + {-0.667142773082013307295312642964, 0.744929876112135347376863592217}, + {0.0550037880414559199326873795144, 0.998486145773235356237762516685}, + {-0.998486145773235356237762516685, 0.0550037880414559199326873795144}, + {0.995749772840319513633744463732, 0.092099890816722387865134180629}, + {-0.092099890816722387865134180629, 0.995749772840319513633744463732}, + {0.63897695939730914105325609853, 0.769225874083399263625437924929}, + {-0.769225874083399263625437924929, 0.63897695939730914105325609853}, + {0.884707732291741044683419659123, 0.466146144919385940763589815106}, + {-0.466146144919385940763589815106, 0.884707732291741044683419659123}, + {0.295967736775197831633477107971, 0.955197936968127714330023536604}, + {-0.955197936968127714330023536604, 0.295967736775197831633477107971}, + {0.958648922809561154778634772811, 0.284591361070690496148927195463}, + {-0.284591361070690496148927195463, 0.958648922809561154778634772811}, + {0.476630672815625322513710671046, 0.879103635376014325153448680794}, + {-0.879103635376014325153448680794, 0.476630672815625322513710671046}, + {0.776767719771761511182717185875, 0.629787193836599201723913665774}, + {-0.629787193836599201723913665774, 0.776767719771761511182717185875}, + {0.103930926591118508173217094281, 0.994584517523730338162124553492}, + {-0.994584517523730338162124553492, 0.103930926591118508173217094281}, + {0.981927597595475543101883886266, 0.189256949886596720356024547982}, + {-0.189256949886596720356024547982, 0.981927597595475543101883886266}, + {0.5605027902424810637427299298, 0.828152535545471413236384705669}, + {-0.828152535545471413236384705669, 0.5605027902424810637427299298}, + {0.834757310644888228523541329196, 0.550618045767584329830413025775}, + {-0.550618045767584329830413025775, 0.834757310644888228523541329196}, + {0.200916800996102234977414013883, 0.979608309007989452865672319604}, + {-0.979608309007989452865672319604, 0.200916800996102234977414013883}, + {0.926137934846182564463390463061, 0.377185001874210446004553887178}, + {-0.377185001874210446004553887178, 0.926137934846182564463390463061}, + {0.388168341456725740634681187657, 0.921588486630955494582906339929}, + {-0.921588486630955494582906339929, 0.388168341456725740634681187657}, + {0.711297431132804081066467460914, 0.702891146952267398262392816832}, + {-0.702891146952267398262392816832, 0.711297431132804081066467460914}, + {0.00594414054863863419025937773199, 0.999982333440515347433574788738}, + {-0.999982333440515347433574788738, 0.00594414054863863419025937773199}, + {0.999994687152754080372574208013, 0.00325970340147597320568095824456}, + {-0.00325970340147597320568095824456, 0.999994687152754080372574208013}, + {0.704798066056391947498127592553, 0.709407982816073090504005449475}, + {-0.709407982816073090504005449475, 0.704798066056391947498127592553}, + {0.922627189594287910878733782738, 0.385692972481151141739275090004}, + {-0.385692972481151141739275090004, 0.922627189594287910878733782738}, + {0.379669825971788943430595963946, 0.925122058566625882214395915071}, + {-0.925122058566625882214395915071, 0.379669825971788943430595963946}, + {0.980144133054583588382513426041, 0.198286354645696188958581274164}, + {-0.198286354645696188958581274164, 0.980144133054583588382513426041}, + {0.552856937036373285998536175612, 0.833276189009838241794625446346}, + {-0.833276189009838241794625446346, 0.552856937036373285998536175612}, + {0.82965420065317263542681303079, 0.55827762568326633196846842111}, + {-0.55827762568326633196846842111, 0.82965420065317263542681303079}, + {0.191892216416400251954854638825, 0.98141600622712554802262729936}, + {-0.98141600622712554802262729936, 0.191892216416400251954854638825}, + {0.994859932602027319248350067937, 0.101260626618096843953864549803}, + {-0.101260626618096843953864549803, 0.994859932602027319248350067937}, + {0.631870128924829854710765175696, 0.775074280422540451418456086685}, + {-0.775074280422540451418456086685, 0.631870128924829854710765175696}, + {0.880379965288918153731856364175, 0.474269034112372978029270598199}, + {-0.474269034112372978029270598199, 0.880379965288918153731856364175}, + {0.287163793348918394698898737261, 0.957881493604224365512322947325}, + {-0.957881493604224365512322947325, 0.287163793348918394698898737261}, + {0.955989009704538927891803723469, 0.293402476683710111693415001355}, + {-0.293402476683710111693415001355, 0.955989009704538927891803723469}, + {0.468519430621912313927879267794, 0.883453192381870922034181603522}, + {-0.883453192381870922034181603522, 0.468519430621912313927879267794}, + {0.770938412530016936585752773681, 0.636909698533235868467272666749}, + {-0.636909698533235868467272666749, 0.770938412530016936585752773681}, + {0.0947726125408362429780595448392, 0.995498946213497770685307841632}, + {-0.995498946213497770685307841632, 0.0947726125408362429780595448392}, + {0.998630203692576046137219236698, 0.052323190584330339647500807132}, + {-0.052323190584330339647500807132, 0.998630203692576046137219236698}, + {0.669140106053227601279331793194, 0.743136271804219816772274498362}, + {-0.743136271804219816772274498362, 0.669140106053227601279331793194}, + {0.902590787574043873497942058748, 0.430499558869073817302108864169}, + {-0.430499558869073817302108864169, 0.902590787574043873497942058748}, + {0.333818909155973619906632166021, 0.942637223904252530282121824712}, + {-0.942637223904252530282121824712, 0.333818909155973619906632166021}, + {0.969234056247750053181277962722, 0.246140903162260504810632255612}, + {-0.246140903162260504810632255612, 0.969234056247750053181277962722}, + {0.511304071976311891312150237354, 0.859399875483143449450551543123}, + {-0.859399875483143449450551543123, 0.511304071976311891312150237354}, + {0.801261461112612538570942888327, 0.598314357955482600814889337926}, + {-0.598314357955482600814889337926, 0.801261461112612538570942888327}, + {0.143505272864572319679865586295, 0.989649552447766533980200165388}, + {-0.989649552447766533980200165388, 0.143505272864572319679865586295}, + {0.988692956794401944797812120669, 0.149954116932956960095424392421}, + {-0.149954116932956960095424392421, 0.988692956794401944797812120669}, + {0.593077921310565470136566545989, 0.80514506721083423101958942425}, + {-0.80514506721083423101958942425, 0.593077921310565470136566545989}, + {0.856048230555233935490377916722, 0.516895953711434263944113354228}, + {-0.516895953711434263944113354228, 0.856048230555233935490377916722}, + {0.239816874811107999532211465521, 0.970818142885593871049820791086}, + {-0.970818142885593871049820791086, 0.239816874811107999532211465521}, + {0.940440901902201753514987103699, 0.339957217939806877193831269324}, + {-0.339957217939806877193831269324, 0.940440901902201753514987103699}, + {0.42460608492168910865771636054, 0.905378193158790089967169478768}, + {-0.905378193158790089967169478768, 0.42460608492168910865771636054}, + {0.738758105785406904431056318572, 0.673970667860521621683744797338}, + {-0.673970667860521621683744797338, 0.738758105785406904431056318572}, + {0.0458116366923888504447681668807, 0.998950095822391248567839738826}, + {-0.998950095822391248567839738826, 0.0458116366923888504447681668807}, + {0.999613510322995946211221962585, 0.0277998197788694445531376686631}, + {-0.0277998197788694445531376686631, 0.999613510322995946211221962585}, + {0.687176050633676926970849763165, 0.726490932796481914124342438299}, + {-0.726490932796481914124342438299, 0.687176050633676926970849763165}, + {0.912883932157067201629274677543, 0.408219213670120095738269583308}, + {-0.408219213670120095738269583308, 0.912883932157067201629274677543}, + {0.356851844667720241588426688395, 0.93416099306128452806063933167}, + {-0.93416099306128452806063933167, 0.356851844667720241588426688395}, + {0.974982741224347138064842965832, 0.222280575657563372882208341252}, + {-0.222280575657563372882208341252, 0.974982741224347138064842965832}, + {0.532240805486072221519577851723, 0.846593010233097187366979596845}, + {-0.846593010233097187366979596845, 0.532240805486072221519577851723}, + {0.815703505528198369312065096892, 0.578470216233306633313304701005}, + {-0.578470216233306633313304701005, 0.815703505528198369312065096892}, + {0.167749267583607891696217961908, 0.985829692809647051277011087222}, + {-0.985829692809647051277011087222, 0.167749267583607891696217961908}, + {0.992075239212224069262902048649, 0.125645213764824292823618634429}, + {-0.125645213764824292823618634429, 0.992075239212224069262902048649}, + {0.612658546417489291435742870817, 0.790347711770970517974888025492}, + {-0.790347711770970517974888025492, 0.612658546417489291435742870817}, + {0.868475666555644232325050779764, 0.495731799061477962364818949936}, + {-0.495731799061477962364818949936, 0.868475666555644232325050779764}, + {0.263569716350824823081211434328, 0.964640349883180925338876932074}, + {-0.964640349883180925338876932074, 0.263569716350824823081211434328}, + {0.948500626458698259391155716003, 0.316775254095797265119216490348}, + {-0.316775254095797265119216490348, 0.948500626458698259391155716003}, + {0.446697294645404030521973481882, 0.894685155211863980539988006058}, + {-0.894685155211863980539988006058, 0.446697294645404030521973481882}, + {0.75507567383362161894666542139, 0.655637648998821820356397438445}, + {-0.655637648998821820356397438445, 0.75507567383362161894666542139}, + {0.0703133016684832495846180222543, 0.997524956885027958009004578344}, + {-0.997524956885027958009004578344, 0.0703133016684832495846180222543}, + {0.997045359568640043868015254702, 0.0768150438562733572922525127069}, + {-0.0768150438562733572922525127069, 0.997045359568640043868015254702}, + {0.650701096493652042518363032286, 0.759333973309477938684608488984}, + {-0.759333973309477938684608488984, 0.650701096493652042518363032286}, + {0.891753956050629459362255602173, 0.452520587231180104748062831277}, + {-0.452520587231180104748062831277, 0.891753956050629459362255602173}, + {0.310584893615644452857793567091, 0.950545645331016597623374764225}, + {-0.950545645331016597623374764225, 0.310584893615644452857793567091}, + {0.962901540917664999774672196509, 0.269852964590693911173246988255}, + {-0.269852964590693911173246988255, 0.962901540917664999774672196509}, + {0.490059348012483853018750323827, 0.87168907038322973601651710851}, + {-0.87168907038322973601651710851, 0.490059348012483853018750323827}, + {0.786336766753948257502315755119, 0.617798097480841024164988084522}, + {-0.617798097480841024164988084522, 0.786336766753948257502315755119}, + {0.119174835935170878031819086118, 0.992873284200871730753590327367}, + {-0.992873284200871730753590327367, 0.119174835935170878031819086118}, + {0.984715122709017620650229218882, 0.174172693348194823625973981507}, + {-0.174172693348194823625973981507, 0.984715122709017620650229218882}, + {0.573140048240456057015990154468, 0.819457433368523280314832391014}, + {-0.819457433368523280314832391014, 0.573140048240456057015990154468}, + {0.843105143110442156739736674353, 0.537748749566859363113735525985}, + {-0.537748749566859363113735525985, 0.843105143110442156739736674353}, + {0.215919576553335490487484094047, 0.976411151339961036477177458437}, + {-0.976411151339961036477177458437, 0.215919576553335490487484094047}, + {0.931814690911749621271553678525, 0.362934404267548638145512995834}, + {-0.362934404267548638145512995834, 0.931814690911749621271553678525}, + {0.402259108369461493293073317545, 0.915525865136428529389434061159}, + {-0.915525865136428529389434061159, 0.402259108369461493293073317545}, + {0.721995537478211879545142437564, 0.691897711993288755394360123319}, + {-0.691897711993288755394360123319, 0.721995537478211879545142437564}, + {0.021282376499358386656712838203, 0.999773504575180993469984969124}, + {-0.999773504575180993469984969124, 0.021282376499358386656712838203}, + {0.99987938781685492539708093318, 0.0155309310407164488654352396679}, + {-0.0155309310407164488654352396679, 0.99987938781685492539708093318}, + {0.696039468836920693917136304663, 0.718003522150983175542648950795}, + {-0.718003522150983175542648950795, 0.696039468836920693917136304663}, + {0.91782467138542056517280798289, 0.396985985390220896817936591106}, + {-0.396985985390220896817936591106, 0.91782467138542056517280798289}, + {0.368288566771496572727073726128, 0.929711531382394373856925540167}, + {-0.929711531382394373856925540167, 0.368288566771496572727073726128}, + {0.977637051411420765489879158849, 0.210299300302171726650968253125}, + {-0.210299300302171726650968253125, 0.977637051411420765489879158849}, + {0.542589727269785271346336230636, 0.839997849914688843320220712485}, + {-0.839997849914688843320220712485, 0.542589727269785271346336230636}, + {0.822740803960084421397880305449, 0.56841672169202939191023915555}, + {-0.56841672169202939191023915555, 0.822740803960084421397880305449}, + {0.179834283190787092010509695683, 0.983696920087308135371984008088}, + {-0.983696920087308135371984008088, 0.179834283190787092010509695683}, + {0.99354239782241360323666867771, 0.113461463630999945251254246159}, + {-0.113461463630999945251254246159, 0.99354239782241360323666867771}, + {0.622311196559740320566334048635, 0.782769937233402157694683864975}, + {-0.782769937233402157694683864975, 0.622311196559740320566334048635}, + {0.874493663686836564252757852955, 0.485036938976377340893009204592}, + {-0.485036938976377340893009204592, 0.874493663686836564252757852955}, + {0.27538749102146814484726178307, 0.961333308373792272405466974305}, + {-0.961333308373792272405466974305, 0.27538749102146814484726178307}, + {0.952316525764560939393277294585, 0.305111839750142110894159941381}, + {-0.305111839750142110894159941381, 0.952316525764560939393277294585}, + {0.457642822296505824564150088918, 0.889136124111763237465311249252}, + {-0.889136124111763237465311249252, 0.457642822296505824564150088918}, + {0.763064500535323708696466837864, 0.646322340649599591522189712123}, + {-0.646322340649599591522189712123, 0.763064500535323708696466837864}, + {0.0825491729055596729081045737075, 0.996586992716946951098577756056}, + {-0.996586992716946951098577756056, 0.0825491729055596729081045737075}, + {0.997912922638376609363319857948, 0.0645739795221989820728225595303}, + {-0.0645739795221989820728225595303, 0.997912922638376609363319857948}, + {0.659970295822934538421122852014, 0.751291693439630869910672572587}, + {-0.751291693439630869910672572587, 0.659970295822934538421122852014}, + {0.897239932329087164220027261763, 0.441543320450094867180013125108}, + {-0.441543320450094867180013125108, 0.897239932329087164220027261763}, + {0.322226164423369654254969418616, 0.946662716579143359574288751901}, + {-0.946662716579143359574288751901, 0.322226164423369654254969418616}, + {0.96614054718903874530155917455, 0.258016362034008961412467897389}, + {-0.258016362034008961412467897389, 0.96614054718903874530155917455}, + {0.50071941324531987760337869986, 0.865609651747981989622360288195}, + {-0.865609651747981989622360288195, 0.50071941324531987760337869986}, + {0.793858890047679732759888793225, 0.608102016681630441752304250258}, + {-0.608102016681630441752304250258, 0.793858890047679732759888793225}, + {0.131349944809144192348071555898, 0.991336064106736136736230946553}, + {-0.991336064106736136736230946553, 0.131349944809144192348071555898}, + {0.986778342346060433065702000022, 0.162075609135863357712636911856}, + {-0.162075609135863357712636911856, 0.986778342346060433065702000022}, + {0.583152895116010538956174968916, 0.812362419685829117987907466159}, + {-0.812362419685829117987907466159, 0.583152895116010538956174968916}, + {0.849640663212165914153217727289, 0.52736206103244753773395814278}, + {-0.52736206103244753773395814278, 0.849640663212165914153217727289}, + {0.227885385032600529253699050969, 0.973687964025715668725524665206}, + {-0.973687964025715668725524665206, 0.227885385032600529253699050969}, + {0.93619829041644009315348284872, 0.351472276319676313338646878037}, + {-0.351472276319676313338646878037, 0.93619829041644009315348284872}, + {0.413463729704002469489410032111, 0.910520589673432745314585190499}, + {-0.910520589673432745314585190499, 0.413463729704002469489410032111}, + {0.73043182180462162822465188583, 0.682985617487792739943586184381}, + {-0.682985617487792739943586184381, 0.73043182180462162822465188583}, + {0.0335495328139920750976088470452, 0.999437055970991528042191021086}, + {-0.999437055970991528042191021086, 0.0335495328139920750976088470452}, + {0.999197094711349875773009898694, 0.0400645219664195195719003095292}, + {-0.0400645219664195195719003095292, 0.999197094711349875773009898694}, + {0.67820914624483985555514209409, 0.734868936579745168913291308854}, + {-0.734868936579745168913291308854, 0.67820914624483985555514209409}, + {0.907805715966381931991691089934, 0.419390965637988888392584385656}, + {-0.419390965637988888392584385656, 0.907805715966381931991691089934}, + {0.345361381988741222848204870388, 0.938469773530733797350933400594}, + {-0.938469773530733797350933400594, 0.345361381988741222848204870388}, + {0.972181602222713436844969692174, 0.234228376375873209580547040787}, + {-0.234228376375873209580547040787, 0.972181602222713436844969692174}, + {0.521811730194788547798623312701, 0.853060676758178315637337618682}, + {-0.853060676758178315637337618682, 0.521811730194788547798623312701}, + {0.808543365148773007788918221195, 0.588436595287799790732208293775}, + {-0.588436595287799790732208293775, 0.808543365148773007788918221195}, + {0.155638989553760903605805765437, 0.987814003206415547353458350699}, + {-0.987814003206415547353458350699, 0.155638989553760903605805765437}, + {0.990458677720148616963058429974, 0.137810042191615078621680368087}, + {-0.137810042191615078621680368087, 0.990458677720148616963058429974}, + {0.6029136321516831431566174615, 0.797806462850273567788406126056}, + {-0.797806462850273567788406126056, 0.6029136321516831431566174615}, + {0.862326880183573063476387687842, 0.506352003761084801247704945126}, + {-0.506352003761084801247704945126, 0.862326880183573063476387687842}, + {0.25171224905038469543683277152, 0.967802120104103269504491890984}, + {-0.967802120104103269504491890984, 0.25171224905038469543683277152}, + {0.944541886447350487188145962136, 0.328390963253376633179669852325}, + {-0.328390963253376633179669852325, 0.944541886447350487188145962136}, + {0.435684496024810463676857352766, 0.900099450018500446546454440977}, + {-0.900099450018500446546454440977, 0.435684496024810463676857352766}, + {0.746973135512826735826763524528, 0.664854220729729661343299085274}, + {-0.664854220729729661343299085274, 0.746973135512826735826763524528}, + {0.0580668415068081936181165758626, 0.998312697463787257490253068681}, + {-0.998312697463787257490253068681, 0.0580668415068081936181165758626}, + {0.996027645135173611556922423915, 0.0890445401272909053957960168191}, + {-0.0890445401272909053957960168191, 0.996027645135173611556922423915}, + {0.641333903972704399976123568194, 0.767261900275994501718912488286}, + {-0.767261900275994501718912488286, 0.641333903972704399976123568194}, + {0.886133684906519336621499860485, 0.463429706076329883668307729749}, + {-0.463429706076329883668307729749, 0.886133684906519336621499860485}, + {0.29889684986536180222316261279, 0.954285425405084652972220737865}, + {-0.954285425405084652972220737865, 0.29889684986536180222316261279}, + {0.959517525216059263826196001901, 0.281648928283508681591484901219}, + {-0.281648928283508681591484901219, 0.959517525216059263826196001901}, + {0.479325481644417072946140478962, 0.877637215850802232530725177639}, + {-0.877637215850802232530725177639, 0.479325481644417072946140478962}, + {0.77869622403551752576333910838, 0.627401140159011050378978779918}, + {-0.627401140159011050378978779918, 0.77869622403551752576333910838}, + {0.10698177976926523391476564484, 0.994260981230481788450958902104}, + {-0.994260981230481788450958902104, 0.10698177976926523391476564484}, + {0.982503608596561828214532852144, 0.186243547793565555892314478115}, + {-0.186243547793565555892314478115, 0.982503608596561828214532852144}, + {0.563040888581811227453499668627, 0.826429039775953389046492247871}, + {-0.826429039775953389046492247871, 0.563040888581811227453499668627}, + {0.836442654475350488851859154238, 0.548054455117581773571089343022}, + {-0.548054455117581773571089343022, 0.836442654475350488851859154238}, + {0.20392125138005612328129245725, 0.978987294726337053951681355102}, + {-0.978987294726337053951681355102, 0.20392125138005612328129245725}, + {0.927290763542091722726468105975, 0.374341875629115916623845805589}, + {-0.374341875629115916623845805589, 0.927290763542091722726468105975}, + {0.390993908292825376182832997074, 0.920393265771703550370830271277}, + {-0.920393265771703550370830271277, 0.390993908292825376182832997074}, + {0.713450523279566262147000088589, 0.700705609248358451957017223322}, + {-0.700705609248358451957017223322, 0.713450523279566262147000088589}, + {0.00901201513710663329459471526661, 0.999959390967037453634702615091}, + {-0.999959390967037453634702615091, 0.00901201513710663329459471526661}, + {0.999955861371306098384081906261, 0.00939549408861725145192256292148}, + {-0.00939549408861725145192256292148, 0.999955861371306098384081906261}, + {0.700431952880226416091602459346, 0.713719188045545127607738322695}, + {-0.713719188045545127607738322695, 0.700431952880226416091602459346}, + {0.920243253809045369528973878914, 0.391346845929465558810989023186}, + {-0.391346845929465558810989023186, 0.920243253809045369528973878914}, + {0.373986236556857032375944527303, 0.927434253662241303572955075651}, + {-0.927434253662241303572955075651, 0.373986236556857032375944527303}, + {0.978909019918661305048601661838, 0.20429667330107437184238960981}, + {-0.20429667330107437184238960981, 0.978909019918661305048601661838}, + {0.547733643084120092048294736742, 0.836652769214204949932423005521}, + {-0.836652769214204949932423005521, 0.547733643084120092048294736742}, + {0.826213055533910223360294367012, 0.563357778738627024139873356035}, + {-0.563357778738627024139873356035, 0.826213055533910223360294367012}, + {0.185866748692611660764839598414, 0.982574959853159235478869959479}, + {-0.982574959853159235478869959479, 0.185866748692611660764839598414}, + {0.994219881120514958894318624516, 0.107363066203928755881769063762}, + {-0.107363066203928755881769063762, 0.994219881120514958894318624516}, + {0.627102467769021010290941831045, 0.778936772092577500714583038643}, + {-0.778936772092577500714583038643, 0.627102467769021010290941831045}, + {0.877453332298888555307314618403, 0.479662016046274175540276019092}, + {-0.479662016046274175540276019092, 0.877453332298888555307314618403}, + {0.281280937219346105493400500563, 0.959625465667208188413894731639}, + {-0.959625465667208188413894731639, 0.281280937219346105493400500563}, + {0.954170729728887279108562324836, 0.299262791754408896682804197553}, + {-0.299262791754408896682804197553, 0.954170729728887279108562324836}, + {0.463089843994652528014199788231, 0.886311342807372892771411443391}, + {-0.886311342807372892771411443391, 0.463089843994652528014199788231}, + {0.76701589539014147778317465054, 0.64162809805903886495315191496}, + {-0.64162809805903886495315191496, 0.76701589539014147778317465054}, + {0.0886625617708671487360305718539, 0.99606172004561399635491625304}, + {-0.99606172004561399635491625304, 0.0886625617708671487360305718539}, + {0.998290355699304354786249859899, 0.0584496853520734688247628696445}, + {-0.0584496853520734688247628696445, 0.998290355699304354786249859899}, + {0.664567711237437519500304006215, 0.747228048978779813005246523971}, + {-0.747228048978779813005246523971, 0.664567711237437519500304006215}, + {0.899932300922808514620498954173, 0.43602964779448155629992811555}, + {-0.43602964779448155629992811555, 0.899932300922808514620498954173}, + {0.328028711837470676737638086706, 0.944667753345190486236049309809}, + {-0.944667753345190486236049309809, 0.328028711837470676737638086706}, + {0.967705518501305483880514657358, 0.252083377996486501793071965949}, + {-0.252083377996486501793071965949, 0.967705518501305483880514657358}, + {0.506021268318189831880715701118, 0.862521000329644516035898504924}, + {-0.862521000329644516035898504924, 0.506021268318189831880715701118}, + {0.797575189707696696750360842998, 0.6032195427559784439708323589}, + {-0.6032195427559784439708323589, 0.797575189707696696750360842998}, + {0.137430195921458553343086350651, 0.990511454375460287380406043667}, + {-0.990511454375460287380406043667, 0.137430195921458553343086350651}, + {0.987754243764729533694435303914, 0.156017800025404829167285924996}, + {-0.156017800025404829167285924996, 0.987754243764729533694435303914}, + {0.588126479528059853407739865361, 0.808768968295600854112592514866}, + {-0.808768968295600854112592514866, 0.588126479528059853407739865361}, + {0.852860501741625864902118792088, 0.522138836487980761980054467131}, + {-0.522138836487980761980054467131, 0.852860501741625864902118792088}, + {0.233855532186159842389372442994, 0.972271356189170044004299597873}, + {-0.972271356189170044004299597873, 0.233855532186159842389372442994}, + {0.938337260093066949373508123244, 0.345721255234670121758711047733}, + {-0.345721255234670121758711047733, 0.938337260093066949373508123244}, + {0.419042795675052370807378565587, 0.907966483628573350905810457334}, + {-0.907966483628573350905810457334, 0.419042795675052370807378565587}, + {0.734608792597933657475550717209, 0.678490915073891143194373398728}, + {-0.678490915073891143194373398728, 0.734608792597933657475550717209}, + {0.0396813317430465342661172201133, 0.999212385787475287912684507319}, + {-0.999212385787475287912684507319, 0.0396813317430465342661172201133}, + {0.999424116393725636697809022735, 0.0339328096481966573483290972035}, + {-0.0339328096481966573483290972035, 0.999424116393725636697809022735}, + {0.682705450176424588626389322599, 0.730693689790328892996740250965}, + {-0.730693689790328892996740250965, 0.682705450176424588626389322599}, + {0.910361961368377992087630445894, 0.413812879564568303347726896391}, + {-0.413812879564568303347726896391, 0.910361961368377992087630445894}, + {0.351113222935427515825068667255, 0.936333009500304180861007807835}, + {-0.936333009500304180861007807835, 0.351113222935427515825068667255}, + {0.973600499477792369340534150979, 0.228258772923612379690538887189}, + {-0.228258772923612379690538887189, 0.973600499477792369340534150979}, + {0.52703618914775007819883967386, 0.84984284154696332347356246828}, + {-0.84984284154696332347356246828, 0.52703618914775007819883967386}, + {0.812138723620446478790313449281, 0.583464389312794318165344975569}, + {-0.583464389312794318165344975569, 0.812138723620446478790313449281}, + {0.161697172472260397579901791687, 0.986840425000149679135574842803}, + {-0.986840425000149679135574842803, 0.161697172472260397579901791687}, + {0.991285619137828200031492542621, 0.13173010776027119228537287654}, + {-0.13173010776027119228537287654, 0.991285619137828200031492542621}, + {0.607797530901341143128036037524, 0.794092035868785961483240498637}, + {-0.794092035868785961483240498637, 0.607797530901341143128036037524}, + {0.865417564610694411975089224143, 0.501051333561037925967696082807}, + {-0.501051333561037925967696082807, 0.865417564610694411975089224143}, + {0.25764583281059644237487304963, 0.966239424177741890176207562035}, + {-0.966239424177741890176207562035, 0.25764583281059644237487304963}, + {0.946539074783614098329564967571, 0.322589181324761331559614063735}, + {-0.322589181324761331559614063735, 0.946539074783614098329564967571}, + {0.441199200785378664591007691342, 0.897409196089689720210458290239}, + {-0.897409196089689720210458290239, 0.441199200785378664591007691342}, + {0.751038542761547356363394101209, 0.660258364041389045340224583924}, + {-0.660258364041389045340224583924, 0.751038542761547356363394101209}, + {0.0641912799703506370407879444429, 0.997937613067955253676188931422}, + {-0.997937613067955253676188931422, 0.0641912799703506370407879444429}, + {0.996555262223090543294290455378, 0.0829313531510686985370739421342}, + {-0.0829313531510686985370739421342, 0.996555262223090543294290455378}, + {0.646029661558988332892283779074, 0.763312305931182377705113140109}, + {-0.763312305931182377705113140109, 0.646029661558988332892283779074}, + {0.88896055490979930535644371048, 0.45798376806876212041785834117}, + {-0.45798376806876212041785834117, 0.88896055490979930535644371048}, + {0.304746608509286587285203040665, 0.952433464658864026297635518858}, + {-0.952433464658864026297635518858, 0.304746608509286587285203040665}, + {0.961227627905313464218295393948, 0.275756137468460116313195840121}, + {-0.275756137468460116313195840121, 0.961227627905313464218295393948}, + {0.484701539197948616521927078793, 0.874679608713464618041655285197}, + {-0.874679608713464618041655285197, 0.484701539197948616521927078793}, + {0.782531226323924244958618601231, 0.622611339302437727383221499622}, + {-0.622611339302437727383221499622, 0.782531226323924244958618601231}, + {0.113080436559479621805301974291, 0.993585836688263945859489467693}, + {-0.993585836688263945859489467693, 0.113080436559479621805301974291}, + {0.983627882169697209668868254084, 0.180211513001684475732133705606}, + {-0.180211513001684475732133705606, 0.983627882169697209668868254084}, + {0.568101162754976463098444128264, 0.822958728537733996688530169195}, + {-0.822958728537733996688530169195, 0.568101162754976463098444128264}, + {0.83978970759681625946058147747, 0.542911822503851726473556027486}, + {-0.542911822503851726473556027486, 0.83978970759681625946058147747}, + {0.209924365733555884361294374685, 0.977717628291197460299599697464}, + {-0.977717628291197460299599697464, 0.209924365733555884361294374685}, + {0.929570226123729859146749276988, 0.368645079587775981178054962584}, + {-0.368645079587775981178054962584, 0.929570226123729859146749276988}, + {0.396633974853630832946294049179, 0.917976846108772726040569978068}, + {-0.917976846108772726040569978068, 0.396633974853630832946294049179}, + {0.717736541566450836349133624026, 0.696314768549562201904734592972}, + {-0.696314768549562201904734592972, 0.717736541566450836349133624026}, + {0.0151474809652809867116518560692, 0.999885270328754516278024766507}, + {-0.999885270328754516278024766507, 0.0151474809652809867116518560692}, + {0.99976526936858645111527721383, 0.0216657832620400780943015206503}, + {-0.0216657832620400780943015206503, 0.99976526936858645111527721383}, + {0.691620779301016286311210023996, 0.722260823829629416970021793531}, + {-0.722260823829629416970021793531, 0.691620779301016286311210023996}, + {0.915371533381674762352986363112, 0.402610178553003683621369646062}, + {-0.402610178553003683621369646062, 0.915371533381674762352986363112}, + {0.362577031129754756566541118445, 0.931953805988866013088056661218}, + {-0.931953805988866013088056661218, 0.362577031129754756566541118445}, + {0.976328275421757263430322382192, 0.216294009653474339494039213605}, + {-0.216294009653474339494039213605, 0.976328275421757263430322382192}, + {0.537425383258891553595049117575, 0.84331130517089214482240322468}, + {-0.84331130517089214482240322468, 0.537425383258891553595049117575}, + {0.819237576660004518380731042271, 0.573454264077130404508864103263}, + {-0.573454264077130404508864103263, 0.819237576660004518380731042271}, + {0.173795047029843163777229619882, 0.98478184468840335163974941679}, + {-0.98478184468840335163974941679, 0.173795047029843163777229619882}, + {0.992827508214586762491649096773, 0.119555589298094108730019513587}, + {-0.119555589298094108730019513587, 0.992827508214586762491649096773}, + {0.617496495685640911510461137368, 0.786573631528513228516885646968}, + {-0.786573631528513228516885646968, 0.617496495685640911510461137368}, + {0.87150107088261252741290263657, 0.490393600539872021037979266112}, + {-0.490393600539872021037979266112, 0.87150107088261252741290263657}, + {0.269483676640202840513182991344, 0.963004957424663854403945606464}, + {-0.963004957424663854403945606464, 0.269483676640202840513182991344}, + {0.950426467621290904652653352969, 0.310949400457558644550459803213}, + {-0.310949400457558644550459803213, 0.950426467621290904652653352969}, + {0.452178570604760410311939722305, 0.891927429943622507302336543944}, + {-0.891927429943622507302336543944, 0.452178570604760410311939722305}, + {0.75908437673334649975487309348, 0.650992249569337655401568554225}, + {-0.650992249569337655401568554225, 0.75908437673334649975487309348}, + {0.076432676110549283010620058576, 0.997074744451277306112046971975}, + {-0.997074744451277306112046971975, 0.076432676110549283010620058576}, + {0.997497918719934206777111285191, 0.0706958425184928546114448977278}, + {-0.0706958425184928546114448977278, 0.997497918719934206777111285191}, + {0.655348032899810584162025861588, 0.75532705219285567466158681782}, + {-0.75532705219285567466158681782, 0.655348032899810584162025861588}, + {0.894513783159035624770183403598, 0.447040369249254498917878208886}, + {-0.447040369249254498917878208886, 0.894513783159035624770183403598}, + {0.316411485376301038208168847632, 0.948622038496872987956010092603}, + {-0.948622038496872987956010092603, 0.316411485376301038208168847632}, + {0.964539201231235154843091095245, 0.263939631901332349972477686606}, + {-0.263939631901332349972477686606, 0.964539201231235154843091095245}, + {0.495398706369549024497445088855, 0.868665713452175691777767951862}, + {-0.868665713452175691777767951862, 0.495398706369549024497445088855}, + {0.790112702049143300619959973119, 0.61296159591005516809048003779}, + {-0.61296159591005516809048003779, 0.790112702049143300619959973119}, + {0.125264748445647056129814700398, 0.992123350595503716853329478909}, + {-0.992123350595503716853329478909, 0.125264748445647056129814700398}, + {0.985765289280531309401567341411, 0.168127316191243408871613951305}, + {-0.168127316191243408871613951305, 0.985765289280531309401567341411}, + {0.57815735532705936350339470664, 0.815925286090105505998337775964}, + {-0.815925286090105505998337775964, 0.57815735532705936350339470664}, + {0.846388836191954929688563424861, 0.532565430693382579896422157617}, + {-0.532565430693382579896422157617, 0.846388836191954929688563424861}, + {0.221906658123096095502901903274, 0.975067913060746471742845642439}, + {-0.975067913060746471742845642439, 0.221906658123096095502901903274}, + {0.934024073403320387321002726821, 0.357210064672131955010314641186}, + {-0.357210064672131955010314641186, 0.934024073403320387321002726821}, + {0.40786909705721979824133427428, 0.913040415132719163082697377831}, + {-0.913040415132719163082697377831, 0.40786909705721979824133427428}, + {0.72622735066606036724579098518, 0.687454605879221025688252666441}, + {-0.687454605879221025688252666441, 0.72622735066606036724579098518}, + {0.0274164707639894360269305906286, 0.999624097914234566175650797959}, + {-0.999624097914234566175650797959, 0.0274164707639894360269305906286}, + {0.99893245382310669189251939315, 0.0461947258780349007123433580091}, + {-0.0461947258780349007123433580091, 0.99893245382310669189251939315}, + {0.673687308122224326467630817206, 0.739016515969048715106737290625}, + {-0.739016515969048715106737290625, 0.673687308122224326467630817206}, + {0.90521529219227359064348092943, 0.424953261878593946399007563741}, + {-0.424953261878593946399007563741, 0.90521529219227359064348092943}, + {0.339596538381222057267905256595, 0.940571204704615193570305109461}, + {-0.940571204704615193570305109461, 0.339596538381222057267905256595}, + {0.97072610287979010923464784355, 0.240189161262149897257955899477}, + {-0.240189161262149897257955899477, 0.97072610287979010923464784355}, + {0.516567625325114354595257282199, 0.856246394717065206769746055215}, + {-0.856246394717065206769746055215, 0.516567625325114354595257282199}, + {0.804917565476392260492843888642, 0.593386646957578478556172285607}, + {-0.593386646957578478556172285607, 0.804917565476392260492843888642}, + {0.149574946915272233294302850481, 0.988750390773775356123564961308}, + {-0.988750390773775356123564961308, 0.149574946915272233294302850481}, + {0.989594446093062463098988246202, 0.143884788152760983992450860569}, + {-0.143884788152760983992450860569, 0.989594446093062463098988246202}, + {0.598007034044542695738755355706, 0.801490852869356951160284552316}, + {-0.801490852869356951160284552316, 0.598007034044542695738755355706}, + {0.859203729636801916136334966723, 0.511633610094381241495398171537}, + {-0.511633610094381241495398171537, 0.859203729636801916136334966723}, + {0.245769188466264582038078856385, 0.96932837882764666392887420443}, + {-0.96932837882764666392887420443, 0.245769188466264582038078856385}, + {0.94250913664291924298765934509, 0.334180381447832741681480683837}, + {-0.334180381447832741681480683837, 0.94250913664291924298765934509}, + {0.430153387989216873243947247829, 0.902755815711756226527029411955}, + {-0.902755815711756226527029411955, 0.430153387989216873243947247829}, + {0.742879605147745092352806750569, 0.669425046032436910081742098555}, + {-0.669425046032436910081742098555, 0.742879605147745092352806750569}, + {0.0519402168595025359731920389095, 0.99865019595080828374022985372}, + {-0.99865019595080828374022985372, 0.0519402168595025359731920389095}, + {0.99546252816937441654232543442, 0.0951543746269054857567226690662}, + {-0.0951543746269054857567226690662, 0.99546252816937441654232543442}, + {0.636614000527419232611237021047, 0.771182607643919326712023121217}, + {-0.771182607643919326712023121217, 0.636614000527419232611237021047}, + {0.883273452470847431072797917295, 0.468858196217395328808663634845}, + {-0.468858196217395328808663634845, 0.883273452470847431072797917295}, + {0.29303583792397674967489251685, 0.956101457844403035402081059146}, + {-0.956101457844403035402081059146, 0.29303583792397674967489251685}, + {0.957771297234302321221832698939, 0.287531115175595930466556637839}, + {-0.287531115175595930466556637839, 0.957771297234302321221832698939}, + {0.473931377757417393592476173581, 0.880561780443005703844505660527}, + {-0.880561780443005703844505660527, 0.473931377757417393592476173581}, + {0.774831904274396854326312222838, 0.632167319717265918654902634444}, + {-0.632167319717265918654902634444, 0.774831904274396854326312222838}, + {0.100879095175508873283298783008, 0.994898692408714868662400476751}, + {-0.994898692408714868662400476751, 0.100879095175508873283298783008}, + {0.981342344317876036363657021866, 0.192268570621137502296704724358}, + {-0.192268570621137502296704724358, 0.981342344317876036363657021866}, + {0.557959416237422956008629171265, 0.829868236427924843567893731233}, + {-0.829868236427924843567893731233, 0.557959416237422956008629171265}, + {0.833064109760702886653405130346, 0.553176453790838351665115624201}, + {-0.553176453790838351665115624201, 0.833064109760702886653405130346}, + {0.197910459506698693799009447503, 0.980220102843156082528253136843}, + {-0.980220102843156082528253136843, 0.197910459506698693799009447503}, + {0.924976388987313158374092836311, 0.380024577910417327153425048891}, + {-0.380024577910417327153425048891, 0.924976388987313158374092836311}, + {0.385339121032363340901838455466, 0.92277503314838638104333767842}, + {-0.92277503314838638104333767842, 0.385339121032363340901838455466}, + {0.709137643983723919838269011962, 0.705070068777006841997945230105}, + {-0.705070068777006841997945230105, 0.709137643983723919838269011962}, + {0.00287621001165597883764135644924, 0.999995863699429943238783380366}, + {-0.999995863699429943238783380366, 0.00287621001165597883764135644924}, + {0.999998510931137785107125637296, 0.00172572752979512642271253675119}, + {-0.00172572752979512642271253675119, 0.999998510931137785107125637296}, + {0.705885454617058982762500818353, 0.708326001894655887802798588382}, + {-0.708326001894655887802798588382, 0.705885454617058982762500818353}, + {0.923217749456613501202184579597, 0.38427722686657550532629556983}, + {-0.38427722686657550532629556983, 0.923217749456613501202184579597}, + {0.381088498179637580332013158113, 0.924538564125471418186918981519}, + {-0.924538564125471418186918981519, 0.381088498179637580332013158113}, + {0.980447147206909064287572164176, 0.196782599672414126867892036898}, + {-0.196782599672414126867892036898, 0.980447147206909064287572164176}, + {0.554134515737128907630903995596, 0.832427137032892283663443322439}, + {-0.832427137032892283663443322439, 0.554134515737128907630903995596}, + {0.830509611341179065924222868489, 0.557004295737406063260266364523}, + {-0.557004295737406063260266364523, 0.830509611341179065924222868489}, + {0.193397463353994741863672857107, 0.981120492685908729413313267287}, + {-0.981120492685908729413313267287, 0.193397463353994741863672857107}, + {0.995014093896149698359465674002, 0.099734412055338825253691936723}, + {-0.099734412055338825253691936723, 0.995014093896149698359465674002}, + {0.633058334088172136588923422096, 0.774104092252139053265125312464}, + {-0.774104092252139053265125312464, 0.633058334088172136588923422096}, + {0.881106448780665130193767708988, 0.472917990688792755360481123716}, + {-0.472917990688792755360481123716, 0.881106448780665130193767708988}, + {0.28863282671884382679650116188, 0.95743986304116768248917423989}, + {-0.95743986304116768248917423989, 0.28863282671884382679650116188}, + {0.956437958523136178001777807367, 0.291935663282332724666190415519}, + {-0.291935663282332724666190415519, 0.956437958523136178001777807367}, + {0.469874079078797413089318979473, 0.882733453433057535519878911145}, + {-0.882733453433057535519878911145, 0.469874079078797413089318979473}, + {0.771914512341742353740414728236, 0.6357263449285471823557713833}, + {-0.6357263449285471823557713833, 0.771914512341742353740414728236}, + {0.0962995766952391279991374517522, 0.995352395651066812476415179844}, + {-0.995352395651066812476415179844, 0.0962995766952391279991374517522}, + {0.998709291493549033980059448368, 0.0507912500776795811097130695089}, + {-0.0507912500776795811097130695089, 0.998709291493549033980059448368}, + {0.670279275094231907772268641565, 0.742108949803969908387557552487}, + {-0.742108949803969908387557552487, 0.670279275094231907772268641565}, + {0.903250103425898398512572384789, 0.429114495981088750475862525491}, + {-0.429114495981088750475862525491, 0.903250103425898398512572384789}, + {0.335264503226227805754433575203, 0.942124043253578569157014044322}, + {-0.942124043253578569157014044322, 0.335264503226227805754433575203}, + {0.969610491165555865578085104062, 0.244653827727443345940727681409}, + {-0.244653827727443345940727681409, 0.969610491165555865578085104062}, + {0.512621772783462992251202194893, 0.858614534042104193822808611003}, + {-0.858614534042104193822808611003, 0.512621772783462992251202194893}, + {0.802178320760077445683577934687, 0.59708453480436274318776668224}, + {-0.59708453480436274318776668224, 0.802178320760077445683577934687}, + {0.145023206828508222088203183375, 0.989428253832068227424656470248}, + {-0.989428253832068227424656470248, 0.145023206828508222088203183375}, + {0.988921820193613188010317571752, 0.148437305098653993251645033524}, + {-0.148437305098653993251645033524, 0.988921820193613188010317571752}, + {0.594312300105932833638178180991, 0.804234350138562259502350570983}, + {-0.804234350138562259502350570983, 0.594312300105932833638178180991}, + {0.856840131524509218685636824375, 0.515582184534203791237416680815}, + {-0.515582184534203791237416680815, 0.856840131524509218685636824375}, + {0.241305808450644365681370118182, 0.970449126336863088226891704835}, + {-0.970449126336863088226891704835, 0.241305808450644365681370118182}, + {0.940961283064563280831293923256, 0.338514200253830888076578276014}, + {-0.338514200253830888076578276014, 0.940961283064563280831293923256}, + {0.425994417561522398241180553669, 0.904725790616371927654881801573}, + {-0.904725790616371927654881801573, 0.425994417561522398241180553669}, + {0.739791094251449954377619633306, 0.672836634604747296783955334831}, + {-0.672836634604747296783955334831, 0.739791094251449954377619633306}, + {0.0473439524468644845622833372545, 0.998878646366368694309301190515}, + {-0.998878646366368694309301190515, 0.0473439524468644845622833372545}, + {0.999654978602144694832531968132, 0.0262663997522607599310884296528}, + {-0.0262663997522607599310884296528, 0.999654978602144694832531968132}, + {0.688289664834289327544070147269, 0.725435963598649813377505779499}, + {-0.725435963598649813377505779499, 0.688289664834289327544070147269}, + {0.913509058290461140039440124383, 0.406818387516192370156886681798}, + {-0.406818387516192370156886681798, 0.913509058290461140039440124383}, + {0.358284409268445847018824679253, 0.933612490316598542250403625076}, + {-0.933612490316598542250403625076, 0.358284409268445847018824679253}, + {0.975322568108916931883811685111, 0.220784709926722638728691094911}, + {-0.220784709926722638728691094911, 0.975322568108916931883811685111}, + {0.533538836182603115609879296244, 0.845775567325584010092143216752}, + {-0.845775567325584010092143216752, 0.533538836182603115609879296244}, + {0.816589907663684888561306252086, 0.577218262619794919565663349204}, + {-0.577218262619794919565663349204, 0.816589907663684888561306252086}, + {0.169261313434313803716335655736, 0.985571208880662741336209364817}, + {-0.985571208880662741336209364817, 0.169261313434313803716335655736}, + {0.992266809256206583356174633082, 0.124123242176906595224394891375}, + {-0.124123242176906595224394891375, 0.992266809256206583356174633082}, + {0.613870203325251440062970686995, 0.789406975817552925001052699372}, + {-0.789406975817552925001052699372, 0.613870203325251440062970686995}, + {0.86923508750954836532542913119, 0.494398991344306648620943178685}, + {-0.494398991344306648620943178685, 0.86923508750954836532542913119}, + {0.265049145431935251338018133538, 0.964234904214632204677570825879}, + {-0.964234904214632204677570825879, 0.265049145431935251338018133538}, + {0.948985437465188708117125315766, 0.315319900226744886939655998503}, + {-0.315319900226744886939655998503, 0.948985437465188708117125315766}, + {0.448069198385520395255809944501, 0.893998877772314237688533467008}, + {-0.893998877772314237688533467008, 0.448069198385520395255809944501}, + {0.756080520613569118104635435884, 0.654478606486655345442216002994}, + {-0.654478606486655345442216002994, 0.756080520613569118104635435884}, + {0.0718434024606740273544147612483, 0.99741592403714596226649291566}, + {-0.99741592403714596226649291566, 0.0718434024606740273544147612483}, + {0.997162019251903286587435104593, 0.0752855056532587690876212604962}, + {-0.0752855056532587690876212604962, 0.997162019251903286587435104593}, + {0.651865134182214034375135724986, 0.758334917327168955836214081501}, + {-0.758334917327168955836214081501, 0.651865134182214034375135724986}, + {0.892447064473728679523389928363, 0.451152121919230597590910747385}, + {-0.451152121919230597590910747385, 0.892447064473728679523389928363}, + {0.312042646383613508920262802349, 0.950068095895189590471829887974}, + {-0.950068095895189590471829887974, 0.312042646383613508920262802349}, + {0.963314357118388198131242461386, 0.268375575210536898573820963065}, + {-0.268375575210536898573820963065, 0.963314357118388198131242461386}, + {0.491395925196560834358194824745, 0.870936303468982764286465680925}, + {-0.870936303468982764286465680925, 0.491395925196560834358194824745}, + {0.787283531631423616481413318979, 0.616591145591593114971828981652}, + {-0.616591145591593114971828981652, 0.787283531631423616481413318979}, + {0.120697743665676107682749318428, 0.992689304200470745698225982778}, + {-0.992689304200470745698225982778, 0.120697743665676107682749318428}, + {0.984981141604703958059019441862, 0.172661954938238271228811981928}, + {-0.172661954938238271228811981928, 0.984981141604703958059019441862}, + {0.574396405379798746793085229001, 0.818577283759307494648282954586}, + {-0.818577283759307494648282954586, 0.574396405379798746793085229001}, + {0.843929047083555872177385026589, 0.536454810294997086472790215339}, + {-0.536454810294997086472790215339, 0.843929047083555872177385026589}, + {0.217417117873348164636126966798, 0.976078786192818848022056954505}, + {-0.976078786192818848022056954505, 0.217417117873348164636126966798}, + {0.932370328771828460467929744482, 0.36150459198454526488220039937}, + {-0.36150459198454526488220039937, 0.932370328771828460467929744482}, + {0.403663033629342637986781028303, 0.91490773047403872730853890971}, + {-0.91490773047403872730853890971, 0.403663033629342637986781028303}, + {0.723056045396724411666866672022, 0.690789371093135651236138983222}, + {-0.690789371093135651236138983222, 0.723056045396724411666866672022}, + {0.0228159842064053451637928304763, 0.999739681549498659940411471325}, + {-0.999739681549498659940411471325, 0.0228159842064053451637928304763}, + {0.999902035550953915965521900944, 0.0139971175982403685938981752201}, + {-0.0139971175982403685938981752201, 0.999902035550953915965521900944}, + {0.697140053089530531416073699802, 0.716934966631093129052487711306}, + {-0.716934966631093129052487711306, 0.697140053089530531416073699802}, + {0.918432560156186905686581667396, 0.395577593456646836589385429761}, + {-0.395577593456646836589385429761, 0.918432560156186905686581667396}, + {0.369714292530311239293183689369, 0.929145490167611720622176108009}, + {-0.929145490167611720622176108009, 0.369714292530311239293183689369}, + {0.977958496134064825255904906953, 0.208799377008644898578992865623}, + {-0.208799377008644898578992865623, 0.977958496134064825255904906953}, + {0.543877628945055979770017984265, 0.83916453972573457065209368011}, + {-0.83916453972573457065209368011, 0.543877628945055979770017984265}, + {0.823611775954420255452248511574, 0.567153984830580104592456791579}, + {-0.567153984830580104592456791579, 0.823611775954420255452248511574}, + {0.181343043191790537216334655568, 0.983419900493141541097941171756}, + {-0.983419900493141541097941171756, 0.181343043191790537216334655568}, + {0.993715276508913225583796702267, 0.111937255785614572789476994785}, + {-0.111937255785614572789476994785, 0.993715276508913225583796702267}, + {0.623511217954696550513915553893, 0.781814403221538833932413581351}, + {-0.781814403221538833932413581351, 0.623511217954696550513915553893}, + {0.875236671856810866110265578754, 0.483694912353865080767434392328}, + {-0.483694912353865080767434392328, 0.875236671856810866110265578754}, + {0.276861833262245227160747162998, 0.960909738363946774875046230591}, + {-0.960909738363946774875046230591, 0.276861833262245227160747162998}, + {0.95278344083495092320390540408, 0.303650646089731912624642973242}, + {-0.303650646089731912624642973242, 0.95278344083495092320390540408}, + {0.459006201054919571458157179222, 0.888433062978371324014403853653}, + {-0.888433062978371324014403853653, 0.459006201054919571458157179222}, + {0.76405504841759397383071927834, 0.645151054395471157398844752606}, + {-0.645151054395471157398844752606, 0.76405504841759397383071927834}, + {0.0840778204832076936359186447589, 0.996459191388585407977984687022}, + {-0.996459191388585407977984687022, 0.0840778204832076936359186447589}, + {0.998010803750762454100708964688, 0.063043124896828492409461830448}, + {-0.063043124896828492409461830448, 0.998010803750762454100708964688}, + {0.661121985907862974585214033141, 0.750278428151338716567408937408}, + {-0.750278428151338716567408937408, 0.661121985907862974585214033141}, + {0.897916195387928550708522834611, 0.4401664526745163175114328169}, + {-0.4401664526745163175114328169, 0.897916195387928550708522834611}, + {0.32367794715905118252408101398, 0.946167314233007372692441094841}, + {-0.946167314233007372692441094841, 0.32367794715905118252408101398}, + {0.9665352024651197027083071589, 0.256534018008743036620700195272}, + {-0.256534018008743036620700195272, 0.9665352024651197027083071589}, + {0.50204665217958466261194416802, 0.86484053965761287230407106108}, + {-0.86484053965761287230407106108, 0.50204665217958466261194416802}, + {0.794790772479183171839167698636, 0.606883537412198470484270274028}, + {-0.606883537412198470484270274028, 0.794790772479183171839167698636}, + {0.13287048014987942745612770068, 0.991133409539170173729871748947}, + {-0.991133409539170173729871748947, 0.13287048014987942745612770068}, + {0.987025802126775597677976747946, 0.160561720020667486608090257505}, + {-0.160561720020667486608090257505, 0.987025802126775597677976747946}, + {0.584398356864344603600613936578, 0.8114669189155243600453104591}, + {-0.8114669189155243600453104591, 0.584398356864344603600613936578}, + {0.850448626521511652498475086759, 0.526058108622682762955946600414}, + {-0.526058108622682762955946600414, 0.850448626521511652498475086759}, + {0.229378734958878033323159684187, 0.973337246769414798919228815066}, + {-0.973337246769414798919228815066, 0.229378734958878033323159684187}, + {0.936736340441837622705634203157, 0.350035753167635299565318973691}, + {-0.350035753167635299565318973691, 0.936736340441837622705634203157}, + {0.414859963787718277750116158131, 0.909885273232869162818303720996}, + {-0.909885273232869162818303720996, 0.414859963787718277750116158131}, + {0.731478648821048516026621655328, 0.681864345980146779879760288168}, + {-0.681864345980146779879760288168, 0.731478648821048516026621655328}, + {0.0350826099826446191620732406591, 0.999384415766428557681422262249}, + {-0.999384415766428557681422262249, 0.0350826099826446191620732406591}, + {0.999257377290578063799841856962, 0.0385317262830938700446381517395}, + {-0.0385317262830938700446381517395, 0.999257377290578063799841856962}, + {0.679335622687252560503168297146, 0.733827712578451807345913948666}, + {-0.733827712578451807345913948666, 0.679335622687252560503168297146}, + {0.908447985320707251943872506672, 0.41799791622297355342396940614}, + {-0.41799791622297355342396940614, 0.908447985320707251943872506672}, + {0.346800569692418292344626706836, 0.937938891858640322318763082876}, + {-0.937938891858640322318763082876, 0.346800569692418292344626706836}, + {0.972539760092530181090353380569, 0.232736793479595388545178025197}, + {-0.232736793479595388545178025197, 0.972539760092530181090353380569}, + {0.52311969443393113721185727627, 0.852259224236001089458625301631}, + {-0.852259224236001089458625301631, 0.52311969443393113721185727627}, + {0.809445063936509168200927888392, 0.587195613461834908797243315348}, + {-0.587195613461834908797243315348, 0.809445063936509168200927888392}, + {0.157154093545625900274487207753, 0.987574093869342362062013762625}, + {-0.987574093869342362062013762625, 0.157154093545625900274487207753}, + {0.990668910271870095662904986966, 0.136290536064487960432600743843}, + {-0.136290536064487960432600743843, 0.990668910271870095662904986966}, + {0.604136742101177515174015297816, 0.796880666626675893127185190679}, + {-0.796880666626675893127185190679, 0.604136742101177515174015297816}, + {0.863102599555404914610790001461, 0.505028615665194191564069114975}, + {-0.505028615665194191564069114975, 0.863102599555404914610790001461}, + {0.253196542175250560724464321538, 0.967414859835477480842769182345}, + {-0.967414859835477480842769182345, 0.253196542175250560724464321538}, + {0.945044520379187069636373053072, 0.326941668346621416851860431052}, + {-0.326941668346621416851860431052, 0.945044520379187069636373053072}, + {0.437064718142972374881338737396, 0.899430059623650857147936221736}, + {-0.899430059623650857147936221736, 0.437064718142972374881338737396}, + {0.7479921298642226989983328167, 0.663707596507064123692032353574}, + {-0.663707596507064123692032353574, 0.7479921298642226989983328167}, + {0.0595981650859905912054337306927, 0.998222449516330545549180897069}, + {-0.998222449516330545549180897069, 0.0595981650859905912054337306927}, + {0.996163065820794946247929146921, 0.0875165486894595306965882741679}, + {-0.0875165486894595306965882741679, 0.996163065820794946247929146921}, + {0.642510113965105711386627262982, 0.766277204053824712026710130885}, + {-0.766277204053824712026710130885, 0.642510113965105711386627262982}, + {0.886843534314297299303575528029, 0.462069849313851810368447559085}, + {-0.462069849313851810368447559085, 0.886843534314297299303575528029}, + {0.30036035313341352681604234931, 0.953825800796755052246567174734}, + {-0.953825800796755052246567174734, 0.30036035313341352681604234931}, + {0.959948440172823214133757119271, 0.280176716037866979647219523031}, + {-0.280176716037866979647219523031, 0.959948440172823214133757119271}, + {0.480671195794698635772590478155, 0.876900907476605651069689884025}, + {-0.876900907476605651069689884025, 0.480671195794698635772590478155}, + {0.779657728779656888917770629632, 0.626205897412462131335075810057}, + {-0.626205897412462131335075810057, 0.779657728779656888917770629632}, + {0.108506830545237922813761599627, 0.994095703503956928948070981278}, + {-0.994095703503956928948070981278, 0.108506830545237922813761599627}, + {0.98278814654575197273800313269, 0.184736187600495954663770703519}, + {-0.184736187600495954663770703519, 0.98278814654575197273800313269}, + {0.564307951909398752832203172147, 0.825564373875120494084001165902}, + {-0.825564373875120494084001165902, 0.564307951909398752832203172147}, + {0.837282375035324322176677469542, 0.546770723846116801780681271339}, + {-0.546770723846116801780681271339, 0.837282375035324322176677469542}, + {0.205422758569589608557137694334, 0.978673331741322205523658794846}, + {-0.978673331741322205523658794846, 0.205422758569589608557137694334}, + {0.927863905559833779257417063491, 0.37291898953962082652324738774}, + {-0.37291898953962082652324738774, 0.927863905559833779257417063491}, + {0.392405313302891745053102567908, 0.919792405976293858671510861313}, + {-0.919792405976293858671510861313, 0.392405313302891745053102567908}, + {0.714524552391572864706859036232, 0.699610365867761041513972486428}, + {-0.699610365867761041513972486428, 0.714524552391572864706859036232}, + {0.0105459224268683784270228542823, 0.999944390213859057858769574523}, + {-0.999944390213859057858769574523, 0.0105459224268683784270228542823}, + {0.999969097376711579805430574197, 0.00786157055586177329342678632429}, + {-0.00786157055586177329342678632429, 0.999969097376711579805430574197}, + {0.701525959881237448634294651129, 0.712643899582890205657292881369}, + {-0.712643899582890205657292881369, 0.701525959881237448634294651129}, + {0.9208424894060320831101762451, 0.389934750573094734971135721935}, + {-0.389934750573094734971135721935, 0.9208424894060320831101762451}, + {0.37540846231318658610121019592, 0.926859475014227163214286520088}, + {-0.926859475014227163214286520088, 0.37540846231318658610121019592}, + {0.979221255233887699631623036112, 0.202794805895440438670007665678}, + {-0.202794805895440438670007665678, 0.979221255233887699631623036112}, + {0.549016407419809393530840679887, 0.835811572295960591993946309231}, + {-0.835811572295960591993946309231, 0.549016407419809393530840679887}, + {0.82707626312472026786082324179, 0.562089721464152369811984044645}, + {-0.562089721464152369811984044645, 0.82707626312472026786082324179}, + {0.187373780531359113155076556723, 0.982288687896478829308932745334}, + {-0.982288687896478829308932745334, 0.187373780531359113155076556723}, + {0.994383404189101427483876705082, 0.105837826287646666334474332416}, + {-0.105837826287646666334474332416, 0.994383404189101427483876705082}, + {0.628296603527438435321528231725, 0.777973892875515993239332601661}, + {-0.777973892875515993239332601661, 0.628296603527438435321528231725}, + {0.878188091961392247597473215137, 0.478315455674609535385854997003}, + {-0.478315455674609535385854997003, 0.878188091961392247597473215137}, + {0.282752652729325926195258489315, 0.959192857237025742200842159946}, + {-0.959192857237025742200842159946, 0.282752652729325926195258489315}, + {0.954628670293982684036393493443, 0.297798760663543549132015186842}, + {-0.297798760663543549132015186842, 0.954628670293982684036393493443}, + {0.464448883185830663222759540076, 0.885599929374113359337172823871}, + {-0.885599929374113359337172823871, 0.464448883185830663222759540076}, + {0.767999237748281271542794002016, 0.640450755966498141980025593512}, + {-0.640450755966498141980025593512, 0.767999237748281271542794002016}, + {0.0901903963979476946954960681069, 0.995924541517870798301714785339}, + {-0.995924541517870798301714785339, 0.0901903963979476946954960681069}, + {0.998378841821709994697187084967, 0.0569182589573937400273528908201}, + {-0.0569182589573937400273528908201, 0.998378841821709994697187084967}, + {0.665713162363197552551241642504, 0.74620773612740765035056256238}, + {-0.74620773612740765035056256238, 0.665713162363197552551241642504}, + {0.90060010294934089714047331654, 0.434648656465928318048952405661}, + {-0.434648656465928318048952405661, 0.90060010294934089714047331654}, + {0.329477427512101739282002199616, 0.944163452353461774002596484934}, + {-0.944163452353461774002596484934, 0.329477427512101739282002199616}, + {0.968091070856162971836056385655, 0.250598640316677667705391741038}, + {-0.250598640316677667705391741038, 0.968091070856162971836056385655}, + {0.507343763084487919812204381742, 0.861743758932590697696696224739}, + {-0.861743758932590697696696224739, 0.507343763084487919812204381742}, + {0.798499578148532118682112468377, 0.601995368500968019986885337858}, + {-0.601995368500968019986885337858, 0.798499578148532118682112468377}, + {0.138949459173495493136130107814, 0.990299473793353590700405675307}, + {-0.990299473793353590700405675307, 0.138949459173495493136130107814}, + {0.987992409838101881547345328727, 0.154502421024073938182752385728}, + {-0.154502421024073938182752385728, 0.987992409838101881547345328727}, + {0.589366423141498785298608709127, 0.80786584237322223156496647789}, + {-0.80786584237322223156496647789, 0.589366423141498785298608709127}, + {0.853660448939716376415276499756, 0.520829951055084783284598870523}, + {-0.520829951055084783284598870523, 0.853660448939716376415276499756}, + {0.235346702039917837767646346947, 0.971911482512133995115277684818}, + {-0.971911482512133995115277684818, 0.235346702039917837767646346947}, + {0.938866485649468063279243779107, 0.344281457711880178873542490692}, + {-0.344281457711880178873542490692, 0.938866485649468063279243779107}, + {0.420435105246661167122113056394, 0.907322612016381424560051982553}, + {-0.907322612016381424560051982553, 0.420435105246661167122113056394}, + {0.735648719915506510425018404931, 0.677363241464043919215498590347}, + {-0.677363241464043919215498590347, 0.735648719915506510425018404931}, + {0.0412140570577315193401268800244, 0.999150339789184105399044710794}, + {-0.999150339789184105399044710794, 0.0412140570577315193401268800244}, + {0.999474992780647775170166369207, 0.03239967293236408613088528341}, + {-0.03239967293236408613088528341, 0.999474992780647775170166369207}, + {0.683825516582870718274023147387, 0.729645573460272589549902022554}, + {-0.729645573460272589549902022554, 0.683825516582870718274023147387}, + {0.910995671041643140242172194121, 0.412415915482642725375939107835}, + {-0.412415915482642725375939107835, 0.910995671041643140242172194121}, + {0.352549126118100464299942586877, 0.935793307132169904782870162308}, + {-0.935793307132169904782870162308, 0.352549126118100464299942586877}, + {0.973949498424792170503394572734, 0.226765020490585694767915470038}, + {-0.226765020490585694767915470038, 0.973949498424792170503394572734}, + {0.528339211144607689973895503499, 0.8490333785942068001162397195}, + {-0.8490333785942068001162397195, 0.528339211144607689973895503499}, + {0.81303279091262192768141403576, 0.582217898128211786001884320285}, + {-0.582217898128211786001884320285, 0.81303279091262192768141403576}, + {0.163210775886702380566006809204, 0.986591223675976514151386709273}, + {-0.986591223675976514151386709273, 0.163210775886702380566006809204}, + {0.99148652421766148012949315671, 0.130209340274730633879585184332}, + {-0.130209340274730633879585184332, 0.99148652421766148012949315671}, + {0.609014937247299825706647879997, 0.793158752211477136029316170607}, + {-0.793158752211477136029316170607, 0.609014937247299825706647879997}, + {0.866185149223125838702230794297, 0.499723210652968596878764628855}, + {-0.499723210652968596878764628855, 0.866185149223125838702230794297}, + {0.259127721809726152546460298254, 0.965843063747781505057332651631}, + {-0.965843063747781505057332651631, 0.259127721809726152546460298254}, + {0.947032806547138616970471503009, 0.321136829596746664616802036107}, + {-0.321136829596746664616802036107, 0.947032806547138616970471503009}, + {0.442575289619001166485645626381, 0.896731349412217881855724499474}, + {-0.896731349412217881855724499474, 0.442575289619001166485645626381}, + {0.752050482376696360908852057037, 0.659105508971802200868239651754}, + {-0.659105508971802200868239651754, 0.752050482376696360908852057037}, + {0.0657220209718039904966957465149, 0.997837970794548279052094130748}, + {-0.997837970794548279052094130748, 0.0657220209718039904966957465149}, + {0.996681304780248300545508755022, 0.0814025595515382588418518139406}, + {-0.0814025595515382588418518139406, 0.996681304780248300545508755022}, + {0.647199807427135231918668978324, 0.762320411156804156327382315794}, + {-0.762320411156804156327382315794, 0.647199807427135231918668978324}, + {0.889662047030628899690896105312, 0.456619581351118908330022350128}, + {-0.456619581351118908330022350128, 0.889662047030628899690896105312}, + {0.306207264023724223811484534963, 0.951964868815601383822411207802}, + {-0.951964868815601383822411207802, 0.306207264023724223811484534963}, + {0.961649501425706820789685025375, 0.274281308892329656057285092174}, + {-0.274281308892329656057285092174, 0.961649501425706820789685025375}, + {0.486042710112291387325456071267, 0.873935057053268127269518572575}, + {-0.873935057053268127269518572575, 0.486042710112291387325456071267}, + {0.783485379096387823949498852016, 0.621410219373796146591359956801}, + {-0.621410219373796146591359956801, 0.783485379096387823949498852016}, + {0.114604444501737415262887509471, 0.993411204537400061020946395729}, + {-0.993411204537400061020946395729, 0.114604444501737415262887509471}, + {0.983903165774271504240289232257, 0.178702435292209971251509159629}, + {-0.178702435292209971251509159629, 0.983903165774271504240289232257}, + {0.569362896740227220071517422184, 0.822086304359571085065283568838}, + {-0.822086304359571085065283568838, 0.569362896740227220071517422184}, + {0.840621535522285689978616574081, 0.541622962969771526431372876687}, + {-0.541622962969771526431372876687, 0.840621535522285689978616574081}, + {0.211423918216980671891391807549, 0.977394458141532251893579541502}, + {-0.977394458141532251893579541502, 0.211423918216980671891391807549}, + {0.930134626687321386384610377718, 0.367218703550400982216217471432}, + {-0.367218703550400982216217471432, 0.930134626687321386384610377718}, + {0.398041666488001766310844686814, 0.917367337406043925618348566786}, + {-0.917367337406043925618348566786, 0.398041666488001766310844686814}, + {0.71880383017311688664818802863, 0.695212955667870891041104641772}, + {-0.695212955667870891041104641772, 0.71880383017311688664818802863}, + {0.0166812673367803324109281959409, 0.999860857979768535841458287905}, + {-0.999860857979768535841458287905, 0.0166812673367803324109281959409}, + {0.999797327978704686302080517635, 0.0201321376570525942262257501625}, + {-0.0201321376570525942262257501625, 0.999797327978704686302080517635}, + {0.692727899368849819872195894277, 0.721199041483015723663640983432}, + {-0.721199041483015723663640983432, 0.692727899368849819872195894277}, + {0.915988052439551947436768841726, 0.401205543067386705136101454627}, + {-0.401205543067386705136101454627, 0.915988052439551947436768841726}, + {0.364006203213285473019311666576, 0.931396523518446595168995827407}, + {-0.931396523518446595168995827407, 0.364006203213285473019311666576}, + {0.976658917449606978067322415882, 0.214796086943318864648233557091}, + {-0.214796086943318864648233557091, 0.976658917449606978067322415882}, + {0.538718373784973558748845334776, 0.842485913084885629231735038047}, + {-0.842485913084885629231735038047, 0.538718373784973558748845334776}, + {0.820116280266262820397571431386, 0.572196895170035579880618570314}, + {-0.572196895170035579880618570314, 0.820116280266262820397571431386}, + {0.17530547838914131708598631576, 0.984514087886381838998772764171}, + {-0.984514087886381838998772764171, 0.17530547838914131708598631576}, + {0.99300973601021458314619394514, 0.118032470909169337480726369449}, + {-0.118032470909169337480726369449, 0.99300973601021458314619394514}, + {0.618702357537008640342435228376, 0.785625478697163703856176653062}, + {-0.785625478697163703856176653062, 0.618702357537008640342435228376}, + {0.872252299586219748128712581092, 0.489056158193056034200196791062}, + {-0.489056158193056034200196791062, 0.872252299586219748128712581092}, + {0.270960590103625165170342370402, 0.962590441782326888819909527228}, + {-0.962590441782326888819909527228, 0.270960590103625165170342370402}, + {0.950902339617887060185807968082, 0.309491099240719103047325688749}, + {-0.309491099240719103047325688749, 0.950902339617887060185807968082}, + {0.453546237599970147869044012623, 0.891232747579952522265500647336}, + {-0.891232747579952522265500647336, 0.453546237599970147869044012623}, + {0.760082092846179335765555151738, 0.649827063251887104833315333963}, + {-0.649827063251887104833315333963, 0.760082092846179335765555151738}, + {0.0779620790860474921490919086864, 0.996956325133945275851488077024}, + {-0.996956325133945275851488077024, 0.0779620790860474921490919086864}, + {0.997605191137131641632151968224, 0.0691656172982429845452756467239}, + {-0.0691656172982429845452756467239, 0.997605191137131641632151968224}, + {0.656505918583426550938497712195, 0.754320872616508819596958801412}, + {-0.754320872616508819596958801412, 0.656505918583426550938497712195}, + {0.895198481789264199903755070409, 0.445667676864944295456183454007}, + {-0.445667676864944295456183454007, 0.895198481789264199903755070409}, + {0.317866280514233656706579722595, 0.948135553447947976302145889349}, + {-0.948135553447947976302145889349, 0.317866280514233656706579722595}, + {0.964942944569991412073761694046, 0.26245973734002397970144215833}, + {-0.26245973734002397970144215833, 0.964942944569991412073761694046}, + {0.496730639501984705397319430631, 0.867904759625126920319360124267}, + {-0.867904759625126920319360124267, 0.496730639501984705397319430631}, + {0.791052043386467951258111952484, 0.61174885750125740369043114697}, + {-0.61174885750125740369043114697, 0.791052043386467951258111952484}, + {0.126786498627784433024245913657, 0.991930029672308477550757288554}, + {-0.991930029672308477550757288554, 0.126786498627784433024245913657}, + {0.986022033451868562536901663407, 0.16661497395925206621569714116}, + {-0.16661497395925206621569714116, 0.986022033451868562536901663407}, + {0.57940828831935786702445057017, 0.815037444186972215298681021522}, + {-0.815037444186972215298681021522, 0.57940828831935786702445057017}, + {0.847204785193193976589043359127, 0.531266460399820394933101397328}, + {-0.531266460399820394933101397328, 0.847204785193193976589043359127}, + {0.223402131898112366048536614471, 0.974726365429487318081669400271}, + {-0.974726365429487318081669400271, 0.223402131898112366048536614471}, + {0.934570927640435034078336684615, 0.355776869975124587774928386352}, + {-0.355776869975124587774928386352, 0.934570927640435034078336684615}, + {0.409269203085618593629391170907, 0.912413677783089016593010001088}, + {-0.912413677783089016593010001088, 0.409269203085618593629391170907}, + {0.727281037968895760492671342945, 0.686339778689014634593945629604}, + {-0.686339778689014634593945629604, 0.727281037968895760492671342945}, + {0.0289498420671206353749216333426, 0.999580865485273695192347531702}, + {-0.999580865485273695192347531702, 0.0289498420671206353749216333426}, + {0.999002140325035981049950351007, 0.0446623289361073247394351426465}, + {-0.0446623289361073247394351426465, 0.999002140325035981049950351007}, + {0.674820152189402278075647245714, 0.737982223498013567564157710876}, + {-0.737982223498013567564157710876, 0.674820152189402278075647245714}, + {0.90586609704658094255336209244, 0.423564179578011956728289533203}, + {-0.423564179578011956728289533203, 0.90586609704658094255336209244}, + {0.341038956421299666033064568182, 0.940049163715957369902298523812}, + {-0.940049163715957369902298523812, 0.341038956421299666033064568182}, + {0.971093406187982455257667879778, 0.238699804059873954553694375136}, + {-0.238699804059873954553694375136, 0.971093406187982455257667879778}, + {0.517880482562427690851336592459, 0.855452982799701833194205846667}, + {-0.855452982799701833194205846667, 0.517880482562427690851336592459}, + {0.805826861811239303357012886408, 0.592151221212495526557972880255}, + {-0.592151221212495526557972880255, 0.805826861811239303357012886408}, + {0.151091494441751295063625093462, 0.988519782456253270908064223477}, + {-0.988519782456253270908064223477, 0.151091494441751295063625093462}, + {0.98981399820153526114552278159, 0.142366600592594150231917637939}, + {-0.142366600592594150231917637939, 0.98981399820153526114552278159}, + {0.599235801548174573660787700646, 0.800572578935174750114356356789}, + {-0.800572578935174750114356356789, 0.599235801548174573660787700646}, + {0.859987554562638201183233377378, 0.510315006635483237040773474291}, + {-0.510315006635483237040773474291, 0.859987554562638201183233377378}, + {0.247255829834069296158105544237, 0.968950233300485797016676769999}, + {-0.968950233300485797016676769999, 0.247255829834069296158105544237}, + {0.943020653819184651034390753921, 0.332734197927471109235142421312}, + {-0.332734197927471109235142421312, 0.943020653819184651034390753921}, + {0.431537691427335445837343286257, 0.902094906801698903642261484492}, + {-0.902094906801698903642261484492, 0.431537691427335445837343286257}, + {0.743905615870826597557652348769, 0.668284695826446673017073862866}, + {-0.668284695826446673017073862866, 0.743905615870826597557652348769}, + {0.0534720653629467274070563576061, 0.998569345727086110642289895623}, + {-0.998569345727086110642289895623, 0.0534720653629467274070563576061}, + {0.995607321884947049639436045254, 0.0936272428787871952016175214339}, + {-0.0936272428787871952016175214339, 0.995607321884947049639436045254}, + {0.63779623036043353589974458373, 0.770205147047214211397658800706}, + {-0.770205147047214211397658800706, 0.63779623036043353589974458373}, + {0.883991632440144892157718459202, 0.467502720607920918549638145123}, + {-0.467502720607920918549638145123, 0.883991632440144892157718459202}, + {0.294502133845571667514207092609, 0.955650821775613334985166602564}, + {-0.955650821775613334985166602564, 0.294502133845571667514207092609}, + {0.95821123740373226329580802485, 0.286061574688402042720980489321}, + {-0.286061574688402042720980489321, 0.95821123740373226329580802485}, + {0.475281584478260687998130151755, 0.879833743076403052008060967637}, + {-0.879833743076403052008060967637, 0.475281584478260687998130151755}, + {0.775800724790101647876383594848, 0.630977999153023549894214738742}, + {-0.630977999153023549894214738742, 0.775800724790101647876383594848}, + {0.102405131367896723437738160101, 0.994742775329142014939520777261}, + {-0.994742775329142014939520777261, 0.102405131367896723437738160101}, + {0.981636125898989075899692124949, 0.19076298469573210736527357767}, + {-0.19076298469573210736527357767, 0.981636125898989075899692124949}, + {0.559231761203128896475789133547, 0.829011361358604426108342977386}, + {-0.829011361358604426108342977386, 0.559231761203128896475789133547}, + {0.833911691340176730591338127851, 0.551897899113745205035286289785}, + {-0.551897899113745205035286289785, 0.833911691340176730591338127851}, + {0.1994138648714438022402362094, 0.979915358843320483117622643476}, + {-0.979915358843320483117622643476, 0.1994138648714438022402362094}, + {0.925558250880732624743529868283, 0.378605235339659174087501014583}, + {-0.378605235339659174087501014583, 0.925558250880732624743529868283}, + {0.38675418627952418137994072822, 0.922182844882327601432336905418}, + {-0.922182844882327601432336905418, 0.38675418627952418137994072822}, + {0.710218373164482219550563968369, 0.703981436132797733229438108538}, + {-0.703981436132797733229438108538, 0.710218373164482219550563968369}, + {0.00441018046893763143723177222455, 0.999990275106828918616486134852}, + {-0.999990275106828918616486134852, 0.00441018046893763143723177222455}, + {0.999988510290275689484928989259, 0.00479367160275984138806393275445}, + {-0.00479367160275984138806393275445, 0.999988510290275689484928989259}, + {0.703709019037794814011022026534, 0.710488294431980471621557171602}, + {-0.710488294431980471621557171602, 0.703709019037794814011022026534}, + {0.922034458701062820118465879204, 0.387107810522905992911546491086}, + {-0.387107810522905992911546491086, 0.922034458701062820118465879204}, + {0.378250260364165202808806043322, 0.925703376106213227103580720723}, + {-0.925703376106213227103580720723, 0.378250260364165202808806043322}, + {0.979838812528434743853722466156, 0.199789643032032088187932572509}, + {-0.199789643032032088187932572509, 0.979838812528434743853722466156}, + {0.551578057409840893932084782136, 0.834123280207420103593562998867}, + {-0.834123280207420103593562998867, 0.551578057409840893932084782136}, + {0.828796837708690614654472028633, 0.559549641947945874953518341499}, + {-0.559549641947945874953518341499, 0.828796837708690614654472028633}, + {0.190386517937884469642284557267, 0.981709210401678800117508671974}, + {-0.981709210401678800117508671974, 0.190386517937884469642284557267}, + {0.994703430306383862280483754148, 0.102786602904819029946636987916}, + {-0.102786602904819029946636987916, 0.994703430306383862280483754148}, + {0.630680436910037944819862332224, 0.776042644768290879042638152896}, + {-0.776042644768290879042638152896, 0.630680436910037944819862332224}, + {0.879651410178071579970549009886, 0.47561896153510330176317211226}, + {-0.47561896153510330176317211226, 0.879651410178071579970549009886}, + {0.285694084254848323922715280787, 0.958320870179598882643290380656}, + {-0.958320870179598882643290380656, 0.285694084254848323922715280787}, + {0.955537811351456878306009912194, 0.294868599680718268540857707194}, + {-0.294868599680718268540857707194, 0.955537811351456878306009912194}, + {0.467163679693549771521077218495, 0.88417085247998439268002357494}, + {-0.88417085247998439268002357494, 0.467163679693549771521077218495}, + {0.769960498625737121969336840266, 0.638091553427880930904336764797}, + {-0.638091553427880930904336764797, 0.769960498625737121969336840266}, + {0.0932454253773213892486637632828, 0.995643154270746899570099230914}, + {-0.995643154270746899570099230914, 0.0932454253773213892486637632828}, + {0.998548766018269917665861612477, 0.0538550079694594396872986408198}, + {-0.0538550079694594396872986408198, 0.998548766018269917665861612477}, + {0.667999362460917511796765211329, 0.744161845133038180044593445928}, + {-0.744161845133038180044593445928, 0.667999362460917511796765211329}, + {0.901929347838879458265637367731, 0.431883608750012248922445223798}, + {-0.431883608750012248922445223798, 0.901929347838879458265637367731}, + {0.3323725295775806221065806767, 0.943148186438483415727773717663}, + {-0.943148186438483415727773717663, 0.3323725295775806221065806767}, + {0.968855340628585581796983206004, 0.247627399403756276452526208232}, + {-0.247627399403756276452526208232, 0.968855340628585581796983206004}, + {0.509985168021289569928455875925, 0.860183194673260986817808770866}, + {-0.860183194673260986817808770866, 0.509985168021289569928455875925}, + {0.800342716019530664439685097022, 0.599542773215123392382963629643}, + {-0.599542773215123392382963629643, 0.800342716019530664439685097022}, + {0.141987001218867286977953767746, 0.98986852232247157790112623843}, + {-0.98986852232247157790112623843, 0.141987001218867286977953767746}, + {0.988461766905159300300454106036, 0.151470575910737814684381419283}, + {-0.151470575910737814684381419283, 0.988461766905159300300454106036}, + {0.591842146945560143578290990263, 0.806053889698989056711297962465}, + {-0.806053889698989056711297962465, 0.591842146945560143578290990263}, + {0.855254315221780969658027515834, 0.518208506582555461505990024307}, + {-0.518208506582555461505990024307, 0.855254315221780969658027515834}, + {0.238327376859299805422764961804, 0.971184875005457026730937286629}, + {-0.971184875005457026730937286629, 0.238327376859299805422764961804}, + {0.939918307791555052155274552206, 0.341399435673610418540135924559}, + {-0.341399435673610418540135924559, 0.939918307791555052155274552206}, + {0.423216753142722668101782801386, 0.90602846525886360229407046063}, + {-0.90602846525886360229407046063, 0.423216753142722668101782801386}, + {0.737723378950179697888245300419, 0.675103115198211534142558321037}, + {-0.675103115198211534142558321037, 0.737723378950179697888245300419}, + {0.0442792131387068493841852045989, 0.999019194652343456297671764332}, + {-0.999019194652343456297671764332, 0.0442792131387068493841852045989}, + {0.999569689856698584407013186137, 0.0293331743898168351147859311823}, + {-0.0293331743898168351147859311823, 0.999569689856698584407013186137}, + {0.686060819441438707322333812044, 0.727544192490972796427683988441}, + {-0.727544192490972796427683988441, 0.686060819441438707322333812044}, + {0.912256657919599756034756410372, 0.409619079244805672779250471649}, + {-0.409619079244805672779250471649, 0.912256657919599756034756410372}, + {0.355418440360133647271112522503, 0.934707297634917444462132607441}, + {-0.934707297634917444462132607441, 0.355418440360133647271112522503}, + {0.974640620111207556242050031869, 0.223775918340738150247304361073}, + {-0.223775918340738150247304361073, 0.974640620111207556242050031869}, + {0.530941522375513619280695820635, 0.84740846102547973117680157884}, + {-0.84740846102547973117680157884, 0.530941522375513619280695820635}, + {0.814815183963569444891561488475, 0.57972080865052155740357875402}, + {-0.57972080865052155740357875402, 0.814815183963569444891561488475}, + {0.166236827002671422315316362983, 0.986085856986136821689115095069}, + {-0.986085856986136821689115095069, 0.166236827002671422315316362983}, + {0.991881334719373009001230911963, 0.127166889697417156979142305318}, + {-0.127166889697417156979142305318, 0.991881334719373009001230911963}, + {0.611445447864987001729275561956, 0.79128658795987782959002743155}, + {-0.79128658795987782959002743155, 0.611445447864987001729275561956}, + {0.867714201994605138779093067569, 0.497063440273840251304449111558}, + {-0.497063440273840251304449111558, 0.867714201994605138779093067569}, + {0.262089667064712039667995213676, 0.965043525659805889738152018253}, + {-0.965043525659805889738152018253, 0.262089667064712039667995213676}, + {0.948013583538612203405193668004, 0.318229862562077525645065634308}, + {-0.318229862562077525645065634308, 0.948013583538612203405193668004}, + {0.445324339783404188342075258333, 0.895369327370820311529087121016}, + {-0.895369327370820311529087121016, 0.445324339783404188342075258333}, + {0.754069050287676123645042025601, 0.656795148732268185831628670712}, + {-0.656795148732268185831628670712, 0.754069050287676123645042025601}, + {0.0687830354223016304793247854832, 0.997631642460329315369449432183}, + {-0.997631642460329315369449432183, 0.0687830354223016304793247854832}, + {0.996926353741335091740438656416, 0.0783444013060697053152381386099}, + {-0.0783444013060697053152381386099, 0.996926353741335091740438656416}, + {0.649535527642554733063207095256, 0.760331242505599025349738440127}, + {-0.760331242505599025349738440127, 0.649535527642554733063207095256}, + {0.891058749244331593786228040699, 0.453887987718476049181504095031}, + {-0.453887987718476049181504095031, 0.891058749244331593786228040699}, + {0.309126410011419439083368843058, 0.95102095804112107835237566178}, + {-0.95102095804112107835237566178, 0.309126410011419439083368843058}, + {0.962486458916603448976445633889, 0.271329718980758416435605795414}, + {-0.271329718980758416435605795414, 0.962486458916603448976445633889}, + {0.488721617671423136375352669347, 0.872439786128892280636648592917}, + {-0.872439786128892280636648592917, 0.488721617671423136375352669347}, + {0.785388151550103552622772440373, 0.619003595631488656714225271571}, + {-0.619003595631488656714225271571, 0.785388151550103552622772440373}, + {0.117651647774764861464902310217, 0.993054927874527315623254253296}, + {-0.993054927874527315623254253296, 0.117651647774764861464902310217}, + {0.984446786683527919059599753382, 0.175683021912979492329398567563}, + {-0.175683021912979492329398567563, 0.984446786683527919059599753382}, + {0.571882342447216585057390147995, 0.820335654715241835432948391826}, + {-0.820335654715241835432948391826, 0.571882342447216585057390147995}, + {0.842279255229485990241755644092, 0.539041423463969437968046349852}, + {-0.539041423463969437968046349852, 0.842279255229485990241755644092}, + {0.214421527153702190515716097252, 0.976741218897346552552107823431}, + {-0.976741218897346552552107823431, 0.214421527153702190515716097252}, + {0.931256860401693420392632560834, 0.364363362530840673159104881051}, + {-0.364363362530840673159104881051, 0.931256860401693420392632560834}, + {0.400854236555041654188613620136, 0.916141845478021354942654852493}, + {-0.916141845478021354942654852493, 0.400854236555041654188613620136}, + {0.720933330634457525398772759218, 0.693004424791290873919535897585}, + {-0.693004424791290873919535897585, 0.720933330634457525398772759218}, + {0.0197487187128237290378862667239, 0.999804975037232868118053374928}, + {-0.999804975037232868118053374928, 0.0197487187128237290378862667239}, + {0.999854387269971889296016342996, 0.0170647079374115599736949633325}, + {-0.0170647079374115599736949633325, 0.999854387269971889296016342996}, + {0.694937246736205938013597460667, 0.71907038813922918851773147253}, + {-0.71907038813922918851773147253, 0.694937246736205938013597460667}, + {0.917214622884544250602800730121, 0.398393443177423978163176343514}, + {-0.398393443177423978163176343514, 0.917214622884544250602800730121}, + {0.366861974394109113806194955032, 0.930275384896137147450190241216}, + {-0.930275384896137147450190241216, 0.366861974394109113806194955032}, + {0.977313306214358745727110999724, 0.211798728741130842800899358735}, + {-0.211798728741130842800899358735, 0.977313306214358745727110999724}, + {0.541300548828474115126141441579, 0.840829183507561639387972718396}, + {-0.840829183507561639387972718396, 0.541300548828474115126141441579}, + {0.821867895977163254173092354904, 0.569678121014025595769680876401}, + {-0.569678121014025595769680876401, 0.821867895977163254173092354904}, + {0.178325100022344001926555279169, 0.983971624947600265187475088169}, + {-0.983971624947600265187475088169, 0.178325100022344001926555279169}, + {0.993367181234679597068293332995, 0.114985404490601447746733754229}, + {-0.114985404490601447746733754229, 0.993367181234679597068293332995}, + {0.621109710806425741047576138953, 0.783723629311990466206339078781}, + {-0.783723629311990466206339078781, 0.621109710806425741047576138953}, + {0.873748597748798871975850488525, 0.486377824260119440147320801771}, + {-0.486377824260119440147320801771, 0.873748597748798871975850488525}, + {0.273912500767323263684005496543, 0.961754616273502005441287110443}, + {-0.961754616273502005441287110443, 0.273912500767323263684005496543}, + {0.9518473698013957262631379308, 0.30657231545292068508601346366}, + {-0.30657231545292068508601346366, 0.9518473698013957262631379308}, + {0.456278366660324619097366394271, 0.889837093021967895367652090499}, + {-0.889837093021967895367652090499, 0.456278366660324619097366394271}, + {0.762072157088574564376415310107, 0.647492106044828097388688092906}, + {-0.647492106044828097388688092906, 0.762072157088574564376415310107}, + {0.0810203310817338706595336361715, 0.996712448979848009145143805654}, + {-0.996712448979848009145143805654, 0.0810203310817338706595336361715}, + {0.997812693340489276216942471365, 0.0661046821987580773827275493204}, + {-0.0661046821987580773827275493204, 0.997812693340489276216942471365}, + {0.658817052764149480026389937848, 0.752303190865996507241675317346}, + {-0.752303190865996507241675317346, 0.658817052764149480026389937848}, + {0.896561557978014955772039229487, 0.442919149231588926696190355869}, + {-0.442919149231588926696190355869, 0.896561557978014955772039229487}, + {0.320773623458397383334528285559, 0.947155891336463273688650588156}, + {-0.947155891336463273688650588156, 0.320773623458397383334528285559}, + {0.965743618490924826680554815539, 0.259498098921851660936255257184}, + {-0.259498098921851660936255257184, 0.965743618490924826680554815539}, + {0.499390996069908166266060334237, 0.866376726975225830429394591192}, + {-0.866376726975225830429394591192, 0.499390996069908166266060334237}, + {0.792925139589524263783459900878, 0.609319065028276818374308732018}, + {-0.609319065028276818374308732018, 0.792925139589524263783459900878}, + {0.129829100389300899909272857258, 0.9915363859647838840061240262}, + {-0.9915363859647838840061240262, 0.129829100389300899909272857258}, + {0.986528560580586688644189052866, 0.163589116871495049432283508395}, + {-0.163589116871495049432283508395, 0.986528560580586688644189052866}, + {0.581906061152583808926408437401, 0.813256008888889381225340002857}, + {-0.813256008888889381225340002857, 0.581906061152583808926408437401}, + {0.848830700616267530556058318325, 0.528664772508341429535505540116}, + {-0.528664772508341429535505540116, 0.848830700616267530556058318325}, + {0.226391498869999208132242074498, 0.974036390100182614126822500111}, + {-0.974036390100182614126822500111, 0.226391498869999208132242074498}, + {0.935658037426032040961842994875, 0.352907972423500249092853664479}, + {-0.352907972423500249092853664479, 0.935658037426032040961842994875}, + {0.412066522700191562034177650276, 0.911153763571095898221585684951}, + {-0.911153763571095898221585684951, 0.412066522700191562034177650276}, + {0.729383276011561054730236719479, 0.684105281864307079864317984175}, + {-0.684105281864307079864317984175, 0.729383276011561054730236719479}, + {0.0320163767000480603552858838157, 0.999487344403620081578765166341}, + {-0.999487344403620081578765166341, 0.0320163767000480603552858838157}, + {0.999134460924839151729770492238, 0.041597223374054900779661636534}, + {-0.041597223374054900779661636534, 0.999134460924839151729770492238}, + {0.677081073910793640457939090993, 0.735908431363445192374683756498}, + {-0.735908431363445192374683756498, 0.677081073910793640457939090993}, + {0.907161310457516245620013251028, 0.420783028185550522781710469644}, + {-0.420783028185550522781710469644, 0.907161310457516245620013251028}, + {0.343921381616371701905876534511, 0.93899844689279754472721606362}, + {-0.93899844689279754472721606362, 0.343921381616371701905876534511}, + {0.971821156715677703097355788486, 0.235719408110155848046218807212}, + {-0.235719408110155848046218807212, 0.971821156715677703097355788486}, + {0.520502538082239785133253917593, 0.853860121946180661289815816417}, + {-0.853860121946180661289815816417, 0.520502538082239785133253917593}, + {0.807639763780396480186141161539, 0.589676192465615311988358371309}, + {-0.589676192465615311988358371309, 0.807639763780396480186141161539}, + {0.154123519328319358301371266862, 0.988051588121720114621382435871}, + {-0.988051588121720114621382435871, 0.154123519328319358301371266862}, + {0.990246114523483988989482895704, 0.139329224038400978358254178602}, + {-0.139329224038400978358254178602, 0.990246114523483988989482895704}, + {0.601689103488173060441113193519, 0.798730381758199214914384356234}, + {-0.798730381758199214914384356234, 0.601689103488173060441113193519}, + {0.861549131673294721345257585199, 0.507674200361798888536668528104}, + {-0.507674200361798888536668528104, 0.861549131673294721345257585199}, + {0.250227363622282372723759635846, 0.968187103040854424307326553389}, + {-0.968187103040854424307326553389, 0.250227363622282372723759635846}, + {0.944037029917215830820964583836, 0.329839485424473943808720832749}, + {-0.329839485424473943808720832749, 0.944037029917215830820964583836}, + {0.434303248698943988870269095059, 0.900766722392397856111756482278}, + {-0.900766722392397856111756482278, 0.434303248698943988870269095059}, + {0.745952383461488288673990609823, 0.665999280486191502781423423585}, + {-0.665999280486191502781423423585, 0.745952383461488288673990609823}, + {0.056535381290738699611164719272, 0.99840059628503363686746752137}, + {-0.99840059628503363686746752137, 0.056535381290738699611164719272}, + {0.995889880700290719417466789309, 0.0905723220347179891476230295666}, + {-0.0905723220347179891476230295666, 0.995889880700290719417466789309}, + {0.640156184859676513632109617902, 0.76824479105679821522301153891}, + {-0.76824479105679821522301153891, 0.640156184859676513632109617902}, + {0.885421750340583679772521463747, 0.464788472343943992459713854259}, + {-0.464788472343943992459713854259, 0.885421750340583679772521463747}, + {0.29743264326415003173664786118, 0.954742804487627938492266821413}, + {-0.954742804487627938492266821413, 0.29743264326415003173664786118}, + {0.959084352421872732286090013076, 0.283120477782015877998844644026}, + {-0.283120477782015877998844644026, 0.959084352421872732286090013076}, + {0.477978639594976162374706518676, 0.878371459059853476603052513383}, + {-0.878371459059853476603052513383, 0.477978639594976162374706518676}, + {0.777732886943944046187482399546, 0.628594906570072664564463593706}, + {-0.628594906570072664564463593706, 0.777732886943944046187482399546}, + {0.105456477254830705003740831671, 0.994423919364875952275895087951}, + {-0.994423919364875952275895087951, 0.105456477254830705003740831671}, + {0.982216758721474514359783825057, 0.187750469737576780593357739235}, + {-0.187750469737576780593357739235, 0.982216758721474514359783825057}, + {0.561772500364625337887503064849, 0.827291761009425807849027023622}, + {-0.827291761009425807849027023622, 0.561772500364625337887503064849}, + {0.835600965685013408190684458532, 0.549336896763974014135101242573}, + {-0.549336896763974014135101242573, 0.835600965685013408190684458532}, + {0.202419264344120136689397781993, 0.979298954059681037165319139604}, + {-0.979298954059681037165319139604, 0.202419264344120136689397781993}, + {0.926715439519610328922283315478, 0.375763880856017695730741934312}, + {-0.375763880856017695730741934312, 0.926715439519610328922283315478}, + {0.389581583236324247021542532821, 0.920991959792852310506816593261}, + {-0.920991959792852310506816593261, 0.389581583236324247021542532821}, + {0.712374815349561707478187599918, 0.701799203800971715949685858504}, + {-0.701799203800971715949685858504, 0.712374815349561707478187599918}, + {0.00747808664120274378672714732375, 0.999972038719176725685144901945}, + {-0.999972038719176725685144901945, 0.00747808664120274378672714732375}, + {0.999940272373166960662160818174, 0.010929395512867571063631011441}, + {-0.010929395512867571063631011441, 0.999940272373166960662160818174}, + {0.699336297695171249522161360801, 0.714792797058008244626137184241}, + {-0.714792797058008244626137184241, 0.699336297695171249522161360801}, + {0.919641852790790470528747846402, 0.392758020408905339504457288058}, + {-0.392758020408905339504457288058, 0.919641852790790470528747846402}, + {0.372563130774787198973285740067, 0.92800684996786997338347191544}, + {-0.92800684996786997338347191544, 0.372563130774787198973285740067}, + {0.978594481135952265482558232179, 0.205798059976901759338119290987}, + {-0.205798059976901759338119290987, 0.978594481135952265482558232179}, + {0.546449589878259645026048474392, 0.837491997407665889063821396121}, + {-0.837491997407665889063821396121, 0.546449589878259645026048474392}, + {0.825347903783971381663775446214, 0.564624510377830013396760477917}, + {-0.564624510377830013396760477917, 0.825347903783971381663775446214}, + {0.184359279491450506194283320838, 0.982858919716046108838725103851}, + {-0.982858919716046108838725103851, 0.184359279491450506194283320838}, + {0.994054018556510210480325895332, 0.108888053484545208049460995881}, + {-0.108888053484545208049460995881, 0.994054018556510210480325895332}, + {0.625906856377921205769609969138, 0.779897818396172004540289890429}, + {-0.779897818396172004540289890429, 0.625906856377921205769609969138}, + {0.876716507903935404755202398519, 0.481007447726881587346525748217}, + {-0.481007447726881587346525748217, 0.876716507903935404755202398519}, + {0.27980855982815033833333018265, 0.960055816005973894355918218935}, + {-0.960055816005973894355918218935, 0.27980855982815033833333018265}, + {0.953710543907895669235585955903, 0.300726118651017559901816866841}, + {-0.300726118651017559901816866841, 0.953710543907895669235585955903}, + {0.461729715108338767404205782441, 0.887020670664428356744224402064}, + {-0.887020670664428356744224402064, 0.461729715108338767404205782441}, + {0.766030748169509001499477562902, 0.642803930338685991507929884392}, + {-0.642803930338685991507929884392, 0.766030748169509001499477562902}, + {0.087134518512214306773522309868, 0.996196554743914219898215378635}, + {-0.996196554743914219898215378635, 0.087134518512214306773522309868}, + {0.99819952050326066217422749105, 0.059980974208997547836297314916}, + {-0.059980974208997547836297314916, 0.99819952050326066217422749105}, + {0.663420696319658276252084760927, 0.748246603530373421975241399196}, + {-0.748246603530373421975241399196, 0.663420696319658276252084760927}, + {0.899262381268641997067447846348, 0.437409613103154792934645911373}, + {-0.437409613103154792934645911373, 0.899262381268641997067447846348}, + {0.326579224279594404478643809853, 0.94516983144244415271373327414}, + {-0.94516983144244415271373327414, 0.326579224279594404478643809853}, + {0.967317689041886308309869946243, 0.253567522499756559106742770382}, + {-0.253567522499756559106742770382, 0.967317689041886308309869946243}, + {0.504697582834967573006679231185, 0.863296212131468232442443877517}, + {-0.863296212131468232442443877517, 0.504697582834967573006679231185}, + {0.796648924495397259271101120248, 0.604442297577135856379015876882}, + {-0.604442297577135856379015876882, 0.796648924495397259271101120248}, + {0.135910609282895361582532700595, 0.990721104188435175430527124263}, + {-0.990721104188435175430527124263, 0.135910609282895361582532700595}, + {0.987513753410208416916304940969, 0.157532811901781533148181324577}, + {-0.157532811901781533148181324577, 0.987513753410208416916304940969}, + {0.586885151996203946467289824795, 0.809670191106473091657846907765}, + {-0.809670191106473091657846907765, 0.586885151996203946467289824795}, + {0.852058547680391686895973180071, 0.523446493277757829432061953412}, + {-0.523446493277757829432061953412, 0.852058547680391686895973180071}, + {0.23236381204774503306254018753, 0.972628942017787267992900979152}, + {-0.972628942017787267992900979152, 0.23236381204774503306254018753}, + {0.937805826538453124463501353603, 0.347160239241951162902921623754}, + {-0.347160239241951162902921623754, 0.937805826538453124463501353603}, + {0.41764950005526740861583334663, 0.908608218707923187551500632253}, + {-0.908608218707923187551500632253, 0.41764950005526740861583334663}, + {0.733567136674911357552275603666, 0.679616992129075558892736808048}, + {-0.679616992129075558892736808048, 0.733567136674911357552275603666}, + {0.0381485130543548905945883120694, 0.999272080542502605382537694823}, + {-0.999272080542502605382537694823, 0.0381485130543548905945883120694}, + {0.999370888265317169896206905833, 0.035465866516850352851975003432}, + {-0.035465866516850352851975003432, 0.999370888265317169896206905833}, + {0.681583777298107373887603444018, 0.73174008672755108673158019883}, + {-0.73174008672755108673158019883, 0.681583777298107373887603444018}, + {0.909726109525480164386124215525, 0.415208869904815536244768736651}, + {-0.415208869904815536244768736651, 0.909726109525480164386124215525}, + {0.349676493549424760587385208055, 0.936870508586420958074825193762}, + {-0.936870508586420958074825193762, 0.349676493549424760587385208055}, + {0.973249209554771232966174920875, 0.229751988241697457926449033039}, + {-0.229751988241697457926449033039, 0.973249209554771232966174920875}, + {0.52573192698382964493930558092, 0.850650304737422091605481000443}, + {-0.850650304737422091605481000443, 0.52573192698382964493930558092}, + {0.811242745287404809495512836293, 0.584709507549308504792406893102}, + {-0.584709507549308504792406893102, 0.811242745287404809495512836293}, + {0.160183188568752243874371288257, 0.987087304193477899083575266559}, + {-0.987087304193477899083575266559, 0.160183188568752243874371288257}, + {0.991082381467178641365478597436, 0.133250565272143545181293688984}, + {-0.133250565272143545181293688984, 0.991082381467178641365478597436}, + {0.606578694349081293246683799225, 0.795023450950828158845240523078}, + {-0.795023450950828158845240523078, 0.606578694349081293246683799225}, + {0.864647943587137479681814511423, 0.502378277446919874194009025814}, + {-0.502378277446919874194009025814, 0.864647943587137479681814511423}, + {0.256163337545934455263818563253, 0.966633510953002095256181291916}, + {-0.966633510953002095256181291916, 0.256163337545934455263818563253}, + {0.946043115722214555951552483748, 0.324040773969271445054118885309}, + {-0.324040773969271445054118885309, 0.946043115722214555951552483748}, + {0.439822073767418553202901421173, 0.898084931076636783231492699997}, + {-0.898084931076636783231492699997, 0.439822073767418553202901421173}, + {0.750024835880159779222253746411, 0.661409665459266937759252869}, + {-0.661409665459266937759252869, 0.750024835880159779222253746411}, + {0.0626603879206148739466897268358, 0.998034907097761769811938847852}, + {-0.998034907097761769811938847852, 0.0626603879206148739466897268358}, + {0.996426874675137241865741088986, 0.0844599516051143250905397508177}, + {-0.0844599516051143250905397508177, 0.996426874675137241865741088986}, + {0.644857995520643711984121182468, 0.76430240455797171517104970917}, + {-0.76430240455797171517104970917, 0.644857995520643711984121182468}, + {0.888256970978913873437932124943, 0.459346877106359630094090107377}, + {-0.459346877106359630094090107377, 0.888256970978913873437932124943}, + {0.303285235896641747199709016058, 0.952899819334182884844608452113}, + {-0.952899819334182884844608452113, 0.303285235896641747199709016058}, + {0.96080349252346075594743979309, 0.277230317163762118859438032814}, + {-0.277230317163762118859438032814, 0.96080349252346075594743979309}, + {0.48335922773406386765415732043, 0.875422102168050941806143327995}, + {-0.875422102168050941806143327995, 0.48335922773406386765415732043}, + {0.781575232179895551531956243707, 0.623810994166456134202292105329}, + {-0.623810994166456134202292105329, 0.781575232179895551531956243707}, + {0.111556162528031480030143995918, 0.993758130835677433445596307138}, + {-0.993758130835677433445596307138, 0.111556162528031480030143995918}, + {0.98335028399370150165026416289, 0.181720166656061110632691679712}, + {-0.181720166656061110632691679712, 0.98335028399370150165026416289}, + {0.566838091972813318264456938778, 0.823829216214513992966317346145}, + {-0.823829216214513992966317346145, 0.566838091972813318264456938778}, + {0.838955903565044458503052737797, 0.54419940451397030845015478917}, + {-0.54419940451397030845015478917, 0.838955903565044458503052737797}, + {0.208424319277820624396468929262, 0.978038497776839599318066120759}, + {-0.978038497776839599318066120759, 0.208424319277820624396468929262}, + {0.929003638191603364759885153035, 0.370070588167669078938359916719}, + {-0.370070588167669078938359916719, 0.929003638191603364759885153035}, + {0.395225349901203726332710175484, 0.918584194723309543917366681853}, + {-0.918584194723309543917366681853, 0.395225349901203726332710175484}, + {0.716667564056371886316298969177, 0.697414942935341786167668942653}, + {-0.697414942935341786167668942653, 0.716667564056371886316298969177}, + {0.0136136589502957403713390149846, 0.999907329851114301533243633457}, + {-0.999907329851114301533243633457, 0.0136136589502957403713390149846}, + {0.99973085821421603380798615035, 0.0231993778853467197409443656397}, + {-0.0231993778853467197409443656397, 0.99973085821421603380798615035}, + {0.69051203178268116822380306985, 0.723320906626756965351887629367}, + {-0.723320906626756965351887629367, 0.69051203178268116822380306985}, + {0.914752860366158215832399491774, 0.404013866657979892060836846213}, + {-0.404013866657979892060836846213, 0.914752860366158215832399491774}, + {0.361147005867446246885066329924, 0.932508895481956590067795787036}, + {-0.932508895481956590067795787036, 0.361147005867446246885066329924}, + {0.975995335999165991580639456515, 0.217791423402931949571126324372}, + {-0.217791423402931949571126324372, 0.975995335999165991580639456515}, + {0.536131128118969457752029939002, 0.844134712863936931270814056916}, + {-0.844134712863936931270814056916, 0.536131128118969457752029939002}, + {0.818356945308593153320941837592, 0.574710283590948334264680852357}, + {-0.574710283590948334264680852357, 0.818356945308593153320941837592}, + {0.172284206714011373096084867029, 0.98504728420361820084849568957}, + {-0.98504728420361820084849568957, 0.172284206714011373096084867029}, + {0.992642944199928822790468530002, 0.121078426361168639546761482961}, + {-0.121078426361168639546761482961, 0.992642944199928822790468530002}, + {0.616289180805370984117530497315, 0.787519933476127809157674164453}, + {-0.787519933476127809157674164453, 0.616289180805370984117530497315}, + {0.870747791452801789269244636671, 0.491729888943175819093767131562}, + {-0.491729888943175819093767131562, 0.870747791452801789269244636671}, + {0.268006129055658293136588099514, 0.963417207023313348912552100956}, + {-0.963417207023313348912552100956, 0.268006129055658293136588099514}, + {0.949948359179409007602146175486, 0.312406969980422444255196978702}, + {-0.312406969980422444255196978702, 0.949948359179409007602146175486}, + {0.450809839589695282580095181402, 0.892620013515893151456737086846}, + {-0.892620013515893151456737086846, 0.450809839589695282580095181402}, + {0.75808487442165073399991115366, 0.652155904039141698369519417611}, + {-0.652155904039141698369519417611, 0.75808487442165073399991115366}, + {0.074903093281581081819275880207, 0.997190817555421937790072206553}, + {-0.997190817555421937790072206553, 0.074903093281581081819275880207}, + {0.997388299093779462367592714145, 0.0722259013845963221411849985998}, + {-0.0722259013845963221411849985998, 0.997388299093779462367592714145}, + {0.654188605118969035423504010396, 0.756331454411686920913382436993}, + {-0.756331454411686920913382436993, 0.654188605118969035423504010396}, + {0.893826979651468622734000746277, 0.448412009704393432762969951}, + {-0.448412009704393432762969951, 0.893826979651468622734000746277}, + {0.314955945691579142220462017576, 0.949106291346508257333880465012}, + {-0.949106291346508257333880465012, 0.314955945691579142220462017576}, + {0.964133188238567639949394560972, 0.265418905387191261802115604951}, + {-0.265418905387191261802115604951, 0.964133188238567639949394560972}, + {0.494065607516103622032233033678, 0.869424623224890891215466126596}, + {-0.869424623224890891215466126596, 0.494065607516103622032233033678}, + {0.789171501500308902876668071258, 0.614172891961007993977261776308}, + {-0.614172891961007993977261776308, 0.789171501500308902876668071258}, + {0.123742703503456508151714388077, 0.992314336956619635898846354394}, + {-0.992314336956619635898846354394, 0.123742703503456508151714388077}, + {0.985506225508247291067220885452, 0.169639262803419288427875244452}, + {-0.169639262803419288427875244452, 0.985506225508247291067220885452}, + {0.576905061874655955556079334201, 0.816811208042225289638338381337}, + {-0.816811208042225289638338381337, 0.576905061874655955556079334201}, + {0.845570895556026269623828284239, 0.533863147809042648539445963252}, + {-0.533863147809042648539445963252, 0.845570895556026269623828284239}, + {0.220410662180277938659855863079, 0.975407166263018265972561948729}, + {-0.975407166263018265972561948729, 0.220410662180277938659855863079}, + {0.933475021317337949611214753531, 0.358642418819351993519717325398}, + {-0.358642418819351993519717325398, 0.933475021317337949611214753531}, + {0.406468031273437002681703233975, 0.913665004010056347460988490639}, + {-0.913665004010056347460988490639, 0.406468031273437002681703233975}, + {0.725171954480117952890338983707, 0.688567815422334250641256403469}, + {-0.688567815422334250641256403469, 0.725171954480117952890338983707}, + {0.0258830349472542013689047024627, 0.999664978131133308281164318032}, + {-0.999664978131133308281164318032, 0.0258830349472542013689047024627}, + {0.998860416736620515543165765848, 0.0477270141193102542809256760847}, + {-0.0477270141193102542809256760847, 0.998860416736620515543165765848}, + {0.672552878803734821566706614249, 0.74004906946283555146948174297}, + {-0.74004906946283555146948174297, 0.672552878803734821566706614249}, + {0.904562357278943296456930056593, 0.426341344223101825772204165332}, + {-0.426341344223101825772204165332, 0.904562357278943296456930056593}, + {0.338153321237685933198235943564, 0.941091032438372776525170593231}, + {-0.941091032438372776525170593231, 0.338153321237685933198235943564}, + {0.970356515359309446111524266598, 0.241677953276128010262624457027}, + {-0.241677953276128010262624457027, 0.970356515359309446111524266598}, + {0.515253552554280180864054727863, 0.857037791803951676250505897769}, + {-0.857037791803951676250505897769, 0.515253552554280180864054727863}, + {0.804006375092761627065840457362, 0.594620676406562242988229627372}, + {-0.594620676406562242988229627372, 0.804006375092761627065840457362}, + {0.148058047424494715338383343806, 0.988978672466118480244290367409}, + {-0.988978672466118480244290367409, 0.148058047424494715338383343806}, + {0.989372565373267010002678034652, 0.145402637138122570181764103836}, + {-0.145402637138122570181764103836, 0.989372565373267010002678034652}, + {0.596776859372594503660991449578, 0.802407240818141298532850669289}, + {-0.802407240818141298532850669289, 0.596776859372594503660991449578}, + {0.858417882921593933254200692318, 0.512951009629972864978242341749}, + {-0.512951009629972864978242341749, 0.858417882921593933254200692318}, + {0.244281968779819030901379051102, 0.969704243431498857930250778736}, + {-0.969704243431498857930250778736, 0.244281968779819030901379051102}, + {0.941995401651612551674475071195, 0.335625778609476288494306572829}, + {-0.335625778609476288494306572829, 0.941995401651612551674475071195}, + {0.428768072358625129680831378209, 0.903414600350176288934278545639}, + {-0.903414600350176288934278545639, 0.428768072358625129680831378209}, + {0.741851846357193478453950774565, 0.670563821016630035032335399592}, + {-0.670563821016630035032335399592, 0.741851846357193478453950774565}, + {0.0504082461357108491117884341293, 0.998728696254153724964908178663}, + {-0.998728696254153724964908178663, 0.0504082461357108491117884341293}, + {0.995315392034315071789762896515, 0.0966812824675887250158723418281}, + {-0.0966812824675887250158723418281, 0.995315392034315071789762896515}, + {0.635430272680167163024123055948, 0.772158253573455244378465067712}, + {-0.772158253573455244378465067712, 0.635430272680167163024123055948}, + {0.88255319407379551055470301435, 0.470212568558244170535687089796}, + {-0.470212568558244170535687089796, 0.88255319407379551055470301435}, + {0.291568852460749095012459974896, 0.956549844114106706527422829822}, + {-0.956549844114106706527422829822, 0.291568852460749095012459974896}, + {0.957329103336492792664103035349, 0.288999979074301416304138001578}, + {-0.288999979074301416304138001578, 0.957329103336492792664103035349}, + {0.472580055830262302229982651625, 0.881287745762680096994756695494}, + {-0.881287745762680096994756695494, 0.472580055830262302229982651625}, + {0.773861260504375536584120709449, 0.633355152730740056377101154794}, + {-0.633355152730740056377101154794, 0.773861260504375536584120709449}, + {0.0993528216048655404835088233995, 0.995052268395561045899455621111}, + {-0.995052268395561045899455621111, 0.0993528216048655404835088233995}, + {0.981046253543432777988186899165, 0.193773704120023815278273104923}, + {-0.193773704120023815278273104923, 0.981046253543432777988186899165}, + {0.556685758339313885301180562237, 0.830723158737122879813341569388}, + {-0.830723158737122879813341569388, 0.556685758339313885301180562237}, + {0.832214567900907975506186176062, 0.554453706790301037443668974447}, + {-0.554453706790301037443668974447, 0.832214567900907975506186176062}, + {0.196406588439524965039595372218, 0.980522540290404087670594890369}, + {-0.980522540290404087670594890369, 0.196406588439524965039595372218}, + {0.924392350535101048336628082325, 0.381443026246634675491264943048}, + {-0.381443026246634675491264943048, 0.924392350535101048336628082325}, + {0.383923149045028389014788672284, 0.923365050035655721138994067587}, + {-0.923365050035655721138994067587, 0.383923149045028389014788672284}, + {0.708055246133589499812899248354, 0.706157042323237060799101527664}, + {-0.706157042323237060799101527664, 0.708055246133589499812899248354}, + {0.00134223278637433831965430908184, 0.99999909920516782957378154606}, + {-0.99999909920516782957378154606, 0.00134223278637433831965430908184}, + {0.999999540410766107889628528937, 0.000958737845553301400343404470306}, + {-0.000958737845553301400343404470306, 0.999999540410766107889628528937}, + {0.706428526175912785411981076322, 0.707784386239854623745770823007}, + {-0.707784386239854623745770823007, 0.706428526175912785411981076322}, + {0.923512214816725629518145979091, 0.38356901476045490717581287754}, + {-0.38356901476045490717581287754, 0.923512214816725629518145979091}, + {0.381797498215353581230147028691, 0.924246000995674887867892266513}, + {-0.924246000995674887867892266513, 0.381797498215353581230147028691}, + {0.980597789169856848268125304457, 0.196030548321400821532023428517}, + {-0.196030548321400821532023428517, 0.980597789169856848268125304457}, + {0.554772816300762472430108118715, 0.832001876376321947148539948103}, + {-0.832001876376321947148539948103, 0.554772816300762472430108118715}, + {0.830936583959804409005300840363, 0.556367139070246485843540540372}, + {-0.556367139070246485843540540372, 0.830936583959804409005300840363}, + {0.194149916388032445224709476861, 0.980971870119892841444197983947}, + {-0.980971870119892841444197983947, 0.194149916388032445224709476861}, + {0.995090296554064002165773672459, 0.0989712165427154288988020880424}, + {-0.0989712165427154288988020880424, 0.995090296554064002165773672459}, + {0.633651878226674902805370948045, 0.773618314945947460614661395084}, + {-0.773618314945947460614661395084, 0.633651878226674902805370948045}, + {0.881468913134971443490428555378, 0.472242051470061485751728014293}, + {-0.472242051470061485751728014293, 0.881468913134971443490428555378}, + {0.289367088926947013671764352694, 0.957218202838801213161445957667}, + {-0.957218202838801213161445957667, 0.289367088926947013671764352694}, + {0.956661589026665093449253163271, 0.291201998758552904611462963658}, + {-0.291201998758552904611462963658, 0.956661589026665093449253163271}, + {0.470550988884203547080176122108, 0.88237280491870218046557283742}, + {-0.88237280491870218046557283742, 0.470550988884203547080176122108}, + {0.772401881244962340566928560293, 0.635134106979969192963153545861}, + {-0.635134106979969192963153545861, 0.772401881244962340566928560293}, + {0.0970629740211609165490003192645, 0.995278242037957672216919036146}, + {-0.995278242037957672216919036146, 0.0970629740211609165490003192645}, + {0.998747954133162862966344164306, 0.0500252347802737362680858268504}, + {-0.0500252347802737362680858268504, 0.998747954133162862966344164306}, + {0.670848268320169749401316039439, 0.741594633807331149455421837047}, + {-0.741594633807331149455421837047, 0.670848268320169749401316039439}, + {0.903578964410565954601395333157, 0.428421585677856708862520918046}, + {-0.428421585677856708862520918046, 0.903578964410565954601395333157}, + {0.335987004632723351704726155731, 0.941866621511735280236621292715}, + {-0.941866621511735280236621292715, 0.335987004632723351704726155731}, + {0.969797853084430894199385875254, 0.24391007390599625526128590991}, + {-0.24391007390599625526128590991, 0.969797853084430894199385875254}, + {0.513280171037514332255113913561, 0.858221105554798247005976463697}, + {-0.858221105554798247005976463697, 0.513280171037514332255113913561}, + {0.802636042867324261962380660407, 0.596469096173710355834884921933}, + {-0.596469096173710355834884921933, 0.802636042867324261962380660407}, + {0.145782046063579834305201643474, 0.989316731408863003771614330617}, + {-0.989316731408863003771614330617, 0.145782046063579834305201643474}, + {0.989035379290950311670371775108, 0.147678767975650965249556634262}, + {-0.147678767975650965249556634262, 0.989035379290950311670371775108}, + {0.594928965257182418291392878018, 0.803778281802897565100352039735}, + {-0.803778281802897565100352039735, 0.594928965257182418291392878018}, + {0.857235326040076461850958366995, 0.514924844796756486786648565612}, + {-0.514924844796756486786648565612, 0.857235326040076461850958366995}, + {0.242050062558382045319049780119, 0.970263761672816138492692061845}, + {-0.970263761672816138492692061845, 0.242050062558382045319049780119}, + {0.941220643407275181502313898818, 0.337792392489817516754158077674}, + {-0.337792392489817516754158077674, 0.941220643407275181502313898818}, + {0.426688208183271855400420236037, 0.904398790908827354684262900264}, + {-0.904398790908827354684262900264, 0.426688208183271855400420236037}, + {0.740306935836266943873340551363, 0.672269024091335931636592704308}, + {-0.672269024091335931636592704308, 0.740306935836266943873340551363}, + {0.0481100687726125839094315495004, 0.998842040205904835303840627603}, + {-0.998842040205904835303840627603, 0.0481100687726125839094315495004}, + {0.999674830640828737671199633041, 0.0254996663356668526456694223725}, + {-0.0254996663356668526456694223725, 0.999674830640828737671199633041}, + {0.68884586474369913222659533858, 0.724907838711587815261339073913}, + {-0.724907838711587815261339073913, 0.68884586474369913222659533858}, + {0.913820815358251103965869788226, 0.406117615252011843196555673785}, + {-0.406117615252011843196555673785, 0.913820815358251103965869788226}, + {0.359000375625232459597668821516, 0.933337415033246187157089934772}, + {-0.933337415033246187157089934772, 0.359000375625232459597668821516}, + {0.975491620965388106512250487867, 0.220036582018353604750515728483}, + {-0.220036582018353604750515728483, 0.975491620965388106512250487867}, + {0.534187380920995491528913134971, 0.845366099429570971857117456238}, + {-0.845366099429570971857117456238, 0.534187380920995491528913134971}, + {0.817032388293513878174678666255, 0.576591776284917756356662721373}, + {-0.576591776284917756356662721373, 0.817032388293513878174678666255}, + {0.170017187223921950600669106279, 0.985441097198846205351685512142}, + {-0.985441097198846205351685512142, 0.170017187223921950600669106279}, + {0.992361718718787866322372792638, 0.12336214663134467683924810899}, + {-0.12336214663134467683924810899, 0.992361718718787866322372792638}, + {0.614475490271239155326554737258, 0.788935911120745236857487725501}, + {-0.788935911120745236857487725501, 0.614475490271239155326554737258}, + {0.869614031075202298382009757916, 0.493732151026381016478694618854}, + {-0.493732151026381016478694618854, 0.869614031075202298382009757916}, + {0.265788626307669972881342346227, 0.964031330468819280810066629783}, + {-0.964031330468819280810066629783, 0.265788626307669972881342346227}, + {0.949227005644128207784149253712, 0.314591944836294656884234655081}, + {-0.314591944836294656884234655081, 0.949227005644128207784149253712}, + {0.448754755075956024423788903732, 0.893654950076772536249336553738}, + {-0.893654950076772536249336553738, 0.448754755075956024423788903732}, + {0.756582276977223466829514109122, 0.653898507540703777252133477305}, + {-0.653898507540703777252133477305, 0.756582276977223466829514109122}, + {0.0726083896863590066983462634198, 0.997360527465947943603907788201}, + {-0.997360527465947943603907788201, 0.0726083896863590066983462634198}, + {0.997219469203518671385211291636, 0.0745206698940129863562020773315}, + {-0.0745206698940129863562020773315, 0.997219469203518671385211291636}, + {0.652446577984436726360684133397, 0.757834720025678310051375774492}, + {-0.757834720025678310051375774492, 0.652446577984436726360684133397}, + {0.892792831281713605839911451767, 0.450467490960204108851883120224}, + {-0.450467490960204108851883120224, 0.892792831281713605839911451767}, + {0.312771247631986826220895636652, 0.949828482756087111305021153385}, + {-0.949828482756087111305021153385, 0.312771247631986826220895636652}, + {0.963519915239853141919468271226, 0.267636643485503089578969593276}, + {-0.267636643485503089578969593276, 0.963519915239853141919468271226}, + {0.492063780371781944733555747007, 0.870559151376993356485911590426}, + {-0.870559151376993356485911590426, 0.492063780371781944733555747007}, + {0.787756219501405952065908877557, 0.615987125382383871041724887618}, + {-0.615987125382383871041724887618, 0.787756219501405952065908877557}, + {0.12145909124983085558557860395, 0.992596438212814291546237654984}, + {-0.992596438212814291546237654984, 0.12145909124983085558557860395}, + {0.985113281933042705951208972692, 0.171906433152193560687237550155}, + {-0.171906433152193560687237550155, 0.985113281933042705951208972692}, + {0.575024077280281709079190477496, 0.818136486503297732930661823048}, + {-0.818136486503297732930661823048, 0.575024077280281709079190477496}, + {0.84434025449863758794322166068, 0.535807367094906505933238349826}, + {-0.535807367094906505933238349826, 0.84434025449863758794322166068}, + {0.218165696902243771981844133734, 0.975911742267280168938725637418}, + {-0.975911742267280168938725637418, 0.218165696902243771981844133734}, + {0.932647325049340447655765728996, 0.360789366636975583624291630258}, + {-0.360789366636975583624291630258, 0.932647325049340447655765728996}, + {0.404364640268877806317249223866, 0.914597855726887787319867584301}, + {-0.914597855726887787319867584301, 0.404364640268877806317249223866}, + {0.723585661479022035358354969503, 0.69023459091961336664411419406}, + {-0.69023459091961336664411419406, 0.723585661479022035358354969503}, + {0.0235827681523888936510058300655, 0.999721887849951307281060053356}, + {-0.999721887849951307281060053356, 0.0235827681523888936510058300655}, + {0.999912477096339236126709693053, 0.0132301983002098347275143908064}, + {-0.0132301983002098347275143908064, 0.999912477096339236126709693053}, + {0.697689730213338799025279968191, 0.716400056082380998212499889632}, + {-0.716400056082380998212499889632, 0.697689730213338799025279968191}, + {0.918735694195573548981315070705, 0.394873048220535760588489893053}, + {-0.394873048220535760588489893053, 0.918735694195573548981315070705}, + {0.370426829379276789655506263443, 0.92886164958836370253436598432}, + {-0.92886164958836370253436598432, 0.370426829379276789655506263443}, + {0.978118355580896658985068370384, 0.20804923089433094429701043282}, + {-0.20804923089433094429701043282, 0.978118355580896658985068370384}, + {0.544521100048259598658262348181, 0.838747144020313917245346146956}, + {-0.838747144020313917245346146956, 0.544521100048259598658262348181}, + {0.824046535315227757223510707263, 0.566522115750982102611033042194}, + {-0.566522115750982102611033042194, 0.824046535315227757223510707263}, + {0.182097263395007646913370535913, 0.983280522874346973871695354319}, + {-0.983280522874346973871695354319, 0.182097263395007646913370535913}, + {0.993800839011860115412844152161, 0.111175052864043721889686366922}, + {-0.111175052864043721889686366922, 0.993800839011860115412844152161}, + {0.62411067863522851073554420509, 0.781335946193104979862198433693}, + {-0.781335946193104979862198433693, 0.62411067863522851073554420509}, + {0.875607403732219347070042658743, 0.483023472027314937360387148146}, + {-0.483023472027314937360387148146, 0.875607403732219347070042658743}, + {0.277598760293414292821267963518, 0.960697105378984450574364473141}, + {-0.960697105378984450574364473141, 0.277598760293414292821267963518}, + {0.953016057691806528495703787485, 0.302919781099827312154104674846}, + {-0.302919781099827312154104674846, 0.953016057691806528495703787485}, + {0.459687485602313927746109811778, 0.888080748344778903025087402057}, + {-0.888080748344778903025087402057, 0.459687485602313927746109811778}, + {0.764549648293492145612049171177, 0.644564841807476751611716281332}, + {-0.644564841807476751611716281332, 0.764549648293492145612049171177}, + {0.0848420703056171482403513550707, 0.996394411418619285747411140619}, + {-0.996394411418619285747411140619, 0.0848420703056171482403513550707}, + {0.998058863665200246551023610664, 0.0622776417290279715732737031431}, + {-0.0622776417290279715732737031431, 0.998058863665200246551023610664}, + {0.661697247738101013148082074622, 0.749771133303904990796695528843}, + {-0.749771133303904990796695528843, 0.661697247738101013148082074622}, + {0.898253534685283572258640560904, 0.439477630176319800092699097149}, + {-0.439477630176319800092699097149, 0.898253534685283572258640560904}, + {0.324403553123280230519043243476, 0.945918778078219113503166681767}, + {-0.945918778078219113503166681767, 0.324403553123280230519043243476}, + {0.96673167727948183802766379813, 0.255792619409551613696152116972}, + {-0.255792619409551613696152116972, 0.96673167727948183802766379813}, + {0.502709828830202987148823012831, 0.864455220354130360327360449446}, + {-0.864455220354130360327360449446, 0.502709828830202987148823012831}, + {0.795256012499515496649848955713, 0.606273762077306432871637298376}, + {-0.606273762077306432871637298376, 0.795256012499515496649848955713}, + {0.133630630797438337831195553917, 0.991031207638124134184920421831}, + {-0.991031207638124134184920421831, 0.133630630797438337831195553917}, + {0.987148661090667567385992242635, 0.159804633558925435199427056432}, + {-0.159804633558925435199427056432, 0.987148661090667567385992242635}, + {0.58502057224188452888569145216, 0.811018452350979468512548464787}, + {-0.811018452350979468512548464787, 0.58502057224188452888569145216}, + {0.850851857849413528711579601804, 0.525405668026336925002794941975}, + {-0.525405668026336925002794941975, 0.850851857849413528711579601804}, + {0.230125207735221853910800859921, 0.97316102920576352808268438821}, + {-0.97316102920576352808268438821, 0.230125207735221853910800859921}, + {0.937004538946803688936881826521, 0.349317182504794376818324508349}, + {-0.349317182504794376818324508349, 0.937004538946803688936881826521}, + {0.415557714957740464978286354381, 0.909566812025978332911790857906}, + {-0.909566812025978332911790857906, 0.415557714957740464978286354381}, + {0.732001417018089628463428653049, 0.681303108376520527578179553529}, + {-0.681303108376520527578179553529, 0.732001417018089628463428653049}, + {0.0358491178351420178294084450954, 0.999357213788164000511926587933}, + {-0.999357213788164000511926587933, 0.0358491178351420178294084450954}, + {0.99928663683291674146857985761, 0.0377652942151688597594016982839}, + {-0.0377652942151688597594016982839, 0.99928663683291674146857985761}, + {0.679898261620603294730358356901, 0.733306452886705262272926120204}, + {-0.733306452886705262272926120204, 0.679898261620603294730358356901}, + {0.908768318467432889562473974365, 0.417301022464448945203940866122}, + {-0.417301022464448945203940866122, 0.908768318467432889562473974365}, + {0.347519857735126114217649728744, 0.937672623296509466506165608735}, + {-0.937672623296509466506165608735, 0.347519857735126114217649728744}, + {0.972717980899902245894850238983, 0.231990796442482438699528302095}, + {-0.231990796442482438699528302095, 0.972717980899902245894850238983}, + {0.523773215139060166478657265543, 0.85185774581375484082457205659}, + {-0.85185774581375484082457205659, 0.523773215139060166478657265543}, + {0.80989519919940444747652463775, 0.58657460421821616680659872145}, + {-0.58657460421821616680659872145, 0.80989519919940444747652463775}, + {0.157911507089812663817696147817, 0.987453267718844562850222246198}, + {-0.987453267718844562850222246198, 0.157911507089812663817696147817}, + {0.990773152401069778782982666598, 0.13553066251312459455924397389}, + {-0.13553066251312459455924397389, 0.990773152401069778782982666598}, + {0.604747764158633405173759456375, 0.796417065202104978993702388834}, + {-0.796417065202104978993702388834, 0.604747764158633405173759456375}, + {0.863489697743797135487398008991, 0.504366475779592149386587607296}, + {-0.504366475779592149386587607296, 0.863489697743797135487398008991}, + {0.253938465532451029815774745657, 0.967220375986271418966566670861}, + {-0.967220375986271418966566670861, 0.253938465532451029815774745657}, + {0.945295003500931207973678738199, 0.32621673218302971175575066809}, + {-0.32621673218302971175575066809, 0.945295003500931207973678738199}, + {0.437754443734133413812514845631, 0.899094570660405767448253300245}, + {-0.899094570660405767448253300245, 0.437754443734133413812514845631}, + {0.7485009671529704311510045045, 0.663133698563923013047372023721}, + {-0.663133698563923013047372023721, 0.7485009671529704311510045045}, + {0.060363774510688743279995804869, 0.998176444686420527574455263675}, + {-0.998176444686420527574455263675, 0.060363774510688743279995804869}, + {0.99622989715783649700142632355, 0.0867524755202205427595529840801}, + {-0.0867524755202205427595529840801, 0.99622989715783649700142632355}, + {0.643097652176015111002982393984, 0.765784179626150973696496748744}, + {-0.765784179626150973696496748744, 0.643097652176015111002982393984}, + {0.887197676561702897046757243515, 0.461389512996899453511900901503}, + {-0.461389512996899453511900901503, 0.887197676561702897046757243515}, + {0.301091839941263039737151530062, 0.953595146758195677527680800267}, + {-0.953595146758195677527680800267, 0.301091839941263039737151530062}, + {0.960163050645093996138257352868, 0.2794403624673905683906127706}, + {-0.2794403624673905683906127706, 0.960163050645093996138257352868}, + {0.481343628917989763138507441909, 0.876531979393827098689939703036}, + {-0.876531979393827098689939703036, 0.481343628917989763138507441909}, + {0.780137793314234606611989875091, 0.625607723292157413830238965602}, + {-0.625607723292157413830238965602, 0.780137793314234606611989875091}, + {0.10926926040984277777035060808, 0.994012187414966219556333726359}, + {-0.994012187414966219556333726359, 0.10926926040984277777035060808}, + {0.982929548338690173281406714523, 0.183982344268950520627114997296}, + {-0.183982344268950520627114997296, 0.982929548338690173281406714523}, + {0.564940985807745321700679141941, 0.825131312310091069583961598255}, + {-0.825131312310091069583961598255, 0.564940985807745321700679141941}, + {0.837701496611261697822214955522, 0.546128375544845945377403495513}, + {-0.546128375544845945377403495513, 0.837701496611261697822214955522}, + {0.206173331117788738442442308951, 0.978515486610096907327260851162}, + {-0.978515486610096907327260851162, 0.206173331117788738442442308951}, + {0.928149657895271151808458398591, 0.372207217217628894001535400093}, + {-0.372207217217628894001535400093, 0.928149657895271151808458398591}, + {0.393110669752560759615533925171, 0.91949116435488009724252833621}, + {-0.91949116435488009724252833621, 0.393110669752560759615533925171}, + {0.715060936600893093384456733475, 0.6990621266721961379886352006}, + {-0.6990621266721961379886352006, 0.715060936600893093384456733475}, + {0.011312866991496257906835154472, 0.999936007472694621256437130796}, + {-0.999936007472694621256437130796, 0.011312866991496257906835154472}, + {0.999974832997189810868121639942, 0.007094601626752249784457760029}, + {-0.007094601626752249784457760029, 0.999974832997189810868121639942}, + {0.702072344508104739801979121694, 0.712105626348291997906869710278}, + {-0.712105626348291997906869710278, 0.702072344508104739801979121694}, + {0.921141294730707271831704474607, 0.389228358604349677918321503967}, + {-0.389228358604349677918321503967, 0.921141294730707271831704474607}, + {0.376119244135794339456424495438, 0.92657126773428433175183727144}, + {-0.92657126773428433175183727144, 0.376119244135794339456424495438}, + {0.979376508861383165971403741423, 0.202043693023289255839003430992}, + {-0.202043693023289255839003430992, 0.979376508861383165971403741423}, + {0.549657305317949873568750263075, 0.83539023618343188903878626661}, + {-0.83539023618343188903878626661, 0.549657305317949873568750263075}, + {0.827507137225519828760411655821, 0.56145519664602328013813803409}, + {-0.56145519664602328013813803409, 0.827507137225519828760411655821}, + {0.18812713133160241518737620936, 0.982144685093261582409240872948}, + {-0.982144685093261582409240872948, 0.18812713133160241518737620936}, + {0.994464288292152387427336179826, 0.105075112712682036675282404303}, + {-0.105075112712682036675282404303, 0.994464288292152387427336179826}, + {0.628893117166156478958782827249, 0.777491766632313008322796576977}, + {-0.777491766632313008322796576977, 0.628893117166156478958782827249}, + {0.878554696977485449771450021217, 0.477641753219710474276382683456}, + {-0.477641753219710474276382683456, 0.878554696977485449771450021217}, + {0.283488261196583546386307261855, 0.958975706555561080435268195288}, + {-0.958975706555561080435268195288, 0.283488261196583546386307261855}, + {0.954856798268619577640947682085, 0.297066482121764730006674426477}, + {-0.297066482121764730006674426477, 0.954856798268619577640947682085}, + {0.46512799314628400226823146113, 0.885243441089348270800485352083}, + {-0.885243441089348270800485352083, 0.46512799314628400226823146113}, + {0.768490231380656751980495755561, 0.639861519606004014448785710556}, + {-0.639861519606004014448785710556, 0.768490231380656751980495755561}, + {0.0909542343511469120187484804774, 0.995855073418615788227725715842}, + {-0.995855073418615788227725715842, 0.0909542343511469120187484804774}, + {0.99842220391501501630671100429, 0.0561524953095062992480812624763}, + {-0.0561524953095062992480812624763, 0.99842220391501501630671100429}, + {0.666285300661627388763008639216, 0.745696921089422759543197116727}, + {-0.745696921089422759543197116727, 0.666285300661627388763008639216}, + {0.900933209360986197999920932489, 0.433957777059604421499017234964}, + {-0.433957777059604421499017234964, 0.900933209360986197999920932489}, + {0.33020149482782662486712865757, 0.943910468642799149741051678575}, + {-0.943910468642799149741051678575, 0.33020149482782662486712865757}, + {0.968282992835658662400533103209, 0.249856050127307965924572386029}, + {-0.249856050127307965924572386029, 0.968282992835658662400533103209}, + {0.508004562976194007362096272118, 0.861354377707204799108353654447}, + {-0.861354377707204799108353654447, 0.508004562976194007362096272118}, + {0.798961067899735755126755520905, 0.601382749985825415528495341277}, + {-0.601382749985825415528495341277, 0.798961067899735755126755520905}, + {0.139708968412357553701141910096, 0.990192609619540031751228070789}, + {-0.990192609619540031751228070789, 0.139708968412357553701141910096}, + {0.988110621094009822229509154567, 0.153744594965840031264292520063}, + {-0.153744594965840031264292520063, 0.988110621094009822229509154567}, + {0.589985875066900922192303369229, 0.807413566409150185165799484821}, + {-0.807413566409150185165799484821, 0.589985875066900922192303369229}, + {0.854059669376662777828812522785, 0.520175048559833763661686134583}, + {-0.520175048559833763661686134583, 0.854059669376662777828812522785}, + {0.236092079513478908525669908158, 0.971730687994879160918060279073}, + {-0.971730687994879160918060279073, 0.236092079513478908525669908158}, + {0.939130270038973646862245914235, 0.343561254940839388982709579068}, + {-0.343561254940839388982709579068, 0.939130270038973646862245914235}, + {0.421130889240484029834021839633, 0.906999875483739614878686552402}, + {-0.906999875483739614878686552402, 0.421130889240484029834021839633}, + {0.736168034582387442732454019279, 0.676798806780201767097082665714}, + {-0.676798806780201767097082665714, 0.736168034582387442732454019279}, + {0.0419803835727343560568769476049, 0.99911843511922349136966658989}, + {-0.99911843511922349136966658989, 0.0419803835727343560568769476049}, + {0.999499549033423639876616562105, 0.0316330757591294781105872857552}, + {-0.0316330757591294781105872857552, 0.999499549033423639876616562105}, + {0.684384946535361748942705162335, 0.729120871293498229448459824198}, + {-0.729120871293498229448459824198, 0.684384946535361748942705162335}, + {0.911311722098472776210087431537, 0.41171706931570856147217796206}, + {-0.41171706931570856147217796206, 0.911311722098472776210087431537}, + {0.353266766827231237790840623347, 0.93552263011400993431010419954}, + {-0.93552263011400993431010419954, 0.353266766827231237790840623347}, + {0.974123138525439635770908353152, 0.226017943954340022605364879382}, + {-0.226017943954340022605364879382, 0.974123138525439635770908353152}, + {0.528990256122106039526897802716, 0.848627897802015862538382862112}, + {-0.848627897802015862538382862112, 0.528990256122106039526897802716}, + {0.813479107260763223408162048145, 0.581594138596866927493067578325}, + {-0.581594138596866927493067578325, 0.813479107260763223408162048145}, + {0.163967433797471168199777480368, 0.986465752397857942312953127839}, + {-0.986465752397857942312953127839, 0.163967433797471168199777480368}, + {0.991586101888073501164910794614, 0.129448841410091775028234906131}, + {-0.129448841410091775028234906131, 0.991586101888073501164910794614}, + {0.609623103197573734668424094707, 0.792691410353209446881805888552}, + {-0.792691410353209446881805888552, 0.609623103197573734668424094707}, + {0.866568177310544474067910414306, 0.499058708042130871884722864706}, + {-0.499058708042130871884722864706, 0.866568177310544474067910414306}, + {0.259868437869964274966605444206, 0.965644031203540587071643130912}, + {-0.965644031203540587071643130912, 0.259868437869964274966605444206}, + {0.947278836828930881353016957291, 0.320410370144331824171501921228}, + {-0.320410370144331824171501921228, 0.947278836828930881353016957291}, + {0.443262943704693324065146953217, 0.896391634687790817004326981987}, + {-0.896391634687790817004326981987, 0.443262943704693324065146953217}, + {0.752555788715146389833421380899, 0.658528499665218758352125405509}, + {-0.658528499665218758352125405509, 0.752555788715146389833421380899}, + {0.0664873337037914513558689577621, 0.997787269139549959362511799554}, + {-0.997787269139549959362511799554, 0.0664873337037914513558689577621}, + {0.996743446594378856850937609124, 0.0806380906963857085889557652081}, + {-0.0806380906963857085889557652081, 0.996743446594378856850937609124}, + {0.647784309436786553959564116667, 0.76182379094348684489546030818}, + {-0.76182379094348684489546030818, 0.647784309436786553959564116667}, + {0.890012008146243260853225365281, 0.45593708486532602552898651993}, + {-0.45593708486532602552898651993, 0.890012008146243260853225365281}, + {0.306937321794966910815105620713, 0.951729730800363826759280527767}, + {-0.951729730800363826759280527767, 0.306937321794966910815105620713}, + {0.961859589677426574105822965066, 0.273543652358398725610300061817}, + {-0.273543652358398725610300061817, 0.961859589677426574105822965066}, + {0.48671286687705922480162712418, 0.873562009943377737464231813647}, + {-0.873562009943377737464231813647, 0.48671286687705922480162712418}, + {0.783961764266484117946731657867, 0.620809110893341897785546734667}, + {-0.620809110893341897785546734667, 0.783961764266484117946731657867}, + {0.115366347568727142802735841087, 0.993323011838873948242678579845}, + {-0.993323011838873948242678579845, 0.115366347568727142802735841087}, + {0.98403993940963496722673653494, 0.17794773852646156298007440455}, + {-0.17794773852646156298007440455, 0.98403993940963496722673653494}, + {0.569993261506080650669048281998, 0.821649366723823937341819600988}, + {-0.821649366723823937341819600988, 0.569993261506080650669048281998}, + {0.841036707833296648395560168865, 0.540978055078882080763946760271}, + {-0.540978055078882080763946760271, 0.841036707833296648395560168865}, + {0.212173508116346082319481070044, 0.977232010555120322869981919212}, + {-0.977232010555120322869981919212, 0.212173508116346082319481070044}, + {0.930416006290687547242157506844, 0.366505191283953368763803837282}, + {-0.366505191283953368763803837282, 0.930416006290687547242157506844}, + {0.398745161275694481872733376804, 0.917061773469606822395405743009}, + {-0.917061773469606822395405743009, 0.398745161275694481872733376804}, + {0.719336840352691742062063440244, 0.694661435601117815963334578555}, + {-0.694661435601117815963334578555, 0.719336840352691742062063440244}, + {0.0174481460283606934491551498922, 0.999847769513025896870317410503}, + {-0.999847769513025896870317410503, 0.0174481460283606934491551498922}, + {0.999812475055878779883755669289, 0.0193652968641791559112341758464}, + {-0.0193652968641791559112341758464, 0.999812475055878779883755669289}, + {0.693280848294566154343954167416, 0.720667513759269406925511702866}, + {-0.720667513759269406925511702866, 0.693280848294566154343954167416}, + {0.916295503780824804884730383492, 0.400502871089639500556955908905}, + {-0.400502871089639500556955908905, 0.916295503780824804884730383492}, + {0.364720468261999275494389394225, 0.931117060326330792108251444006}, + {-0.931117060326330792108251444006, 0.364720468261999275494389394225}, + {0.976823376697157241821400930348, 0.214046935829419360253211834788}, + {-0.214046935829419360253211834788, 0.976823376697157241821400930348}, + {0.539364393866917035680330627656, 0.842072473501285556629625261849}, + {-0.842072473501285556629625261849, 0.539364393866917035680330627656}, + {0.820554908518633885350368473155, 0.571567705618482579943417931645}, + {-0.571567705618482579943417931645, 0.820554908518633885350368473155}, + {0.17606053959936784902851059087, 0.984379340699498506239706330234}, + {-0.984379340699498506239706330234, 0.17606053959936784902851059087}, + {0.993099973691677573306435533596, 0.117270807337501462019879738818}, + {-0.117270807337501462019879738818, 0.993099973691677573306435533596}, + {0.619304742689998688476293864369, 0.785150708897135563013591763593}, + {-0.785150708897135563013591763593, 0.619304742689998688476293864369}, + {0.872627144363098072865625454142, 0.488387005274203533922161568626}, + {-0.488387005274203533922161568626, 0.872627144363098072865625454142}, + {0.271698807953819454485966389257, 0.962382334499378488956722321745}, + {-0.962382334499378488956722321745, 0.271698807953819454485966389257}, + {0.95113943659906818872684652888, 0.308761675319342454582027812648}, + {-0.308761675319342454582027812648, 0.95113943659906818872684652888}, + {0.454229671084327324415141902136, 0.890884619861979532018381178204}, + {-0.890884619861979532018381178204, 0.454229671084327324415141902136}, + {0.760580280344194448183259282814, 0.649243896506964901504943554755}, + {-0.649243896506964901504943554755, 0.760580280344194448183259282814}, + {0.0787267120040932855129511835912, 0.99689623573219721119897940298}, + {-0.99689623573219721119897940298, 0.0787267120040932855129511835912}, + {0.997657947063273708998565325601, 0.068400443430538013234176730748}, + {-0.068400443430538013234176730748, 0.997657947063273708998565325601}, + {0.657084282287190180049663013051, 0.753817117058990793232453597739}, + {-0.753817117058990793232453597739, 0.657084282287190180049663013051}, + {0.895540041271694953195492416853, 0.444980937208652782555162730205}, + {-0.444980937208652782555162730205, 0.895540041271694953195492416853}, + {0.318593397808312417396336968523, 0.94789147420627983819940709509}, + {-0.94789147420627983819940709509, 0.318593397808312417396336968523}, + {0.965143964822054445384935661423, 0.261719558244249028700068038233}, + {-0.261719558244249028700068038233, 0.965143964822054445384935661423}, + {0.497396167943289280177765476765, 0.867523516750601464408987339993}, + {-0.867523516750601464408987339993, 0.497396167943289280177765476765}, + {0.791521016159905221876158520899, 0.611141948304312565554141656321}, + {-0.611141948304312565554141656321, 0.791521016159905221876158520899}, + {0.127547262064797967129337052938, 0.991832493891873778757428681274}, + {-0.991832493891873778757428681274, 0.127547262064797967129337052938}, + {0.986149535498173857028803013236, 0.165858655597879267595828878257}, + {-0.165858655597879267595828878257, 0.986149535498173857028803013236}, + {0.580033243722978153478209151217, 0.814592803906467266550350814214}, + {-0.814592803906467266550350814214, 0.580033243722978153478209151217}, + {0.847612012230619660257957548311, 0.530616506266399334812433608022}, + {-0.530616506266399334812433608022, 0.847612012230619660257957548311}, + {0.224149671872960865037782696163, 0.974554731453931233353671359509}, + {-0.974554731453931233353671359509, 0.224149671872960865037782696163}, + {0.934843530163339542227163292409, 0.35505995847426291556203636901}, + {-0.35505995847426291556203636901, 0.934843530163339542227163292409}, + {0.409968895161902879831217205719, 0.912099503891833474078509880201}, + {-0.912099503891833474078509880201, 0.409968895161902879831217205719}, + {0.727807240014169964936741052952, 0.685781759295883031057883272297}, + {-0.685781759295883031057883272297, 0.727807240014169964936741052952}, + {0.029716502398525190820066654851, 0.999558367222844301736017769144}, + {-0.999558367222844301736017769144, 0.029716502398525190820066654851}, + {0.999036102055332331239867471595, 0.0438960908292260754137714684475}, + {-0.0438960908292260754137714684475, 0.999036102055332331239867471595}, + {0.675385978920574836514845173951, 0.737464425906427578638613340445}, + {-0.737464425906427578638613340445, 0.675385978920574836514845173951}, + {0.906190700222840650468469902989, 0.422869264465553063025993196788}, + {-0.422869264465553063025993196788, 0.906190700222840650468469902989}, + {0.341759864716796313288682540588, 0.939787313634716570120986034453}, + {-0.939787313634716570120986034453, 0.341759864716796313288682540588}, + {0.97127620099216649407480872469, 0.237954914608260514885529346429}, + {-0.237954914608260514885529346429, 0.97127620099216649407480872469}, + {0.51853645439050211063403139633, 0.855055521862835954394199688977}, + {-0.855055521862835954394199688977, 0.51853645439050211063403139633}, + {0.806280799041550366723640763666, 0.591532985637249986154984071618}, + {-0.591532985637249986154984071618, 0.806280799041550366723640763666}, + {0.151849635103164182226009870647, 0.988403605982412392094715869462}, + {-0.988403605982412392094715869462, 0.151849635103164182226009870647}, + {0.989922900864865451531215967407, 0.141607380963316015209940701425}, + {-0.141607380963316015209940701425, 0.989922900864865451531215967407}, + {0.599849656708177247921298658184, 0.800112735398632368699622929853}, + {-0.800112735398632368699622929853, 0.599849656708177247921298658184}, + {0.860378708277976134155551335425, 0.50965525440430925474544210374}, + {-0.50965525440430925474544210374, 0.860378708277976134155551335425}, + {0.247998932555237111552415285587, 0.968760305468521432104012092168}, + {-0.968760305468521432104012092168, 0.247998932555237111552415285587}, + {0.943275580350332543133617946296, 0.332010812346139383421927959716}, + {-0.332010812346139383421927959716, 0.943275580350332543133617946296}, + {0.432229462556186716515327361776, 0.90176365623060572573876925162}, + {-0.90176365623060572573876925162, 0.432229462556186716515327361776}, + {0.744417964952435506909012019605, 0.667713930853681136845523269585}, + {-0.667713930853681136845523269585, 0.744417964952435506909012019605}, + {0.0542379426555934518461299376213, 0.998528039454320226475658728305}, + {-0.998528039454320226475658728305, 0.0542379426555934518461299376213}, + {0.995678840228737538353698255378, 0.0928635941623847244175138371247}, + {-0.0928635941623847244175138371247, 0.995678840228737538353698255378}, + {0.638386782652119566527915139886, 0.769715736967275021562784331763}, + {-0.769715736967275021562784331763, 0.638386782652119566527915139886}, + {0.884349942486086115778221028449, 0.466824570074086953042069580988}, + {-0.466824570074086953042069580988, 0.884349942486086115778221028449}, + {0.2952350221499632221267006571, 0.955424660397726333016521493846}, + {-0.955424660397726333016521493846, 0.2952350221499632221267006571}, + {0.958430362016590930984705209994, 0.285326551804675809798084173963}, + {-0.285326551804675809798084173963, 0.958430362016590930984705209994}, + {0.47595626864334805716794107866, 0.879468947910670206802308257465}, + {-0.879468947910670206802308257465, 0.47595626864334805716794107866}, + {0.776284450615002397100283815234, 0.630382781913785938954220000596}, + {-0.630382781913785938954220000596, 0.776284450615002397100283815234}, + {0.10316805932506323273400994367, 0.99466393899402039124879593146}, + {-0.99466393899402039124879593146, 0.10316805932506323273400994367}, + {0.98178215052580430910467157446, 0.190010023180164994149876633855}, + {-0.190010023180164994149876633855, 0.98178215052580430910467157446}, + {0.559867440400600213656900905335, 0.828582192168815789834468432673}, + {-0.828582192168815789834468432673, 0.559867440400600213656900905335}, + {0.834334746401350080269310183212, 0.55125813458614358886222817091}, + {-0.55125813458614358886222817091, 0.834334746401350080269310183212}, + {0.200165391809844439574561647532, 0.979762122110061750568377192394}, + {-0.979762122110061750568377192394, 0.200165391809844439574561647532}, + {0.925848365189827271493072657904, 0.377895229759948492187504598405}, + {-0.377895229759948492187504598405, 0.925848365189827271493072657904}, + {0.387461377834897868766006467922, 0.921885936917513970634274755866}, + {-0.921885936917513970634274755866, 0.387461377834897868766006467922}, + {0.710758111208985354778633336537, 0.703436498449316771619521659886}, + {-0.703436498449316771619521659886, 0.710758111208985354778633336537}, + {0.00517716203158365053521583476481, 0.99998659840684800403920462486}, + {-0.99998659840684800403920462486, 0.00517716203158365053521583476481}, + {0.999991892856248010268416237523, 0.0040266886865165116629605712717}, + {-0.0040266886865165116629605712717, 0.999991892856248010268416237523}, + {0.7042537496942614660966341944, 0.709948347446187399967243436549}, + {-0.709948347446187399967243436549, 0.7042537496942614660966341944}, + {0.922331095439485437736948370002, 0.386400505156759443980973856014}, + {-0.386400505156759443980973856014, 0.922331095439485437736948370002}, + {0.378960154634224721803548163734, 0.925412989534729057972128885012}, + {-0.925412989534729057972128885012, 0.378960154634224721803548163734}, + {0.97999176104346119586807617452, 0.199038057383344679784897834907}, + {-0.199038057383344679784897834907, 0.97999176104346119586807617452}, + {0.552217659650817926930699286459, 0.833699979830738291397551620321}, + {-0.833699979830738291397551620321, 0.552217659650817926930699286459}, + {0.829225763087007572416098355461, 0.558913798212899770589956460753}, + {-0.558913798212899770589956460753, 0.829225763087007572416098355461}, + {0.191139423398341445903270141571, 0.981562897028483649641827923915}, + {-0.981562897028483649641827923915, 0.191139423398341445903270141571}, + {0.994781974056508255799258222396, 0.102023644770398755343698837805}, + {-0.102023644770398755343698837805, 0.994781974056508255799258222396}, + {0.63127546859898076014872003725, 0.77555869071601357678247268268}, + {-0.77555869071601357678247268268, 0.63127546859898076014872003725}, + {0.880015946578848962111862874735, 0.474944137522437859200152843187}, + {-0.474944137522437859200152843187, 0.880015946578848962111862874735}, + {0.286429023051290643220312404083, 0.958101463705114730906586828496}, + {-0.958101463705114730906586828496, 0.286429023051290643220312404083}, + {0.955763691653575442330748046516, 0.294135624698419084754874575083}, + {-0.294135624698419084754874575083, 0.955763691653575442330748046516}, + {0.467841692767338113156938561588, 0.883812282392925085972024135117}, + {-0.883812282392925085972024135117, 0.467841692767338113156938561588}, + {0.770449682195725960021093214891, 0.637500813493210194415894420672}, + {-0.637500813493210194415894420672, 0.770449682195725960021093214891}, + {0.0940090466106288241698862861995, 0.995571343076607773170394466433}, + {-0.995571343076607773170394466433, 0.0940090466106288241698862861995}, + {0.998589778577742226417512938497, 0.0530891148923741326703940046627}, + {-0.0530891148923741326703940046627, 0.998589778577742226417512938497}, + {0.668569930908305076755482332374, 0.743649277203483949350015791424}, + {-0.743649277203483949350015791424, 0.668569930908305076755482332374}, + {0.902260333094715538670982368785, 0.431191710639030001406268866049}, + {-0.431191710639030001406268866049, 0.902260333094715538670982368785}, + {0.333095817342620781342787950052, 0.942892982511192245809183987149}, + {-0.942892982511192245809183987149, 0.333095817342620781342787950052}, + {0.969044983470266241276647178893, 0.246884223900822430408652508049}, + {-0.246884223900822430408652508049, 0.969044983470266241276647178893}, + {0.510644770198381614534355321666, 0.859791787974880539202615636896}, + {-0.859791787974880539202615636896, 0.510644770198381614534355321666}, + {0.800802324111759111779917930107, 0.598928741752476900828128236753}, + {-0.598928741752476900828128236753, 0.800802324111759111779917930107}, + {0.142746179028669484267766165431, 0.989759328510075198082063252514}, + {-0.989759328510075198082063252514, 0.142746179028669484267766165431}, + {0.988577652627162017928696968738, 0.150712390751955610523893369646}, + {-0.150712390751955610523893369646, 0.988577652627162017928696968738}, + {0.592460208392600939930616732454, 0.805599715411689953903362493293}, + {-0.805599715411689953903362493293, 0.592460208392600939930616732454}, + {0.855651524567380694641371974285, 0.517552382378360875669898177875}, + {-0.517552382378360875669898177875, 0.855651524567380694641371974285}, + {0.239072196155210636714372185452, 0.971001794553194685910568750842}, + {-0.971001794553194685910568750842, 0.239072196155210636714372185452}, + {0.940179881388678917453205485799, 0.340678427012879259105915252803}, + {-0.340678427012879259105915252803, 0.940179881388678917453205485799}, + {0.423911543720325523221248431582, 0.905703595609872014193797440385}, + {-0.905703595609872014193797440385, 0.423911543720325523221248431582}, + {0.738240959511861305486490891781, 0.674537089935762002035346540652}, + {-0.674537089935762002035346540652, 0.738240959511861305486490891781}, + {0.0450454381650831972572213146577, 0.99898493907591801033163392276}, + {-0.99898493907591801033163392276, 0.0450454381650831972572213146577}, + {0.99959189410692594890406326158, 0.0285665054868127279985579036747}, + {-0.0285665054868127279985579036747, 0.99959189410692594890406326158}, + {0.686618636997584741443745315337, 0.727017776486640676658623760886}, + {-0.727017776486640676658623760886, 0.686618636997584741443745315337}, + {0.912570563459208727863369858824, 0.408919266735797426459697589962}, + {-0.408919266735797426459697589962, 0.912570563459208727863369858824}, + {0.356135247266522125819676602987, 0.934434420199948045926419126772}, + {-0.934434420199948045926419126772, 0.356135247266522125819676602987}, + {0.97481196739615982860271969912, 0.223028312600055816927735463651}, + {-0.223028312600055816927735463651, 0.97481196739615982860271969912}, + {0.531591320291531665986894950038, 0.847000984763716879832884387724}, + {-0.847000984763716879832884387724, 0.531591320291531665986894950038}, + {0.815259584543988280458393091976, 0.57909568277544909431497899277}, + {-0.57909568277544909431497899277, 0.815259584543988280458393091976}, + {0.166993096412007713258418561963, 0.985958064904755460133856104221}, + {-0.985958064904755460133856104221, 0.166993096412007713258418561963}, + {0.991978578743518579763360776269, 0.126406088911843378363997203451}, + {-0.126406088911843378363997203451, 0.991978578743518579763360776269}, + {0.612052177168501465587269194657, 0.79081738247416977216630584735}, + {-0.79081738247416977216630584735, 0.612052177168501465587269194657}, + {0.86809518961414167126378060857, 0.496397765676667213075035078873}, + {-0.496397765676667213075035078873, 0.86809518961414167126378060857}, + {0.262829769015759162620327060722, 0.964842221567403623971870274545}, + {-0.964842221567403623971870274545, 0.262829769015759162620327060722}, + {0.948257383916349061436790179869, 0.31750265171825226007129572281}, + {-0.31750265171825226007129572281, 0.948257383916349061436790179869}, + {0.446010948402778995003359341354, 0.895027504552152630701300495275}, + {-0.895027504552152630701300495275, 0.446010948402778995003359341354}, + {0.754572584008453839388153028267, 0.656216591883201916246548535128}, + {-0.656216591883201916246548535128, 0.754572584008453839388153028267}, + {0.0695481890020963056109337685484, 0.997578593097570798242657019728}, + {-0.997578593097570798242657019728, 0.0695481890020963056109337685484}, + {0.996986149905620178124365793337, 0.077579745400254238041348742172}, + {-0.077579745400254238041348742172, 0.996986149905620178124365793337}, + {0.650118503292086202804966887925, 0.759832831402577513202345471655}, + {-0.759832831402577513202345471655, 0.650118503292086202804966887925}, + {0.891406614843252897983916227531, 0.453204420779070193958659729105}, + {-0.453204420779070193958659729105, 0.891406614843252897983916227531}, + {0.309855742953607127265769349833, 0.950783581346811068613078532508}, + {-0.950783581346811068613078532508, 0.309855742953607127265769349833}, + {0.962694283081255930412112320482, 0.270591421376706942059087168673}, + {-0.270591421376706942059087168673, 0.962694283081255930412112320482}, + {0.489390626789901916904312884071, 0.872064684762653974381407806504}, + {-0.872064684762653974381407806504, 0.489390626789901916904312884071}, + {0.785862690303412603221033805312, 0.618401028450860978935565981374}, + {-0.618401028450860978935565981374, 0.785862690303412603221033805312}, + {0.118413276684707788333206224252, 0.992964398105385614989870646241}, + {-0.992964398105385614989870646241, 0.118413276684707788333206224252}, + {0.984581244298162183703482241981, 0.174927909083378158028310167538}, + {-0.174927909083378158028310167538, 0.984581244298162183703482241981}, + {0.572511363740678680400719713361, 0.819896785203959810317542178382}, + {-0.819896785203959810317542178382, 0.572511363740678680400719713361}, + {0.842692447037091563188937470841, 0.538395244877439949071629143873}, + {-0.538395244877439949071629143873, 0.842692447037091563188937470841}, + {0.21517061514318339132678659098, 0.976576472366042613870718014368}, + {-0.976576472366042613870718014368, 0.21517061514318339132678659098}, + {0.931536049656050302303356147604, 0.363648990361860546816075157039}, + {-0.363648990361860546816075157039, 0.931536049656050302303356147604}, + {0.401556790575008537569345890006, 0.915834124688034711958550815325}, + {-0.915834124688034711958550815325, 0.401556790575008537569345890006}, + {0.721464646265866371699360115599, 0.692451272067911238750070879178}, + {-0.692451272067911238750070879178, 0.721464646265866371699360115599}, + {0.0205155536404768752689609101481, 0.999789533881418779337479918468}, + {-0.999789533881418779337479918468, 0.0205155536404768752689609101481}, + {0.999867181641464375374539486074, 0.0162978242828590649815545532419}, + {-0.0162978242828590649815545532419, 0.999867181641464375374539486074}, + {0.695488562355564443606681379606, 0.718537166493557366564459698566}, + {-0.718537166493557366564459698566, 0.695488562355564443606681379606}, + {0.917519917011646257698487261223, 0.397689831259163184551397307587}, + {-0.397689831259163184551397307587, 0.917519917011646257698487261223}, + {0.367575378700365329454058382908, 0.929993731684941482562578585203}, + {-0.929993731684941482562578585203, 0.367575378700365329454058382908}, + {0.977475466324706054876969574252, 0.211049076599018392563777979376}, + {-0.211049076599018392563777979376, 0.977475466324706054876969574252}, + {0.541945297455357466454017867363, 0.840413763908007482861250991846}, + {-0.840413763908007482861250991846, 0.541945297455357466454017867363}, + {0.822304591838926346447635751247, 0.569047588731045106413830581005}, + {-0.569047588731045106413830581005, 0.822304591838926346447635751247}, + {0.179079744280565389402681830688, 0.983834561899716630861689736776}, + {-0.983834561899716630861689736776, 0.179079744280565389402681830688}, + {0.993455081740560963510233705165, 0.114223467658162255999876322221}, + {-0.114223467658162255999876322221, 0.993455081740560963510233705165}, + {0.621710636551257689319527344196, 0.783247013654715384944893230568}, + {-0.783247013654715384944893230568, 0.621710636551257689319527344196}, + {0.874121387829363327703902086796, 0.485707524482859753778285494263}, + {-0.485707524482859753778285494263, 0.874121387829363327703902086796}, + {0.274650076679177679306320669639, 0.961544245149499987590502314561}, + {-0.961544245149499987590502314561, 0.274650076679177679306320669639}, + {0.952082227825700622148019647284, 0.305842167561065081393678610766}, + {-0.305842167561065081393678610766, 0.952082227825700622148019647284}, + {0.456960728887526979047351005647, 0.889486870197969903273360614548}, + {-0.889486870197969903273360614548, 0.456960728887526979047351005647}, + {0.762568553111665492494353202346, 0.646907413626695904085295296682}, + {-0.646907413626695904085295296682, 0.762568553111665492494353202346}, + {0.0817847760495850756301550177341, 0.996650014000160067162426003051}, + {-0.996650014000160067162426003051, 0.0817847760495850756301550177341}, + {0.997863101498009497092311903543, 0.0653393500792066456162032750399}, + {-0.0653393500792066456162032750399, 0.997863101498009497092311903543}, + {0.659393868245753855816815303115, 0.751797663284411443740395952773}, + {-0.751797663284411443740395952773, 0.659393868245753855816815303115}, + {0.896901008965428792052421158587, 0.442231364917500979760944801455}, + {-0.442231364917500979760944801455, 0.896901008965428792052421158587}, + {0.321499988505963507634533016244, 0.946909582479058764548085491697}, + {-0.946909582479058764548085491697, 0.321499988505963507634533016244}, + {0.965942366959485543276286989567, 0.258757306588058677565555854017}, + {-0.258757306588058677565555854017, 0.965942366959485543276286989567}, + {0.500055351742453857433190478332, 0.865993444082419516760751321272}, + {-0.865993444082419516760751321272, 0.500055351742453857433190478332}, + {0.793392248184711101899324603437, 0.608710719899370311658515220188}, + {-0.608710719899370311658515220188, 0.793392248184711101899324603437}, + {0.130589561010459626055890680618, 0.991436516654039423634969807608}, + {-0.991436516654039423634969807608, 0.130589561010459626055890680618}, + {0.986653741674811346484830210102, 0.162832410898735208126808515772}, + {-0.162832410898735208126808515772, 0.986653741674811346484830210102}, + {0.582529649477889321929069410544, 0.81280945336478926943613032563}, + {-0.81280945336478926943613032563, 0.582529649477889321929069410544}, + {0.849235931706025959364581012778, 0.528013572078784632068959581375}, + {-0.528013572078784632068959581375, 0.849235931706025959364581012778}, + {0.227138508761166174609869017331, 0.973862463512047304980967510346}, + {-0.973862463512047304980967510346, 0.227138508761166174609869017331}, + {0.935928439212529661439532446821, 0.35219022796380683448802528801}, + {-0.35219022796380683448802528801, 0.935928439212529661439532446821}, + {0.41276524761167726618182882703, 0.910837444533365014898151912348}, + {-0.910837444533365014898151912348, 0.41276524761167726618182882703}, + {0.729907763601057135716132506786, 0.683545650732197529464428953361}, + {-0.683545650732197529464428953361, 0.729907763601057135716132506786}, + {0.0327829643997067238236908792715, 0.99946249416632315654140938932}, + {-0.99946249416632315654140938932, 0.0327829643997067238236908792715}, + {0.999166071709922998245190228772, 0.0408308846801159475159437306502}, + {-0.0408308846801159475159437306502, 0.999166071709922998245190228772}, + {0.67764530939845490919992698764, 0.735388900276766732666544612584}, + {-0.735388900276766732666544612584, 0.67764530939845490919992698764}, + {0.907483780136612572242427177116, 0.420087120474984532236817358353}, + {-0.420087120474984532236817358353, 0.907483780136612572242427177116}, + {0.344641483174408957967926880883, 0.9387343863283924561002891096}, + {-0.9387343863283924561002891096, 0.344641483174408957967926880883}, + {0.972001665370963885415278582514, 0.234973961357578281505809059126}, + {-0.234973961357578281505809059126, 0.972001665370963885415278582514}, + {0.521157287430216609180888553965, 0.853460650386635322206529963296}, + {-0.853460650386635322206529963296, 0.521157287430216609180888553965}, + {0.808091802154378369138498783286, 0.589056567140108455760127981193}, + {-0.589056567140108455760127981193, 0.808091802154378369138498783286}, + {0.154881299997379318078571941442, 0.987933086251858383342039360286}, + {-0.987933086251858383342039360286, 0.154881299997379318078571941442}, + {0.990352687421301447479038415622, 0.138569673873492504867499519605}, + {-0.138569673873492504867499519605, 0.990352687421301447479038415622}, + {0.602301544979168546056769173447, 0.798268657104678425184829393402}, + {-0.798268657104678425184829393402, 0.602301544979168546056769173447}, + {0.861938259456469180186388712173, 0.507013251192858227511806035182}, + {-0.507013251192858227511806035182, 0.861938259456469180186388712173}, + {0.250969880155890723294476174487, 0.967994896295707674127584141388}, + {-0.967994896295707674127584141388, 0.250969880155890723294476174487}, + {0.944289735932944407359457272833, 0.329115321143957251237566197233}, + {-0.329115321143957251237566197233, 0.944289735932944407359457272833}, + {0.434994000309758765965284510457, 0.900433351056319719596388040372}, + {-0.900433351056319719596388040372, 0.434994000309758765965284510457}, + {0.74646297904962677360884981681, 0.665426946334724656040293666592}, + {-0.665426946334724656040293666592, 0.74646297904962677360884981681}, + {0.0573011282531621576885605406915, 0.998356940528243419485932008683}, + {-0.998356940528243419485932008683, 0.0573011282531621576885605406915}, + {0.995959055866258324840600835159, 0.0898084574970052784692597924732}, + {-0.0898084574970052784692597924732, 0.995959055866258324840600835159}, + {0.640745232883146442759425553959, 0.767753571491219033440245311795}, + {-0.767753571491219033440245311795, 0.640745232883146442759425553959}, + {0.885777978163732937488816787663, 0.464109225721886897719770104231}, + {-0.464109225721886897719770104231, 0.885777978163732937488816787663}, + {0.298164834266100908788388323956, 0.95451439570446949822724036494}, + {-0.95451439570446949822724036494, 0.298164834266100908788388323956}, + {0.959301220985062319535074948362, 0.282384786092609363361560781414}, + {-0.282384786092609363361560781414, 0.959301220985062319535074948362}, + {0.47865220140907555057552258404, 0.878004595709069080022857178847}, + {-0.878004595709069080022857178847, 0.47865220140907555057552258404}, + {0.778214784391584535327979210706, 0.627998208082124698137249652063}, + {-0.627998208082124698137249652063, 0.778214784391584535327979210706}, + {0.106219159755045478021884264308, 0.994342742770787380024444246374}, + {-0.994342742770787380024444246374, 0.106219159755045478021884264308}, + {0.982360472607696211255756679748, 0.186997063768348542600605810549}, + {-0.186997063768348542600605810549, 0.982360472607696211255756679748}, + {0.562406859897951139082294957916, 0.826860643603096079345959878992}, + {-0.826860643603096079345959878992, 0.562406859897951139082294957916}, + {0.836022055985299883396066888963, 0.548695837332590086177219745878}, + {-0.548695837332590086177219745878, 0.836022055985299883396066888963}, + {0.203170317622019785952502957116, 0.979143412395430234873572317156}, + {-0.979143412395430234873572317156, 0.203170317622019785952502957116}, + {0.927003374196951668295696435962, 0.375052988559571864879416125405}, + {-0.375052988559571864879416125405, 0.927003374196951668295696435962}, + {0.390287860562721244939154985332, 0.920692883592229116551663992141}, + {-0.920692883592229116551663992141, 0.390287860562721244939154985332}, + {0.712912879008703370686816924717, 0.701252612789087459432835203188}, + {-0.701252612789087459432835203188, 0.712912879008703370686816924717}, + {0.00824505331433090550563580478638, 0.999966008970226916119372617686}, + {-0.999966008970226916119372617686, 0.00824505331433090550563580478638}, + {0.999948360994165397208632839465, 0.0101624477898955133808867046241}, + {-0.0101624477898955133808867046241, 0.999948360994165397208632839465}, + {0.699884331149658756032749806764, 0.71425620264103750756135013944}, + {-0.71425620264103750756135013944, 0.699884331149658756032749806764}, + {0.919942823889248639801508033997, 0.392052548486392093973762484893}, + {-0.392052548486392093973762484893, 0.919942823889248639801508033997}, + {0.373274793459794029804044157572, 0.927720824692185086490781031898}, + {-0.927720824692185086490781031898, 0.373274793459794029804044157572}, + {0.978752038414610336936050316581, 0.205047426951047245813342101428}, + {-0.205047426951047245813342101428, 0.978752038414610336936050316581}, + {0.547091777401188528173747727124, 0.837072629525066003175481910148}, + {-0.837072629525066003175481910148, 0.547091777401188528173747727124}, + {0.825780722551702428546605005977, 0.563991310449007077210126226419}, + {-0.563991310449007077210126226419, 0.825780722551702428546605005977}, + {0.185113068540655539839789867074, 0.982717228838215994812799181091}, + {-0.982717228838215994812799181091, 0.185113068540655539839789867074}, + {0.994137242251175723417588869779, 0.10812559164798686561770324488}, + {-0.10812559164798686561770324488, 0.994137242251175723417588869779}, + {0.626504846351800814829857699806, 0.77941752449999890384191303383}, + {-0.77941752449999890384191303383, 0.626504846351800814829857699806}, + {0.877085178084718308788580998225, 0.480334873170893073002218898182}, + {-0.480334873170893073002218898182, 0.877085178084718308788580998225}, + {0.280544831042396247688941457454, 0.959840923161433767774042280507}, + {-0.959840923161433767774042280507, 0.280544831042396247688941457454}, + {0.95394091740782349653215987928, 0.299994543442243577491268524682}, + {-0.299994543442243577491268524682, 0.95394091740782349653215987928}, + {0.462409915563415485628695478226, 0.886666267537360996975337457116}, + {-0.886666267537360996975337457116, 0.462409915563415485628695478226}, + {0.766523547242852099081744654541, 0.642216203098485372002812709979}, + {-0.642216203098485372002812709979, 0.766523547242852099081744654541}, + {0.0878985659957715881329676221867, 0.996129430393403736410107285337}, + {-0.996129430393403736410107285337, 0.0878985659957715881329676221867}, + {0.99824523172225787526201656874, 0.0592153471979670609215418153326}, + {-0.0592153471979670609215418153326, 0.99824523172225787526201656874}, + {0.66399439908394664033863818986, 0.747737546191943325268880471413}, + {-0.747737546191943325268880471413, 0.66399439908394664033863818986}, + {0.899597605700772184889046911849, 0.436719758904309363067852700624}, + {-0.436719758904309363067852700624, 0.899597605700772184889046911849}, + {0.327304064330806665505946284611, 0.944919070329589216861165823502}, + {-0.944919070329589216861165823502, 0.327304064330806665505946284611}, + {0.967511888352754145792289364181, 0.252825524613492613745791004476}, + {-0.252825524613492613745791004476, 0.967511888352754145792289364181}, + {0.505359574221587282139012131665, 0.862908860044081404971905158163}, + {-0.862908860044081404971905158163, 0.505359574221587282139012131665}, + {0.7971122915618589210851041571, 0.603831097775695879725788017822}, + {-0.603831097775695879725788017822, 0.7971122915618589210851041571}, + {0.136670442802027086726113225268, 0.990616570659050621472374587029}, + {-0.990616570659050621472374587029, 0.136670442802027086726113225268}, + {0.987634289087372163606914909906, 0.156775352077043378384857419405}, + {-0.156775352077043378384857419405, 0.987634289087372163606914909906}, + {0.587505988569450021685725005227, 0.809219817722621748146138997981}, + {-0.809219817722621748146138997981, 0.587505988569450021685725005227}, + {0.852459775451070100871220347472, 0.522792818655642088643276110815}, + {-0.522792818655642088643276110815, 0.852459775451070100871220347472}, + {0.233109740683179689213488927635, 0.972450435137246826933221655054}, + {-0.972450435137246826933221655054, 0.233109740683179689213488927635}, + {0.938071819237501269839185624733, 0.346440849139423523439518248779}, + {-0.346440849139423523439518248779, 0.938071819237501269839185624733}, + {0.4183462709163262571721020322, 0.908287618329350454615678245318}, + {-0.908287618329350454615678245318, 0.4183462709163262571721020322}, + {0.734088180559004044312132464256, 0.679054153336514865202389046317}, + {-0.679054153336514865202389046317, 0.734088180559004044312132464256}, + {0.038914933845027192826915296564, 0.99924252707930583117246214897}, + {-0.99924252707930583117246214897, 0.038914933845027192826915296564}, + {0.999397796289508644207444376661, 0.0346993482888897988858367682496}, + {-0.0346993482888897988858367682496, 0.999397796289508644207444376661}, + {0.682144814381375641509919205419, 0.731217103337031271159673906368}, + {-0.731217103337031271159673906368, 0.682144814381375641509919205419}, + {0.910044303124737385957132573822, 0.414510996657761809913722572674}, + {-0.414510996657761809913722572674, 0.910044303124737385957132573822}, + {0.350394961306590146943307217953, 0.936602034532785565623669299384}, + {-0.936602034532785565623669299384, 0.350394961306590146943307217953}, + {0.973425140836747027073272420239, 0.229005447941657336308196590835}, + {-0.229005447941657336308196590835, 0.973425140836747027073272420239}, + {0.526384212894925096470899461565, 0.850246823231342707671842617856}, + {-0.850246823231342707671842617856, 0.526384212894925096470899461565}, + {0.811690973202369048244975147099, 0.584087120232753442827799972292}, + {-0.584087120232753442827799972292, 0.811690973202369048244975147099}, + {0.160940227859001055987775430367, 0.98696415489960565015081783713}, + {-0.98696415489960565015081783713, 0.160940227859001055987775430367}, + {0.991184291846594178743146130728, 0.132490375486544548211398364401}, + {-0.132490375486544548211398364401, 0.991184291846594178743146130728}, + {0.607188291221825160448588576401, 0.794557977118800384808139369852}, + {-0.794557977118800384808139369852, 0.607188291221825160448588576401}, + {0.865033008537231862256078329665, 0.501714953076969116807504178723}, + {-0.501714953076969116807504178723, 0.865033008537231862256078329665}, + {0.25690466074346191005517425765, 0.966436751830292650744524962647}, + {-0.966436751830292650744524962647, 0.25690466074346191005517425765}, + {0.946291373592331619413187127066, 0.323315072745979981139186065775}, + {-0.323315072745979981139186065775, 0.946291373592331619413187127066}, + {0.440510766846965884369069499371, 0.897747327643974690758454926254}, + {-0.897747327643974690758454926254, 0.440510766846965884369069499371}, + {0.750531910080146413832835605717, 0.660834209126197613670683495002}, + {-0.660834209126197613670683495002, 0.750531910080146413832835605717}, + {0.0634258526013802281795506132767, 0.997986553627747019490357160976}, + {-0.997986553627747019490357160976, 0.0634258526013802281795506132767}, + {0.996491361554210919315721639578, 0.0836956769960967156274378453418}, + {-0.0836956769960967156274378453418, 0.996491361554210919315721639578}, + {0.645444018388859341861518714722, 0.763807579908737155349740532984}, + {-0.763807579908737155349740532984, 0.645444018388859341861518714722}, + {0.888609024317253859415188799176, 0.458665457498096562627409866764}, + {-0.458665457498096562627409866764, 0.888609024317253859415188799176}, + {0.304016011625357573944228306573, 0.952666922211226174788123444159}, + {-0.952666922211226174788123444159, 0.304016011625357573944228306573}, + {0.961015842884817228508609332494, 0.276493308643056046047803420151}, + {-0.276493308643056046047803420151, 0.961015842884817228508609332494}, + {0.484030525837350011908455371668, 0.875051112825769972225486981188}, + {-0.875051112825769972225486981188, 0.484030525837350011908455371668}, + {0.782053459282860297108186387049, 0.623211350044037271089791829581}, + {-0.623211350044037271089791829581, 0.782053459282860297108186387049}, + {0.11231833258074616632704589847, 0.993672276037870005893637426198}, + {-0.993672276037870005893637426198, 0.11231833258074616632704589847}, + {0.983489372362428726503935649816, 0.180965893057658977483015405596}, + {-0.180965893057658977483015405596, 0.983489372362428726503935649816}, + {0.567469794277824624018080612586, 0.823394214566925075615699824993}, + {-0.823394214566925075615699824993, 0.567469794277824624018080612586}, + {0.839373052471700797916298597556, 0.543555773388839535975591843453}, + {-0.543555773388839535975591843453, 0.839373052471700797916298597556}, + {0.209174404031644556312130589504, 0.977878350664338147346654750436}, + {-0.977878350664338147346654750436, 0.209174404031644556312130589504}, + {0.929287205495526791310112457722, 0.369357942519603188458177100983}, + {-0.369357942519603188458177100983, 0.929287205495526791310112457722}, + {0.39592977883506125236223738284, 0.918280790516506129073093234183}, + {-0.918280790516506129073093234183, 0.39592977883506125236223738284}, + {0.717202263767218073375886433496, 0.696865060716332473944589764869}, + {-0.696865060716332473944589764869, 0.717202263767218073375886433496}, + {0.0143805741876490058434034935431, 0.999896594196636678830714117794}, + {-0.999896594196636678830714117794, 0.0143805741876490058434034935431}, + {0.999748357854501779051759058348, 0.0224325871719499338186043502219}, + {-0.0224325871719499338186043502219, 0.999748357854501779051759058348}, + {0.691066608810189220157838008163, 0.722791077827877548322987877327}, + {-0.722791077827877548322987877327, 0.691066608810189220157838008163}, + {0.915062466027752763331193364138, 0.403312141234562548941511295197}, + {-0.403312141234562548941511295197, 0.915062466027752763331193364138}, + {0.361862124935682982762585879755, 0.932231624939334535540069737181}, + {-0.932231624939334535540069737181, 0.361862124935682982762585879755}, + {0.976162092835966110904166725959, 0.21704278036854099398489381656}, + {-0.21704278036854099398489381656, 0.976162092835966110904166725959}, + {0.536778413575385915379456491792, 0.843723257187741548790427259519}, + {-0.843723257187741548790427259519, 0.536778413575385915379456491792}, + {0.81879750182303601135913595499, 0.57408244269299446571608314116}, + {-0.57408244269299446571608314116, 0.81879750182303601135913595499}, + {0.173039677769319361066990836662, 0.984914854146027196613033538597}, + {-0.984914854146027196613033538597, 0.173039677769319361066990836662}, + {0.992735518207621847608379539452, 0.120317043219339683823854159073}, + {-0.120317043219339683823854159073, 0.992735518207621847608379539452}, + {0.616893019696640787508101766434, 0.787047014002060785209380355809}, + {-0.787047014002060785209380355809, 0.616893019696640787508101766434}, + {0.871124687397811903188937776576, 0.491061889181052646513592208066}, + {-0.491061889181052646513592208066, 0.871124687397811903188937776576}, + {0.268744981895804979643571641645, 0.96321136554020347908533494774}, + {-0.96321136554020347908533494774, 0.268744981895804979643571641645}, + {0.950187692885819279453585295414, 0.311678276895140604629830249905}, + {-0.311678276895140604629830249905, 0.950187692885819279453585295414}, + {0.451494337898471098657893207928, 0.892273984180655843623242162721}, + {-0.892273984180655843623242162721, 0.451494337898471098657893207928}, + {0.758584848705459613427137810504, 0.651574268456416971773137447599}, + {-0.651574268456416971773137447599, 0.758584848705459613427137810504}, + {0.0756679069528052444804089304853, 0.997133074297198107593942495441}, + {-0.997133074297198107593942495441, 0.0756679069528052444804089304853}, + {0.997443402291984360097387707356, 0.0714608929708456797058602205652}, + {-0.0714608929708456797058602205652, 0.997443402291984360097387707356}, + {0.654768511601112601638874366472, 0.755829475619774759920233009325}, + {-0.755829475619774759920233009325, 0.654768511601112601638874366472}, + {0.894170644414028270574590351316, 0.447726321169753638251620486699}, + {-0.447726321169753638251620486699, 0.894170644414028270574590351316}, + {0.315683808388265596533983625704, 0.948864444017943342579712862062}, + {-0.948864444017943342579712862062, 0.315683808388265596533983625704}, + {0.964336478382053718938493602764, 0.264679346496281941991668418268}, + {-0.264679346496281941991668418268, 0.964336478382053718938493602764}, + {0.494732302461959871919816578156, 0.86904542395704953428037242702}, + {-0.86904542395704953428037242702, 0.494732302461959871919816578156}, + {0.789642334037846338468114026909, 0.613567424408485329045959133509}, + {-0.613567424408485329045959133509, 0.789642334037846338468114026909}, + {0.124503762595729663975596679393, 0.992219135624538450812792689248}, + {-0.992219135624538450812792689248, 0.124503762595729663975596679393}, + {0.985636047306535534318072677706, 0.168883339172189977928084658743}, + {-0.168883339172189977928084658743, 0.985636047306535534318072677706}, + {0.577531378474272716339044109191, 0.816368487190439195977376130031}, + {-0.816368487190439195977376130031, 0.577531378474272716339044109191}, + {0.845980114708143271506912697077, 0.53321444608937296205652955905}, + {-0.53321444608937296205652955905, 0.845980114708143271506912697077}, + {0.221158725202677014509689001898, 0.975237826515525818571461513784}, + {-0.975237826515525818571461513784, 0.221158725202677014509689001898}, + {0.933749822010810581751627523772, 0.357926347025166069482082775721}, + {-0.357926347025166069482082775721, 0.933749822010810581751627523772}, + {0.40716868392875155135968157083, 0.91335297822240024689932624824}, + {-0.91335297822240024689932624824, 0.40716868392875155135968157083}, + {0.72569986602835612199413617418, 0.688011413020471640500375087868}, + {-0.688011413020471640500375087868, 0.72569986602835612199413617418}, + {0.0266497606943056179751305734271, 0.999644832055333609766023528209}, + {-0.999644832055333609766023528209, 0.0266497606943056179751305734271}, + {0.998896729092468405042382073589, 0.0469608838116115923089211037222}, + {-0.0469608838116115923089211037222, 0.998896729092468405042382073589}, + {0.673120291452642072194123556983, 0.739533010240050248107479546888}, + {-0.739533010240050248107479546888, 0.673120291452642072194123556983}, + {0.904889090897077474906495808682, 0.425647428249555592749686638854}, + {-0.425647428249555592749686638854, 0.904889090897077474906495808682}, + {0.338875029485178447252735622897, 0.940831395304928874701033691963}, + {-0.940831395304928874701033691963, 0.338875029485178447252735622897}, + {0.970541594591857070817297881149, 0.2409336281366619147981111837}, + {-0.2409336281366619147981111837, 0.970541594591857070817297881149}, + {0.515910740688195645908820097247, 0.85664234523081883576622885812}, + {-0.85664234523081883576622885812, 0.515910740688195645908820097247}, + {0.804462206906771837289227278234, 0.594003836400646689774873721035}, + {-0.594003836400646689774873721035, 0.804462206906771837289227278234}, + {0.148816540942351915877139845179, 0.988864822481795635589207904559}, + {-0.988864822481795635589207904559, 0.148816540942351915877139845179}, + {0.989483796777076762829494782636, 0.144643755190539041732833425158}, + {-0.144643755190539041732833425158, 0.989483796777076762829494782636}, + {0.5973921224237657146005631148, 0.801949282726799772547110478627}, + {-0.801949282726799772547110478627, 0.5973921224237657146005631148}, + {0.858811058887407496875709966844, 0.512292460546404870846970425191}, + {-0.512292460546404870846970425191, 0.858811058887407496875709966844}, + {0.245025650694180474609140674147, 0.969516596300389998930313595338}, + {-0.969516596300389998930313595338, 0.245025650694180474609140674147}, + {0.94225254629871402212160091949, 0.334903178536110179841500666953}, + {-0.334903178536110179841500666953, 0.94225254629871402212160091949}, + {0.429460856494299492158717157508, 0.903085473661924598154371324199}, + {-0.903085473661924598154371324199, 0.429460856494299492158717157508}, + {0.74236594410984846348355858936, 0.669994630594823004088311790838}, + {-0.669994630594823004088311790838, 0.74236594410984846348355858936}, + {0.0511742465498520801570059290952, 0.998689739854202618296596938308}, + {-0.998689739854202618296596938308, 0.0511742465498520801570059290952}, + {0.995389252882770692032465831289, 0.0959178567602490400600956377275}, + {-0.0959178567602490400600956377275, 0.995389252882770692032465831289}, + {0.636022323681566303932299888402, 0.771670657585670327627269671211}, + {-0.771670657585670327627269671211, 0.636022323681566303932299888402}, + {0.882913582969978016912193652388, 0.469535520495644453919936722741}, + {-0.469535520495644453919936722741, 0.882913582969978016912193652388}, + {0.292302431169357557205756847907, 0.956325932270208234342590003507}, + {-0.956325932270208234342590003507, 0.292302431169357557205756847907}, + {0.957550481936536468374754349497, 0.288265631914570774618766790809}, + {-0.288265631914570774618766790809, 0.957550481936536468374754349497}, + {0.473255855995953322867819679232, 0.880925022215589881291464280366}, + {-0.880925022215589881291464280366, 0.473255855995953322867819679232}, + {0.774346810153525133557650406146, 0.632761422342624002546074279962}, + {-0.632761422342624002546074279962, 0.774346810153525133557650406146}, + {0.100115987838015313426964780774, 0.994975773061444135336728322727}, + {-0.994975773061444135336728322727, 0.100115987838015313426964780774}, + {0.981194587536402318406203448831, 0.193021194145278379705743532213}, + {-0.193021194145278379705743532213, 0.981194587536402318406203448831}, + {0.557322751217676159996017304366, 0.830295941803379067280843628396}, + {-0.830295941803379067280843628396, 0.557322751217676159996017304366}, + {0.832639583741012767603706379305, 0.553815243188189088030526363582}, + {-0.553815243188189088030526363582, 0.832639583741012767603706379305}, + {0.197158581964768875316451612889, 0.980371609930459797510593489278}, + {-0.980371609930459797510593489278, 0.197158581964768875316451612889}, + {0.924684641745282420721707694611, 0.380733914066502143747072750557}, + {-0.380733914066502143747072750557, 0.924684641745282420721707694611}, + {0.384631248173022577407920152837, 0.923070313101262418520320807147}, + {-0.923070313101262418520320807147, 0.384631248173022577407920152837}, + {0.708596653483234084625053128548, 0.705613763097320489947605892667}, + {-0.705613763097320489947605892667, 0.708596653483234084625053128548}, + {0.00210922201941564401231343772736, 0.999997775588762349840976639825}, + {-0.999997775588762349840976639825, 0.00210922201941564401231343772736}, + {0.999996893178149881542537968926, 0.00249271619883590809726991999185}, + {-0.00249271619883590809726991999185, 0.999996893178149881542537968926}, + {0.705341967803978953099885984557, 0.708867200859519819289289443986}, + {-0.708867200859519819289289443986, 0.705341967803978953099885984557}, + {0.92292274099128568387584437005, 0.384985212912304253229933692637}, + {-0.384985212912304253229933692637, 0.92292274099128568387584437005}, + {0.380379273959376595914960716982, 0.924830583373050796325287592481}, + {-0.924830583373050796325287592481, 0.380379273959376595914960716982}, + {0.980295928472165289946360644535, 0.197534535261294025287881481745}, + {-0.197534535261294025287881481745, 0.980295928472165289946360644535}, + {0.553495889190436574089915211516, 0.83285190799402497585646187872}, + {-0.83285190799402497585646187872, 0.553495889190436574089915211516}, + {0.830082150155146969439101667376, 0.557641124733289306192318690591}, + {-0.557641124733289306192318690591, 0.830082150155146969439101667376}, + {0.192644896549212130132389120263, 0.981268538084016705980161532352}, + {-0.981268538084016705980161532352, 0.192644896549212130132389120263}, + {0.994937305897080070948845786916, 0.100497548896777200155838727369}, + {-0.100497548896777200155838727369, 0.994937305897080070948845786916}, + {0.632464417537761947052388222801, 0.774589414172837553707040569861}, + {-0.774589414172837553707040569861, 0.632464417537761947052388222801}, + {0.880743466094136340238662796764, 0.473593651702054585506118655758}, + {-0.473593651702054585506118655758, 0.880743466094136340238662796764}, + {0.287898394715485173023239440226, 0.957660960006330608251801095321}, + {-0.957660960006330608251801095321, 0.287898394715485173023239440226}, + {0.956213765371798474212994278787, 0.292669156067883462490897272801}, + {-0.292669156067883462490897272801, 0.956213765371798474212994278787}, + {0.469196892858576575235929340124, 0.883093582658065368029554065288}, + {-0.883093582658065368029554065288, 0.469196892858576575235929340124}, + {0.771426689341102700403496328363, 0.636318208895695458515717746195}, + {-0.636318208895695458515717746195, 0.771426689341102700403496328363}, + {0.0955361227187574574282180606133, 0.995425963724006157562484986556}, + {-0.995425963724006157562484986556, 0.0955361227187574574282180606133}, + {0.998670041338990066570602266438, 0.0515572354959016113107317380582}, + {-0.0515572354959016113107317380582, 0.998670041338990066570602266438}, + {0.669709887560265837436190849985, 0.742622829237033377225429831014}, + {-0.742622829237033377225429831014, 0.669709887560265837436190849985}, + {0.90292071108246674260300324022, 0.42980715384731871253620738571}, + {-0.42980715384731871253620738571, 0.90292071108246674260300324022}, + {0.3345418045922629035615614157, 0.942380910768120472198461357038}, + {-0.942380910768120472198461357038, 0.3345418045922629035615614157}, + {0.96942255884981032298952641213, 0.245397437625346936940218256495}, + {-0.245397437625346936940218256495, 0.96942255884981032298952641213}, + {0.511963072967230203857980086468, 0.859007457428601517435140522139}, + {-0.859007457428601517435140522139, 0.511963072967230203857980086468}, + {0.801720126751992334668273088027, 0.59769962218556682564951643144}, + {-0.59769962218556682564951643144, 0.801720126751992334668273088027}, + {0.144264282280020444959589553946, 0.98953919420012392826890845754}, + {-0.98953919420012392826890845754, 0.144264282280020444959589553946}, + {0.988807679339048450906091147772, 0.149195754899814819749437333485}, + {-0.149195754899814819749437333485, 0.988807679339048450906091147772}, + {0.593695285336069189519037081482, 0.804689945363879499673487316613}, + {-0.804689945363879499673487316613, 0.593695285336069189519037081482}, + {0.856444432951968592782066025393, 0.516239220967935508177504289051}, + {-0.516239220967935508177504289051, 0.856444432951968592782066025393}, + {0.240561412388916678883887811935, 0.970633920110692161031806790561}, + {-0.970633920110692161031806790561, 0.240561412388916678883887811935}, + {0.94070136917857194447378788027, 0.339235808878661948551069826863}, + {-0.339235808878661948551069826863, 0.94070136917857194447378788027}, + {0.425300376338232644091164047495, 0.905052258097043593743080691638}, + {-0.905052258097043593743080691638, 0.425300376338232644091164047495}, + {0.739274817466592515380341410491, 0.673403849305701851513106248603}, + {-0.673403849305701851513106248603, 0.739274817466592515380341410491}, + {0.0465778082698889428492350361921, 0.998914664912260441553826240124}, + {-0.998914664912260441553826240124, 0.0465778082698889428492350361921}, + {0.999634538492192303849037671171, 0.0270331177170084341332412236625}, + {-0.0270331177170084341332412236625, 0.999634538492192303849037671171}, + {0.687733060021803233041737257736, 0.725963661730424925089266707801}, + {-0.725963661730424925089266707801, 0.687733060021803233041737257736}, + {0.913196763828828195208586748777, 0.407518920459596922789557993383}, + {-0.407518920459596922789557993383, 0.913196763828828195208586748777}, + {0.357568232142172259813150958507, 0.933887016379776890850905601837}, + {-0.933887016379776890850905601837, 0.357568232142172259813150958507}, + {0.975152941495307623576138666976, 0.221532707953135205025674281387}, + {-0.221532707953135205025674281387, 0.975152941495307623576138666976}, + {0.532889977577059803515169278398, 0.846184537673621672837498408626}, + {-0.846184537673621672837498408626, 0.532889977577059803515169278398}, + {0.81614694665505216342182848166, 0.577844409392039848327726758725}, + {-0.577844409392039848327726758725, 0.81614694665505216342182848166}, + {0.168505340072635900749986603842, 0.985700740776329853432002892077}, + {-0.985700740776329853432002892077, 0.168505340072635900749986603842}, + {0.992171316068626518713813311479, 0.124884264703963118736673720832}, + {-0.124884264703963118736673720832, 0.992171316068626518713813311479}, + {0.613264555255239041109405206953, 0.789877576126575386972206160863}, + {-0.789877576126575386972206160863, 0.613264555255239041109405206953}, + {0.868855632595287863395583372039, 0.495065540820043614722578695364}, + {-0.495065540820043614722578695364, 0.868855632595287863395583372039}, + {0.264309508634617107425412996236, 0.964437910725893909891226485342}, + {-0.964437910725893909891226485342, 0.264309508634617107425412996236}, + {0.948743311022566482293427725381, 0.316047670122621859878364602991}, + {-0.316047670122621859878364602991, 0.948743311022566482293427725381}, + {0.447383378107519602551178650174, 0.894342279551349483845967824891}, + {-0.894342279551349483845967824891, 0.447383378107519602551178650174}, + {0.755578319467224535088689663098, 0.655058320419704909198799214209}, + {-0.655058320419704909198799214209, 0.755578319467224535088689663098}, + {0.0710783729713664047533683287838, 0.997470733854253666095246444456}, + {-0.997470733854253666095246444456, 0.0710783729713664047533683287838}, + {0.997103982695563328952914616821, 0.0760502971239812586290440776793}, + {-0.0760502971239812586290440776793, 0.997103982695563328952914616821}, + {0.6512833069045277367692392545, 0.758834668519765664917997582961}, + {-0.758834668519765664917997582961, 0.6512833069045277367692392545}, + {0.892100772662129060108782141469, 0.451836487477087489494920191646}, + {-0.451836487477087489494920191646, 0.892100772662129060108782141469}, + {0.311313861568590977135784214624, 0.950307150133709255257485892798}, + {-0.950307150133709255257485892798, 0.311313861568590977135784214624}, + {0.96310823230390618654439549573, 0.269114349057134383258471643785}, + {-0.269114349057134383258471643785, 0.96310823230390618654439549573}, + {0.490727780945777458665446602026, 0.871312943211584034486349992221}, + {-0.871312943211584034486349992221, 0.490727780945777458665446602026}, + {0.786810380622823490881501129479, 0.617194803076117626261520854314}, + {-0.617194803076117626261520854314, 0.786810380622823490881501129479}, + {0.119936325078148470213434961806, 0.992781586214585565208778916713}, + {-0.992781586214585565208778916713, 0.119936325078148470213434961806}, + {0.984848421837337006934376404388, 0.173417375151703440083750251688}, + {-0.173417375151703440083750251688, 0.984848421837337006934376404388}, + {0.573768395576709555605532386835, 0.819017599467391499423740697239}, + {-0.819017599467391499423740697239, 0.573768395576709555605532386835}, + {0.843517343206759195872734835575, 0.537101937912544125097724645457}, + {-0.537101937912544125097724645457, 0.843517343206759195872734835575}, + {0.216668410943563732873329286122, 0.976245255916355803016415393358}, + {-0.976245255916355803016415393358, 0.216668410943563732873329286122}, + {0.932092784004874053138678391406, 0.362219604668277461723135957072}, + {-0.362219604668277461723135957072, 0.932092784004874053138678391406}, + {0.402961189525244900533351710692, 0.915217067004543860520016096416}, + {-0.915217067004543860520016096416, 0.402961189525244900533351710692}, + {0.722526003959184426328477002244, 0.691343744893068712364936345693}, + {-0.691343744893068712364936345693, 0.722526003959184426328477002244}, + {0.0220491868383661318575583720758, 0.999756887127949078752919831459}, + {-0.999756887127949078752919831459, 0.0220491868383661318575583720758}, + {0.999891005788962949907272559358, 0.0147640286621272456141218398784}, + {-0.0147640286621272456141218398784, 0.999891005788962949907272559358}, + {0.696589965856190374005052490247, 0.717469455425435831408265130449}, + {-0.717469455425435831408265130449, 0.696589965856190374005052490247}, + {0.918128885826588025942385229428, 0.39628190598465151683882368161}, + {-0.39628190598465151683882368161, 0.918128885826588025942385229428}, + {0.369001538187952782266165741021, 0.929428784154506804071615988505}, + {-0.929428784154506804071615988505, 0.369001538187952782266165741021}, + {0.977798061379446359353551088134, 0.209549400291664938977831411648}, + {-0.209549400291664938977831411648, 0.977798061379446359353551088134}, + {0.543233837892656001855584690929, 0.839581441772277115020983728755}, + {-0.839581441772277115020983728755, 0.543233837892656001855584690929}, + {0.823176532084024858981763372867, 0.567785520268101140395344827994}, + {-0.567785520268101140395344827994, 0.823176532084024858981763372867}, + {0.180588716309133340276460444329, 0.983558699591345897417227206461}, + {-0.983558699591345897417227206461, 0.180588716309133340276460444329}, + {0.993629129428871715745685833099, 0.11269939285738185541507760945}, + {-0.11269939285738185541507760945, 0.993629129428871715745685833099}, + {0.622911390478579463092501100618, 0.78229240032870239751616736612}, + {-0.78229240032870239751616736612, 0.622911390478579463092501100618}, + {0.874865425102218319253211120667, 0.48436606813516047687073751149}, + {-0.48436606813516047687073751149, 0.874865425102218319253211120667}, + {0.27612474336039283251409415243, 0.961121806070467377125510211044}, + {-0.961121806070467377125510211044, 0.27612474336039283251409415243}, + {0.952550263480144931982351863553, 0.304381332449784880456178370878}, + {-0.304381332449784880456178370878, 0.952550263480144931982351863553}, + {0.458324646486003239864714942087, 0.888784854969682847070089337649}, + {-0.888784854969682847070089337649, 0.458324646486003239864714942087}, + {0.763559999067796257854467967263, 0.645736887457722286143280143733}, + {-0.645736887457722286143280143733, 0.763559999067796257854467967263}, + {0.0833135211999826846840377925218, 0.996523385167282449437209379539}, + {-0.996523385167282449437209379539, 0.0833135211999826846840377925218}, + {0.997962156732281946425189289585, 0.0638085709779828980092730716933}, + {-0.0638085709779828980092730716933, 0.997962156732281946425189289585}, + {0.660546335156593888982001772092, 0.750785281629303580253065319994}, + {-0.750785281629303580253065319994, 0.660546335156593888982001772092}, + {0.89757832786961022630833895164, 0.440855016234129426511145766199}, + {-0.440855016234129426511145766199, 0.89757832786961022630833895164}, + {0.322952150783425262492443152951, 0.946415293781942112971705682867}, + {-0.946415293781942112971705682867, 0.322952150783425262492443152951}, + {0.966338159063000134985088607209, 0.257275265695581123459589889535}, + {-0.257275265695581123459589889535, 0.966338159063000134985088607209}, + {0.501383180187855770348903661215, 0.865225350197688203301993326022}, + {-0.865225350197688203301993326022, 0.501383180187855770348903661215}, + {0.79432506490391663334094118909, 0.607492955733141548613218674291}, + {-0.607492955733141548613218674291, 0.79432506490391663334094118909}, + {0.132110251338040357582670480951, 0.99123502838196742015242080015}, + {-0.99123502838196742015242080015, 0.132110251338040357582670480951}, + {0.986902362521034470788094949967, 0.161318712028086425069872689164}, + {-0.161318712028086425069872689164, 0.986902362521034470788094949967}, + {0.583775797700308185511630654219, 0.811914908114987676768237179203}, + {-0.811914908114987676768237179203, 0.583775797700308185511630654219}, + {0.850044894896594183109073128435, 0.526710239752597009221801727108}, + {-0.526710239752597009221801727108, 0.850044894896594183109073128435}, + {0.228632127244934230114736806172, 0.973512891743841368707990113762}, + {-0.973512891743841368707990113762, 0.228632127244934230114736806172}, + {0.936467590879016875682339104969, 0.350754117913461060940250035856}, + {-0.350754117913461060940250035856, 0.936467590879016875682339104969}, + {0.414161968566268079428027704125, 0.910203199177696542498949838773}, + {-0.910203199177696542498949838773, 0.414161968566268079428027704125}, + {0.73095545031396436108650505048, 0.682425182460546064788786679856}, + {-0.682425182460546064788786679856, 0.73095545031396436108650505048}, + {0.0343160814919516513055164352863, 0.999411029832589781207730084134}, + {-0.999411029832589781207730084134, 0.0343160814919516513055164352863}, + {0.999227529910869605167533791246, 0.0392981356837970585704766790514}, + {-0.0392981356837970585704766790514, 0.999227529910869605167533791246}, + {0.678772584118257693575060329749, 0.73434854057826159934307952426}, + {-0.73434854057826159934307952426, 0.678772584118257693575060329749}, + {0.908127117757437596345937436126, 0.418694564084093556743937369902}, + {-0.418694564084093556743937369902, 0.908127117757437596345937436126}, + {0.346081077635870426600916971438, 0.938204608655486493873354447715}, + {-0.938204608655486493873354447715, 0.346081077635870426600916971438}, + {0.972360967165074141504987892404, 0.233482653603649087381555204956}, + {-0.233482653603649087381555204956, 0.972360967165074141504987892404}, + {0.52246586599096378478179758531, 0.852660201296103759105449171329}, + {-0.852660201296103759105449171329, 0.52246586599096378478179758531}, + {0.808994452497937666102245657385, 0.587816277273402909564481433335}, + {-0.587816277273402909564481433335, 0.808994452497937666102245657385}, + {0.156396587551734883581033841438, 0.98769433905544512519725230959}, + {-0.98769433905544512519725230959, 0.156396587551734883581033841438}, + {0.990564085357674373177871984808, 0.137050329439640405926326138797}, + {-0.137050329439640405926326138797, 0.990564085357674373177871984808}, + {0.603525364645641548833054912393, 0.797343799266881703147191728931}, + {-0.797343799266881703147191728931, 0.603525364645641548833054912393}, + {0.862714993625990689274374290108, 0.505690458455473446086614330852}, + {-0.505690458455473446086614330852, 0.862714993625990689274374290108}, + {0.252454469869047792851546319071, 0.967608774579446495600620892219}, + {-0.967608774579446495600620892219, 0.252454469869047792851546319071}, + {0.94479348131210028061133243682, 0.327666412178853061476502261939}, + {-0.327666412178853061476502261939, 0.94479348131210028061133243682}, + {0.436374735437898397805156491813, 0.89976501947536513537073687985}, + {-0.89976501947536513537073687985, 0.436374735437898397805156491813}, + {0.747482852550976573091645605018, 0.6642811040081262330403433225}, + {-0.6642811040081262330403433225, 0.747482852550976573091645605018}, + {0.0588325206012274351530244587138, 0.998267867117692109246718246141}, + {-0.998267867117692109246718246141, 0.0588325206012274351530244587138}, + {0.996095648466687300093269641366, 0.0882805703749677400660189618975}, + {-0.0882805703749677400660189618975, 0.996095648466687300093269641366}, + {0.641922197782050174552637145098, 0.766769777700361920835803175578}, + {-0.766769777700361920835803175578, 0.641922197782050174552637145098}, + {0.88648887035968959580145565269, 0.46274991380701674392739164432}, + {-0.46274991380701674392739164432, 0.88648887035968959580145565269}, + {0.299628689631306843743629997334, 0.954055893724170656078342744877}, + {-0.954055893724170656078342744877, 0.299628689631306843743629997334}, + {0.95973326498761768466039256964, 0.280912904787600004041792089993}, + {-0.280912904787600004041792089993, 0.95973326498761768466039256964}, + {0.479998479904927277672754826199, 0.877269319701173166947683057515}, + {-0.877269319701173166947683057515, 0.479998479904927277672754826199}, + {0.779177205592524679822474809043, 0.626803703151971314255774814228}, + {-0.626803703151971314255774814228, 0.779177205592524679822474809043}, + {0.107744336848860269850725046581, 0.994178634792057591695879636973}, + {-0.994178634792057591695879636973, 0.107744336848860269850725046581}, + {0.982646166603868054245651819656, 0.185489922256501876818290952542}, + {-0.185489922256501876818290952542, 0.982646166603868054245651819656}, + {0.563674586043223069253826906788, 0.825996949781899081344249680114}, + {-0.825996949781899081344249680114, 0.563674586043223069253826906788}, + {0.836862760907737923687932379835, 0.547412750496257927146359634207}, + {-0.547412750496257927146359634207, 0.836862760907737923687932379835}, + {0.20467206517647421049055367348, 0.978830601144241474464990915294}, + {-0.978830601144241474464990915294, 0.20467206517647421049055367348}, + {0.927577607385966729225401650183, 0.373630542482979277618682090178}, + {-0.373630542482979277618682090178, 0.927577607385966729225401650183}, + {0.39169972601128699718486814163, 0.920093106507533176063873270323}, + {-0.920093106507533176063873270323, 0.39169972601128699718486814163}, + {0.713987747845867826157473245985, 0.700158193500572734180309453222}, + {-0.700158193500572734180309453222, 0.713987747845867826157473245985}, + {0.00977897165834604349676784096346, 0.99995218471350177935619285563}, + {-0.99995218471350177935619285563, 0.00977897165834604349676784096346}, + {0.999962773500176926866345183953, 0.00862853486021188630095135607689}, + {-0.00862853486021188630095135607689, 0.999962773500176926866345183953}, + {0.700979162564722479977774582949, 0.713181753587443179043248164817}, + {-0.713181753587443179043248164817, 0.700979162564722479977774582949}, + {0.920543142373445477666393799154, 0.390640913153272428814943850739}, + {-0.390640913153272428814943850739, 0.920543142373445477666393799154}, + {0.374697459647452602027328794065, 0.927147137046620883893410791643}, + {-0.927147137046620883893410791643, 0.374697459647452602027328794065}, + {0.979065425555756929654194209434, 0.203545799468632215845076416372}, + {-0.203545799468632215845076416372, 0.979065425555756929654194209434}, + {0.548375186549461601792643250519, 0.836232416722075599935237733007}, + {-0.836232416722075599935237733007, 0.548375186549461601792643250519}, + {0.826644902476264320512200356461, 0.562723915619380399277815740788}, + {-0.562723915619380399277815740788, 0.826644902476264320512200356461}, + {0.18662031950394827872763414689, 0.982432112844569105369885164691}, + {-0.982432112844569105369885164691, 0.18662031950394827872763414689}, + {0.994301935115913582130531267467, 0.10660047760094494562199685106}, + {-0.10660047760094494562199685106, 0.994301935115913582130531267467}, + {0.627699720278016237706708579935, 0.778455561456721900626121168898}, + {-0.778455561456721900626121168898, 0.627699720278016237706708579935}, + {0.877820970329870498005675472086, 0.478988876748849490283532759349}, + {-0.478988876748849490283532759349, 0.877820970329870498005675472086}, + {0.282016877925967690554642786083, 0.959409443650045545837201643735}, + {-0.959409443650045545837201643735, 0.282016877925967690554642786083}, + {0.954399980735894493122373205551, 0.298530864017984121083770787664}, + {-0.298530864017984121083770787664, 0.954399980735894493122373205551}, + {0.4637695000020656821426712213, 0.885955896683257027035551800509}, + {-0.885955896683257027035551800509, 0.4637695000020656821426712213}, + {0.767507792321741266761137012509, 0.641039615566313392136521542852}, + {-0.641039615566313392136521542852, 0.767507792321741266761137012509}, + {0.0894265053880619747994629165078, 0.995993423740377359365538723068}, + {-0.995993423740377359365538723068, 0.0894265053880619747994629165078}, + {0.998334892407855001295047259191, 0.0576839891217359110320295201291}, + {-0.0576839891217359110320295201291, 0.998334892407855001295047259191}, + {0.665140632442866142071125068469, 0.746718112190607130607133967715}, + {-0.746718112190607130607133967715, 0.665140632442866142071125068469}, + {0.900266466737858483959655586659, 0.435339280179646070934040835709}, + {-0.435339280179646070934040835709, 0.900266466737858483959655586659}, + {0.328753166373295047097968790695, 0.94441588063709125400180255383}, + {-0.94441588063709125400180255383, 0.328753166373295047097968790695}, + {0.967898579373632661493331852398, 0.251341083085323824253265456719}, + {-0.251341083085323824253265456719, 0.967898579373632661493331852398}, + {0.50668266473551748507730962956, 0.862132633216325494629472814268}, + {-0.862132633216325494629472814268, 0.50668266473551748507730962956}, + {0.798037618660599412656608819816, 0.602607632877745547084202826227}, + {-0.602607632877745547084202826227, 0.798037618660599412656608819816}, + {0.138189868194246556765136801914, 0.99040575539950126415789100065}, + {-0.99040575539950126415789100065, 0.138189868194246556765136801914}, + {0.987873617371714196622178860707, 0.155260156192514237050161796105}, + {-0.155260156192514237050161796105, 0.987873617371714196622178860707}, + {0.588746624507014537641680362867, 0.808317643090633253244448042096}, + {-0.808317643090633253244448042096, 0.588746624507014537641680362867}, + {0.853260726316321882478632687707, 0.521484547159494438162141705106}, + {-0.521484547159494438162141705106, 0.853260726316321882478632687707}, + {0.234601186117955551324598673091, 0.972091705278904427700581436511}, + {-0.972091705278904427700581436511, 0.234601186117955551324598673091}, + {0.938602148948998404875965206884, 0.345001457951009615765514126906}, + {-0.345001457951009615765514126906, 0.938602148948998404875965206884}, + {0.419739073921698235825061829019, 0.907644814794507093225206517673}, + {-0.907644814794507093225206517673, 0.419739073921698235825061829019}, + {0.73512897248543718209390362972, 0.677927277672543127096105308738}, + {-0.677927277672543127096105308738, 0.73512897248543718209390362972}, + {0.0404477062975607815076273254817, 0.999181656684742347529493144975}, + {-0.999181656684742347529493144975, 0.0404477062975607815076273254817}, + {0.999449848562484532976668560877, 0.0331662510457058634649740724853}, + {-0.0331662510457058634649740724853, 0.999449848562484532976668560877}, + {0.683265684353446811627463830519, 0.730169846395354871226857085276}, + {-0.730169846395354871226857085276, 0.683265684353446811627463830519}, + {0.910679084069531574741063195688, 0.413114519035919447453863995179}, + {-0.413114519035919447453863995179, 0.910679084069531574741063195688}, + {0.351831278013402026783040810187, 0.936063433647237541634922308731}, + {-0.936063433647237541634922308731, 0.351831278013402026783040810187}, + {0.97377528537474811187735213025, 0.227511963626812280248046249653}, + {-0.227511963626812280248046249653, 0.97377528537474811187735213025}, + {0.527687855358763724389348226396, 0.849438359921935948193549847929}, + {-0.849438359921935948193549847929, 0.527687855358763724389348226396}, + {0.812585996278237132628419203684, 0.582841315155767647482321081043}, + {-0.582841315155767647482321081043, 0.812585996278237132628419203684}, + {0.162454021963239159109804177206, 0.986716114567897095710691246495}, + {-0.986716114567897095710691246495, 0.162454021963239159109804177206}, + {0.991386363281272275926880865882, 0.130969762540569384379196549162}, + {-0.130969762540569384379196549162, 0.991386363281272275926880865882}, + {0.608406413029229264921582398529, 0.79362562747488629710801433248}, + {-0.79362562747488629710801433248, 0.608406413029229264921582398529}, + {0.865801611581300756270707097428, 0.500387419289516577336485170235}, + {-0.500387419289516577336485170235, 0.865801611581300756270707097428}, + {0.25838685331132560385469787434, 0.966041528111432401537683745119}, + {-0.966041528111432401537683745119, 0.25838685331132560385469787434}, + {0.946786219150345997874751446943, 0.321863100132638579875532514052}, + {-0.321863100132638579875532514052, 0.946786219150345997874751446943}, + {0.441887375177668850767531694146, 0.897070536612695867972888663644}, + {-0.897070536612695867972888663644, 0.441887375177668850767531694146}, + {0.751544733626323679942515809671, 0.659682130543596145777485162398}, + {-0.659682130543596145777485162398, 0.751544733626323679942515809671}, + {0.0649566695772448854384606420354, 0.997888085447177108910921106144}, + {-0.997888085447177108910921106144, 0.0649566695772448854384606420354}, + {0.996618576644185072410664361087, 0.0821669805196622998755273670213}, + {-0.0821669805196622998755273670213, 0.996618576644185072410664361087}, + {0.646614924686512049234465848713, 0.762816582916664431834874449123}, + {-0.762816582916664431834874449123, 0.646614924686512049234465848713}, + {0.889311562549753853446077300759, 0.457301809219376631521214449094}, + {-0.457301809219376631521214449094, 0.889311562549753853446077300759}, + {0.305477026118637418061751986897, 0.952199446814433581032233178121}, + {-0.952199446814433581032233178121, 0.305477026118637418061751986897}, + {0.961438847460361678542994923191, 0.275018804073633216233929488226}, + {-0.275018804073633216233929488226, 0.961438847460361678542994923191}, + {0.485372267421119774066795571343, 0.874307590049680949384480754816}, + {-0.874307590049680949384480754816, 0.485372267421119774066795571343}, + {0.783008533022029107328876307292, 0.622010962294628599877910346549}, + {-0.622010962294628599877910346549, 0.783008533022029107328876307292}, + {0.113842474015905706363760430122, 0.993498812837709355250126463943}, + {-0.993498812837709355250126463943, 0.113842474015905706363760430122}, + {0.983765813334025240877167561848, 0.179457026931919894074951571383}, + {-0.179457026931919894074951571383, 0.983765813334025240877167561848}, + {0.568732197032851050089163891244, 0.822522758383126051384692800639}, + {-0.822522758383126051384692800639, 0.568732197032851050089163891244}, + {0.840205868695283575320331692637, 0.542267552237826522265606854489}, + {-0.542267552237826522265606854489, 0.840205868695283575320331692637}, + {0.210674203942371407460854015881, 0.977556330751966462422331005655}, + {-0.977556330751966462422331005655, 0.210674203942371407460854015881}, + {0.929852699909718749538001247856, 0.367931999791546449074530755752}, + {-0.367931999791546449074530755752, 0.929852699909718749538001247856}, + {0.397337937542652064326631489166, 0.917672361678911863158702999499}, + {-0.917672361678911863158702999499, 0.397337937542652064326631489166}, + {0.718270397139768257410707974486, 0.695764066758753796371195221582}, + {-0.695764066758753796371195221582, 0.718270397139768257410707974486}, + {0.0159143788320401796676506478434, 0.999873358254129263045228981355}, + {-0.999873358254129263045228981355, 0.0159143788320401796676506478434}, + {0.999781592746521674541781976586, 0.0208989666067081400480098807293}, + {-0.0208989666067081400480098807293, 0.999781592746521674541781976586}, + {0.692174542929158143067525088554, 0.72173014494394716056291372297}, + {-0.72173014494394716056291372297, 0.692174542929158143067525088554}, + {0.915680062246107651091620027728, 0.401907979026249750909016711375}, + {-0.401907979026249750909016711375, 0.915680062246107651091620027728}, + {0.363291724029100704385086828552, 0.931675438793984622698474140634}, + {-0.931675438793984622698474140634, 0.363291724029100704385086828552}, + {0.976493883658778649703435803531, 0.215545111698214497675252232511}, + {-0.215545111698214497675252232511, 0.976493883658778649703435803531}, + {0.538072036788890595282452977699, 0.842898857055729311404945747199}, + {-0.842898857055729311404945747199, 0.538072036788890595282452977699}, + {0.819677169560613871901466609415, 0.572825748112897548125488356163}, + {-0.572825748112897548125488356163, 0.819677169560613871901466609415}, + {0.174550314051218513622387717987, 0.984648255908992631191267719259}, + {-0.984648255908992631191267719259, 0.174550314051218513622387717987}, + {0.992918914166708299617880584265, 0.118794065045375624167789396779}, + {-0.118794065045375624167789396779, 0.992918914166708299617880584265}, + {0.618099608417362000878370054124, 0.786099786333963934659152528184}, + {-0.786099786333963934659152528184, 0.618099608417362000878370054124}, + {0.87187694168578688813653343459, 0.489725023412770910091751375148}, + {-0.489725023412770910091751375148, 0.87187694168578688813653343459}, + {0.270222212854296872652781758006, 0.962797982798119011960125135374}, + {-0.962797982798119011960125135374, 0.270222212854296872652781758006}, + {0.950664683245358910212985392718, 0.31022034109645585386871857736}, + {-0.31022034109645585386871857736, 0.950664683245358910212985392718}, + {0.452862537306046752938470945082, 0.891580351008662286282913100877}, + {-0.891580351008662286282913100877, 0.452862537306046752938470945082}, + {0.759583458211452011710207443684, 0.650409847720290423822575576196}, + {-0.650409847720290423822575576196, 0.759583458211452011710207443684}, + {0.0771974003049192142311696329671, 0.997015828051973307388777811866}, + {-0.997015828051973307388777811866, 0.0771974003049192142311696329671}, + {0.997551848345558433983626400732, 0.0699307504775973087784990411819}, + {-0.0699307504775973087784990411819, 0.997551848345558433983626400732}, + {0.655927168674145355531379664171, 0.754824184426492350574733336543}, + {-0.754824184426492350574733336543, 0.655927168674145355531379664171}, + {0.894856395684630934184156103584, 0.446354154346423837029789183362}, + {-0.446354154346423837029789183362, 0.894856395684630934184156103584}, + {0.317138976227611724123534031605, 0.948379074925898124526213450736}, + {-0.948379074925898124526213450736, 0.317138976227611724123534031605}, + {0.964741356666855343071631523344, 0.26319976203749756438199369768}, + {-0.26319976203749756438199369768, 0.964741356666855343071631523344}, + {0.496064818846842947586139871419, 0.868285491933643238660067709134}, + {-0.868285491933643238660067709134, 0.496064818846842947586139871419}, + {0.79058260525749446401277964469, 0.612355406822110648334955840255}, + {-0.612355406822110648334955840255, 0.79058260525749446401277964469}, + {0.126025660605540323899731447455, 0.99202698192586336034537453088}, + {-0.99202698192586336034537453088, 0.126025660605540323899731447455}, + {0.985893951354205211323744606489, 0.167371194305328430118606775068}, + {-0.167371194305328430118606775068, 0.985893951354205211323744606489}, + {0.578782992064769685747194216674, 0.815481605001947773558867993415}, + {-0.815481605001947773558867993415, 0.578782992064769685747194216674}, + {0.846797059767020909859525090724, 0.531916102002870649911869804782}, + {-0.531916102002870649911869804782, 0.846797059767020909859525090724}, + {0.222654460501545525819011572821, 0.97489742599863582483976642834}, + {-0.97489742599863582483976642834, 0.222654460501545525819011572821}, + {0.934297775333532531938374177116, 0.356493572181620088645814803385}, + {-0.356493572181620088645814803385, 0.934297775333532531938374177116}, + {0.408569270246806781621273785277, 0.912727314924885901881168592809}, + {-0.912727314924885901881168592809, 0.408569270246806781621273785277}, + {0.726754408082925018597109101393, 0.686897394326137611386684511672}, + {-0.686897394326137611386684511672, 0.726754408082925018597109101393}, + {0.0281831647052698738842302361718, 0.999602775720033531747787947097}, + {-0.999602775720033531747787947097, 0.0281831647052698738842302361718}, + {0.998967590907519298326633361285, 0.0454285407692911619159659153411}, + {-0.0454285407692911619159659153411, 0.998967590907519298326633361285}, + {0.674253928478920516731420775614, 0.738499586953671127709242227866}, + {-0.738499586953671127709242227866, 0.674253928478920516731420775614}, + {0.905540960972635589065760086669, 0.424258845518576954702893999638}, + {-0.424258845518576954702893999638, 0.905540960972635589065760086669}, + {0.340317847501371673057946054541, 0.940310460790495072913586227514}, + {-0.940310460790495072913586227514, 0.340317847501371673057946054541}, + {0.970910040114567163271885874565, 0.2394445530905426333667662675}, + {-0.2394445530905426333667662675, 0.970910040114567163271885874565}, + {0.517224206078608306214050571725, 0.855849940495618244362674431613}, + {-0.855849940495618244362674431613, 0.517224206078608306214050571725}, + {0.805372450533747064049805430841, 0.592769108440434067119895189535}, + {-0.592769108440434067119895189535, 0.805372450533747064049805430841}, + {0.150333264897104995627685752879, 0.988635377409374793700180816813}, + {-0.988635377409374793700180816813, 0.150333264897104995627685752879}, + {0.9897045132561318458996879599, 0.143125736471269193739175307201}, + {-0.143125736471269193739175307201, 0.9897045132561318458996879599}, + {0.598621593873188917456218405277, 0.801031951515495332927230265341}, + {-0.801031951515495332927230265341, 0.598621593873188917456218405277}, + {0.859595894938779081506652346434, 0.510974458661486830024500704894}, + {-0.510974458661486830024500704894, 0.859595894938779081506652346434}, + {0.24651258165866721250303328361, 0.969139591123992283350219167914}, + {-0.969139591123992283350219167914, 0.24651258165866721250303328361}, + {0.942765172533282513889218989789, 0.333457387769846846925503314196}, + {-0.333457387769846846925503314196, 0.942765172533282513889218989789}, + {0.43084566643597871360427120635, 0.902425626693600380079374190245}, + {-0.902425626693600380079374190245, 0.43084566643597871360427120635}, + {0.743392829168709967646577752021, 0.668855067664543612160343855066}, + {-0.668855067664543612160343855066, 0.743392829168709967646577752021}, + {0.0527061566140616319375844511796, 0.998610064567233335353080292407}, + {-0.998610064567233335353080292407, 0.0527061566140616319375844511796}, + {0.995535217851020393098337990523, 0.0943908365166949425706377496681}, + {-0.0943908365166949425706377496681, 0.995535217851020393098337990523}, + {0.637205302869657597142349914066, 0.770694104035309135447562312038}, + {-0.770694104035309135447562312038, 0.637205302869657597142349914066}, + {0.883632802364701874786589996802, 0.468180596121949288423991220043}, + {-0.468180596121949288423991220043, 0.883632802364701874786589996802}, + {0.29376907229316245873107504849, 0.955876420968743589767768753518}, + {-0.955876420968743589767768753518, 0.29376907229316245873107504849}, + {0.957991549099890371543608580396, 0.286796429289474075208232761725}, + {-0.286796429289474075208232761725, 0.957991549099890371543608580396}, + {0.474606620717262561726101921522, 0.880198020658613189404206877953}, + {-0.880198020658613189404206877953, 0.474606620717262561726101921522}, + {0.775316542581622414331832260359, 0.631572845204161126275721471757}, + {-0.631572845204161126275721471757, 0.775316542581622414331832260359}, + {0.101642143168429829880317072366, 0.994821026482717862826632426732}, + {-0.994821026482717862826632426732, 0.101642143168429829880317072366}, + {0.981489523800932128771989937377, 0.191515833990350214932618655439}, + {-0.191515833990350214932618655439, 0.981489523800932128771989937377}, + {0.558595753024020758026324529055, 0.829440042862368165366149241891}, + {-0.829440042862368165366149241891, 0.558595753024020758026324529055}, + {0.83348814571024076869321106642, 0.552537338974032121718948928901}, + {-0.552537338974032121718948928901, 0.83348814571024076869321106642}, + {0.19866222062300423201186561073, 0.980068019117620647939759237488}, + {-0.980068019117620647939759237488, 0.19866222062300423201186561073}, + {0.925267592089565549251517495577, 0.37931501819566443289843959974}, + {-0.37931501819566443289843959974, 0.925267592089565549251517495577}, + {0.386046767206627172708266471091, 0.922479210350733103140896673722}, + {-0.922479210350733103140896673722, 0.386046767206627172708266471091}, + {0.709678217316808468417832500563, 0.704525959682137492556819324818}, + {-0.704525959682137492556819324818, 0.709678217316808468417832500563}, + {0.00364319631189606802160185772266, 0.999993363538295154668844588741}, + {-0.999993363538295154668844588741, 0.00364319631189606802160185772266}, + {0.99998453945682697074914813129, 0.00556065169900967371657829474429}, + {-0.00556065169900967371657829474429, 0.99998453945682697074914813129}, + {0.703163874407442768266207622219, 0.711027823455815277675640118105}, + {-0.711027823455815277675640118105, 0.703163874407442768266207622219}, + {0.921737279553523913833146252728, 0.387814888163501236828523133227}, + {-0.387814888163501236828523133227, 0.921737279553523913833146252728}, + {0.377540143579222942094020254444, 0.925993218110251481434147535765}, + {-0.925993218110251481434147535765, 0.377540143579222942094020254444}, + {0.979685287599479925901846399938, 0.200541111149619977194191733361}, + {-0.200541111149619977194191733361, 0.979685287599479925901846399938}, + {0.550938130689703875475515815197, 0.834546089890866871208174870844}, + {-0.834546089890866871208174870844, 0.550938130689703875475515815197}, + {0.828367424770547478019011577999, 0.560185156514353965739871910046}, + {-0.560185156514353965739871910046, 0.828367424770547478019011577999}, + {0.189633500477944194617663242752, 0.981854946260638627997252569912}, + {-0.981854946260638627997252569912, 0.189633500477944194617663242752}, + {0.994624301397859400530876428093, 0.103549500572529068476157476653}, + {-0.103549500572529068476157476653, 0.994624301397859400530876428093}, + {0.630085034208043293091350278701, 0.776526142294674426302947267686}, + {-0.776526142294674426302947267686, 0.630085034208043293091350278701}, + {0.879286356301033245053133668989, 0.476293505753387746981530881385}, + {-0.476293505753387746981530881385, 0.879286356301033245053133668989}, + {0.284958977391936985057441233948, 0.958539712898605733570889242401}, + {-0.958539712898605733570889242401, 0.284958977391936985057441233948}, + {0.955311368931062721010505356389, 0.29560140119941741287945546901}, + {-0.29560140119941741287945546901, 0.955311368931062721010505356389}, + {0.466485391799404847112953120813, 0.884528902432111463483010993514}, + {-0.884528902432111463483010993514, 0.466485391799404847112953120813}, + {0.769470862107824671305422725709, 0.638681917989730729701136624499}, + {-0.638681917989730729701136624499, 0.769470862107824671305422725709}, + {0.0924817492901326004828277405068, 0.995714379753670608685922616132}, + {-0.995714379753670608685922616132, 0.0924817492901326004828277405068}, + {0.998507166038285487452696997934, 0.0546208693650311050138412838351}, + {-0.0546208693650311050138412838351, 0.998507166038285487452696997934}, + {0.667428401046715524813635056489, 0.744673975291351708527542996308}, + {-0.744673975291351708527542996308, 0.667428401046715524813635056489}, + {0.901597832001245658162247309519, 0.432575252794994702387754159645}, + {-0.432575252794994702387754159645, 0.901597832001245658162247309519}, + {0.331649046286344673095669577378, 0.943402835535996242555256685591}, + {-0.943402835535996242555256685591, 0.331649046286344673095669577378}, + {0.968665127834270056617071986693, 0.248370429233871009566669840751}, + {-0.248370429233871009566669840751, 0.968665127834270056617071986693}, + {0.50932526583306247935922783654, 0.860574095348029977081694141816}, + {-0.860574095348029977081694141816, 0.50932526583306247935922783654}, + {0.799882637106302807339375249285, 0.600156451982203242856428460072}, + {-0.600156451982203242856428460072, 0.799882637106302807339375249285}, + {0.141227739881770508745262304728, 0.989977133820719612522509578412}, + {-0.989977133820719612522509578412, 0.141227739881770508745262304728}, + {0.988345299696566148561771569803, 0.152228671963282741197431846558}, + {-0.152228671963282741197431846558, 0.988345299696566148561771569803}, + {0.591223737333032905993945860246, 0.806507589805552260742160797236}, + {-0.806507589805552260742160797236, 0.591223737333032905993945860246}, + {0.854856602752102845421688925853, 0.518864325938036996532787270553}, + {-0.518864325938036996532787270553, 0.854856602752102845421688925853}, + {0.237582417361533571042997436962, 0.97136738413467949015966951265}, + {-0.97136738413467949015966951265, 0.237582417361533571042997436962}, + {0.939656181264707068834240999422, 0.342120243497849529923371392215}, + {-0.342120243497849529923371392215, 0.939656181264707068834240999422}, + {0.422521713597607817547441300121, 0.906352801914652395076643642824}, + {-0.906352801914652395076643642824, 0.422521713597607817547441300121}, + {0.73720536440484119022897857576, 0.675668743314891906415198263858}, + {-0.675668743314891906415198263858, 0.73720536440484119022897857576}, + {0.0435129620640102368622770256934, 0.999052862531515928345982047176}, + {-0.999052862531515928345982047176, 0.0435129620640102368622770256934}, + {0.999546897585375959671694090503, 0.0300998260368702010014718695174}, + {-0.0300998260368702010014718695174, 0.999546897585375959671694090503}, + {0.685502598293388554218097397097, 0.728070180499801211659871569282}, + {-0.728070180499801211659871569282, 0.685502598293388554218097397097}, + {0.911942215722902682628614456917, 0.410318650785463256980278856645}, + {-0.410318650785463256980278856645, 0.911942215722902682628614456917}, + {0.354701424370233830973830890798, 0.934979625205665798581833314529}, + {-0.934979625205665798581833314529, 0.354701424370233830973830890798}, + {0.974468699470289578812298714183, 0.224523392439813174181750810021}, + {-0.224523392439813174181750810021, 0.974468699470289578812298714183}, + {0.530291412120277305675131174212, 0.847815438778677932241123471613}, + {-0.847815438778677932241123471613, 0.530291412120277305675131174212}, + {0.814370304048370963201364247652, 0.580345593490778299816668095445}, + {-0.580345593490778299816668095445, 0.814370304048370963201364247652}, + {0.165480459800492779587344216452, 0.986213068978614493254042372428}, + {-0.986213068978614493254042372428, 0.165480459800492779587344216452}, + {0.991783507196993485699465509242, 0.127927615673986083688618009546}, + {-0.127927615673986083688618009546, 0.991783507196993485699465509242}, + {0.610838358863869168580151836068, 0.79175532795207315128749314681}, + {-0.79175532795207315128749314681, 0.610838358863869168580151836068}, + {0.867332703921159797744167008204, 0.497728822461397990561948745381}, + {-0.497728822461397990561948745381, 0.867332703921159797744167008204}, + {0.261349410933066350626319263029, 0.965244262041965783716079840815}, + {-0.965244262041965783716079840815, 0.261349410933066350626319263029}, + {0.947769225468909182197307927709, 0.318956886199473654830427449269}, + {-0.318956886199473654830427449269, 0.947769225468909182197307927709}, + {0.444637469191193790418736853098, 0.895710623466781319379492742883}, + {-0.895710623466781319379492742883, 0.444637469191193790418736853098}, + {0.753565072967504190337706404534, 0.657373319205670214593340006104}, + {-0.657373319205670214593340006104, 0.753565072967504190337706404534}, + {0.0680178413792193875542224645869, 0.997684104942096028345588365482}, + {-0.997684104942096028345588365482, 0.0680178413792193875542224645869}, + {0.996865971110961313073062228796, 0.0791090111238923754388707720864}, + {-0.0791090111238923754388707720864, 0.996865971110961313073062228796}, + {0.648952169888007412978936372383, 0.760829206325340012817548540625}, + {-0.760829206325340012817548540625, 0.648952169888007412978936372383}, + {0.890710359458505629426383620739, 0.45457128764727294800707113609}, + {-0.45457128764727294800707113609, 0.890710359458505629426383620739}, + {0.308396895218129185600730579608, 0.951257775274303996049241050059}, + {-0.951257775274303996049241050059, 0.308396895218129185600730579608}, + {0.962278068545965092894789449929, 0.272067856968526922134543610809}, + {-0.272067856968526922134543610809, 0.962278068545965092894789449929}, + {0.488052321050608250985192171356, 0.872814374261282388545168942073}, + {-0.872814374261282388545168942073, 0.488052321050608250985192171356}, + {0.784913150773180023911379521451, 0.619605798668249385663386874512}, + {-0.619605798668249385663386874512, 0.784913150773180023911379521451}, + {0.116889949653388794392760985374, 0.99314487345504043336319455193}, + {-0.99314487345504043336319455193, 0.116889949653388794392760985374}, + {0.984311749944212777130303493323, 0.176438031392785410478296626025}, + {-0.176438031392785410478296626025, 0.984311749944212777130303493323}, + {0.571252984730106661004356283229, 0.820774041644193652622618628811}, + {-0.820774041644193652622618628811, 0.571252984730106661004356283229}, + {0.84186556793069533544127125424, 0.53968728494631768022316009592}, + {-0.53968728494631768022316009592, 0.84186556793069533544127125424}, + {0.213672313025560972610250587422, 0.976905390836956488698206158006}, + {-0.976905390836956488698206158006, 0.213672313025560972610250587422}, + {0.930977123312918930508885750896, 0.365077520354242179845272175953}, + {-0.365077520354242179845272175953, 0.930977123312918930508885750896}, + {0.400151446722855130833096382048, 0.916449027325364151685960223404}, + {-0.916449027325364151685960223404, 0.400151446722855130833096382048}, + {0.72040159089654476343156375151, 0.693557169838022291585843959183}, + {-0.693557169838022291585843959183, 0.72040159089654476343156375151}, + {0.0189818721675081777955362838384, 0.999819828033539415024222307693}, + {-0.999819828033539415024222307693, 0.0189818721675081777955362838384}, + {0.99984100470990400211235282768, 0.0178315815532360394390920532715}, + {-0.0178315815532360394390920532715, 0.99984100470990400211235282768}, + {0.694385522303169744340323177312, 0.719603186774318115404014406522}, + {-0.719603186774318115404014406522, 0.694385522303169744340323177312}, + {0.916908789183710992709563925018, 0.39909682073108654343229773076}, + {-0.39909682073108654343229773076, 0.916908789183710992709563925018}, + {0.366148354272405329723483191628, 0.93055649085029179534700460863}, + {-0.93055649085029179534700460863, 0.366148354272405329723483191628}, + {0.977150571175773197118985535781, 0.212548256287508063877567110467}, + {-0.212548256287508063877567110467, 0.977150571175773197118985535781}, + {0.540655481768424150956775520172, 0.841244108468970575032130909676}, + {-0.841244108468970575032130909676, 0.540655481768424150956775520172}, + {0.821430716631691870688314338622, 0.570308318170044903361315391521}, + {-0.570308318170044903361315391521, 0.821430716631691870688314338622}, + {0.177570350860060705011633785944, 0.984108109150328536074425755942}, + {-0.984108109150328536074425755942, 0.177570350860060705011633785944}, + {0.993278696356479029461183927197, 0.115747273680089718395080922164}, + {-0.115747273680089718395080922164, 0.993278696356479029461183927197}, + {0.620508419678753364578938089835, 0.784199783924846571814271101175}, + {-0.784199783924846571814271101175, 0.620508419678753364578938089835}, + {0.873375293664445995212020079634, 0.487047837913836434431402722112}, + {-0.487047837913836434431402722112, 0.873375293664445995212020079634}, + {0.273174763719801927397412555365, 0.961964421622042320514367474971}, + {-0.961964421622042320514367474971, 0.273174763719801927397412555365}, + {0.951611951829806845815085125651, 0.307302282996181841401295287142}, + {-0.307302282996181841401295287142, 0.951611951829806845815085125651}, + {0.455595736016314978300556504109, 0.890186792377730240488631352491}, + {-0.890186792377730240488631352491, 0.455595736016314978300556504109}, + {0.761575312758068001528499735286, 0.648076417560036532883316340303}, + {-0.648076417560036532883316340303, 0.761575312758068001528499735286}, + {0.0802558384517093187371372664529, 0.996774297619282045879174347647}, + {-0.996774297619282045879174347647, 0.0802558384517093187371372664529}, + {0.997761698195469559635739642545, 0.0668699754306281146654100666638}, + {-0.0668699754306281146654100666638, 0.997761698195469559635739642545}, + {0.658239849717446978694113113306, 0.752808275886996947079410347214}, + {-0.752808275886996947079410347214, 0.658239849717446978694113113306}, + {0.896221579566536030725387718121, 0.443606672987753025694246389321}, + {-0.443606672987753025694246389321, 0.896221579566536030725387718121}, + {0.32004706970797314191656823823, 0.947401643006459903695315460936}, + {-0.947401643006459903695315460936, 0.32004706970797314191656823823}, + {0.965544301900275181438360050379, 0.260238738599598840028903623534}, + {-0.260238738599598840028903623534, 0.965544301900275181438360050379}, + {0.49872634661850595572119004828, 0.8667595002009252924679572061}, + {-0.8667595002009252924679572061, 0.49872634661850595572119004828}, + {0.792457564536907077545890842885, 0.609927051710476120227610863367}, + {-0.609927051710476120227610863367, 0.792457564536907077545890842885}, + {0.129068563393027413432889716205, 0.991635671980218735832579568523}, + {-0.991635671980218735832579568523, 0.129068563393027413432889716205}, + {0.986402799137027219700257774093, 0.164345726608992187545155161388}, + {-0.164345726608992187545155161388, 0.986402799137027219700257774093}, + {0.581282130506935112990163361246, 0.813702085995432700116225532838}, + {-0.813702085995432700116225532838, 0.581282130506935112990163361246}, + {0.848424970181277604552860793774, 0.529315661938033255040636504418}, + {-0.529315661938033255040636504418, 0.848424970181277604552860793774}, + {0.22564435579854633107110828405, 0.974209743687805218570474607986}, + {-0.974209743687805218570474607986, 0.22564435579854633107110828405}, + {0.935387085216017766242657671683, 0.353625509276525973323401785819}, + {-0.353625509276525973323401785819, 0.935387085216017766242657671683}, + {0.411367555380587224256316858373, 0.911469546600543023551210808364}, + {-0.911469546600543023551210808364, 0.411367555380587224256316858373}, + {0.728858359344675799107449165604, 0.684664510554904959249711282609}, + {-0.684664510554904959249711282609, 0.728858359344675799107449165604}, + {0.0312497701659798614304275332643, 0.999511606668263441477506603405}, + {-0.999511606668263441477506603405, 0.0312497701659798614304275332643}, + {0.999102262374694127800012211083, 0.0423635375974190722669909803244}, + {-0.0423635375974190722669909803244, 0.999102262374694127800012211083}, + {0.676516440113781092335898392776, 0.736427529534153690882192222489}, + {-0.736427529534153690882192222489, 0.676516440113781092335898392776}, + {0.906838307118793540695378396777, 0.421478688360302278237412565431}, + {-0.421478688360302278237412565431, 0.906838307118793540695378396777}, + {0.34320107773824654051608717964, 0.93926195506860921113911899738}, + {-0.93926195506860921113911899738, 0.34320107773824654051608717964}, + {0.971640076363043392326801495074, 0.236464716195078805682783240627}, + {-0.236464716195078805682783240627, 0.971640076363043392326801495074}, + {0.519847482536030192079579137499, 0.854259091201815534688535080932}, + {-0.854259091201815534688535080932, 0.519847482536030192079579137499}, + {0.807187250292749958191507175798, 0.590295470899810936771245906129}, + {-0.590295470899810936771245906129, 0.807187250292749958191507175798}, + {0.153365647992363879348332034169, 0.98816950874628906031915676067}, + {-0.98816950874628906031915676067, 0.153365647992363879348332034169}, + {0.990138959089390646717276922573, 0.140088692239516698911927505833}, + {-0.140088692239516698911927505833, 0.990138959089390646717276922573}, + {0.601076308038980156034369883855, 0.799191636539215211065823041281}, + {-0.799191636539215211065823041281, 0.601076308038980156034369883855}, + {0.861159497062963352753683921037, 0.508334850879087363217934125714}, + {-0.508334850879087363217934125714, 0.861159497062963352753683921037}, + {0.249484699886362987220067566341, 0.968378740226473300189979909192}, + {-0.968378740226473300189979909192, 0.249484699886362987220067566341}, + {0.943783768548825063859908368613, 0.330563455668919481400536142246}, + {-0.330563455668919481400536142246, 0.943783768548825063859908368613}, + {0.433612241598717584878386333003, 0.901099563830620953197581002314}, + {-0.901099563830620953197581002314, 0.433612241598717584878386333003}, + {0.745441349048781676245312155515, 0.666571222847440636449789508333}, + {-0.666571222847440636449789508333, 0.745441349048781676245312155515}, + {0.0557696010700070299304087484416, 0.998443664708476341651532948163}, + {-0.998443664708476341651532948163, 0.0557696010700070299304087484416}, + {0.995820119677964910032130774198, 0.0913361332910671841478489341171}, + {-0.0913361332910671841478489341171, 0.995820119677964910032130774198}, + {0.639566760248816312817154994264, 0.768735558683760311637911399885}, + {-0.768735558683760311637911399885, 0.639566760248816312817154994264}, + {0.885065001646630933329618073913, 0.465467445542917801493132401447}, + {-0.465467445542917801493132401447, 0.885065001646630933329618073913}, + {0.296700277290238345617723325631, 0.954970651620192789721386361634}, + {-0.954970651620192789721386361634, 0.296700277290238345617723325631}, + {0.958866919654069005396479496994, 0.283856002918939698265177185021}, + {-0.283856002918939698265177185021, 0.958866919654069005396479496994}, + {0.477304796598357894410469270952, 0.878737805687339390559031926387}, + {-0.878737805687339390559031926387, 0.477304796598357894410469270952}, + {0.777250531976084069185617408948, 0.62919123527183240529581098599}, + {-0.62919123527183240529581098599, 0.777250531976084069185617408948}, + {0.104693732717287407707473789742, 0.994504510964993704291714493593}, + {-0.994504510964993704291714493593, 0.104693732717287407707473789742}, + {0.982072467022439998807215033594, 0.188503765258040967278674315821}, + {-0.188503765258040967278674315821, 0.982072467022439998807215033594}, + {0.561137810355011423801840919623, 0.827722391741327223613211572228}, + {-0.827722391741327223613211572228, 0.561137810355011423801840919623}, + {0.83517938382220768822605805326, 0.549977633034614998841504984739}, + {-0.549977633034614998841504984739, 0.83517938382220768822605805326}, + {0.201668091988182529572526391348, 0.979453919627588209806390295853}, + {-0.979453919627588209806390295853, 0.201668091988182529572526391348}, + {0.926426959679452211027239627583, 0.376474552100253767594750797798}, + {-0.376474552100253767594750797798, 0.926426959679452211027239627583}, + {0.388875076729119251162103410024, 0.921290494197634535211705042457}, + {-0.921290494197634535211705042457, 0.388875076729119251162103410024}, + {0.711836332618670075689237819461, 0.702345381962465875602674714173}, + {-0.702345381962465875602674714173, 0.711836332618670075689237819461}, + {0.00671111556890887934329059660854, 0.999977480210339941812947017752}, + {-0.999977480210339941812947017752, 0.00671111556890887934329059660854}, + {0.999931595513069204628209263319, 0.0116963368063578380257050426394}, + {-0.0116963368063578380257050426394, 0.999931595513069204628209263319}, + {0.6987878528391577859224526037, 0.715328970980792622214039511164}, + {-0.715328970980792622214039511164, 0.6987878528391577859224526037}, + {0.919340340690724344696604930505, 0.393463261281994325369026910266}, + {-0.393463261281994325369026910266, 0.919340340690724344696604930505}, + {0.371851248920489485527696160716, 0.92829232932103467046403011409}, + {-0.92829232932103467046403011409, 0.371851248920489485527696160716}, + {0.978436348175373726832049214863, 0.206548571937059888270482588268}, + {-0.206548571937059888270482588268, 0.978436348175373726832049214863}, + {0.545807080893116136621756595559, 0.837910872615301172139368190983}, + {-0.837910872615301172139368190983, 0.545807080893116136621756595559}, + {0.824914599485333188688684913359, 0.565257378152600797882598726574}, + {-0.565257378152600797882598726574, 0.824914599485333188688684913359}, + {0.183605381988431293205366046095, 0.983000032403296586380747612566}, + {-0.983000032403296586380747612566, 0.183605381988431293205366046095}, + {0.99397021008547692400014739178, 0.109650451265067117034845978196}, + {-0.109650451265067117034845978196, 0.99397021008547692400014739178}, + {0.625308498199164009001549402456, 0.780377653498552037447666407388}, + {-0.780377653498552037447666407388, 0.625308498199164009001549402456}, + {0.876347321973419024487839124049, 0.4816797393185814901706010005}, + {-0.4816797393185814901706010005, 0.876347321973419024487839124049}, + {0.279072124009737854155588365757, 0.960270144074412801415974172414}, + {-0.960270144074412801415974172414, 0.279072124009737854155588365757}, + {0.953479609364626612411086625798, 0.301457516950363935137602311443}, + {-0.301457516950363935137602311443, 0.953479609364626612411086625798}, + {0.461049243029566901963534064635, 0.887374551980088854818973231886}, + {-0.887374551980088854818973231886, 0.461049243029566901963534064635}, + {0.765537498460013066114981938881, 0.643391279433895846295854425989}, + {-0.643391279433895846295854425989, 0.765537498460013066114981938881}, + {0.0863704197696647524296054143633, 0.996263093057658144502397590259}, + {-0.996263093057658144502397590259, 0.0863704197696647524296054143633}, + {0.998153222069203760469235930941, 0.0607465659347662806366585641626}, + {-0.0607465659347662806366585641626, 0.998153222069203760469235930941}, + {0.66284660328206679302809334331, 0.748755220694604761710877482983}, + {-0.748755220694604761710877482983, 0.66284660328206679302809334331}, + {0.898926627823621871016257500742, 0.438099209985194470995395477075}, + {-0.438099209985194470995395477075, 0.898926627823621871016257500742}, + {0.325854192110238582991144085099, 0.945420036536239072333387412073}, + {-0.945420036536239072333387412073, 0.325854192110238582991144085099}, + {0.967122920682944364756394861615, 0.254309371218780000400982999054}, + {-0.254309371218780000400982999054, 0.967122920682944364756394861615}, + {0.504035294547763190742273309297, 0.863683056363935830468392396142}, + {-0.863683056363935830468392396142, 0.504035294547763190742273309297}, + {0.796185088780898442273326054419, 0.605053141800745430955998926947}, + {-0.605053141800745430955998926947, 0.796185088780898442273326054419}, + {0.135150695811053850325933467502, 0.990825054902119473076993472205}, + {-0.990825054902119473076993472205, 0.135150695811053850325933467502}, + {0.987392636804146239803969820059, 0.158290179054025176297315624652}, + {-0.158290179054025176297315624652, 0.987392636804146239803969820059}, + {0.586263970173543591535292307526, 0.810120088182211595118076274957}, + {-0.810120088182211595118076274957, 0.586263970173543591535292307526}, + {0.851656818665622372677148632647, 0.52409985996978780686816890011}, + {-0.52409985996978780686816890011, 0.851656818665622372677148632647}, + {0.231617746718666472904857300819, 0.972806876725780367287654826214}, + {-0.972806876725780367287654826214, 0.231617746718666472904857300819}, + {0.937539282152399233716266735428, 0.347879425119054508375171508305}, + {-0.347879425119054508375171508305, 0.937539282152399233716266735428}, + {0.416952483501768222939887209577, 0.908928284575690637048239750584}, + {-0.908928284575690637048239750584, 0.416952483501768222939887209577}, + {0.733045661252171965038826328964, 0.680179431120469746296919311135}, + {-0.680179431120469746296919311135, 0.733045661252171965038826328964}, + {0.0373820698218952293667705077951, 0.999301046159680073088793506031}, + {-0.999301046159680073088793506031, 0.0373820698218952293667705077951}, + {0.999343392336980218537689779623, 0.0362323638811553883076754800641}, + {-0.0362323638811553883076754800641, 0.999343392336980218537689779623}, + {0.681022339256663666873237161781, 0.732262639654230773622600736417}, + {-0.732262639654230773622600736417, 0.681022339256663666873237161781}, + {0.909407380757791261594036313909, 0.415906498895188825315472058719}, + {-0.415906498895188825315472058719, 0.909407380757791261594036313909}, + {0.348957820086587433561220450429, 0.937138431503274138556491834606}, + {-0.937138431503274138556491834606, 0.348957820086587433561220450429}, + {0.973072705735360532486311058165, 0.230498393384562350050259738055}, + {-0.230498393384562350050259738055, 0.973072705735360532486311058165}, + {0.525079331798186887958479474037, 0.851053285827843786215396448824}, + {-0.851053285827843786215396448824, 0.525079331798186887958479474037}, + {0.810794040139234728492567683134, 0.585331550896324825927763413347}, + {-0.585331550896324825927763413347, 0.810794040139234728492567683134}, + {0.159426055046860609687442433824, 0.987209872809320820863376866328}, + {-0.987209872809320820863376866328, 0.159426055046860609687442433824}, + {0.990979888059532743049828695803, 0.134010676669868128429996545492}, + {-0.134010676669868128429996545492, 0.990979888059532743049828695803}, + {0.605968740641719794126629494713, 0.79548845709104298862257564906}, + {-0.79548845709104298862257564906, 0.605968740641719794126629494713}, + {0.864262369986935063970179271564, 0.503041306280673450324059103878}, + {-0.503041306280673450324059103878, 0.864262369986935063970179271564}, + {0.255421863654115455233295506332, 0.966829701430121812855134066922}, + {-0.966829701430121812855134066922, 0.255421863654115455233295506332}, + {0.945794301319306973674372329697, 0.324766284567724272669408946967}, + {-0.324766284567724272669408946967, 0.945794301319306973674372329697}, + {0.439133121951876925770363868651, 0.898422006189072641646475858579}, + {-0.898422006189072641646475858579, 0.439133121951876925770363868651}, + {0.749517320459886171590824233135, 0.661984732702070921561698924052}, + {-0.661984732702070921561698924052, 0.749517320459886171590824233135}, + {0.0618948863783577163388827102608, 0.998082673449554591549315318844}, + {-0.998082673449554591549315318844, 0.0618948863783577163388827102608}, + {0.996361801623805720673487940076, 0.0852241765285184776912430493212}, + {-0.0852241765285184776912430493212, 0.996361801623805720673487940076}, + {0.644271593299083789041503678163, 0.764796779587793462695799462381}, + {-0.764796779587793462695799462381, 0.644271593299083789041503678163}, + {0.887904395101883237018114414241, 0.460028026492689645632339079384}, + {-0.460028026492689645632339079384, 0.887904395101883237018114414241}, + {0.302554281753035614332958402883, 0.953132155890726751046315712301}, + {-0.953132155890726751046315712301, 0.302554281753035614332958402883}, + {0.960590576946164120819560139353, 0.27796716259701537143911309613}, + {-0.27796716259701537143911309613, 0.960590576946164120819560139353}, + {0.482687645282997457307772037893, 0.87579257652206399242800216598}, + {-0.87579257652206399242800216598, 0.482687645282997457307772037893}, + {0.781096545296358524268498513266, 0.624410271316939380348287613742}, + {-0.624410271316939380348287613742, 0.781096545296358524268498513266}, + {0.110793926849700546943822132562, 0.993843401031180184723723414209}, + {-0.993843401031180184723723414209, 0.110793926849700546943822132562}, + {0.983210617145337639755098280148, 0.182474333353171092042543932621}, + {-0.182474333353171092042543932621, 0.983210617145337639755098280148}, + {0.566206056211556729707012891595, 0.824263733224600558813222050958}, + {-0.824263733224600558813222050958, 0.566206056211556729707012891595}, + {0.838538261122245165424260449072, 0.544842715500612473356056852936}, + {-0.544842715500612473356056852936, 0.838538261122245165424260449072}, + {0.207674111913339565216318760577, 0.978198069534491398968611974851}, + {-0.978198069534491398968611974851, 0.207674111913339565216318760577}, + {0.928719524378774807793490708718, 0.370783016112742613845654204852}, + {-0.370783016112742613845654204852, 0.928719524378774807793490708718}, + {0.394520688466455549114897394247, 0.918887058550697966019527029857}, + {-0.918887058550697966019527029857, 0.394520688466455549114897394247}, + {0.716132442748462327841707519838, 0.697964414883108785758736303251}, + {-0.697964414883108785758736303251, 0.716132442748462327841707519838}, + {0.0128467357043776618519270726893, 0.999917477285871769687730648002}, + {-0.999917477285871769687730648002, 0.0128467357043776618519270726893}, + {0.999712770458023869402097716375, 0.0239661549511472130036260352881}, + {-0.0239661549511472130036260352881, 0.999712770458023869402097716375}, + {0.689957048544735385142701034056, 0.723850309914582878967337364884}, + {-0.723850309914582878967337364884, 0.689957048544735385142701034056}, + {0.914442716579023873180176451569, 0.404715354410448646671483174941}, + {-0.404715354410448646671483174941, 0.914442716579023873180176451569}, + {0.36043167434573075702530786657, 0.932785617453621096473170837271}, + {-0.932785617453621096473170837271, 0.36043167434573075702530786657}, + {0.975828005009455545781804630678, 0.218539938316239801263662911879}, + {-0.218539938316239801263662911879, 0.975828005009455545781804630678}, + {0.535483527270423365074236699002, 0.844545671957429244791626388178}, + {-0.844545671957429244791626388178, 0.535483527270423365074236699002}, + {0.817915907375843853621688595013, 0.575337786401649564638205447409}, + {-0.575337786401649564638205447409, 0.817915907375843853621688595013}, + {0.17152863430834341951225496814, 0.985179134783271126529768935143}, + {-0.985179134783271126529768935143, 0.17152863430834341951225496814}, + {0.992549786245966680908736634592, 0.121839738275678885481845270533}, + {-0.121839738275678885481845270533, 0.992549786245966680908736634592}, + {0.615684979367054574517226228636, 0.787992389672507953513047596061}, + {-0.787992389672507953513047596061, 0.615684979367054574517226228636}, + {0.870370383269300273987312266399, 0.4923975994332744354764486161}, + {-0.4923975994332744354764486161, 0.870370383269300273987312266399}, + {0.26726711855441093179663880619, 0.963622481752902215745848479855}, + {-0.963622481752902215745848479855, 0.26726711855441093179663880619}, + {0.949708466642853910144594919984, 0.313135479284732842764071847341}, + {-0.313135479284732842764071847341, 0.949708466642853910144594919984}, + {0.450125076081105746084176644217, 0.892965517745774262081681627023}, + {-0.892965517745774262081681627023, 0.450125076081105746084176644217}, + {0.757584454176041810491426531371, 0.652737155975350424874648069817}, + {-0.652737155975350424874648069817, 0.757584454176041810491426531371}, + {0.0741382355467969794693416929476, 0.997247974191979857927492503222}, + {-0.997247974191979857927492503222, 0.0741382355467969794693416929476}, + {0.997332609157735472393824238679, 0.0729908673097100502147327460989}, + {-0.0729908673097100502147327460989, 0.997332609157735472393824238679}, + {0.653608313794523887452214694349, 0.756832988273290818703742388607}, + {-0.756832988273290818703742388607, 0.653608313794523887452214694349}, + {0.893482789073525851542001419148, 0.449097434449801047318118207841}, + {-0.449097434449801047318118207841, 0.893482789073525851542001419148}, + {0.314227897714424497799257096631, 0.949347580340295205125755728659}, + {-0.949347580340295205125755728659, 0.314227897714424497799257096631}, + {0.963929330920367144486249344482, 0.266158308138996935188202996869}, + {-0.266158308138996935188202996869, 0.963929330920367144486249344482}, + {0.493398621924179825448675273947, 0.869803311032626647048004997487}, + {-0.869803311032626647048004997487, 0.493398621924179825448675273947}, + {0.7887002047135096560737110849, 0.614777998211442078257960019982}, + {-0.614777998211442078257960019982, 0.7887002047135096560737110849}, + {0.122981571616539039903948093979, 0.992408954535742848790391690272}, + {-0.992408954535742848790391690272, 0.122981571616539039903948093979}, + {0.985375823962037711289951857907, 0.170395086640240944753088569996}, + {-0.170395086640240944753088569996, 0.985375823962037711289951857907}, + {0.576278405896654910556264894694, 0.817253448385022229771834645362}, + {-0.817253448385022229771834645362, 0.576278405896654910556264894694}, + {0.845161178976337135182461679506, 0.534511535470777121581420487928}, + {-0.534511535470777121581420487928, 0.845161178976337135182461679506}, + {0.21966246949596504589585777012, 0.975575932203605722392580901214}, + {-0.975575932203605722392580901214, 0.21966246949596504589585777012}, + {0.933199671484560733247803909762, 0.359358279633443133960213344835}, + {-0.359358279633443133960213344835, 0.933199671484560733247803909762}, + {0.405767139503452056725762986389, 0.913976492312130517348123248667}, + {-0.913976492312130517348123248667, 0.405767139503452056725762986389}, + {0.724643616331902551408461476967, 0.68912381275749157172327841181}, + {-0.68912381275749157172327841181, 0.724643616331902551408461476967}, + {0.0251162939738801861688788363836, 0.999684536129782141955502083874}, + {-0.999684536129782141955502083874, 0.0251162939738801861688788363836}, + {0.998823516776924491544775719376, 0.0484931163504361759097882611513}, + {-0.0484931163504361759097882611513, 0.998823516776924491544775719376}, + {0.671985070509296900098661353695, 0.740564693333820245335630261252}, + {-0.740564693333820245335630261252, 0.671985070509296900098661353695}, + {0.904235091530079748700643449411, 0.427035009391019682034595916775}, + {-0.427035009391019682034595916775, 0.904235091530079748700643449411}, + {0.337431414063306844841605425245, 0.941350115952208854608329602343}, + {-0.941350115952208854608329602343, 0.337431414063306844841605425245}, + {0.970170865291024475673964388989, 0.242422136242680913431613021203}, + {-0.242422136242680913431613021203, 0.970170865291024475673964388989}, + {0.514596061309975039144148922787, 0.857432734203832702668535148405}, + {-0.857432734203832702668535148405, 0.514596061309975039144148922787}, + {0.803550070302515684339539348002, 0.5952371666124538496234208651}, + {-0.5952371666124538496234208651, 0.803550070302515684339539348002}, + {0.147299466807902845744138176087, 0.989091940659768797949880081433}, + {-0.989091940659768797949880081433, 0.147299466807902845744138176087}, + {0.989260751947067640266197940946, 0.14616143354908089913024582529}, + {-0.14616143354908089913024582529, 0.989260751947067640266197940946}, + {0.596161245252972538111180256237, 0.802864726873976697341106500971}, + {-0.802864726873976697341106500971, 0.596161245252972538111180256237}, + {0.858024201970656541504922643071, 0.513609256957677895449876359635}, + {-0.513609256957677895449876359635, 0.858024201970656541504922643071}, + {0.243538143160669129372308816528, 0.969891320110585097857835989998}, + {-0.969891320110585097857835989998, 0.243538143160669129372308816528}, + {0.941737702852886160442835716822, 0.336348181242844046501261345838}, + {-0.336348181242844046501261345838, 0.941737702852886160442835716822}, + {0.428075035989740726449781504925, 0.903743195582894620621061676502}, + {-0.903743195582894620621061676502, 0.428075035989740726449781504925}, + {0.741337312192210662331603998609, 0.671132616963017736289032200148}, + {-0.671132616963017736289032200148, 0.741337312192210662331603998609}, + {0.0496422160676971632731202532796, 0.998767065127744380070851093478}, + {-0.998767065127744380070851093478, 0.0496422160676971632731202532796}, + {0.995240945667458132284366456588, 0.0974446512998208697053925675391}, + {-0.0974446512998208697053925675391, 0.995240945667458132284366456588}, + {0.634837847871510096986469307012, 0.772645395320433858721287379012}, + {-0.772645395320433858721287379012, 0.634837847871510096986469307012}, + {0.882192285994307434648931121046, 0.47088934000690457759574769625}, + {-0.47088934000690457759574769625, 0.882192285994307434648931121046}, + {0.290835102229696829123639645331, 0.956773193244376929378347540478}, + {-0.956773193244376929378347540478, 0.290835102229696829123639645331}, + {0.957107161564402786346761331515, 0.289734156222790251167253927633}, + {-0.289734156222790251167253927633, 0.957107161564402786346761331515}, + {0.471903977657900208786401208272, 0.881649950870895260379711544374}, + {-0.881649950870895260379711544374, 0.471903977657900208786401208272}, + {0.773375255612584577846746469731, 0.633948510532337805578606548806}, + {-0.633948510532337805578606548806, 0.773375255612584577846746469731}, + {0.0985895969250105835834929735029, 0.995128178366065485604963214428}, + {-0.995128178366065485604963214428, 0.0985895969250105835834929735029}, + {0.980897342426228391332188039087, 0.194526100102691612558913902831}, + {-0.194526100102691612558913902831, 0.980897342426228391332188039087}, + {0.556048437977062715020792893483, 0.831149886977835428147898255702}, + {-0.831149886977835428147898255702, 0.556048437977062715020792893483}, + {0.831789062490414399242411036539, 0.555091844221582420004779123701}, + {-0.555091844221582420004779123701, 0.831789062490414399242411036539}, + {0.195654479373345374648707206688, 0.98067289383420053194839738353}, + {-0.98067289383420053194839738353, 0.195654479373345374648707206688}, + {0.924099515528716275447607131355, 0.382151914033662609693209333273}, + {-0.382151914033662609693209333273, 0.924099515528716275447607131355}, + {0.383214824064937176206768754128, 0.923659243778179983586085199931}, + {-0.923659243778179983586085199931, 0.383214824064937176206768754128}, + {0.707513422253286283769568854041, 0.706699906135159428011149884696}, + {-0.706699906135159428011149884696, 0.707513422253286283769568854041}, + {0.000575242763732066093342043533454, 0.999999834547867672007726014272}, + {-0.999999834547867672007726014272, 0.000575242763732066093342043533454}}; diff --git a/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cuh b/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cuh new file mode 100644 index 000000000..a854a8b68 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/fft/twiddles.cuh @@ -0,0 +1,13 @@ +#ifndef GPU_BOOTSTRAP_TWIDDLES_CUH +#define GPU_BOOTSTRAP_TWIDDLES_CUH + +/* + * 'negtwiddles' are stored in constant memory for faster access times + * because of it's limitied size, only twiddles for up to 2^12 polynomial size + * can be stored there, twiddles for 2^13 are stored in device memory + * 'negtwiddles13' + */ + +extern __constant__ double2 negtwiddles[4096]; +extern __device__ double2 negtwiddles13[4096]; +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cu b/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cu new file mode 100644 index 000000000..a5581a0f9 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cu @@ -0,0 +1,51 @@ +#include "integer/bitwise_ops.cuh" + +void scratch_cuda_integer_radix_bitop_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type, + bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + scratch_cuda_integer_radix_bitop_kb( + stream, (int_bitop_buffer **)mem_ptr, lwe_ciphertext_count, + params, op_type, allocate_gpu_memory); +} + +void cuda_bitop_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1, + void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count) { + + host_integer_radix_bitop_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_1), + static_cast(lwe_array_2), + (int_bitop_buffer *)mem_ptr, bsk, static_cast(ksk), + lwe_ciphertext_count); +} + +void cuda_bitnot_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count) { + + host_integer_radix_bitnot_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + (int_bitop_buffer *)mem_ptr, bsk, static_cast(ksk), + lwe_ciphertext_count); +} + +void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void) { + + int_bitop_buffer *mem_ptr = + (int_bitop_buffer *)(*mem_ptr_void); + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cuh new file mode 100644 index 000000000..eaa9e4bc2 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/bitwise_ops.cuh @@ -0,0 +1,51 @@ +#ifndef CUDA_INTEGER_BITWISE_OPS_CUH +#define CUDA_INTEGER_BITWISE_OPS_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.cuh" +#include "integer.h" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "polynomial/functions.cuh" +#include "utils/kernel_dimensions.cuh" +#include + +template +__host__ void +host_integer_radix_bitop_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_1, Torus *lwe_array_2, + int_bitop_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto lut = mem_ptr->lut; + + integer_radix_apply_bivariate_lookup_table_kb( + stream, lwe_array_out, lwe_array_1, lwe_array_2, bsk, ksk, + num_radix_blocks, lut); +} + +template +__host__ void +host_integer_radix_bitnot_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_in, + int_bitop_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto lut = mem_ptr->lut; + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, lwe_array_in, bsk, ksk, num_radix_blocks, lut); +} + +template +__host__ void scratch_cuda_integer_radix_bitop_kb( + cuda_stream_t *stream, int_bitop_buffer **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op, + bool allocate_gpu_memory) { + + *mem_ptr = new int_bitop_buffer(stream, op, params, num_radix_blocks, + allocate_gpu_memory); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cu b/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cu new file mode 100644 index 000000000..ed00f1f15 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cu @@ -0,0 +1,45 @@ +#include "integer/cmux.cuh" + +void scratch_cuda_integer_radix_cmux_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + std::function predicate_lut_f = + [](uint64_t x) -> uint64_t { return x == 1; }; + + scratch_cuda_integer_radix_cmux_kb( + stream, (int_cmux_buffer **)mem_ptr, predicate_lut_f, + lwe_ciphertext_count, params, allocate_gpu_memory); +} + +void cuda_cmux_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition, + void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t lwe_ciphertext_count) { + + host_integer_radix_cmux_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_condition), + static_cast(lwe_array_true), + static_cast(lwe_array_false), + (int_cmux_buffer *)mem_ptr, bsk, static_cast(ksk), + + lwe_ciphertext_count); +} + +void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + + int_cmux_buffer *mem_ptr = + (int_cmux_buffer *)(*mem_ptr_void); + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cuh new file mode 100644 index 000000000..d64844d70 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/cmux.cuh @@ -0,0 +1,100 @@ +#ifndef CUDA_INTEGER_CMUX_CUH +#define CUDA_INTEGER_CMUX_CUH + +#include "integer.cuh" +#include + +template +__host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_input, Torus *lwe_condition, + int_zero_out_if_buffer *mem_ptr, + int_radix_lut *predicate, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + auto params = mem_ptr->params; + + int big_lwe_size = params.big_lwe_dimension + 1; + + // Left message is shifted + int num_blocks = 0, num_threads = 0; + int num_entries = (params.big_lwe_dimension + 1); + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + + // We can't use integer_radix_apply_bivariate_lookup_table_kb since the + // second operand is fixed + auto tmp_lwe_array_input = mem_ptr->tmp; + for (int i = 0; i < num_radix_blocks; i++) { + auto lwe_array_out_block = tmp_lwe_array_input + i * big_lwe_size; + auto lwe_array_input_block = lwe_array_input + i * big_lwe_size; + + device_pack_bivariate_blocks<<stream>>>( + lwe_array_out_block, lwe_array_input_block, lwe_condition, + predicate->lwe_indexes, params.big_lwe_dimension, + params.message_modulus, 1); + check_cuda_error(cudaGetLastError()); + } + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, tmp_lwe_array_input, bsk, ksk, num_radix_blocks, + predicate); +} + +template +__host__ void +host_integer_radix_cmux_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_condition, Torus *lwe_array_true, + Torus *lwe_array_false, + int_cmux_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto params = mem_ptr->params; + + // Since our CPU threads will be working on different streams we shall assert + // the work in the main stream is completed + stream->synchronize(); + auto true_stream = mem_ptr->zero_if_true_buffer->local_stream; + auto false_stream = mem_ptr->zero_if_false_buffer->local_stream; + +#pragma omp parallel sections + { + // Both sections may be executed in parallel +#pragma omp section + { + auto mem_true = mem_ptr->zero_if_true_buffer; + zero_out_if(true_stream, mem_ptr->tmp_true_ct, lwe_array_true, + lwe_condition, mem_true, mem_ptr->inverted_predicate_lut, bsk, + ksk, num_radix_blocks); + } +#pragma omp section + { + auto mem_false = mem_ptr->zero_if_false_buffer; + zero_out_if(false_stream, mem_ptr->tmp_false_ct, lwe_array_false, + lwe_condition, mem_false, mem_ptr->predicate_lut, bsk, ksk, + num_radix_blocks); + } + } + cuda_synchronize_stream(true_stream); + cuda_synchronize_stream(false_stream); + + // If the condition was true, true_ct will have kept its value and false_ct + // will be 0 If the condition was false, true_ct will be 0 and false_ct will + // have kept its value + auto added_cts = mem_ptr->tmp_true_ct; + host_addition(stream, added_cts, mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct, + params.big_lwe_dimension, num_radix_blocks); + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, added_cts, bsk, ksk, num_radix_blocks, + mem_ptr->message_extract_lut); +} + +template +__host__ void scratch_cuda_integer_radix_cmux_kb( + cuda_stream_t *stream, int_cmux_buffer **mem_ptr, + std::function predicate_lut_f, uint32_t num_radix_blocks, + int_radix_params params, bool allocate_gpu_memory) { + + *mem_ptr = new int_cmux_buffer(stream, predicate_lut_f, params, + num_radix_blocks, allocate_gpu_memory); +} +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cu b/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cu new file mode 100644 index 000000000..bb5531526 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cu @@ -0,0 +1,83 @@ +#include "integer/comparison.cuh" + +void scratch_cuda_integer_radix_comparison_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type, + bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + switch (op_type) { + case EQ: + case NE: + scratch_cuda_integer_radix_equality_check_kb( + stream, (int_comparison_buffer **)mem_ptr, + lwe_ciphertext_count, params, op_type, allocate_gpu_memory); + break; + case GT: + case GE: + case LT: + case LE: + case MAX: + case MIN: + scratch_cuda_integer_radix_difference_check_kb( + stream, (int_comparison_buffer **)mem_ptr, + lwe_ciphertext_count, params, op_type, allocate_gpu_memory); + break; + } +} + +void cuda_comparison_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1, + void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count) { + + int_comparison_buffer *buffer = + (int_comparison_buffer *)mem_ptr; + switch (buffer->op) { + case EQ: + case NE: + host_integer_radix_equality_check_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_1), + static_cast(lwe_array_2), buffer, bsk, + static_cast(ksk), lwe_ciphertext_count); + break; + case GT: + case GE: + case LT: + case LE: + host_integer_radix_difference_check_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_1), + static_cast(lwe_array_2), buffer, + buffer->diff_buffer->operator_f, bsk, static_cast(ksk), + lwe_ciphertext_count); + break; + case MAX: + case MIN: + host_integer_radix_maxmin_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_1), + static_cast(lwe_array_2), buffer, bsk, + static_cast(ksk), lwe_ciphertext_count); + break; + default: + printf("Not implemented\n"); + } +} + +void cleanup_cuda_integer_comparison(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + + int_comparison_buffer *mem_ptr = + (int_comparison_buffer *)(*mem_ptr_void); + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cuh new file mode 100644 index 000000000..3434c58c4 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/comparison.cuh @@ -0,0 +1,468 @@ +#ifndef CUDA_INTEGER_COMPARISON_OPS_CUH +#define CUDA_INTEGER_COMPARISON_OPS_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.cuh" +#include "integer.h" +#include "integer/cmux.cuh" +#include "integer/negation.cuh" +#include "integer/scalar_addition.cuh" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "types/complex/operations.cuh" +#include "utils/kernel_dimensions.cuh" + +// lwe_dimension + 1 threads +// todo: This kernel MUST be refactored to a binary reduction +template +__global__ void device_accumulate_all_blocks(Torus *output, Torus *input_block, + uint32_t lwe_dimension, + uint32_t num_blocks) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < lwe_dimension + 1) { + auto block = &input_block[idx]; + + Torus sum = block[0]; + for (int i = 1; i < num_blocks; i++) { + sum += block[i * (lwe_dimension + 1)]; + } + + output[idx] = sum; + } +} + +template +__host__ void accumulate_all_blocks(cuda_stream_t *stream, Torus *output, + Torus *input, uint32_t lwe_dimension, + uint32_t num_radix_blocks) { + + int num_blocks = 0, num_threads = 0; + int num_entries = (lwe_dimension + 1); + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + // Add all blocks and store in sum + device_accumulate_all_blocks<<stream>>>( + output, input, lwe_dimension, num_radix_blocks); + check_cuda_error(cudaGetLastError()); +} + +template +__host__ void +are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_in, + int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + auto are_all_block_true_buffer = + mem_ptr->eq_buffer->are_all_block_true_buffer; + + uint32_t total_modulus = message_modulus * carry_modulus; + uint32_t max_value = total_modulus - 1; + + cuda_memcpy_async_gpu_to_gpu( + lwe_array_out, lwe_array_in, + num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream); + + int lut_num_blocks = 0; + uint32_t remaining_blocks = num_radix_blocks; + while (remaining_blocks > 1) { + // Split in max_value chunks + uint32_t chunk_length = std::min(max_value, remaining_blocks); + int num_chunks = remaining_blocks / chunk_length; + + // Since all blocks encrypt either 0 or 1, we can sum max_value of them + // as in the worst case we will be adding `max_value` ones + auto input_blocks = lwe_array_out; + auto accumulator = are_all_block_true_buffer->tmp_block_accumulated; + for (int i = 0; i < num_chunks; i++) { + accumulate_all_blocks(stream, accumulator, input_blocks, + big_lwe_dimension, chunk_length); + + accumulator += (big_lwe_dimension + 1); + remaining_blocks -= (chunk_length - 1); + input_blocks += (big_lwe_dimension + 1) * chunk_length; + } + accumulator = are_all_block_true_buffer->tmp_block_accumulated; + + // Selects a LUT + int_radix_lut *lut; + if (are_all_block_true_buffer->op == COMPARISON_TYPE::NE) { + // is_non_zero_lut_buffer LUT + lut = mem_ptr->eq_buffer->is_non_zero_lut; + } else if (chunk_length == max_value) { + // is_max_value LUT + lut = are_all_block_true_buffer->is_max_value_lut; + } else { + // is_equal_to_num_blocks LUT + lut = are_all_block_true_buffer->is_equal_to_num_blocks_lut; + if (chunk_length != lut_num_blocks) { + auto is_equal_to_num_blocks_lut_f = [max_value, + chunk_length](Torus x) -> Torus { + return (x & max_value) == chunk_length; + }; + generate_device_accumulator( + stream, lut->lut, glwe_dimension, polynomial_size, message_modulus, + carry_modulus, is_equal_to_num_blocks_lut_f); + + // We don't have to generate this lut again + lut_num_blocks = chunk_length; + } + } + + // Applies the LUT + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, accumulator, bsk, ksk, num_chunks, lut); + } +} + +// This takes an input slice of blocks. +// +// Each block can encrypt any value as long as its < message_modulus. +// +// It will compare blocks with 0, for either equality or difference. +// +// This returns a Vec of block, where each block encrypts 1 or 0 +// depending of if all blocks matched with the comparison type with 0. +// +// E.g. For ZeroComparisonType::Equality, if all input blocks are zero +// than all returned block will encrypt 1 +// +// The returned Vec will have less block than the number of input blocks. +// The returned blocks potentially needs to be 'reduced' to one block +// with eg are_all_comparisons_block_true. +// +// This function exists because sometimes it is faster to concatenate +// multiple vec of 'boolean' shortint block before reducing them with +// are_all_comparisons_block_true +template +__host__ void host_compare_with_zero_equality( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in, + int_comparison_buffer *mem_ptr, void *bsk, Torus *ksk, + int32_t num_radix_blocks) { + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + // The idea is that we will sum chunks of blocks until carries are full + // then we compare the sum with 0. + // + // If all blocks were 0, the sum will be zero + // If at least one bock was not zero, the sum won't be zero + uint32_t total_modulus = message_modulus * carry_modulus; + uint32_t message_max = message_modulus - 1; + + uint32_t num_elements_to_fill_carry = (total_modulus - 1) / message_max; + + size_t big_lwe_size = big_lwe_dimension + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + int num_sum_blocks = 0; + // Accumulator + auto sum = lwe_array_out; + + if (num_radix_blocks == 1) { + // Just copy + cuda_memcpy_async_gpu_to_gpu(sum, lwe_array_in, big_lwe_size_bytes, stream); + num_sum_blocks = 1; + } else { + uint32_t remainder_blocks = num_radix_blocks; + + auto sum_i = sum; + auto chunk = lwe_array_in; + while (remainder_blocks > 1) { + uint32_t chunk_size = + std::min(remainder_blocks, num_elements_to_fill_carry); + + accumulate_all_blocks(stream, sum_i, chunk, big_lwe_dimension, + chunk_size); + + num_sum_blocks++; + remainder_blocks -= (chunk_size - 1); + + // Update operands + chunk += chunk_size * big_lwe_size; + sum_i += big_lwe_size; + } + } + + auto is_equal_to_zero_lut = mem_ptr->diff_buffer->is_zero_lut; + integer_radix_apply_univariate_lookup_table_kb( + stream, sum, sum, bsk, ksk, num_sum_blocks, is_equal_to_zero_lut); + are_all_comparisons_block_true(stream, lwe_array_out, sum, mem_ptr, bsk, ksk, + num_sum_blocks); + + // The result will be in the two first block. Everything else is + // garbage. + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + big_lwe_size_bytes * (num_radix_blocks - 1), stream); +} + +template +__host__ void host_integer_radix_equality_check_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_1, + Torus *lwe_array_2, int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto eq_buffer = mem_ptr->eq_buffer; + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + + // Applies the LUT for the comparison operation + auto comparisons = mem_ptr->tmp_block_comparisons; + integer_radix_apply_bivariate_lookup_table_kb( + stream, comparisons, lwe_array_1, lwe_array_2, bsk, ksk, num_radix_blocks, + eq_buffer->operator_lut); + + // This takes a Vec of blocks, where each block is either 0 or 1. + // + // It return a block encrypting 1 if all input blocks are 1 + // otherwise the block encrypts 0 + are_all_comparisons_block_true(stream, lwe_array_out, comparisons, mem_ptr, + bsk, ksk, num_radix_blocks); + + // Zero all blocks but the first + size_t big_lwe_size = big_lwe_dimension + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + big_lwe_size_bytes * (num_radix_blocks - 1), stream); +} + +template +__host__ void scratch_cuda_integer_radix_equality_check_kb( + cuda_stream_t *stream, int_comparison_buffer **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, COMPARISON_TYPE op, + bool allocate_gpu_memory) { + + *mem_ptr = new int_comparison_buffer( + stream, op, params, num_radix_blocks, allocate_gpu_memory); +} + +template +__host__ void +compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_left, Torus *lwe_array_right, + int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + // When rhs > lhs, the subtraction will overflow, and the bit of padding will + // be set to 1 + // meaning that the output of the pbs will be the negative (modulo message + // space) + // + // Example: + // lhs: 1, rhs: 3, message modulus: 4, carry modulus 4 + // lhs - rhs = -2 % (4 * 4) = 14 = 1|1110 (padding_bit|b4b3b2b1) + // Since there was an overflow the bit of padding is 1 and not 0. + // When applying the LUT for an input value of 14 we would expect 1, + // but since the bit of padding is 1, we will get -1 modulus our message + // space, so (-1) % (4 * 4) = 15 = 1|1111 We then add one and get 0 = 0|0000 + + // Subtract + // Here we need the true lwe sub, not the one that comes from shortint. + host_subtraction(stream, lwe_array_out, lwe_array_left, lwe_array_right, + big_lwe_dimension, num_radix_blocks); + + // Apply LUT to compare to 0 + auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut; + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, lwe_array_out, bsk, ksk, num_radix_blocks, + is_non_zero_lut); + + // Add one + // Here Lhs can have the following values: (-1) % (message modulus * carry + // modulus), 0, 1 So the output values after the addition will be: 0, 1, 2 + host_integer_radix_add_scalar_one_inplace(stream, lwe_array_out, + big_lwe_dimension, num_radix_blocks, + message_modulus, carry_modulus); +} + +// Reduces a vec containing shortint blocks that encrypts a sign +// (inferior, equal, superior) to one single shortint block containing the +// final sign +template +__host__ void +tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_block_comparisons, + int_tree_sign_reduction_buffer *tree_buffer, + std::function sign_handler_f, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto params = tree_buffer->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + // Tree reduction + // Reduces a vec containing shortint blocks that encrypts a sign + // (inferior, equal, superior) to one single shortint block containing the + // final sign + size_t big_lwe_size = big_lwe_dimension + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + auto x = tree_buffer->tmp_x; + auto y = tree_buffer->tmp_y; + if (x != lwe_block_comparisons) + cuda_memcpy_async_gpu_to_gpu(x, lwe_block_comparisons, + big_lwe_size_bytes * num_radix_blocks, stream); + + uint32_t partial_block_count = num_radix_blocks; + + auto inner_tree_leaf = tree_buffer->tree_inner_leaf_lut; + while (partial_block_count > 2) { + pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4); + + integer_radix_apply_univariate_lookup_table_kb( + stream, x, y, bsk, ksk, partial_block_count >> 1, inner_tree_leaf); + + if ((partial_block_count % 2) != 0) { + partial_block_count >>= 1; + partial_block_count++; + + auto last_y_block = y + (partial_block_count - 1) * big_lwe_size; + auto last_x_block = x + (partial_block_count - 1) * big_lwe_size; + + cuda_memcpy_async_gpu_to_gpu(last_x_block, last_y_block, + big_lwe_size_bytes, stream); + } else { + partial_block_count >>= 1; + } + } + + auto last_lut = tree_buffer->tree_last_leaf_lut; + auto block_selector_f = tree_buffer->block_selector_f; + std::function f; + + if (partial_block_count == 2) { + pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4); + + f = [block_selector_f, sign_handler_f](Torus x) -> Torus { + int msb = (x >> 2) & 3; + int lsb = x & 3; + + int final_sign = block_selector_f(msb, lsb); + return sign_handler_f(final_sign); + }; + } else { + // partial_block_count == 1 + y = x; + f = sign_handler_f; + } + generate_device_accumulator(stream, last_lut->lut, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, f); + + // Last leaf + integer_radix_apply_univariate_lookup_table_kb(stream, lwe_array_out, y, bsk, + ksk, 1, last_lut); +} + +template +__host__ void host_integer_radix_difference_check_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_left, + Torus *lwe_array_right, int_comparison_buffer *mem_ptr, + std::function reduction_lut_f, void *bsk, Torus *ksk, + uint32_t total_num_radix_blocks) { + + auto diff_buffer = mem_ptr->diff_buffer; + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + uint32_t num_radix_blocks = total_num_radix_blocks; + auto lhs = lwe_array_left; + auto rhs = lwe_array_right; + if (carry_modulus == message_modulus) { + // Packing is possible + // Pack inputs + Torus *packed_left = diff_buffer->tmp_packed_left; + Torus *packed_right = diff_buffer->tmp_packed_right; + pack_blocks(stream, packed_left, lwe_array_left, big_lwe_dimension, + num_radix_blocks, message_modulus); + pack_blocks(stream, packed_right, lwe_array_right, big_lwe_dimension, + num_radix_blocks, message_modulus); + // From this point we have half number of blocks + num_radix_blocks /= 2; + + // Clean noise + auto cleaning_lut = mem_ptr->cleaning_lut; + integer_radix_apply_univariate_lookup_table_kb( + stream, packed_left, packed_left, bsk, ksk, num_radix_blocks, + cleaning_lut); + integer_radix_apply_univariate_lookup_table_kb( + stream, packed_right, packed_right, bsk, ksk, num_radix_blocks, + cleaning_lut); + + lhs = packed_left; + rhs = packed_right; + } + + // comparisons will be assigned + // - 0 if lhs < rhs + // - 1 if lhs == rhs + // - 2 if lhs > rhs + auto comparisons = mem_ptr->tmp_block_comparisons; + compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, ksk, + num_radix_blocks); + + // Reduces a vec containing radix blocks that encrypts a sign + // (inferior, equal, superior) to one single radix block containing the + // final sign + tree_sign_reduction(stream, lwe_array_out, comparisons, + mem_ptr->diff_buffer->tree_buffer, reduction_lut_f, bsk, + ksk, num_radix_blocks); + + // The result will be in the first block. Everything else is garbage. + size_t big_lwe_size = big_lwe_dimension + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + (total_num_radix_blocks - 1) * big_lwe_size_bytes, stream); +} + +template +__host__ void scratch_cuda_integer_radix_difference_check_kb( + cuda_stream_t *stream, int_comparison_buffer **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, COMPARISON_TYPE op, + bool allocate_gpu_memory) { + + *mem_ptr = new int_comparison_buffer( + stream, op, params, num_radix_blocks, allocate_gpu_memory); +} + +template +__host__ void +host_integer_radix_maxmin_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_left, Torus *lwe_array_right, + int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t total_num_radix_blocks) { + + // Compute the sign + host_integer_radix_difference_check_kb( + stream, mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right, + mem_ptr, mem_ptr->cleaning_lut_f, bsk, ksk, total_num_radix_blocks); + + // Selector + host_integer_radix_cmux_kb( + stream, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left, + lwe_array_right, mem_ptr->cmux_buffer, bsk, ksk, total_num_radix_blocks); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/integer.cu b/backends/tfhe-cuda-backend/implementation/src/integer/integer.cu new file mode 100644 index 000000000..569fb1dda --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/integer.cu @@ -0,0 +1,127 @@ +#include "integer/integer.cuh" +#include + +void cuda_full_propagation_64_inplace( + cuda_stream_t *stream, void *input_blocks, int8_t *mem_ptr, void *ksk, + void *bsk, uint32_t lwe_dimension, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t ks_base_log, uint32_t ks_level, + uint32_t pbs_base_log, uint32_t pbs_level, uint32_t grouping_factor, + uint32_t num_blocks) { + + switch (polynomial_size) { + case 256: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 512: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 1024: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 2048: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 4096: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 8192: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + case 16384: + host_full_propagate_inplace>( + stream, static_cast(input_blocks), + (int_fullprop_buffer *)mem_ptr, static_cast(ksk), + bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log, + ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks); + break; + default: + break; + } +} + +void scratch_cuda_full_propagation_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type, + bool allocate_gpu_memory) { + + scratch_cuda_full_propagation( + stream, (int_fullprop_buffer **)mem_ptr, lwe_dimension, + glwe_dimension, polynomial_size, level_count, grouping_factor, + input_lwe_ciphertext_count, message_modulus, carry_modulus, pbs_type, + allocate_gpu_memory); +} + +void cleanup_cuda_full_propagation(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + + int_fullprop_buffer *mem_ptr = + (int_fullprop_buffer *)(*mem_ptr_void); + + cuda_drop_async(mem_ptr->lut_buffer, stream); + cuda_drop_async(mem_ptr->lut_indexes, stream); + + cuda_drop_async(mem_ptr->pbs_buffer, stream); + + cuda_drop_async(mem_ptr->tmp_small_lwe_vector, stream); + cuda_drop_async(mem_ptr->tmp_big_lwe_vector, stream); +} + +void scratch_cuda_propagate_single_carry_low_latency_kb_64_inplace( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + scratch_cuda_propagate_single_carry_low_latency_kb_inplace( + stream, (int_sc_prop_memory **)mem_ptr, num_blocks, params, + allocate_gpu_memory); +} + +void cuda_propagate_single_carry_low_latency_kb_64_inplace( + cuda_stream_t *stream, void *lwe_array, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t num_blocks) { + host_propagate_single_carry_low_latency( + stream, static_cast(lwe_array), + (int_sc_prop_memory *)mem_ptr, bsk, + static_cast(ksk), num_blocks); +} + +void cleanup_cuda_propagate_single_carry_low_latency(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + int_sc_prop_memory *mem_ptr = + (int_sc_prop_memory *)(*mem_ptr_void); + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/integer.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/integer.cuh new file mode 100644 index 000000000..5a0e75e3e --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/integer.cuh @@ -0,0 +1,675 @@ +#ifndef CUDA_INTEGER_CUH +#define CUDA_INTEGER_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.h" +#include "integer/scalar_addition.cuh" +#include "linear_algebra.h" +#include "linearalgebra/addition.cuh" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "polynomial/functions.cuh" +#include "utils/kernel_dimensions.cuh" +#include + +template +void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_output_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, void *bootstrapping_key, + int8_t *pbs_buffer, uint32_t glwe_dimension, + uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, + uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, + uint32_t num_lut_vectors, uint32_t lwe_idx, + uint32_t max_shared_memory, PBS_TYPE pbs_type) { + if (sizeof(Torus) == sizeof(uint32_t)) { + // 32 bits + switch (pbs_type) { + case MULTI_BIT: + printf("multibit\n"); + printf("Error: 32-bit multibit PBS is not supported.\n"); + break; + case LOW_LAT: + cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension, + polynomial_size, base_log, level_count, input_lwe_ciphertext_count, + num_lut_vectors, lwe_idx, max_shared_memory); + break; + case AMORTIZED: + cuda_bootstrap_amortized_lwe_ciphertext_vector_32( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension, + polynomial_size, base_log, level_count, input_lwe_ciphertext_count, + num_lut_vectors, lwe_idx, max_shared_memory); + break; + default: + break; + } + } else { + // 64 bits + switch (pbs_type) { + case MULTI_BIT: + cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension, + polynomial_size, grouping_factor, base_log, level_count, + input_lwe_ciphertext_count, num_lut_vectors, lwe_idx, + max_shared_memory); + break; + case LOW_LAT: + cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension, + polynomial_size, base_log, level_count, input_lwe_ciphertext_count, + num_lut_vectors, lwe_idx, max_shared_memory); + break; + case AMORTIZED: + cuda_bootstrap_amortized_lwe_ciphertext_vector_64( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension, + polynomial_size, base_log, level_count, input_lwe_ciphertext_count, + num_lut_vectors, lwe_idx, max_shared_memory); + break; + default: + break; + } + } +} + +// function rotates right radix ciphertext with specific value +// grid is one dimensional +// blockIdx.x represents x_th block of radix ciphertext +template +__global__ void radix_blocks_rotate_right(Torus *dst, Torus *src, uint32_t value, + uint32_t blocks_count, uint32_t lwe_size) { + value %= blocks_count; + + size_t tid = threadIdx.x; + size_t src_block_id = blockIdx.x; + size_t dst_block_id = (src_block_id + value) % blocks_count; + size_t stride = blockDim.x; + + auto cur_src_block = &src[src_block_id * lwe_size]; + auto cur_dst_block = &dst[dst_block_id * lwe_size]; + + for (size_t i = tid; i < lwe_size; i += stride) { + cur_dst_block[i] = cur_src_block[i]; + } +} + +// function rotates left radix ciphertext with specific value +// grid is one dimensional +// blockIdx.x represents x_th block of radix ciphertext +template +__global__ void radix_blocks_rotate_left(Torus *dst, Torus *src, uint32_t value, + uint32_t blocks_count, uint32_t lwe_size) { + value %= blocks_count; + size_t src_block_id = blockIdx.x; + + size_t tid = threadIdx.x; + size_t dst_block_id = (src_block_id >= value) + ? src_block_id - value + : src_block_id - value + blocks_count; + size_t stride = blockDim.x; + + auto cur_src_block = &src[src_block_id * lwe_size]; + auto cur_dst_block = &dst[dst_block_id * lwe_size]; + + for (size_t i = tid; i < lwe_size; i += stride) { + cur_dst_block[i] = cur_src_block[i]; + } +} + +// polynomial_size threads +template +__global__ void +device_pack_bivariate_blocks(Torus *lwe_array_out, Torus *lwe_array_1, + Torus *lwe_array_2, Torus *lwe_indexes, + uint32_t lwe_dimension, uint32_t message_modulus, + uint32_t num_blocks) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < num_blocks * (lwe_dimension + 1)) { + int block_id = tid / (lwe_dimension + 1); + int coeff_id = tid % (lwe_dimension + 1); + + int pos = lwe_indexes[block_id] * (lwe_dimension + 1) + coeff_id; + lwe_array_out[pos] = lwe_array_1[pos] * message_modulus + lwe_array_2[pos]; + } +} + +template +__host__ void pack_bivariate_blocks(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_1, Torus *lwe_array_2, + Torus *lwe_indexes, uint32_t lwe_dimension, + uint32_t message_modulus, + uint32_t num_radix_blocks) { + + // Left message is shifted + int num_blocks = 0, num_threads = 0; + int num_entries = num_radix_blocks * (lwe_dimension + 1); + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + device_pack_bivariate_blocks<<stream>>>( + lwe_array_out, lwe_array_1, lwe_array_2, lwe_indexes, lwe_dimension, + message_modulus, num_radix_blocks); + check_cuda_error(cudaGetLastError()); +} + +template +__host__ void integer_radix_apply_univariate_lookup_table_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in, void *bsk, + Torus *ksk, uint32_t num_radix_blocks, int_radix_lut *lut) { + // apply_lookup_table + auto params = lut->params; + auto pbs_type = params.pbs_type; + auto big_lwe_dimension = params.big_lwe_dimension; + auto small_lwe_dimension = params.small_lwe_dimension; + auto ks_level = params.ks_level; + auto ks_base_log = params.ks_base_log; + auto pbs_level = params.pbs_level; + auto pbs_base_log = params.pbs_base_log; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto grouping_factor = params.grouping_factor; + + // Compute Keyswitch-PBS + cuda_keyswitch_lwe_ciphertext_vector( + stream, lut->tmp_lwe_after_ks, lut->lwe_indexes, lwe_array_in, + lut->lwe_indexes, ksk, big_lwe_dimension, small_lwe_dimension, + ks_base_log, ks_level, num_radix_blocks); + + execute_pbs(stream, lwe_array_out, lut->lwe_indexes, lut->lut, + lut->lut_indexes, lut->tmp_lwe_after_ks, lut->lwe_indexes, bsk, + lut->pbs_buffer, glwe_dimension, small_lwe_dimension, + polynomial_size, pbs_base_log, pbs_level, grouping_factor, + num_radix_blocks, 1, 0, + cuda_get_max_shared_memory(stream->gpu_index), pbs_type); +} + +template +__host__ void integer_radix_apply_bivariate_lookup_table_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_1, + Torus *lwe_array_2, void *bsk, Torus *ksk, uint32_t num_radix_blocks, + int_radix_lut *lut) { + // apply_lookup_table_bivariate + + auto params = lut->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto message_modulus = params.message_modulus; + + // Left message is shifted + pack_bivariate_blocks(stream, lut->tmp_lwe_before_ks, lwe_array_1, + lwe_array_2, lut->lwe_indexes, big_lwe_dimension, + message_modulus, num_radix_blocks); + check_cuda_error(cudaGetLastError()); + + // Apply LUT + integer_radix_apply_univariate_lookup_table_kb(stream, lwe_array_out, + lut->tmp_lwe_before_ks, bsk, + ksk, num_radix_blocks, lut); +} + +// Rotates the slice in-place such that the first mid elements of the slice move +// to the end while the last array_length elements move to the front. After +// calling rotate_left, the element previously at index mid will become the +// first element in the slice. +template +void rotate_left(Torus *buffer, int mid, uint32_t array_length) { + mid = mid % array_length; + + std::rotate(buffer, buffer + mid, buffer + array_length); +} + +template +void generate_lookup_table(Torus *acc, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t message_modulus, + uint32_t carry_modulus, + std::function f) { + + uint32_t modulus_sup = message_modulus * carry_modulus; + uint32_t box_size = polynomial_size / modulus_sup; + Torus delta = (1ul << 63) / modulus_sup; + + memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus)); + + auto body = &acc[glwe_dimension * polynomial_size]; + + // This accumulator extracts the carry bits + for (int i = 0; i < modulus_sup; i++) { + int index = i * box_size; + for (int j = index; j < index + box_size; j++) { + auto f_eval = f(i); + body[j] = f_eval * delta; + } + } + + int half_box_size = box_size / 2; + + // Negate the first half_box_size coefficients + for (int i = 0; i < half_box_size; i++) { + body[i] = -body[i]; + } + + rotate_left(body, half_box_size, polynomial_size); +} + +template +void generate_lookup_table_bivariate(Torus *acc, uint32_t glwe_dimension, + uint32_t polynomial_size, + uint32_t message_modulus, + uint32_t carry_modulus, + std::function f) { + + Torus factor_u64 = message_modulus; + auto wrapped_f = [factor_u64, message_modulus, f](Torus input) -> Torus { + Torus lhs = (input / factor_u64) % message_modulus; + Torus rhs = (input % factor_u64) % message_modulus; + + return f(lhs, rhs); + }; + + generate_lookup_table(acc, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, wrapped_f); +} + +/* + * generate bivariate accumulator for device pointer + * v_stream - cuda stream + * acc - device pointer for bivariate accumulator + * ... + * f - wrapping function with two Torus inputs + */ +template +void generate_device_accumulator_bivariate( + cuda_stream_t *stream, Torus *acc_bivariate, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t message_modulus, uint32_t carry_modulus, + std::function f) { + + // host lut + Torus *h_lut = + (Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus)); + + // fill bivariate accumulator + generate_lookup_table_bivariate(h_lut, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, f); + + // copy host lut and tvi to device + cuda_memcpy_async_to_gpu( + acc_bivariate, h_lut, + (glwe_dimension + 1) * polynomial_size * sizeof(Torus), stream); + + cuda_synchronize_stream(stream); + free(h_lut); +} + +/* + * generate bivariate accumulator for device pointer + * v_stream - cuda stream + * acc - device pointer for accumulator + * ... + * f - evaluating function with one Torus input + */ +template +void generate_device_accumulator(cuda_stream_t *stream, Torus *acc, + uint32_t glwe_dimension, + uint32_t polynomial_size, + uint32_t message_modulus, + uint32_t carry_modulus, + std::function f) { + + // host lut + Torus *h_lut = + (Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus)); + + // fill accumulator + generate_lookup_table(h_lut, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, f); + + // copy host lut and tvi to device + cuda_memcpy_async_to_gpu( + acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus), + stream); + + cuda_synchronize_stream(stream); + free(h_lut); +} + +template +void scratch_cuda_propagate_single_carry_low_latency_kb_inplace( + cuda_stream_t *stream, int_sc_prop_memory **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, + bool allocate_gpu_memory) { + + *mem_ptr = new int_sc_prop_memory(stream, params, num_radix_blocks, + allocate_gpu_memory); +} + +template +void host_propagate_single_carry_low_latency(cuda_stream_t *stream, + Torus *lwe_array, + int_sc_prop_memory *mem, + void *bsk, Torus *ksk, + uint32_t num_blocks) { + auto params = mem->params; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto big_lwe_size = glwe_dimension * polynomial_size + 1; + auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + auto generates_or_propagates = mem->generates_or_propagates; + auto step_output = mem->step_output; + + auto test_vector_array = mem->test_vector_array; + auto lut_carry_propagation_sum = mem->lut_carry_propagation_sum; + auto message_acc = mem->message_acc; + + integer_radix_apply_univariate_lookup_table_kb( + stream, generates_or_propagates, lwe_array, bsk, ksk, num_blocks, + test_vector_array); + + // compute prefix sum with hillis&steele + + int num_steps = ceil(log2((double)num_blocks)); + int space = 1; + cuda_memcpy_async_gpu_to_gpu(step_output, generates_or_propagates, + big_lwe_size_bytes * num_blocks, stream); + + for (int step = 0; step < num_steps; step++) { + auto cur_blocks = &step_output[space * big_lwe_size]; + auto prev_blocks = generates_or_propagates; + int cur_total_blocks = num_blocks - space; + + integer_radix_apply_bivariate_lookup_table_kb( + stream, cur_blocks, cur_blocks, prev_blocks, bsk, ksk, cur_total_blocks, + lut_carry_propagation_sum); + + cuda_memcpy_async_gpu_to_gpu(&generates_or_propagates[space * big_lwe_size], + cur_blocks, + big_lwe_size_bytes * cur_total_blocks, stream); + space *= 2; + } + + radix_blocks_rotate_right<<stream>>>( + step_output, generates_or_propagates, 1, num_blocks, big_lwe_size); + cuda_memset_async(step_output, 0, big_lwe_size_bytes, stream); + + host_addition(stream, lwe_array, lwe_array, step_output, + glwe_dimension * polynomial_size, num_blocks); + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array, lwe_array, bsk, ksk, num_blocks, message_acc); +} + +/* + * input_blocks: input radix ciphertext propagation will happen inplace + * acc_message_carry: list of two lut s, [(message_acc), (carry_acc)] + * tvi_message_carry: tvi for message and carry, should always be {0, 1} + * small_lwe_vector: output of keyswitch should have + * size = 2 * (lwe_dimension + 1) * sizeof(Torus) + * big_lwe_vector: output of pbs should have + * size = 2 * (glwe_dimension * polynomial_size + 1) * sizeof(Torus) + */ +template +void host_full_propagate_inplace(cuda_stream_t *stream, Torus *input_blocks, + int_fullprop_buffer *mem_ptr, + Torus *ksk, void *bsk, uint32_t lwe_dimension, + uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t ks_base_log, + uint32_t ks_level, uint32_t pbs_base_log, + uint32_t pbs_level, uint32_t grouping_factor, + uint32_t num_blocks) { + + int big_lwe_size = (glwe_dimension * polynomial_size + 1); + int small_lwe_size = (lwe_dimension + 1); + + for (int i = 0; i < num_blocks; i++) { + auto cur_input_block = &input_blocks[i * big_lwe_size]; + + cuda_keyswitch_lwe_ciphertext_vector( + stream, mem_ptr->tmp_small_lwe_vector, mem_ptr->lwe_indexes, + cur_input_block, mem_ptr->lwe_indexes, ksk, + polynomial_size * glwe_dimension, lwe_dimension, ks_base_log, ks_level, + 1); + + cuda_memcpy_async_gpu_to_gpu(&mem_ptr->tmp_small_lwe_vector[small_lwe_size], + mem_ptr->tmp_small_lwe_vector, + small_lwe_size * sizeof(Torus), stream); + + execute_pbs( + stream, mem_ptr->tmp_big_lwe_vector, mem_ptr->lwe_indexes, + mem_ptr->lut_buffer, mem_ptr->lut_indexes, + mem_ptr->tmp_small_lwe_vector, mem_ptr->lwe_indexes, bsk, + mem_ptr->pbs_buffer, glwe_dimension, lwe_dimension, polynomial_size, + pbs_base_log, pbs_level, grouping_factor, 2, 2, 0, + cuda_get_max_shared_memory(stream->gpu_index), mem_ptr->pbs_type); + + cuda_memcpy_async_gpu_to_gpu(cur_input_block, mem_ptr->tmp_big_lwe_vector, + big_lwe_size * sizeof(Torus), stream); + + if (i < num_blocks - 1) { + auto next_input_block = &input_blocks[(i + 1) * big_lwe_size]; + host_addition(stream, next_input_block, next_input_block, + &mem_ptr->tmp_big_lwe_vector[big_lwe_size], + glwe_dimension * polynomial_size, 1); + } + } +} + +template +void scratch_cuda_full_propagation( + cuda_stream_t *stream, int_fullprop_buffer **mem_ptr, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t pbs_level, uint32_t grouping_factor, uint32_t num_radix_blocks, + uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type, + bool allocate_gpu_memory) { + + // PBS + int8_t *pbs_buffer; + if (pbs_type == MULTI_BIT) { + uint32_t lwe_chunk_size = + get_average_lwe_chunk_size(lwe_dimension, pbs_level, glwe_dimension); + // Only 64 bits is supported + scratch_cuda_multi_bit_pbs_64(stream, &pbs_buffer, lwe_dimension, + glwe_dimension, polynomial_size, pbs_level, + grouping_factor, num_radix_blocks, + cuda_get_max_shared_memory(stream->gpu_index), + allocate_gpu_memory, lwe_chunk_size); + } else { + // Classic + // We only use low latency for classic mode + if (sizeof(Torus) == sizeof(uint32_t)) + scratch_cuda_bootstrap_low_latency_32( + stream, &pbs_buffer, glwe_dimension, polynomial_size, pbs_level, + num_radix_blocks, cuda_get_max_shared_memory(stream->gpu_index), + allocate_gpu_memory); + else + scratch_cuda_bootstrap_low_latency_64( + stream, &pbs_buffer, glwe_dimension, polynomial_size, pbs_level, + num_radix_blocks, cuda_get_max_shared_memory(stream->gpu_index), + allocate_gpu_memory); + } + + // LUT + Torus *lut_buffer; + if (allocate_gpu_memory) { + // LUT is used as a trivial encryption, so we only allocate memory for the + // body + Torus lut_buffer_size = + 2 * (glwe_dimension + 1) * polynomial_size * sizeof(Torus); + + lut_buffer = (Torus *)cuda_malloc_async(lut_buffer_size, stream); + + // LUTs + auto lut_f_message = [message_modulus](Torus x) -> Torus { + return x % message_modulus; + }; + auto lut_f_carry = [message_modulus](Torus x) -> Torus { + return x / message_modulus; + }; + + // + Torus *lut_buffer_message = lut_buffer; + Torus *lut_buffer_carry = + lut_buffer + (glwe_dimension + 1) * polynomial_size; + + generate_device_accumulator( + stream, lut_buffer_message, glwe_dimension, polynomial_size, + message_modulus, carry_modulus, lut_f_message); + + generate_device_accumulator(stream, lut_buffer_carry, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, lut_f_carry); + } + + Torus *lut_indexes; + if (allocate_gpu_memory) { + lut_indexes = (Torus *)cuda_malloc_async(2 * sizeof(Torus), stream); + + Torus h_lut_indexes[2] = {0, 1}; + cuda_memcpy_async_to_gpu(lut_indexes, h_lut_indexes, 2 * sizeof(Torus), + stream); + } + + Torus *lwe_indexes; + if (allocate_gpu_memory) { + Torus lwe_indexes_size = num_radix_blocks * sizeof(Torus); + + lwe_indexes = (Torus *)cuda_malloc_async(lwe_indexes_size, stream); + Torus *h_lwe_indexes = (Torus *)malloc(lwe_indexes_size); + for (int i = 0; i < num_radix_blocks; i++) + h_lwe_indexes[i] = i; + cuda_memcpy_async_to_gpu(lwe_indexes, h_lwe_indexes, lwe_indexes_size, + stream); + cuda_synchronize_stream(stream); + free(h_lwe_indexes); + } + + // Temporary arrays + Torus *small_lwe_vector; + Torus *big_lwe_vector; + if (allocate_gpu_memory) { + Torus small_vector_size = 2 * (lwe_dimension + 1) * sizeof(Torus); + Torus big_vector_size = + 2 * (glwe_dimension * polynomial_size + 1) * sizeof(Torus); + + small_lwe_vector = (Torus *)cuda_malloc_async(small_vector_size, stream); + big_lwe_vector = (Torus *)cuda_malloc_async(big_vector_size, stream); + } + + *mem_ptr = new int_fullprop_buffer; + + (*mem_ptr)->pbs_type = pbs_type; + (*mem_ptr)->pbs_buffer = pbs_buffer; + + (*mem_ptr)->lut_buffer = lut_buffer; + (*mem_ptr)->lut_indexes = lut_indexes; + (*mem_ptr)->lwe_indexes = lwe_indexes; + + (*mem_ptr)->tmp_small_lwe_vector = small_lwe_vector; + (*mem_ptr)->tmp_big_lwe_vector = big_lwe_vector; +} + +// (lwe_dimension+1) threads +// (num_radix_blocks / 2) thread blocks +template +__global__ void device_pack_blocks(Torus *lwe_array_out, Torus *lwe_array_in, + uint32_t lwe_dimension, + uint32_t num_radix_blocks, uint32_t factor) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < (lwe_dimension + 1)) { + for (int bid = 0; bid < (num_radix_blocks / 2); bid++) { + Torus *lsb_block = lwe_array_in + (2 * bid) * (lwe_dimension + 1); + Torus *msb_block = lsb_block + (lwe_dimension + 1); + + Torus *packed_block = lwe_array_out + bid * (lwe_dimension + 1); + + packed_block[tid] = lsb_block[tid] + factor * msb_block[tid]; + } + + if (num_radix_blocks % 2 != 0) { + // We couldn't pack the last block, so we just copy it + Torus *lsb_block = + lwe_array_in + (num_radix_blocks - 1) * (lwe_dimension + 1); + Torus *last_block = + lwe_array_out + (num_radix_blocks / 2) * (lwe_dimension + 1); + + last_block[tid] = lsb_block[tid]; + } + } +} + +// Packs the low ciphertext in the message parts of the high ciphertext +// and moves the high ciphertext into the carry part. +// +// This requires the block parameters to have enough room for two ciphertexts, +// so at least as many carry modulus as the message modulus +// +// Expects the carry buffer to be empty +template +__host__ void pack_blocks(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_in, uint32_t lwe_dimension, + uint32_t num_radix_blocks, uint32_t factor) { + assert(lwe_array_out != lwe_array_in); + + int num_blocks = 0, num_threads = 0; + int num_entries = (lwe_dimension + 1); + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + device_pack_blocks<<stream>>>( + lwe_array_out, lwe_array_in, lwe_dimension, num_radix_blocks, factor); +} + +template +__global__ void +device_create_trivial_radix(Torus *lwe_array, Torus *scalar_input, + int32_t num_blocks, uint32_t lwe_dimension, + uint64_t delta) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < num_blocks) { + Torus scalar = scalar_input[tid]; + Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension; + + *body = scalar * delta; + } +} + +template +__host__ void +create_trivial_radix(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *scalar_array, uint32_t lwe_dimension, + uint32_t num_radix_blocks, uint32_t num_scalar_blocks, + uint64_t message_modulus, uint64_t carry_modulus) { + + size_t radix_size = (lwe_dimension + 1) * num_radix_blocks; + cuda_memset_async(lwe_array_out, 0, radix_size * sizeof(Torus), stream); + + if (num_scalar_blocks == 0) + return; + + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = num_scalar_blocks; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + // Value of the shift we multiply our messages by + // If message_modulus and carry_modulus are always powers of 2 we can simplify + // this + uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus); + + device_create_trivial_radix<<stream>>>( + lwe_array_out, scalar_array, num_scalar_blocks, lwe_dimension, delta); + check_cuda_error(cudaGetLastError()); +} + +#endif // TFHE_RS_INTERNAL_INTEGER_CUH diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cu b/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cu new file mode 100644 index 000000000..24dd6a08b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cu @@ -0,0 +1,107 @@ +#include "integer/multiplication.cuh" + +/* + * This scratch function allocates the necessary amount of data on the GPU for + * the integer radix multiplication in keyswitch->bootstrap order. + */ +void scratch_cuda_integer_mult_radix_ciphertext_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t message_modulus, + uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension, + uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level, + uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor, + uint32_t num_radix_blocks, PBS_TYPE pbs_type, uint32_t max_shared_memory, + bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + polynomial_size, lwe_dimension, ks_level, ks_base_log, + pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + switch (polynomial_size) { + case 2048: + scratch_cuda_integer_mult_radix_ciphertext_kb( + stream, (int_mul_memory **)mem_ptr, num_radix_blocks, params, + allocate_gpu_memory); + break; + default: + break; + } +} + +/* + * Computes a multiplication between two 64 bit radix lwe ciphertexts + * encrypting integer values. keyswitch -> bootstrap pattern is used, function + * works for single pair of radix ciphertexts, 'v_stream' can be used for + * parallelization + * - 'v_stream' is a void pointer to the Cuda stream to be used in the kernel + * launch + * - 'gpu_index' is the index of the GPU to be used in the kernel launch + * - 'radix_lwe_out' is 64 bit radix big lwe ciphertext, product of + * multiplication + * - 'radix_lwe_left' left radix big lwe ciphertext + * - 'radix_lwe_right' right radix big lwe ciphertext + * - 'bsk' bootstrapping key in fourier domain + * - 'ksk' keyswitching key + * - 'mem_ptr' + * - 'message_modulus' message_modulus + * - 'carry_modulus' carry_modulus + * - 'glwe_dimension' glwe_dimension + * - 'lwe_dimension' is the dimension of small lwe ciphertext + * - 'polynomial_size' polynomial size + * - 'pbs_base_log' base log used in the pbs + * - 'pbs_level' decomposition level count used in the pbs + * - 'ks_level' decomposition level count used in the keyswitch + * - 'num_blocks' is the number of big lwe ciphertext blocks inside radix + * ciphertext + * - 'pbs_type' selects which PBS implementation should be used + * - 'max_shared_memory' maximum shared memory per cuda block + */ +void cuda_integer_mult_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *radix_lwe_out, void *radix_lwe_left, + void *radix_lwe_right, void *bsk, void *ksk, int8_t *mem_ptr, + uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension, + uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log, + uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level, + uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type, + uint32_t max_shared_memory) { + + switch (polynomial_size) { + case 2048: + host_integer_mult_radix_kb>( + stream, static_cast(radix_lwe_out), + static_cast(radix_lwe_left), + static_cast(radix_lwe_right), bsk, + static_cast(ksk), (int_mul_memory *)mem_ptr, + num_blocks); + break; + default: + break; + } +} + +void cleanup_cuda_integer_mult(cuda_stream_t *stream, int8_t **mem_ptr_void) { + + int_mul_memory *mem_ptr = + (int_mul_memory *)(*mem_ptr_void); + + mem_ptr->release(stream); +} + +void cuda_small_scalar_multiplication_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint64_t scalar, + uint32_t lwe_dimension, uint32_t lwe_ciphertext_count) { + + cuda_small_scalar_multiplication_integer_radix_ciphertext_64( + stream, lwe_array, lwe_array, scalar, lwe_dimension, + lwe_ciphertext_count); +} + +void cuda_small_scalar_multiplication_integer_radix_ciphertext_64( + cuda_stream_t *stream, void *output_lwe_array, void *input_lwe_array, + uint64_t scalar, uint32_t lwe_dimension, uint32_t lwe_ciphertext_count) { + + host_integer_small_scalar_mult_radix( + stream, static_cast(output_lwe_array), + static_cast(input_lwe_array), scalar, lwe_dimension, + lwe_ciphertext_count); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cuh new file mode 100644 index 000000000..536a3316b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/multiplication.cuh @@ -0,0 +1,639 @@ +#ifndef CUDA_INTEGER_MULT_CUH +#define CUDA_INTEGER_MULT_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "bootstrap.h" +#include "bootstrap_multibit.h" +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.h" +#include "integer/integer.cuh" +#include "linear_algebra.h" +#include "pbs/bootstrap_amortized.cuh" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "utils/helper.cuh" +#include "utils/kernel_dimensions.cuh" +#include +#include +#include +#include +#include +#include + +template +__global__ void +all_shifted_lhs_rhs(Torus *radix_lwe_left, Torus *lsb_ciphertext, + Torus *msb_ciphertext, Torus *radix_lwe_right, + Torus *lsb_rhs, Torus *msb_rhs, int num_blocks) { + + size_t block_id = blockIdx.x; + double D = sqrt((2 * num_blocks + 1) * (2 * num_blocks + 1) - 8 * block_id); + size_t radix_id = int((2 * num_blocks + 1 - D) / 2.); + size_t local_block_id = + block_id - (2 * num_blocks - radix_id + 1) / 2. * radix_id; + bool process_msb = (local_block_id < (num_blocks - radix_id - 1)); + auto cur_lsb_block = &lsb_ciphertext[block_id * (params::degree + 1)]; + auto cur_msb_block = + (process_msb) + ? &msb_ciphertext[(block_id - radix_id) * (params::degree + 1)] + : nullptr; + + auto cur_lsb_rhs_block = &lsb_rhs[block_id * (params::degree + 1)]; + auto cur_msb_rhs_block = + (process_msb) ? &msb_rhs[(block_id - radix_id) * (params::degree + 1)] + : nullptr; + + auto cur_ct_right = &radix_lwe_right[radix_id * (params::degree + 1)]; + auto cur_src = &radix_lwe_left[local_block_id * (params::degree + 1)]; + + size_t tid = threadIdx.x; + + for (int i = 0; i < params::opt; i++) { + Torus value = cur_src[tid]; + if (process_msb) { + cur_lsb_block[tid] = cur_msb_block[tid] = value; + cur_lsb_rhs_block[tid] = cur_msb_rhs_block[tid] = cur_ct_right[tid]; + } else { + cur_lsb_block[tid] = value; + cur_lsb_rhs_block[tid] = cur_ct_right[tid]; + } + tid += params::degree / params::opt; + } + if (threadIdx.x == 0) { + Torus value = cur_src[params::degree]; + if (process_msb) { + cur_lsb_block[params::degree] = cur_msb_block[params::degree] = value; + cur_lsb_rhs_block[params::degree] = cur_msb_rhs_block[params::degree] = + cur_ct_right[params::degree]; + } else { + cur_lsb_block[params::degree] = value; + cur_lsb_rhs_block[params::degree] = cur_ct_right[params::degree]; + } + } +} + +template +void compress_device_array_with_map(cuda_stream_t *stream, Torus *src, + Torus *dst, int *S, int *F, int num_blocks, + uint32_t map_size, uint32_t unit_size, + int &total_copied, bool is_message) { + for (int i = 0; i < map_size; i++) { + int s_index = i * num_blocks + S[i]; + int number_of_unit = F[i] - S[i] + is_message; + auto cur_dst = &dst[total_copied * unit_size]; + auto cur_src = &src[s_index * unit_size]; + size_t copy_size = unit_size * number_of_unit * sizeof(Torus); + cuda_memcpy_async_gpu_to_gpu(cur_dst, cur_src, copy_size, stream); + total_copied += number_of_unit; + } +} + +template +void extract_message_carry_to_full_radix(cuda_stream_t *stream, Torus *src, + Torus *dst, int *S, int *F, + uint32_t map_size, uint32_t unit_size, + int &total_copied, + int &total_radix_copied, + int num_blocks, bool is_message) { + size_t radix_size = unit_size * num_blocks; + for (int i = 0; i < map_size; i++) { + auto cur_dst_radix = &dst[total_radix_copied * radix_size]; + + int s_index = S[i]; + int number_of_unit = F[i] - s_index + is_message; + + if (!is_message) { + int zero_block_count = num_blocks - number_of_unit; + cuda_memset_async(cur_dst_radix, 0, + zero_block_count * unit_size * sizeof(Torus), stream); + s_index = zero_block_count; + } + + auto cur_dst = &cur_dst_radix[s_index * unit_size]; + auto cur_src = &src[total_copied * unit_size]; + + size_t copy_size = unit_size * number_of_unit * sizeof(Torus); + cuda_memcpy_async_gpu_to_gpu(cur_dst, cur_src, copy_size, stream); + total_copied += number_of_unit; + ++total_radix_copied; + } +} + +template +__global__ void tree_add_chunks(Torus *result_blocks, Torus *input_blocks, + uint32_t chunk_size, uint32_t num_blocks) { + + extern __shared__ Torus result[]; + size_t chunk_id = blockIdx.x; + size_t chunk_elem_size = chunk_size * num_blocks * (params::degree + 1); + size_t radix_elem_size = num_blocks * (params::degree + 1); + auto src_chunk = &input_blocks[chunk_id * chunk_elem_size]; + auto dst_radix = &result_blocks[chunk_id * radix_elem_size]; + size_t block_stride = blockIdx.y * (params::degree + 1); + auto dst_block = &dst_radix[block_stride]; + + // init shared mem with first radix of chunk + size_t tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + result[tid] = src_chunk[block_stride + tid]; + tid += params::degree / params::opt; + } + + if (threadIdx.x == 0) { + result[params::degree] = src_chunk[block_stride + params::degree]; + } + + // accumulate rest of the radixes + for (int r_id = 1; r_id < chunk_size; r_id++) { + auto cur_src_radix = &src_chunk[r_id * radix_elem_size]; + tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + result[tid] += cur_src_radix[block_stride + tid]; + tid += params::degree / params::opt; + } + if (threadIdx.x == 0) { + result[params::degree] += cur_src_radix[block_stride + params::degree]; + } + } + + // put result from shared mem to global mem + tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + dst_block[tid] = result[tid]; + tid += params::degree / params::opt; + } + + if (threadIdx.x == 0) { + dst_block[params::degree] = result[params::degree]; + } +} + +template +__global__ void fill_radix_from_lsb_msb(Torus *result_blocks, Torus *lsb_blocks, + Torus *msb_blocks, + uint32_t glwe_dimension, + uint32_t lsb_count, uint32_t msb_count, + uint32_t num_blocks) { + size_t big_lwe_dimension = glwe_dimension * params::degree + 1; + size_t big_lwe_id = blockIdx.x; + size_t radix_id = big_lwe_id / num_blocks; + size_t block_id = big_lwe_id % num_blocks; + size_t lsb_block_id = block_id - radix_id; + size_t msb_block_id = block_id - radix_id - 1; + + bool process_lsb = (radix_id <= block_id); + bool process_msb = (radix_id + 1 <= block_id); + + auto cur_res_lsb_ct = &result_blocks[big_lwe_id * big_lwe_dimension]; + auto cur_res_msb_ct = + &result_blocks[num_blocks * num_blocks * big_lwe_dimension + + big_lwe_id * big_lwe_dimension]; + Torus *cur_lsb_radix = &lsb_blocks[(2 * num_blocks - radix_id + 1) * + radix_id / 2 * (params::degree + 1)]; + Torus *cur_msb_radix = (process_msb) + ? &msb_blocks[(2 * num_blocks - radix_id - 1) * + radix_id / 2 * (params::degree + 1)] + : nullptr; + Torus *cur_lsb_ct = (process_lsb) + ? &cur_lsb_radix[lsb_block_id * (params::degree + 1)] + : nullptr; + Torus *cur_msb_ct = (process_msb) + ? &cur_msb_radix[msb_block_id * (params::degree + 1)] + : nullptr; + size_t tid = threadIdx.x; + + for (int i = 0; i < params::opt; i++) { + cur_res_lsb_ct[tid] = (process_lsb) ? cur_lsb_ct[tid] : 0; + cur_res_msb_ct[tid] = (process_msb) ? cur_msb_ct[tid] : 0; + tid += params::degree / params::opt; + } + + if (threadIdx.x == 0) { + cur_res_lsb_ct[params::degree] = + (process_lsb) ? cur_lsb_ct[params::degree] : 0; + cur_res_msb_ct[params::degree] = + (process_msb) ? cur_msb_ct[params::degree] : 0; + } +} + +template +__host__ void host_integer_mult_radix_kb( + cuda_stream_t *stream, uint64_t *radix_lwe_out, uint64_t *radix_lwe_left, + uint64_t *radix_lwe_right, void *bsk, uint64_t *ksk, + int_mul_memory *mem_ptr, uint32_t num_blocks) { + + auto glwe_dimension = mem_ptr->params.glwe_dimension; + auto polynomial_size = mem_ptr->params.polynomial_size; + auto lwe_dimension = mem_ptr->params.small_lwe_dimension; + auto message_modulus = mem_ptr->params.message_modulus; + auto carry_modulus = mem_ptr->params.carry_modulus; + + int big_lwe_dimension = glwe_dimension * polynomial_size; + int big_lwe_size = big_lwe_dimension + 1; + + // 'vector_result_lsb' contains blocks from all possible right shifts of + // radix_lwe_left, only nonzero blocks are kept + int lsb_vector_block_count = num_blocks * (num_blocks + 1) / 2; + + // 'vector_result_msb' contains blocks from all possible shifts of + // radix_lwe_left except the last blocks of each shift. Only nonzero blocks + // are kept + int msb_vector_block_count = num_blocks * (num_blocks - 1) / 2; + + // total number of blocks msb and lsb + int total_block_count = lsb_vector_block_count + msb_vector_block_count; + + // buffer to keep all lsb and msb shifts + // for lsb all nonzero blocks of each right shifts are kept + // for 0 shift num_blocks blocks + // for 1 shift num_blocks - 1 blocks + // for num_blocks - 1 shift 1 block + // (num_blocks + 1) * num_blocks / 2 blocks + // for msb we don't keep track for last blocks so + // for 0 shift num_blocks - 1 blocks + // for 1 shift num_blocks - 2 blocks + // for num_blocks - 1 shift 0 blocks + // (num_blocks - 1) * num_blocks / 2 blocks + // in total num_blocks^2 blocks + // in each block three is big polynomial with + // glwe_dimension * polynomial_size + 1 coefficients + auto vector_result_sb = mem_ptr->vector_result_sb; + + // buffer to keep lsb_vector + msb_vector + // addition will happen in full terms so there will be + // num_blocks terms and each term will have num_blocks block + // num_blocks^2 blocks in total + // and each blocks has big lwe ciphertext with + // glwe_dimension * polynomial_size + 1 coefficients + auto block_mul_res = mem_ptr->block_mul_res; + + // buffer to keep keyswitch result of num_blocks^2 ciphertext + // in total it has num_blocks^2 small lwe ciphertexts with + // lwe_dimension +1 coefficients + auto small_lwe_vector = mem_ptr->small_lwe_vector; + + // buffer to keep pbs result for num_blocks^2 lwe_ciphertext + // in total it has num_blocks^2 big lwe ciphertexts with + // glwe_dimension * polynomial_size + 1 coefficients + auto lwe_pbs_out_array = mem_ptr->lwe_pbs_out_array; + + // it contains two test vector, first for lsb extraction, + // second for msb extraction, with total length = + // 2 * (glwe_dimension + 1) * polynomial_size + auto test_vector_array = mem_ptr->test_vector_array; + + // accumulator to extract message + // with length (glwe_dimension + 1) * polynomial_size + auto test_vector_message = mem_ptr->test_vector_message; + + // accumulator to extract carry + // with length (glwe_dimension + 1) * polynomial_size + auto test_vector_carry = mem_ptr->test_vector_carry; + + // to be used as default indexing + auto lwe_indexes = test_vector_array->lwe_indexes; + + auto vector_result_lsb = &vector_result_sb[0]; + auto vector_result_msb = + &vector_result_sb[lsb_vector_block_count * + (polynomial_size * glwe_dimension + 1)]; + + auto vector_lsb_rhs = &block_mul_res[0]; + auto vector_msb_rhs = &block_mul_res[lsb_vector_block_count * + (polynomial_size * glwe_dimension + 1)]; + + dim3 grid(lsb_vector_block_count, 1, 1); + dim3 thds(params::degree / params::opt, 1, 1); + + all_shifted_lhs_rhs<<stream>>>( + radix_lwe_left, vector_result_lsb, vector_result_msb, radix_lwe_right, + vector_lsb_rhs, vector_msb_rhs, num_blocks); + + integer_radix_apply_bivariate_lookup_table_kb( + stream, block_mul_res, block_mul_res, vector_result_sb, bsk, ksk, + total_block_count, test_vector_array); + + vector_result_lsb = &block_mul_res[0]; + vector_result_msb = &block_mul_res[lsb_vector_block_count * + (polynomial_size * glwe_dimension + 1)]; + + fill_radix_from_lsb_msb + <<stream>>>(vector_result_sb, vector_result_lsb, + vector_result_msb, glwe_dimension, + lsb_vector_block_count, msb_vector_block_count, + num_blocks); + + auto new_blocks = block_mul_res; + auto old_blocks = vector_result_sb; + + // amount of current radixes after block_mul + size_t r = 2 * num_blocks; + + size_t total_modulus = message_modulus * carry_modulus; + size_t message_max = message_modulus - 1; + size_t chunk_size = (total_modulus - 1) / message_max; + size_t ch_amount = r / chunk_size; + + int terms_degree[r * num_blocks]; + int f_b[ch_amount]; + int l_b[ch_amount]; + + for (int i = 0; i < num_blocks * num_blocks; i++) { + size_t r_id = i / num_blocks; + size_t b_id = i % num_blocks; + terms_degree[i] = (b_id >= r_id) ? 3 : 0; + } + auto terms_degree_msb = &terms_degree[num_blocks * num_blocks]; + for (int i = 0; i < num_blocks * num_blocks; i++) { + size_t r_id = i / num_blocks; + size_t b_id = i % num_blocks; + terms_degree_msb[i] = (b_id > r_id) ? 2 : 0; + } + + auto max_shared_memory = cuda_get_max_shared_memory(stream->gpu_index); + while (r > chunk_size) { + int cur_total_blocks = r * num_blocks; + ch_amount = r / chunk_size; + dim3 add_grid(ch_amount, num_blocks, 1); + size_t sm_size = big_lwe_size * sizeof(Torus); + cuda_memset_async(new_blocks, 0, + ch_amount * num_blocks * big_lwe_size * sizeof(Torus), + stream); + + tree_add_chunks<<stream>>>( + new_blocks, old_blocks, chunk_size, num_blocks); + + for (int c_id = 0; c_id < ch_amount; c_id++) { + auto cur_chunk = &terms_degree[c_id * chunk_size * num_blocks]; + int mx = 0; + int mn = num_blocks; + for (int r_id = 1; r_id < chunk_size; r_id++) { + auto cur_radix = &cur_chunk[r_id * num_blocks]; + for (int i = 0; i < num_blocks; i++) { + if (cur_radix[i]) { + mn = min(mn, i); + mx = max(mx, i); + } + } + } + f_b[c_id] = mn; + l_b[c_id] = mx; + } + + int total_copied = 0; + int message_count = 0; + int carry_count = 0; + compress_device_array_with_map(stream, new_blocks, old_blocks, f_b, + l_b, num_blocks, ch_amount, + big_lwe_size, total_copied, true); + + message_count = total_copied; + compress_device_array_with_map(stream, new_blocks, old_blocks, f_b, + l_b, num_blocks, ch_amount, + big_lwe_size, total_copied, false); + carry_count = total_copied - message_count; + + auto message_blocks_vector = old_blocks; + auto carry_blocks_vector = + &old_blocks[message_count * (glwe_dimension * polynomial_size + 1)]; + + cuda_keyswitch_lwe_ciphertext_vector( + stream, small_lwe_vector, lwe_indexes, old_blocks, lwe_indexes, ksk, + polynomial_size * glwe_dimension, lwe_dimension, + mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, total_copied); + + execute_pbs( + stream, message_blocks_vector, lwe_indexes, test_vector_message->lut, + test_vector_message->lut_indexes, small_lwe_vector, lwe_indexes, bsk, + test_vector_message->pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, mem_ptr->params.pbs_base_log, + mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor, + message_count, 1, 0, max_shared_memory, mem_ptr->params.pbs_type); + + execute_pbs(stream, carry_blocks_vector, lwe_indexes, + test_vector_carry->lut, test_vector_carry->lut_indexes, + &small_lwe_vector[message_count * (lwe_dimension + 1)], + lwe_indexes, bsk, test_vector_carry->pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, + mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level, + mem_ptr->params.grouping_factor, carry_count, 1, 0, + max_shared_memory, mem_ptr->params.pbs_type); + + int rem_blocks = r % chunk_size * num_blocks; + int new_blocks_created = 2 * ch_amount * num_blocks; + int copy_size = rem_blocks * big_lwe_size * sizeof(Torus); + + auto cur_dst = &new_blocks[new_blocks_created * big_lwe_size]; + auto cur_src = &old_blocks[(cur_total_blocks - rem_blocks) * big_lwe_size]; + cuda_memcpy_async_gpu_to_gpu(cur_dst, cur_src, copy_size, stream); + + total_copied = 0; + int total_radix_copied = 0; + extract_message_carry_to_full_radix( + stream, old_blocks, new_blocks, f_b, l_b, ch_amount, big_lwe_size, + total_copied, total_radix_copied, num_blocks, true); + extract_message_carry_to_full_radix( + stream, old_blocks, new_blocks, f_b, l_b, ch_amount, big_lwe_size, + total_copied, total_radix_copied, num_blocks, false); + + std::swap(new_blocks, old_blocks); + r = (new_blocks_created + rem_blocks) / num_blocks; + } + + dim3 add_grid(1, num_blocks, 1); + size_t sm_size = big_lwe_size * sizeof(Torus); + cuda_memset_async(radix_lwe_out, 0, num_blocks * big_lwe_size * sizeof(Torus), + stream); + tree_add_chunks<<stream>>>( + radix_lwe_out, old_blocks, r, num_blocks); + + integer_radix_apply_univariate_lookup_table_kb( + stream, vector_result_sb, radix_lwe_out, bsk, ksk, num_blocks, + test_vector_message); + integer_radix_apply_univariate_lookup_table_kb( + stream, &block_mul_res[big_lwe_size], radix_lwe_out, bsk, ksk, num_blocks, + test_vector_carry); + + cuda_memset_async(block_mul_res, 0, big_lwe_size * sizeof(Torus), stream); + + host_addition(stream, radix_lwe_out, vector_result_sb, block_mul_res, + big_lwe_size, num_blocks); + + host_propagate_single_carry_low_latency( + stream, radix_lwe_out, mem_ptr->scp_mem, bsk, ksk, num_blocks); +} + +template +__host__ void scratch_cuda_integer_mult_radix_ciphertext_kb( + cuda_stream_t *stream, int_mul_memory **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, + bool allocate_gpu_memory) { + *mem_ptr = new int_mul_memory(stream, params, num_radix_blocks, + allocate_gpu_memory); +} + +// Function to apply lookup table, +// It has two mode +// lsb_msb_mode == true - extracts lsb and msb +// lsb_msb_mode == false - extracts message and carry +template +void apply_lookup_table(Torus *input_ciphertexts, Torus *output_ciphertexts, + int_mul_memory *mem_ptr, uint32_t glwe_dimension, + uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t pbs_base_log, uint32_t pbs_level, + uint32_t ks_base_log, uint32_t ks_level, + uint32_t grouping_factor, + uint32_t lsb_message_blocks_count, + uint32_t msb_carry_blocks_count, + uint32_t max_shared_memory, bool lsb_msb_mode) { + + int total_blocks_count = lsb_message_blocks_count + msb_carry_blocks_count; + int gpu_n = mem_ptr->p2p_gpu_count; + if (total_blocks_count < gpu_n) + gpu_n = total_blocks_count; + int gpu_blocks_count = total_blocks_count / gpu_n; + int big_lwe_size = glwe_dimension * polynomial_size + 1; + // int small_lwe_size = lwe_dimension + 1; + +#pragma omp parallel for num_threads(gpu_n) + for (int i = 0; i < gpu_n; i++) { + cudaSetDevice(i); + auto this_stream = mem_ptr->streams[i]; + // Index where input and output blocks start for current gpu + int big_lwe_start_index = i * gpu_blocks_count * big_lwe_size; + + // Last gpu might have extra blocks to process if total blocks number is not + // divisible by gpu_n + if (i == gpu_n - 1) { + gpu_blocks_count += total_blocks_count % gpu_n; + } + + int can_access_peer; + cudaDeviceCanAccessPeer(&can_access_peer, i, 0); + if (i == 0) { + check_cuda_error( + cudaMemcpyAsync(mem_ptr->pbs_output_multi_gpu[i], + &input_ciphertexts[big_lwe_start_index], + gpu_blocks_count * big_lwe_size * sizeof(Torus), + cudaMemcpyDeviceToDevice, *this_stream)); + } else if (can_access_peer) { + check_cuda_error(cudaMemcpyPeerAsync( + mem_ptr->pbs_output_multi_gpu[i], i, + &input_ciphertexts[big_lwe_start_index], 0, + gpu_blocks_count * big_lwe_size * sizeof(Torus), *this_stream)); + } else { + // Uses host memory as middle ground + cuda_memcpy_async_to_cpu(mem_ptr->device_to_device_buffer[i], + &input_ciphertexts[big_lwe_start_index], + gpu_blocks_count * big_lwe_size * sizeof(Torus), + this_stream, i); + cuda_memcpy_async_to_gpu( + mem_ptr->pbs_output_multi_gpu[i], mem_ptr->device_to_device_buffer[i], + gpu_blocks_count * big_lwe_size * sizeof(Torus), this_stream, i); + } + + // when lsb and msb have to be extracted + // for first lsb_count blocks we need lsb_acc + // for last msb_count blocks we need msb_acc + // when message and carry have tobe extracted + // for first message_count blocks we need message_acc + // for last carry_count blocks we need carry_acc + Torus *cur_tvi; + if (lsb_msb_mode) { + cur_tvi = (big_lwe_start_index < lsb_message_blocks_count) + ? mem_ptr->tvi_lsb_multi_gpu[i] + : mem_ptr->tvi_msb_multi_gpu[i]; + + } else { + cur_tvi = (big_lwe_start_index < lsb_message_blocks_count) + ? mem_ptr->tvi_message_multi_gpu[i] + : mem_ptr->tvi_carry_multi_gpu[i]; + } + + // execute keyswitch on a current gpu with corresponding input and output + // blocks pbs_output_multi_gpu[i] is an input for keyswitch and + // pbs_input_multi_gpu[i] is an output for keyswitch + cuda_keyswitch_lwe_ciphertext_vector( + this_stream, i, mem_ptr->pbs_input_multi_gpu[i], + mem_ptr->pbs_output_multi_gpu[i], mem_ptr->ksk_multi_gpu[i], + polynomial_size * glwe_dimension, lwe_dimension, ks_base_log, ks_level, + gpu_blocks_count); + + // execute pbs on a current gpu with corresponding input and output + cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + this_stream, i, mem_ptr->pbs_output_multi_gpu[i], + mem_ptr->test_vector_multi_gpu[i], cur_tvi, + mem_ptr->pbs_input_multi_gpu[i], mem_ptr->bsk_multi_gpu[i], + mem_ptr->pbs_buffer_multi_gpu[i], lwe_dimension, glwe_dimension, + polynomial_size, grouping_factor, pbs_base_log, pbs_level, + grouping_factor, gpu_blocks_count, 2, 0, max_shared_memory); + + // lookup table is applied and now data from current gpu have to be copied + // back to gpu_0 in 'output_ciphertexts' buffer + if (i == 0) { + check_cuda_error( + cudaMemcpyAsync(&output_ciphertexts[big_lwe_start_index], + mem_ptr->pbs_output_multi_gpu[i], + gpu_blocks_count * big_lwe_size * sizeof(Torus), + cudaMemcpyDeviceToDevice, *this_stream)); + } else if (can_access_peer) { + check_cuda_error(cudaMemcpyPeerAsync( + &output_ciphertexts[big_lwe_start_index], 0, + mem_ptr->pbs_output_multi_gpu[i], i, + gpu_blocks_count * big_lwe_size * sizeof(Torus), *this_stream)); + } else { + // Uses host memory as middle ground + cuda_memcpy_async_to_cpu( + mem_ptr->device_to_device_buffer[i], mem_ptr->pbs_output_multi_gpu[i], + gpu_blocks_count * big_lwe_size * sizeof(Torus), this_stream, i); + cuda_memcpy_async_to_gpu(&output_ciphertexts[big_lwe_start_index], + mem_ptr->device_to_device_buffer[i], + gpu_blocks_count * big_lwe_size * sizeof(Torus), + this_stream, i); + } + } +} + +template +__global__ void device_small_scalar_radix_multiplication(T *output_lwe_array, + T *input_lwe_array, + T scalar, + uint32_t lwe_dimension, + uint32_t num_blocks) { + + int index = blockIdx.x * blockDim.x + threadIdx.x; + int lwe_size = lwe_dimension + 1; + if (index < num_blocks * lwe_size) { + // Here we take advantage of the wrapping behaviour of uint + output_lwe_array[index] = input_lwe_array[index] * scalar; + } +} + +template +__host__ void host_integer_small_scalar_mult_radix( + cuda_stream_t *stream, T *output_lwe_array, T *input_lwe_array, T scalar, + uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = input_lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count * lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + device_small_scalar_radix_multiplication<<stream>>>( + output_lwe_array, input_lwe_array, scalar, input_lwe_dimension, + input_lwe_ciphertext_count); + check_cuda_error(cudaGetLastError()); +} +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/negation.cu b/backends/tfhe-cuda-backend/implementation/src/integer/negation.cu new file mode 100644 index 000000000..27142de0c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/negation.cu @@ -0,0 +1,12 @@ +#include "integer/negation.cuh" + +void cuda_negate_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint32_t lwe_dimension, + uint32_t lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus) { + + host_integer_radix_negation(stream, static_cast(lwe_array), + static_cast(lwe_array), lwe_dimension, + lwe_ciphertext_count, message_modulus, + carry_modulus); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/negation.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/negation.cuh new file mode 100644 index 000000000..5a5ff3b61 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/negation.cuh @@ -0,0 +1,79 @@ +#ifndef CUDA_INTEGER_NEGATE_CUH +#define CUDA_INTEGER_NEGATE_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "device.h" +#include "integer.h" +#include "utils/kernel_dimensions.cuh" + +template +__global__ void +device_integer_radix_negation(Torus *output, Torus *input, int32_t num_blocks, + uint64_t lwe_dimension, uint64_t message_modulus, + uint64_t carry_modulus, uint64_t delta) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < lwe_dimension + 1) { + bool is_body = (tid == lwe_dimension); + + // z = ceil( degree / 2^p ) * 2^p + uint64_t z = (2 * message_modulus - 1) / message_modulus; + __syncthreads(); + z *= message_modulus; + + // (0,Delta*z) - ct + output[tid] = (is_body ? z * delta - input[tid] : -input[tid]); + + for (int radix_block_id = 1; radix_block_id < num_blocks; + radix_block_id++) { + tid += (lwe_dimension + 1); + + // Subtract z/B to the next ciphertext to compensate for the addition of z + uint64_t zb = z / message_modulus; + + uint64_t encoded_zb = zb * delta; + + __syncthreads(); + + // (0,Delta*z) - ct + output[tid] = + (is_body ? z * delta - (input[tid] + encoded_zb) : -input[tid]); + __syncthreads(); + } + } +} + +template +__host__ void host_integer_radix_negation(cuda_stream_t *stream, Torus *output, + Torus *input, uint32_t lwe_dimension, + uint32_t input_lwe_ciphertext_count, + uint64_t message_modulus, + uint64_t carry_modulus) { + cudaSetDevice(stream->gpu_index); + + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + uint64_t shared_mem = input_lwe_ciphertext_count * sizeof(uint32_t); + + // Value of the shift we multiply our messages by + // If message_modulus and carry_modulus are always powers of 2 we can simplify + // this + uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus); + + device_integer_radix_negation<<stream>>>( + output, input, input_lwe_ciphertext_count, lwe_dimension, message_modulus, + carry_modulus, delta); + check_cuda_error(cudaGetLastError()); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cu b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cu new file mode 100644 index 000000000..281a0cb80 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cu @@ -0,0 +1,12 @@ +#include "integer/scalar_addition.cuh" + +void cuda_scalar_addition_integer_radix_ciphertext_64_inplace( + cuda_stream_t *stream, void *lwe_array, void *scalar_input, + uint32_t lwe_dimension, uint32_t lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus) { + + host_integer_radix_scalar_addition_inplace( + stream, static_cast(lwe_array), + static_cast(scalar_input), lwe_dimension, + lwe_ciphertext_count, message_modulus, carry_modulus); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cuh new file mode 100644 index 000000000..2a3675995 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_addition.cuh @@ -0,0 +1,130 @@ +#ifndef CUDA_INTEGER_ADD_CUH +#define CUDA_INTEGER_ADD_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "device.h" +#include "integer.h" +#include "utils/kernel_dimensions.cuh" +#include + +template +__global__ void device_integer_radix_scalar_addition_inplace( + Torus *lwe_array, Torus *scalar_input, int32_t num_blocks, + uint32_t lwe_dimension, uint64_t delta) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < num_blocks) { + Torus scalar = scalar_input[tid]; + Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension; + + *body += scalar * delta; + } +} + +template +__host__ void host_integer_radix_scalar_addition_inplace( + cuda_stream_t *stream, Torus *lwe_array, Torus *scalar_input, + uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus) { + cudaSetDevice(stream->gpu_index); + + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + // Value of the shift we multiply our messages by + // If message_modulus and carry_modulus are always powers of 2 we can simplify + // this + uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus); + + device_integer_radix_scalar_addition_inplace<<stream>>>( + lwe_array, scalar_input, input_lwe_ciphertext_count, lwe_dimension, + delta); + check_cuda_error(cudaGetLastError()); +} + +template +__global__ void device_integer_radix_add_scalar_one_inplace( + Torus *lwe_array, int32_t num_blocks, uint32_t lwe_dimension, + uint64_t delta) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < num_blocks) { + Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension; + *body += delta; + } +} + +template +__host__ void host_integer_radix_add_scalar_one_inplace( + cuda_stream_t *stream, Torus *lwe_array, uint32_t lwe_dimension, + uint32_t input_lwe_ciphertext_count, uint32_t message_modulus, + uint32_t carry_modulus) { + cudaSetDevice(stream->gpu_index); + + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + // Value of the shift we multiply our messages by + // If message_modulus and carry_modulus are always powers of 2 we can simplify + // this + uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus); + + device_integer_radix_add_scalar_one_inplace<<stream>>>( + lwe_array, input_lwe_ciphertext_count, lwe_dimension, delta); + check_cuda_error(cudaGetLastError()); +} + +template +__global__ void device_integer_radix_scalar_subtraction_inplace( + Torus *lwe_array, Torus *scalar_input, int32_t num_blocks, + uint32_t lwe_dimension, uint64_t delta) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < num_blocks) { + Torus scalar = scalar_input[tid]; + Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension; + + *body -= scalar * delta; + } +} + +template +__host__ void host_integer_radix_scalar_subtraction_inplace( + cuda_stream_t *stream, Torus *lwe_array, Torus *scalar_input, + uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count, + uint32_t message_modulus, uint32_t carry_modulus) { + cudaSetDevice(stream->gpu_index); + + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + // Value of the shift we multiply our messages by + // If message_modulus and carry_modulus are always powers of 2 we can simplify + // this + uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus); + + device_integer_radix_scalar_subtraction_inplace<<stream>>>( + lwe_array, scalar_input, input_lwe_ciphertext_count, lwe_dimension, + delta); + check_cuda_error(cudaGetLastError()); +} +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cu b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cu new file mode 100644 index 000000000..0a9c42d60 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cu @@ -0,0 +1,14 @@ +#include "integer/scalar_bitops.cuh" + +void cuda_scalar_bitop_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_input, + void *clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr, void *bsk, + void *ksk, uint32_t lwe_ciphertext_count, BITOP_TYPE op) { + + host_integer_radix_scalar_bitop_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_input), + static_cast(clear_blocks), num_clear_blocks, + (int_bitop_buffer *)mem_ptr, bsk, static_cast(ksk), + lwe_ciphertext_count, op); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cuh new file mode 100644 index 000000000..96519d29d --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_bitops.cuh @@ -0,0 +1,51 @@ +#ifndef CUDA_INTEGER_SCALAR_BITWISE_OPS_CUH +#define CUDA_INTEGER_SCALAR_BITWISE_OPS_CUH + +#include "integer/bitwise_ops.cuh" +#include + +template +__host__ void host_integer_radix_scalar_bitop_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_input, + Torus *clear_blocks, uint32_t num_clear_blocks, + int_bitop_buffer *mem_ptr, void *bsk, Torus *ksk, + uint32_t num_radix_blocks, BITOP_TYPE op) { + + auto lut = mem_ptr->lut; + auto params = lut->params; + auto big_lwe_dimension = params.big_lwe_dimension; + + uint32_t lwe_size = big_lwe_dimension + 1; + + if (num_clear_blocks == 0) { + if (op == SCALAR_BITAND) { + auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size; + cuda_memset_async(lwe_array_out, 0, + num_radix_blocks * lwe_size * sizeof(Torus), stream); + } else { + cuda_memcpy_async_gpu_to_gpu(lwe_array_out, lwe_array_input, + num_radix_blocks * lwe_size * sizeof(Torus), + stream); + } + } else { + auto lut_buffer = lut->lut; + // We have all possible LUTs pre-computed and we use the decomposed scalar + // as index to recover the right one + cuda_memcpy_async_gpu_to_gpu(lut->lut_indexes, clear_blocks, + num_clear_blocks * sizeof(Torus), stream); + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, lwe_array_input, bsk, ksk, num_clear_blocks, + lut); + + if (op == SCALAR_BITAND) { + auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size; + cuda_memset_async(lwe_array_out_block, 0, + (num_radix_blocks - num_clear_blocks) * lwe_size * + sizeof(Torus), + stream); + } + } +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cu b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cu new file mode 100644 index 000000000..e6ac9982a --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cu @@ -0,0 +1,44 @@ +#include "integer/scalar_comparison.cuh" + +void cuda_scalar_comparison_integer_radix_ciphertext_kb_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *scalar_blocks, int8_t *mem_ptr, void *bsk, void *ksk, + uint32_t lwe_ciphertext_count, uint32_t num_scalar_blocks) { + + int_comparison_buffer *buffer = + (int_comparison_buffer *)mem_ptr; + switch (buffer->op) { + // case EQ: + // case NE: + // host_integer_radix_equality_check_kb( + // stream, static_cast(lwe_array_out), + // static_cast(lwe_array_1), + // static_cast(lwe_array_2), buffer, bsk, + // static_cast(ksk), glwe_dimension, polynomial_size, + // big_lwe_dimension, small_lwe_dimension, ks_level, ks_base_log, + // pbs_level, pbs_base_log, grouping_factor, lwe_ciphertext_count, + // message_modulus, carry_modulus); + // break; + case GT: + case GE: + case LT: + case LE: + host_integer_radix_scalar_difference_check_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(scalar_blocks), buffer, + buffer->diff_buffer->operator_f, bsk, static_cast(ksk), + lwe_ciphertext_count, num_scalar_blocks); + break; + case MAX: + case MIN: + host_integer_radix_scalar_maxmin_kb( + stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(scalar_blocks), buffer, bsk, + static_cast(ksk), lwe_ciphertext_count, num_scalar_blocks); + break; + default: + printf("Not implemented\n"); + } +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cuh new file mode 100644 index 000000000..3cf22f0ce --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_comparison.cuh @@ -0,0 +1,298 @@ +#ifndef CUDA_INTEGER_SCALAR_COMPARISON_OPS_CUH +#define CUDA_INTEGER_SCALAR_COMPARISON_OPS_CUH + +#include "integer/comparison.cuh" +#include + +template +__host__ void host_integer_radix_scalar_difference_check_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in, + Torus *scalar_blocks, int_comparison_buffer *mem_ptr, + std::function sign_handler_f, void *bsk, Torus *ksk, + uint32_t total_num_radix_blocks, uint32_t total_num_scalar_blocks) { + + auto params = mem_ptr->params; + auto big_lwe_dimension = params.big_lwe_dimension; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + auto diff_buffer = mem_ptr->diff_buffer; + + size_t big_lwe_size = big_lwe_dimension + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + // Reducing the signs is the bottleneck of the comparison algorithms, + // however if the scalar case there is an improvement: + // + // The idea is to reduce the number of signs block we have to + // reduce. We can do that by splitting the comparison problem in two parts. + // + // - One part where we compute the signs block between the scalar with just + // enough blocks + // from the ciphertext that can represent the scalar value + // + // - The other part is to compare the ciphertext blocks not considered for the + // sign + // computation with zero, and create a single sign block from that. + // + // The smaller the scalar value is compared to the ciphertext num bits + // encrypted, the more the comparisons with zeros we have to do, and the less + // signs block we will have to reduce. + // + // This will create a speedup as comparing a bunch of blocks with 0 + // is faster + if (total_num_scalar_blocks == 0) { + // We only have to compare blocks with zero + // means scalar is zero + host_compare_with_zero_equality(stream, mem_ptr->tmp_lwe_array_out, + lwe_array_in, mem_ptr, bsk, ksk, + total_num_radix_blocks); + + auto scalar_last_leaf_lut_f = [sign_handler_f](Torus x) -> Torus { + x = (x == 1 ? IS_EQUAL : IS_SUPERIOR); + + return sign_handler_f(x); + }; + + auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut; + generate_device_accumulator(stream, lut->lut, glwe_dimension, + polynomial_size, message_modulus, + carry_modulus, scalar_last_leaf_lut_f); + + integer_radix_apply_univariate_lookup_table_kb( + stream, lwe_array_out, mem_ptr->tmp_lwe_array_out, bsk, ksk, 1, lut); + + // The result will be in the two first block. Everything else is + // garbage. + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + big_lwe_size_bytes * (total_num_radix_blocks - 1), + stream); + + } else if (total_num_scalar_blocks < total_num_radix_blocks) { + // We have to handle both part of the work described above + + uint32_t num_lsb_radix_blocks = total_num_scalar_blocks; + uint32_t num_msb_radix_blocks = + total_num_radix_blocks - num_lsb_radix_blocks; + + auto lsb = lwe_array_in; + auto msb = lwe_array_in + num_lsb_radix_blocks * big_lwe_size; + + auto lwe_array_lsb_out = mem_ptr->tmp_lwe_array_out; + auto lwe_array_msb_out = lwe_array_lsb_out + big_lwe_size; + + cuda_synchronize_stream(stream); + auto lsb_stream = diff_buffer->lsb_stream; + auto msb_stream = diff_buffer->msb_stream; + +#pragma omp parallel sections + { + // Both sections may be executed in parallel +#pragma omp section + { + ////////////// + // lsb + Torus *lhs = diff_buffer->tmp_packed_left; + Torus *rhs = diff_buffer->tmp_packed_right; + + pack_blocks(lsb_stream, lhs, lwe_array_in, big_lwe_dimension, + num_lsb_radix_blocks, message_modulus); + pack_blocks(lsb_stream, rhs, scalar_blocks, 0, total_num_scalar_blocks, + message_modulus); + + // From this point we have half number of blocks + num_lsb_radix_blocks /= 2; + num_lsb_radix_blocks += (total_num_scalar_blocks % 2); + + // comparisons will be assigned + // - 0 if lhs < rhs + // - 1 if lhs == rhs + // - 2 if lhs > rhs + + auto comparisons = mem_ptr->tmp_block_comparisons; + scalar_compare_radix_blocks_kb(lsb_stream, comparisons, lhs, rhs, + mem_ptr, bsk, ksk, num_lsb_radix_blocks); + + // Reduces a vec containing radix blocks that encrypts a sign + // (inferior, equal, superior) to one single radix block containing the + // final sign + tree_sign_reduction(lsb_stream, lwe_array_lsb_out, comparisons, + mem_ptr->diff_buffer->tree_buffer, + mem_ptr->cleaning_lut_f, bsk, ksk, + num_lsb_radix_blocks); + } +#pragma omp section + { + ////////////// + // msb + host_compare_with_zero_equality(msb_stream, lwe_array_msb_out, msb, + mem_ptr, bsk, ksk, + num_msb_radix_blocks); + } + } + cuda_synchronize_stream(lsb_stream); + cuda_synchronize_stream(msb_stream); + + ////////////// + // Reduce the two blocks into one final + + auto scalar_bivariate_last_leaf_lut_f = + [sign_handler_f](Torus lsb, Torus msb) -> Torus { + if (msb == 1) + return sign_handler_f(lsb); + else + return sign_handler_f(IS_SUPERIOR); + }; + + auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut; + generate_device_accumulator_bivariate( + stream, lut->lut, glwe_dimension, polynomial_size, message_modulus, + carry_modulus, scalar_bivariate_last_leaf_lut_f); + + integer_radix_apply_bivariate_lookup_table_kb( + stream, lwe_array_out, lwe_array_lsb_out, lwe_array_msb_out, bsk, ksk, + 1, lut); + + // The result will be in the first block. Everything else is garbage. + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + (total_num_radix_blocks - 1) * big_lwe_size_bytes, + stream); + } else { + // We only have to do the regular comparison + // And not the part where we compare most significant blocks with zeros + // total_num_radix_blocks == total_num_scalar_blocks + uint32_t num_lsb_radix_blocks = total_num_radix_blocks; + uint32_t num_scalar_blocks = total_num_scalar_blocks; + + auto lsb = lwe_array_in; + + Torus *lhs = diff_buffer->tmp_packed_left; + Torus *rhs = diff_buffer->tmp_packed_right; + + pack_blocks(stream, lhs, lwe_array_in, big_lwe_dimension, + num_lsb_radix_blocks, message_modulus); + pack_blocks(stream, rhs, scalar_blocks, 0, num_scalar_blocks, + message_modulus); + + // From this point we have half number of blocks + num_lsb_radix_blocks /= 2; + num_scalar_blocks /= 2; + + // comparisons will be assigned + // - 0 if lhs < rhs + // - 1 if lhs == rhs + // - 2 if lhs > rhs + auto comparisons = mem_ptr->tmp_lwe_array_out; + scalar_compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, + ksk, num_lsb_radix_blocks); + + // Reduces a vec containing radix blocks that encrypts a sign + // (inferior, equal, superior) to one single radix block containing the + // final sign + tree_sign_reduction(stream, lwe_array_out, comparisons, + mem_ptr->diff_buffer->tree_buffer, sign_handler_f, bsk, + ksk, num_lsb_radix_blocks); + + // The result will be in the first block. Everything else is garbage. + cuda_memset_async(lwe_array_out + big_lwe_size, 0, + (total_num_radix_blocks - 1) * big_lwe_size_bytes, + stream); + } +} + +template +__host__ void +scalar_compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out, + Torus *lwe_array_in, Torus *scalar_blocks, + int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t num_radix_blocks) { + + auto params = mem_ptr->params; + auto pbs_type = params.pbs_type; + auto big_lwe_dimension = params.big_lwe_dimension; + auto small_lwe_dimension = params.small_lwe_dimension; + auto ks_level = params.ks_level; + auto ks_base_log = params.ks_base_log; + auto pbs_level = params.pbs_level; + auto pbs_base_log = params.pbs_base_log; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto grouping_factor = params.grouping_factor; + auto message_modulus = params.message_modulus; + auto carry_modulus = params.carry_modulus; + + // When rhs > lhs, the subtraction will overflow, and the bit of padding will + // be set to 1 + // meaning that the output of the pbs will be the negative (modulo message + // space) + // + // Example: + // lhs: 1, rhs: 3, message modulus: 4, carry modulus 4 + // lhs - rhs = -2 % (4 * 4) = 14 = 1|1110 (padding_bit|b4b3b2b1) + // Since there was an overflow the bit of padding is 1 and not 0. + // When applying the LUT for an input value of 14 we would expect 1, + // but since the bit of padding is 1, we will get -1 modulus our message + // space, so (-1) % (4 * 4) = 15 = 1|1111 We then add one and get 0 = 0|0000 + + auto subtracted_blocks = mem_ptr->tmp_block_comparisons; + cuda_memcpy_async_gpu_to_gpu( + subtracted_blocks, lwe_array_in, + num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream); + // Subtract + // Here we need the true lwe sub, not the one that comes from shortint. + host_integer_radix_scalar_subtraction_inplace( + stream, subtracted_blocks, scalar_blocks, big_lwe_dimension, + num_radix_blocks, message_modulus, carry_modulus); + + // Apply LUT to compare to 0 + auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut; + integer_radix_apply_univariate_lookup_table_kb(stream, lwe_array_out, + subtracted_blocks, bsk, ksk, + num_radix_blocks, sign_lut); + + // Add one + // Here Lhs can have the following values: (-1) % (message modulus * carry + // modulus), 0, 1 So the output values after the addition will be: 0, 1, 2 + host_integer_radix_add_scalar_one_inplace(stream, lwe_array_out, + big_lwe_dimension, num_radix_blocks, + message_modulus, carry_modulus); +} + +template +__host__ void host_integer_radix_scalar_maxmin_kb( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in, + Torus *scalar_blocks, int_comparison_buffer *mem_ptr, void *bsk, + Torus *ksk, uint32_t total_num_radix_blocks, + uint32_t total_num_scalar_blocks) { + + auto params = mem_ptr->params; + + // Calculates the difference sign between the ciphertext and the scalar + // - 0 if lhs < rhs + // - 1 if lhs == rhs + // - 2 if lhs > rhs + auto sign = mem_ptr->tmp_lwe_array_out; + host_integer_radix_scalar_difference_check_kb( + stream, sign, lwe_array_in, scalar_blocks, mem_ptr, + mem_ptr->cleaning_lut_f, bsk, ksk, total_num_radix_blocks, + total_num_scalar_blocks); + + // There is no optimized CMUX for scalars, so we convert to a trivial + // ciphertext + auto lwe_array_left = lwe_array_in; + auto lwe_array_right = mem_ptr->tmp_block_comparisons; + + create_trivial_radix(stream, lwe_array_right, scalar_blocks, + params.big_lwe_dimension, total_num_radix_blocks, + total_num_scalar_blocks, params.message_modulus, + params.carry_modulus); + + // Selector + // CMUX for Max or Min + host_integer_radix_cmux_kb( + stream, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left, + lwe_array_right, mem_ptr->cmux_buffer, bsk, ksk, total_num_radix_blocks); +} +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cu b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cu new file mode 100644 index 000000000..4ce397f5c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cu @@ -0,0 +1,40 @@ +#include "scalar_rotate.cuh" + +void scratch_cuda_integer_radix_scalar_rotate_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, SHIFT_TYPE shift_type, bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + scratch_cuda_integer_radix_scalar_rotate_kb( + stream, (int_shift_buffer **)mem_ptr, num_blocks, params, + shift_type, allocate_gpu_memory); +} + +void cuda_integer_radix_scalar_rotate_kb_64_inplace(cuda_stream_t *stream, + void *lwe_array, uint32_t n, + int8_t *mem_ptr, void *bsk, + void *ksk, + uint32_t num_blocks) { + + host_integer_radix_scalar_rotate_kb_inplace( + stream, static_cast(lwe_array), n, + (int_shift_buffer *)mem_ptr, bsk, static_cast(ksk), + num_blocks); +} + +void cleanup_cuda_integer_radix_scalar_rotate(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + + int_shift_buffer *mem_ptr = + (int_shift_buffer *)(*mem_ptr_void); + + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cuh new file mode 100644 index 000000000..d45ae1225 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_rotate.cuh @@ -0,0 +1,114 @@ +#ifndef CUDA_INTEGER_SCALAR_ROTATE_OPS_CUH +#define CUDA_INTEGER_SCALAR_ROTATE_OPS_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.cuh" +#include "integer.h" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "types/complex/operations.cuh" +#include "utils/helper.cuh" +#include "utils/kernel_dimensions.cuh" + +#ifndef CUDA_INTEGER_SHIFT_OPS_CUH +#define CUDA_INTEGER_SHIFT_OPS_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.cuh" +#include "integer.h" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "types/complex/operations.cuh" +#include "utils/helper.cuh" +#include "utils/kernel_dimensions.cuh" + +template +__host__ void scratch_cuda_integer_radix_scalar_rotate_kb( + cuda_stream_t *stream, int_shift_buffer **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, SHIFT_TYPE shift_type, + bool allocate_gpu_memory) { + + *mem_ptr = new int_shift_buffer(stream, shift_type, params, + num_radix_blocks, allocate_gpu_memory); +} + +template +__host__ void host_integer_radix_scalar_rotate_kb_inplace( + cuda_stream_t *stream, Torus *lwe_array, uint32_t n, + int_shift_buffer *mem, void *bsk, Torus *ksk, uint32_t num_blocks) { + + auto params = mem->params; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + + size_t big_lwe_size = glwe_dimension * polynomial_size + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + size_t num_bits_in_message = (size_t)log2(message_modulus); + size_t total_num_bits = num_bits_in_message * num_blocks; + n = n % total_num_bits; + + if (n == 0) { + return; + } + size_t rotations = n / num_bits_in_message; + size_t shift_within_block = n % num_bits_in_message; + + Torus *rotated_buffer = mem->tmp_rotated; + + auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1]; + + // rotate right all the blocks in radix ciphertext + // copy result in new buffer + // 256 threads are used in every block + // block_count blocks will be used in the grid + // one block is responsible to process single lwe ciphertext + if (mem->shift_type == LEFT_SHIFT) { + radix_blocks_rotate_right<<stream>>>( + rotated_buffer, lwe_array, rotations, num_blocks, big_lwe_size); + + cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer, + num_blocks * big_lwe_size_bytes, stream); + + if (shift_within_block == 0) { + return; + } + + auto receiver_blocks = lwe_array; + auto giver_blocks = rotated_buffer; + radix_blocks_rotate_right<<stream>>>( + giver_blocks, lwe_array, 1, num_blocks, big_lwe_size); + + integer_radix_apply_bivariate_lookup_table_kb( + stream, lwe_array, receiver_blocks, giver_blocks, bsk, ksk, num_blocks, + lut_bivariate); + + } else { + // left shift + radix_blocks_rotate_left<<stream>>>( + rotated_buffer, lwe_array, rotations, num_blocks, big_lwe_size); + + cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer, + num_blocks * big_lwe_size_bytes, stream); + + if (shift_within_block == 0) { + return; + } + + auto receiver_blocks = lwe_array; + auto giver_blocks = rotated_buffer; + radix_blocks_rotate_left<<stream>>>( + giver_blocks, lwe_array, 1, num_blocks, big_lwe_size); + + integer_radix_apply_bivariate_lookup_table_kb( + stream, lwe_array, receiver_blocks, giver_blocks, bsk, ksk, num_blocks, + lut_bivariate); + } +} + +#endif // CUDA_SCALAR_OPS_CUH + +#endif // CUDA_INTEGER_SCALAR_ROTATE_OPS_CUH diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cu b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cu new file mode 100644 index 000000000..c63770729 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cu @@ -0,0 +1,38 @@ +#include "scalar_shifts.cuh" + +void scratch_cuda_integer_radix_scalar_shift_kb_64( + cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t big_lwe_dimension, + uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log, + uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor, + uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus, + PBS_TYPE pbs_type, SHIFT_TYPE shift_type, bool allocate_gpu_memory) { + + int_radix_params params(pbs_type, glwe_dimension, polynomial_size, + big_lwe_dimension, small_lwe_dimension, ks_level, + ks_base_log, pbs_level, pbs_base_log, grouping_factor, + message_modulus, carry_modulus); + + scratch_cuda_integer_radix_scalar_shift_kb( + stream, (int_shift_buffer **)mem_ptr, num_blocks, params, + shift_type, allocate_gpu_memory); +} + +void cuda_integer_radix_scalar_shift_kb_64_inplace( + cuda_stream_t *stream, void *lwe_array, uint32_t shift, int8_t *mem_ptr, + void *bsk, void *ksk, uint32_t num_blocks) { + + host_integer_radix_scalar_shift_kb_inplace( + stream, static_cast(lwe_array), shift, + (int_shift_buffer *)mem_ptr, bsk, static_cast(ksk), + num_blocks); +} + +void cleanup_cuda_integer_radix_scalar_shift(cuda_stream_t *stream, + int8_t **mem_ptr_void) { + + int_shift_buffer *mem_ptr = + (int_shift_buffer *)(*mem_ptr_void); + + mem_ptr->release(stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cuh new file mode 100644 index 000000000..098aa901a --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/integer/scalar_shifts.cuh @@ -0,0 +1,125 @@ +#ifndef CUDA_INTEGER_SHIFT_OPS_CUH +#define CUDA_INTEGER_SHIFT_OPS_CUH + +#include "crypto/keyswitch.cuh" +#include "device.h" +#include "integer.cuh" +#include "integer.h" +#include "pbs/bootstrap_low_latency.cuh" +#include "pbs/bootstrap_multibit.cuh" +#include "types/complex/operations.cuh" +#include "utils/helper.cuh" +#include "utils/kernel_dimensions.cuh" + +template +__host__ void scratch_cuda_integer_radix_scalar_shift_kb( + cuda_stream_t *stream, int_shift_buffer **mem_ptr, + uint32_t num_radix_blocks, int_radix_params params, SHIFT_TYPE shift_type, + bool allocate_gpu_memory) { + + *mem_ptr = new int_shift_buffer(stream, shift_type, params, + num_radix_blocks, allocate_gpu_memory); +} + +template +__host__ void host_integer_radix_scalar_shift_kb_inplace( + cuda_stream_t *stream, Torus *lwe_array, uint32_t shift, + int_shift_buffer *mem, void *bsk, Torus *ksk, uint32_t num_blocks) { + + auto params = mem->params; + auto glwe_dimension = params.glwe_dimension; + auto polynomial_size = params.polynomial_size; + auto message_modulus = params.message_modulus; + + size_t big_lwe_size = glwe_dimension * polynomial_size + 1; + size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus); + + size_t num_bits_in_block = (size_t)log2(message_modulus); + size_t total_num_bits = num_bits_in_block * num_blocks; + shift = shift % total_num_bits; + + if (shift == 0) { + return; + } + size_t rotations = std::min(shift / num_bits_in_block, (size_t)num_blocks); + size_t shift_within_block = shift % num_bits_in_block; + + Torus *rotated_buffer = mem->tmp_rotated; + + auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1]; + auto lut_univariate = mem->lut_buffers_univariate[shift_within_block]; + + // rotate right all the blocks in radix ciphertext + // copy result in new buffer + // 256 threads are used in every block + // block_count blocks will be used in the grid + // one block is responsible to process single lwe ciphertext + if (mem->shift_type == LEFT_SHIFT) { + radix_blocks_rotate_right<<stream>>>( + rotated_buffer, lwe_array, rotations, num_blocks, big_lwe_size); + + // create trivial assign for value = 0 + cuda_memset_async(rotated_buffer, 0, rotations * big_lwe_size_bytes, + stream); + cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer, + num_blocks * big_lwe_size_bytes, stream); + + if (shift_within_block == 0 || rotations == num_blocks) { + return; + } + + // check if we have enough blocks for partial processing + if (rotations < num_blocks - 1) { + auto partial_current_blocks = &lwe_array[(rotations + 1) * big_lwe_size]; + auto partial_previous_blocks = &lwe_array[rotations * big_lwe_size]; + + size_t partial_block_count = num_blocks - rotations - 1; + + integer_radix_apply_bivariate_lookup_table_kb( + stream, partial_current_blocks, partial_current_blocks, + partial_previous_blocks, bsk, ksk, partial_block_count, + lut_bivariate); + } + + auto rest = &lwe_array[rotations * big_lwe_size]; + + integer_radix_apply_univariate_lookup_table_kb( + stream, rest, rest, bsk, ksk, 1, lut_univariate); + + } else { + // right shift + radix_blocks_rotate_left<<stream>>>( + rotated_buffer, lwe_array, rotations, num_blocks, big_lwe_size); + + // rotate left as the blocks are from LSB to MSB + // create trivial assign for value = 0 + cuda_memset_async(rotated_buffer + (num_blocks - rotations) * big_lwe_size, + 0, rotations * big_lwe_size_bytes, stream); + cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer, + num_blocks * big_lwe_size_bytes, stream); + + if (shift_within_block == 0 || rotations == num_blocks) { + return; + } + + // check if we have enough blocks for partial processing + if (rotations < num_blocks - 1) { + auto partial_current_blocks = lwe_array; + auto partial_next_blocks = &lwe_array[big_lwe_size]; + + size_t partial_block_count = num_blocks - rotations - 1; + + integer_radix_apply_bivariate_lookup_table_kb( + stream, partial_current_blocks, partial_current_blocks, + partial_next_blocks, bsk, ksk, partial_block_count, lut_bivariate); + } + + // The right-most block is done separately as it does not + // need to recuperate the shifted bits from its right neighbour. + auto last_block = &lwe_array[(num_blocks - rotations - 1) * big_lwe_size]; + integer_radix_apply_univariate_lookup_table_kb( + stream, last_block, last_block, bsk, ksk, 1, lut_univariate); + } +} + +#endif // CUDA_SCALAR_OPS_CUH diff --git a/backends/tfhe-cuda-backend/implementation/src/integer/shifts.cuh b/backends/tfhe-cuda-backend/implementation/src/integer/shifts.cuh new file mode 100644 index 000000000..e69de29bb diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cu b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cu new file mode 100644 index 000000000..1e5ac750c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cu @@ -0,0 +1,109 @@ +#include "linearalgebra/addition.cuh" + +/* + * Perform the addition of two u32 input LWE ciphertext vectors. + * See the equivalent operation on u64 ciphertexts for more details. + */ +void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in_1, + void *lwe_array_in_2, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_addition(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in_1), + static_cast(lwe_array_in_2), input_lwe_dimension, + input_lwe_ciphertext_count); +} + +/* + * Perform the addition of two u64 input LWE ciphertext vectors. + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - `lwe_array_out` is an array of size + * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have + * been allocated on the GPU before calling this function, and that will hold + * the result of the computation. + * - `lwe_array_in_1` is the first LWE ciphertext vector used as input, it + * should have been allocated and initialized before calling this function. It + * has the same size as the output array. + * - `lwe_array_in_2` is the second LWE ciphertext vector used as input, it + * should have been allocated and initialized before calling this function. It + * has the same size as the output array. + * - `input_lwe_dimension` is the number of mask elements in the two input and + * in the output ciphertext vectors + * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in each + * input LWE ciphertext vector, as well as in the output. + * + * Each element (mask element or body) of the input LWE ciphertext vector 1 is + * added to the corresponding element in the input LWE ciphertext 2. The result + * is stored in the output LWE ciphertext vector. The two input LWE ciphertext + * vectors are left unchanged. This function is a wrapper to a device function + * that performs the operation on the GPU. + */ +void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in_1, + void *lwe_array_in_2, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_addition(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in_1), + static_cast(lwe_array_in_2), input_lwe_dimension, + input_lwe_ciphertext_count); +} +/* + * Perform the addition of a u32 input LWE ciphertext vector with a u32 + * plaintext vector. See the equivalent operation on u64 data for more details. + */ +void cuda_add_lwe_ciphertext_vector_plaintext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *plaintext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_addition_plaintext(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(plaintext_array_in), + input_lwe_dimension, input_lwe_ciphertext_count); +} +/* + * Perform the addition of a u64 input LWE ciphertext vector with a u64 input + * plaintext vector. + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - `lwe_array_out` is an array of size + * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have + * been allocated on the GPU before calling this function, and that will hold + * the result of the computation. + * - `lwe_array_in` is the LWE ciphertext vector used as input, it should have + * been allocated and initialized before calling this function. It has the same + * size as the output array. + * - `plaintext_array_in` is the plaintext vector used as input, it should have + * been allocated and initialized before calling this function. It should be of + * size `input_lwe_ciphertext_count`. + * - `input_lwe_dimension` is the number of mask elements in the input and + * output LWE ciphertext vectors + * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the + * input LWE ciphertext vector, as well as in the output. It is also the number + * of plaintexts in the input plaintext vector. + * + * Each plaintext of the input plaintext vector is added to the body of the + * corresponding LWE ciphertext in the LWE ciphertext vector. The result of the + * operation is stored in the output LWE ciphertext vector. The two input + * vectors are unchanged. This function is a wrapper to a device function that + * performs the operation on the GPU. + */ +void cuda_add_lwe_ciphertext_vector_plaintext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *plaintext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_addition_plaintext(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(plaintext_array_in), + input_lwe_dimension, input_lwe_ciphertext_count); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cuh b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cuh new file mode 100644 index 000000000..06e09323f --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/addition.cuh @@ -0,0 +1,154 @@ +#ifndef CUDA_ADD_CUH +#define CUDA_ADD_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "../utils/kernel_dimensions.cuh" +#include "device.h" +#include "linear_algebra.h" +#include + +template +__global__ void plaintext_addition(T *output, T *lwe_input, T *plaintext_input, + uint32_t input_lwe_dimension, + uint32_t num_entries) { + + int tid = threadIdx.x; + int plaintext_index = blockIdx.x * blockDim.x + tid; + if (plaintext_index < num_entries) { + int index = + plaintext_index * (input_lwe_dimension + 1) + input_lwe_dimension; + // Here we take advantage of the wrapping behaviour of uint + output[index] = lwe_input[index] + plaintext_input[plaintext_index]; + } +} + +template +__host__ void host_addition_plaintext(cuda_stream_t *stream, T *output, + T *lwe_input, T *plaintext_input, + uint32_t lwe_dimension, + uint32_t lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + int num_blocks = 0, num_threads = 0; + int num_entries = lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + cuda_memcpy_async_gpu_to_gpu( + output, lwe_input, (lwe_dimension + 1) * lwe_ciphertext_count, stream); + plaintext_addition<<stream>>>( + output, lwe_input, plaintext_input, lwe_dimension, num_entries); + check_cuda_error(cudaGetLastError()); +} + +template +__global__ void addition(T *output, T *input_1, T *input_2, + uint32_t num_entries) { + + int tid = threadIdx.x; + int index = blockIdx.x * blockDim.x + tid; + if (index < num_entries) { + // Here we take advantage of the wrapping behaviour of uint + output[index] = input_1[index] + input_2[index]; + } +} + +// Coefficient-wise addition +template +__host__ void host_addition(cuda_stream_t *stream, T *output, T *input_1, + T *input_2, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = input_lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count * lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + addition<<stream>>>(output, input_1, input_2, + num_entries); + check_cuda_error(cudaGetLastError()); +} + +template +__global__ void subtraction(T *output, T *input_1, T *input_2, + uint32_t num_entries) { + + int tid = threadIdx.x; + int index = blockIdx.x * blockDim.x + tid; + if (index < num_entries) { + // Here we take advantage of the wrapping behaviour of uint + output[index] = input_1[index] - input_2[index]; + } +} + +// Coefficient-wise subtraction +template +__host__ void host_subtraction(cuda_stream_t *stream, T *output, T *input_1, + T *input_2, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = input_lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count * lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + subtraction<<stream>>>(output, input_1, input_2, + num_entries); + check_cuda_error(cudaGetLastError()); +} + +template +__global__ void radix_body_subtraction_inplace(T *lwe_ct, T *plaintext_input, + uint32_t input_lwe_dimension, + uint32_t num_entries) { + + int tid = threadIdx.x; + int plaintext_index = blockIdx.x * blockDim.x + tid; + if (plaintext_index < num_entries) { + int index = + plaintext_index * (input_lwe_dimension + 1) + input_lwe_dimension; + // Here we take advantage of the wrapping behaviour of uint + lwe_ct[index] -= plaintext_input[plaintext_index]; + } +} + +template +__host__ void host_subtraction_plaintext(cuda_stream_t *stream, T *output, + T *lwe_input, T *plaintext_input, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + cuda_memcpy_async_gpu_to_gpu(output, lwe_input, + input_lwe_ciphertext_count * + (input_lwe_dimension + 1) * sizeof(T), + stream); + + radix_body_subtraction_inplace<<stream>>>( + output, plaintext_input, input_lwe_dimension, num_entries); + check_cuda_error(cudaGetLastError()); +} +#endif // CUDA_ADD_H diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cu b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cu new file mode 100644 index 000000000..d0aa8593a --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cu @@ -0,0 +1,56 @@ +#include "linearalgebra/multiplication.cuh" + +/* + * Perform the multiplication of a u32 input LWE ciphertext vector with a u32 + * cleartext vector. See the equivalent operation on u64 data for more details. + */ +void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *cleartext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_cleartext_multiplication(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(cleartext_array_in), + input_lwe_dimension, + input_lwe_ciphertext_count); +} +/* + * Perform the multiplication of a u64 input LWE ciphertext vector with a u64 + * input cleartext vector. + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - `lwe_array_out` is an array of size + * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have + * been allocated on the GPU before calling this function, and that will hold + * the result of the computation. + * - `lwe_array_in` is the LWE ciphertext vector used as input, it should have + * been allocated and initialized before calling this function. It has the same + * size as the output array. + * - `cleartext_array_in` is the cleartext vector used as input, it should have + * been allocated and initialized before calling this function. It should be of + * size `input_lwe_ciphertext_count`. + * - `input_lwe_dimension` is the number of mask elements in the input and + * output LWE ciphertext vectors + * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the + * input LWE ciphertext vector, as well as in the output. It is also the number + * of cleartexts in the input cleartext vector. + * + * Each cleartext of the input cleartext vector is multiplied to the mask and + * body of the corresponding LWE ciphertext in the LWE ciphertext vector. The + * result of the operation is stored in the output LWE ciphertext vector. The + * two input vectors are unchanged. This function is a wrapper to a device + * function that performs the operation on the GPU. + */ +void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in, + void *cleartext_array_in, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_cleartext_multiplication(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), + static_cast(cleartext_array_in), + input_lwe_dimension, + input_lwe_ciphertext_count); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cuh b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cuh new file mode 100644 index 000000000..2a542946f --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/multiplication.cuh @@ -0,0 +1,52 @@ +#ifndef CUDA_MULT_CUH +#define CUDA_MULT_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "../utils/kernel_dimensions.cuh" +#include "device.h" +#include "linear_algebra.h" +#include +#include +#include + +template +__global__ void +cleartext_multiplication(T *output, T *lwe_input, T *cleartext_input, + uint32_t input_lwe_dimension, uint32_t num_entries) { + + int tid = threadIdx.x; + int index = blockIdx.x * blockDim.x + tid; + if (index < num_entries) { + int cleartext_index = index / (input_lwe_dimension + 1); + // Here we take advantage of the wrapping behaviour of uint + output[index] = lwe_input[index] * cleartext_input[cleartext_index]; + } +} + +template +__host__ void +host_cleartext_multiplication(cuda_stream_t *stream, T *output, T *lwe_input, + T *cleartext_input, uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = input_lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count * lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + cleartext_multiplication<<stream>>>( + output, lwe_input, cleartext_input, input_lwe_dimension, num_entries); + check_cuda_error(cudaGetLastError()); +} + +#endif // CUDA_MULT_H diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cu b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cu new file mode 100644 index 000000000..cf7903c4b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cu @@ -0,0 +1,49 @@ +#include "linearalgebra/negation.cuh" + +/* + * Perform the negation of a u32 input LWE ciphertext vector. + * See the equivalent operation on u64 ciphertexts for more details. + */ +void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_negation(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), input_lwe_dimension, + input_lwe_ciphertext_count); +} + +/* + * Perform the negation of a u64 input LWE ciphertext vector. + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - `lwe_array_out` is an array of size + * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have + * been allocated on the GPU before calling this function, and that will hold + * the result of the computation. + * - `lwe_array_in` is the LWE ciphertext vector used as input, it should have + * been allocated and initialized before calling this function. It has the same + * size as the output array. + * - `input_lwe_dimension` is the number of mask elements in the two input and + * in the output ciphertext vectors + * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in each + * input LWE ciphertext vector, as well as in the output. + * + * Each element (mask element or body) of the input LWE ciphertext vector is + * negated. The result is stored in the output LWE ciphertext vector. The input + * LWE ciphertext vector is left unchanged. This function is a wrapper to a + * device function that performs the operation on the GPU. + */ +void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream, + void *lwe_array_out, + void *lwe_array_in, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + host_negation(stream, static_cast(lwe_array_out), + static_cast(lwe_array_in), input_lwe_dimension, + input_lwe_ciphertext_count); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cuh b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cuh new file mode 100644 index 000000000..438d52c64 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/linearalgebra/negation.cuh @@ -0,0 +1,44 @@ +#ifndef CUDA_NEGATE_CUH +#define CUDA_NEGATE_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "../utils/kernel_dimensions.cuh" +#include "device.h" +#include "linear_algebra.h" + +template +__global__ void negation(T *output, T *input, uint32_t num_entries) { + + int tid = threadIdx.x; + int index = blockIdx.x * blockDim.x + tid; + if (index < num_entries) { + // Here we take advantage of the wrapping behaviour of uint + output[index] = -input[index]; + } +} + +template +__host__ void host_negation(cuda_stream_t *stream, T *output, T *input, + uint32_t input_lwe_dimension, + uint32_t input_lwe_ciphertext_count) { + + cudaSetDevice(stream->gpu_index); + // lwe_size includes the presence of the body + // whereas lwe_dimension is the number of elements in the mask + int lwe_size = input_lwe_dimension + 1; + // Create a 1-dimensional grid of threads + int num_blocks = 0, num_threads = 0; + int num_entries = input_lwe_ciphertext_count * lwe_size; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + negation<<stream>>>(output, input, num_entries); + check_cuda_error(cudaGetLastError()); +} + +#endif // CUDA_NEGATE_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap.cu b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap.cu new file mode 100644 index 000000000..73ade26d9 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap.cu @@ -0,0 +1 @@ +#include "bootstrapping_key.cuh" diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cu b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cu new file mode 100644 index 000000000..0206aad7d --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cu @@ -0,0 +1,377 @@ +#include "bootstrap_amortized.cuh" + +/* + * Returns the buffer size for 64 bits executions + */ +uint64_t get_buffer_size_bootstrap_amortized_64( + uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) { + return get_buffer_size_bootstrap_amortized( + glwe_dimension, polynomial_size, input_lwe_ciphertext_count, + max_shared_memory); +} + +/* + * Runs standard checks to validate the inputs + */ +void checks_fast_bootstrap_amortized(int polynomial_size) { + assert( + ("Error (GPU amortized PBS): polynomial size should be one of 256, 512, " + "1024, 2048, 4096, 8192, 16384", + polynomial_size == 256 || polynomial_size == 512 || + polynomial_size == 1024 || polynomial_size == 2048 || + polynomial_size == 4096 || polynomial_size == 8192 || + polynomial_size == 16384)); +} + +/* + * Runs standard checks to validate the inputs + */ +void checks_bootstrap_amortized(int nbits, int base_log, int polynomial_size) { + assert(("Error (GPU amortized PBS): base log should be <= nbits", + base_log <= nbits)); + checks_fast_bootstrap_amortized(polynomial_size); +} + +/* + * This scratch function allocates the necessary amount of data on the GPU for + * the amortized PBS on 32 bits inputs, into `pbs_buffer`. It also + * configures SM options on the GPU in case FULLSM or PARTIALSM mode is going to + * be used. + */ +void scratch_cuda_bootstrap_amortized_32( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory) { + checks_fast_bootstrap_amortized(polynomial_size); + + switch (polynomial_size) { + case 256: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 512: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 1024: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 2048: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 4096: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 8192: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 16384: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + default: + break; + } +} + +/* + * This scratch function allocates the necessary amount of data on the GPU for + * the amortized PBS on 64 bits inputs, into `pbs_buffer`. It also + * configures SM options on the GPU in case FULLSM or PARTIALSM mode is going to + * be used. + */ +void scratch_cuda_bootstrap_amortized_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory) { + checks_fast_bootstrap_amortized(polynomial_size); + + switch (polynomial_size) { + case 256: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 512: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 1024: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 2048: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 4096: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 8192: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 16384: + scratch_bootstrap_amortized>( + stream, pbs_buffer, glwe_dimension, polynomial_size, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + default: + break; + } +} + +/* Perform the programmable bootstrapping on a batch of input u32 LWE + * ciphertexts. See the corresponding operation on 64 bits for more details. + */ +void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) { + + checks_bootstrap_amortized(32, base_log, polynomial_size); + + switch (polynomial_size) { + case 256: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 512: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 1024: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 2048: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 4096: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 8192: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 16384: + host_bootstrap_amortized>( + stream, (uint32_t *)lwe_array_out, (uint32_t *)lwe_output_indexes, + (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, + (uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + default: + break; + } +} + +/* Perform the programmable bootstrapping on a batch of input u64 LWE + * ciphertexts. This functions performs best for large numbers of inputs (> 10). + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c = + * (a0,..an-1,b) where n is the LWE dimension + * - lut_vector: should hold as many test vectors of size polynomial_size + * as there are input ciphertexts, but actually holds + * num_lut_vectors vectors to reduce memory usage + * - lut_vector_indexes: stores the index corresponding to + * which test vector of lut_vector to use for each LWE input in + * lwe_array_in + * - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n + * mask values + 1 body value + * - bootstrapping_key: GGSW encryption of the LWE secret key sk1 + * under secret key sk2 + * bsk = Z + sk1 H + * where H is the gadget matrix and Z is a matrix (k+1).l + * containing GLWE encryptions of 0 under sk2. + * bsk is thus a tensor of size (k+1)^2.l.N.n + * where l is the number of decomposition levels and + * k is the GLWE dimension, N is the polynomial size for + * GLWE. The polynomial size for GLWE and the test vector + * are the same because they have to be in the same ring + * to be multiplied. + * - input_lwe_dimension: size of the Torus vector used to encrypt the input + * LWE ciphertexts - referred to as n above (~ 600) + * - polynomial_size: size of the test polynomial (test vector) and size of the + * GLWE polynomials (~1024) (where `size` refers to the polynomial degree + 1). + * - base_log: log of the base used for the gadget matrix - B = 2^base_log (~8) + * - level_count: number of decomposition levels in the gadget matrix (~4) + * - num_samples: number of encrypted input messages + * - num_lut_vectors: parameter to set the actual number of test vectors to be + * used + * - lwe_idx: the index of the LWE input to consider for the GPU of index + * gpu_index. In case of multi-GPU computing, it is assumed that only a part of + * the input LWE array is copied to each GPU, but the whole LUT array is copied + * (because the case when the number of LUTs is smaller than the number of input + * LWEs is not trivial to take into account in the data repartition on the + * GPUs). `lwe_idx` is used to determine which LUT to consider for a given LWE + * input in the LUT array `lut_vector`. + * - 'max_shared_memory' maximum amount of shared memory to be used inside + * device functions + * + * This function calls a wrapper to a device kernel that performs the + * bootstrapping: + * - the kernel is templatized based on integer discretization and + * polynomial degree + * - num_samples blocks of threads are launched, where each thread is going + * to handle one or more polynomial coefficients at each stage: + * - perform the blind rotation + * - round the result + * - decompose into level_count levels, then for each level: + * - switch to the FFT domain + * - multiply with the bootstrapping key + * - come back to the coefficients representation + * - between each stage a synchronization of the threads is necessary + * - in case the device has enough shared memory, temporary arrays used for + * the different stages (accumulators) are stored into the shared memory + * - the accumulators serve to combine the results for all decomposition + * levels + * - the constant memory (64K) is used for storing the roots of identity + * values for the FFT + */ +void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) { + + checks_bootstrap_amortized(64, base_log, polynomial_size); + + switch (polynomial_size) { + case 256: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 512: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 1024: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 2048: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 4096: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 8192: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + case 16384: + host_bootstrap_amortized>( + stream, (uint64_t *)lwe_array_out, (uint64_t *)lwe_output_indexes, + (uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes, + (uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes, + (double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension, + polynomial_size, base_log, level_count, num_samples, num_lut_vectors, + lwe_idx, max_shared_memory); + break; + default: + break; + } +} + +/* + * This cleanup function frees the data for the amortized PBS on GPU in + * pbs_buffer for 32 or 64 bits inputs. + */ +void cleanup_cuda_bootstrap_amortized(cuda_stream_t *stream, + int8_t **pbs_buffer) { + + // Free memory + cuda_drop_async(*pbs_buffer, stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cuh new file mode 100644 index 000000000..69d059b1c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_amortized.cuh @@ -0,0 +1,363 @@ +#ifndef CUDA_AMORTIZED_PBS_CUH +#define CUDA_AMORTIZED_PBS_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "bootstrap.h" +#include "crypto/gadget.cuh" +#include "crypto/torus.cuh" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "fft/twiddles.cuh" +#include "polynomial/functions.cuh" +#include "polynomial/parameters.cuh" +#include "polynomial/polynomial_math.cuh" +#include "types/complex/operations.cuh" + +template +/* + * Kernel launched by host_bootstrap_amortized + * + * Uses shared memory to increase performance + * - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c = + * (a0,..an-1,b) where n is the LWE dimension + * - lut_vector: should hold as many test vectors of size polynomial_size + * as there are input ciphertexts, but actually holds + * num_lut_vectors vectors to reduce memory usage + * - lut_vector_indexes: stores the index corresponding to which test vector + * to use for each sample in lut_vector + * - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n + * mask values + 1 body value + * - bootstrapping_key: RGSW encryption of the LWE secret key sk1 under secret + * key sk2 + * - device_mem: pointer to the device's global memory in case we use it (SMD + * == NOSM or PARTIALSM) + * - lwe_dimension: size of the Torus vector used to encrypt the input + * LWE ciphertexts - referred to as n above (~ 600) + * - polynomial_size: size of the test polynomial (test vector) and size of the + * GLWE polynomial (~1024) + * - base_log: log base used for the gadget matrix - B = 2^base_log (~8) + * - level_count: number of decomposition levels in the gadget matrix (~4) + * - gpu_num: index of the current GPU (useful for multi-GPU computations) + * - lwe_idx: equal to the number of samples per gpu x gpu_num + * - device_memory_size_per_sample: amount of global memory to allocate if SMD + * is not FULLSM + */ +__global__ void device_bootstrap_amortized( + Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, Torus *lwe_array_in, Torus *lwe_input_indexes, + double2 *bootstrapping_key, int8_t *device_mem, uint32_t glwe_dimension, + uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log, + uint32_t level_count, uint32_t lwe_idx, + size_t device_memory_size_per_sample) { + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + + if constexpr (SMD == FULLSM) + selected_memory = sharedmem; + else + selected_memory = &device_mem[blockIdx.x * device_memory_size_per_sample]; + + // For GPU bootstrapping the GLWE dimension is hard-set to 1: there is only + // one mask polynomial and 1 body to handle. + Torus *accumulator = (Torus *)selected_memory; + Torus *accumulator_rotated = + (Torus *)accumulator + + (ptrdiff_t)((glwe_dimension + 1) * polynomial_size); + double2 *res_fft = + (double2 *)accumulator_rotated + (glwe_dimension + 1) * polynomial_size / + (sizeof(double2) / sizeof(Torus)); + double2 *accumulator_fft = (double2 *)sharedmem; + if constexpr (SMD != PARTIALSM) + accumulator_fft = (double2 *)res_fft + + (ptrdiff_t)((glwe_dimension + 1) * polynomial_size / 2); + + auto block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)]; + Torus *block_lut_vector = + &lut_vector[lut_vector_indexes[lwe_idx + blockIdx.x] * params::degree * + (glwe_dimension + 1)]; + + // Put "b", the body, in [0, 2N[ + Torus b_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_dimension], b_hat, + 2 * params::degree); // 2 * params::log2_degree + 1); + + divide_by_monomial_negacyclic_inplace( + accumulator, block_lut_vector, b_hat, false, glwe_dimension + 1); + + // Loop over all the mask elements of the sample to accumulate + // (X^a_i-1) multiplication, decomposition of the resulting polynomial + // into level_count polynomials, and performing polynomial multiplication + // via an FFT with the RGSW encrypted secret key + for (int iteration = 0; iteration < lwe_dimension; iteration++) { + synchronize_threads_in_block(); + + // Put "a" in [0, 2N[ instead of Zq + Torus a_hat = 0; + rescale_torus_element(block_lwe_array_in[iteration], a_hat, + 2 * params::degree); // 2 * params::log2_degree + 1); + + // Perform ACC * (X^ä - 1) + multiply_by_monomial_negacyclic_and_sub_polynomial< + Torus, params::opt, params::degree / params::opt>( + accumulator, accumulator_rotated, a_hat, glwe_dimension + 1); + + synchronize_threads_in_block(); + + // Perform a rounding to increase the accuracy of the + // bootstrapped ciphertext + round_to_closest_multiple_inplace( + accumulator_rotated, base_log, level_count, glwe_dimension + 1); + + // Initialize the polynomial multiplication via FFT arrays + // The polynomial multiplications happens at the block level + // and each thread handles two or more coefficients + int pos = threadIdx.x; + for (int i = 0; i < (glwe_dimension + 1); i++) + for (int j = 0; j < params::opt / 2; j++) { + res_fft[pos].x = 0; + res_fft[pos].y = 0; + pos += params::degree / params::opt; + } + + GadgetMatrix gadget(base_log, level_count, + accumulator_rotated, glwe_dimension + 1); + // Now that the rotation is done, decompose the resulting polynomial + // coefficients so as to multiply each decomposed level with the + // corresponding part of the bootstrapping key + for (int level = level_count - 1; level >= 0; level--) { + for (int i = 0; i < (glwe_dimension + 1); i++) { + gadget.decompose_and_compress_next_polynomial(accumulator_fft, i); + + // Switch to the FFT space + NSMFFT_direct>(accumulator_fft); + + // Get the bootstrapping key piece necessary for the multiplication + // It is already in the Fourier domain + auto bsk_slice = get_ith_mask_kth_block(bootstrapping_key, iteration, i, + level, polynomial_size, + glwe_dimension, level_count); + + // Perform the coefficient-wise product with the two pieces of + // bootstrapping key + for (int j = 0; j < (glwe_dimension + 1); j++) { + auto bsk_poly = bsk_slice + j * params::degree / 2; + auto res_fft_poly = res_fft + j * params::degree / 2; + polynomial_product_accumulate_in_fourier_domain( + res_fft_poly, accumulator_fft, bsk_poly); + } + } + synchronize_threads_in_block(); + } + + // Come back to the coefficient representation + if constexpr (SMD == FULLSM || SMD == NOSM) { + synchronize_threads_in_block(); + + for (int i = 0; i < (glwe_dimension + 1); i++) { + auto res_fft_slice = res_fft + i * params::degree / 2; + NSMFFT_inverse>(res_fft_slice); + } + synchronize_threads_in_block(); + + for (int i = 0; i < (glwe_dimension + 1); i++) { + auto accumulator_slice = accumulator + i * params::degree; + auto res_fft_slice = res_fft + i * params::degree / 2; + add_to_torus(res_fft_slice, accumulator_slice); + } + synchronize_threads_in_block(); + } else { +#pragma unroll + for (int i = 0; i < (glwe_dimension + 1); i++) { + auto accumulator_slice = accumulator + i * params::degree; + auto res_fft_slice = res_fft + i * params::degree / 2; + int tid = threadIdx.x; + for (int j = 0; j < params::opt / 2; j++) { + accumulator_fft[tid] = res_fft_slice[tid]; + tid = tid + params::degree / params::opt; + } + synchronize_threads_in_block(); + + NSMFFT_inverse>(accumulator_fft); + synchronize_threads_in_block(); + + add_to_torus(accumulator_fft, accumulator_slice); + } + synchronize_threads_in_block(); + } + } + + auto block_lwe_array_out = + &lwe_array_out[lwe_output_indexes[blockIdx.x] * + (glwe_dimension * polynomial_size + 1)]; + + // The blind rotation for this block is over + // Now we can perform the sample extraction: for the body it's just + // the resulting constant coefficient of the accumulator + // For the mask it's more complicated + sample_extract_mask(block_lwe_array_out, accumulator, + glwe_dimension); + sample_extract_body(block_lwe_array_out, accumulator, + glwe_dimension); +} + +template +__host__ __device__ uint64_t get_buffer_size_full_sm_bootstrap_amortized( + uint32_t polynomial_size, uint32_t glwe_dimension) { + return sizeof(Torus) * polynomial_size * (glwe_dimension + 1) + // accumulator + sizeof(Torus) * polynomial_size * + (glwe_dimension + 1) + // accumulator rotated + sizeof(double2) * polynomial_size / 2 + // accumulator fft + sizeof(double2) * polynomial_size / 2 * + (glwe_dimension + 1); // res fft +} + +template +__host__ __device__ uint64_t +get_buffer_size_partial_sm_bootstrap_amortized(uint32_t polynomial_size) { + return sizeof(double2) * polynomial_size / 2; // accumulator fft +} + +template +__host__ __device__ uint64_t get_buffer_size_bootstrap_amortized( + uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) { + + uint64_t full_sm = get_buffer_size_full_sm_bootstrap_amortized( + polynomial_size, glwe_dimension); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_amortized(polynomial_size); + uint64_t partial_dm = full_sm - partial_sm; + uint64_t full_dm = full_sm; + uint64_t device_mem = 0; + if (max_shared_memory < partial_sm) { + device_mem = full_dm * input_lwe_ciphertext_count; + } else if (max_shared_memory < full_sm) { + device_mem = partial_dm * input_lwe_ciphertext_count; + } + return device_mem + device_mem % sizeof(double2); +} + +template +__host__ void scratch_bootstrap_amortized( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory) { + cudaSetDevice(stream->gpu_index); + + uint64_t full_sm = get_buffer_size_full_sm_bootstrap_amortized( + polynomial_size, glwe_dimension); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_amortized(polynomial_size); + if (max_shared_memory >= partial_sm && max_shared_memory < full_sm) { + cudaFuncSetAttribute(device_bootstrap_amortized, + cudaFuncAttributeMaxDynamicSharedMemorySize, + partial_sm); + cudaFuncSetCacheConfig(device_bootstrap_amortized, + cudaFuncCachePreferShared); + } else if (max_shared_memory >= partial_sm) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_amortized, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm)); + check_cuda_error(cudaFuncSetCacheConfig( + device_bootstrap_amortized, + cudaFuncCachePreferShared)); + } + if (allocate_gpu_memory) { + uint64_t buffer_size = get_buffer_size_bootstrap_amortized( + glwe_dimension, polynomial_size, input_lwe_ciphertext_count, + max_shared_memory); + *pbs_buffer = (int8_t *)cuda_malloc_async(buffer_size, stream); + check_cuda_error(cudaGetLastError()); + } +} + +template +__host__ void host_bootstrap_amortized( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer, + uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors, + uint32_t lwe_idx, uint32_t max_shared_memory) { + + cudaSetDevice(stream->gpu_index); + uint64_t SM_FULL = get_buffer_size_full_sm_bootstrap_amortized( + polynomial_size, glwe_dimension); + + uint64_t SM_PART = + get_buffer_size_partial_sm_bootstrap_amortized(polynomial_size); + + uint64_t DM_PART = SM_FULL - SM_PART; + + uint64_t DM_FULL = SM_FULL; + + // Create a 1-dimensional grid of threads + // where each block handles 1 sample and each thread + // handles opt polynomial coefficients + // (actually opt/2 coefficients since we compress the real polynomial into a + // complex) + dim3 grid(input_lwe_ciphertext_count, 1, 1); + dim3 thds(polynomial_size / params::opt, 1, 1); + + // Launch the kernel using polynomial_size/opt threads + // where each thread computes opt polynomial coefficients + // Depending on the required amount of shared memory, choose + // from one of three templates (no use, partial use or full use + // of shared memory) + if (max_shared_memory < SM_PART) { + device_bootstrap_amortized + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + lwe_array_in, lwe_input_indexes, bootstrapping_key, pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, base_log, + level_count, lwe_idx, DM_FULL); + } else if (max_shared_memory < SM_FULL) { + device_bootstrap_amortized + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + lwe_array_in, lwe_input_indexes, bootstrapping_key, pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, base_log, + level_count, lwe_idx, DM_PART); + } else { + // For devices with compute capability 7.x a single thread block can + // address the full capacity of shared memory. Shared memory on the + // device then has to be allocated dynamically. + // For lower compute capabilities, this call + // just does nothing and the amount of shared memory used is 48 KB + device_bootstrap_amortized + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + lwe_array_in, lwe_input_indexes, bootstrapping_key, pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, base_log, + level_count, lwe_idx, 0); + } + check_cuda_error(cudaGetLastError()); +} + +template +int cuda_get_pbs_per_gpu(int polynomial_size) { + + int blocks_per_sm = 0; + int num_threads = polynomial_size / params::opt; + cudaGetDeviceCount(0); + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, 0); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, device_bootstrap_amortized, num_threads, + 0); + + return device_properties.multiProcessorCount * blocks_per_sm; +} + +#endif // CNCRT_PBS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_low_latency.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_low_latency.cuh new file mode 100644 index 000000000..adeafaf5a --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_low_latency.cuh @@ -0,0 +1,453 @@ +#ifndef CUDA_FAST_LOWLAT_PBS_CUH +#define CUDA_FAST_LOWLAT_PBS_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "cooperative_groups.h" + +#include "bootstrap.h" +#include "crypto/gadget.cuh" +#include "crypto/torus.cuh" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "fft/twiddles.cuh" +#include "polynomial/parameters.cuh" +#include "polynomial/polynomial_math.cuh" +#include "types/complex/operations.cuh" + +// Cooperative groups are used in the low latency PBS +using namespace cooperative_groups; +namespace cg = cooperative_groups; + +template +__device__ void mul_ggsw_glwe(Torus *accumulator, double2 *fft, + double2 *join_buffer, double2 *bootstrapping_key, + int polynomial_size, uint32_t glwe_dimension, + int level_count, int iteration, + grid_group &grid) { + + // Switch to the FFT space + NSMFFT_direct>(fft); + synchronize_threads_in_block(); + + // Get the pieces of the bootstrapping key that will be needed for the + // external product; blockIdx.x is the ID of the block that's executing + // this function, so we end up getting the lines of the bootstrapping key + // needed to perform the external product in this block (corresponding to + // the same decomposition level) + auto bsk_slice = get_ith_mask_kth_block( + bootstrapping_key, iteration, blockIdx.y, blockIdx.x, polynomial_size, + glwe_dimension, level_count); + + // Selects all GLWEs in a particular decomposition level + auto level_join_buffer = + join_buffer + blockIdx.x * (glwe_dimension + 1) * params::degree / 2; + + // Perform the matrix multiplication between the GGSW and the GLWE, + // each block operating on a single level for mask and body + + // The first product is used to initialize level_join_buffer + auto bsk_poly = bsk_slice + blockIdx.y * params::degree / 2; + auto buffer_slice = level_join_buffer + blockIdx.y * params::degree / 2; + + int tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + buffer_slice[tid] = fft[tid] * bsk_poly[tid]; + tid += params::degree / params::opt; + } + + grid.sync(); + + // Continues multiplying fft by every polynomial in that particular bsk level + // Each y-block accumulates in a different polynomial at each iteration + for (int j = 1; j < (glwe_dimension + 1); j++) { + int idx = (j + blockIdx.y) % (glwe_dimension + 1); + + auto bsk_poly = bsk_slice + idx * params::degree / 2; + auto buffer_slice = level_join_buffer + idx * params::degree / 2; + + int tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + buffer_slice[tid] += fft[tid] * bsk_poly[tid]; + tid += params::degree / params::opt; + } + grid.sync(); + } + + // ----------------------------------------------------------------- + // All blocks are synchronized here; after this sync, level_join_buffer has + // the values needed from every other block + + auto src_acc = join_buffer + blockIdx.y * params::degree / 2; + + // copy first product into fft buffer + tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] = src_acc[tid]; + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + + // accumulate rest of the products into fft buffer + for (int l = 1; l < gridDim.x; l++) { + auto cur_src_acc = &src_acc[l * (glwe_dimension + 1) * params::degree / 2]; + tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] += cur_src_acc[tid]; + tid += params::degree / params::opt; + } + } + + synchronize_threads_in_block(); + + // Perform the inverse FFT on the result of the GGSW x GLWE and add to the + // accumulator + NSMFFT_inverse>(fft); + synchronize_threads_in_block(); + + add_to_torus(fft, accumulator); + + __syncthreads(); +} + +/* + * Kernel launched by the low latency version of the + * bootstrapping, that uses cooperative groups + * + * - lwe_array_out: vector of output lwe s, with length + * (glwe_dimension * polynomial_size+1)*num_samples + * - lut_vector: vector of look up tables with + * length (glwe_dimension+1) * polynomial_size * num_samples + * - lut_vector_indexes: mapping between lwe_array_in and lut_vector + * lwe_array_in: vector of lwe inputs with length (lwe_dimension + 1) * + * num_samples + * + * Each y-block computes one element of the lwe_array_out. + */ +template +__global__ void device_bootstrap_fast_low_latency( + Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, Torus *lwe_array_in, Torus *lwe_input_indexes, + double2 *bootstrapping_key, double2 *join_buffer, uint32_t lwe_dimension, + uint32_t polynomial_size, uint32_t base_log, uint32_t level_count, + int8_t *device_mem, uint64_t device_memory_size_per_block) { + + grid_group grid = this_grid(); + + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + uint32_t glwe_dimension = gridDim.y - 1; + + if constexpr (SMD == FULLSM) { + selected_memory = sharedmem; + } else { + int block_index = blockIdx.x + blockIdx.y * gridDim.x + + blockIdx.z * gridDim.x * gridDim.y; + selected_memory = &device_mem[block_index * device_memory_size_per_block]; + } + + // We always compute the pointer with most restrictive alignment to avoid + // alignment issues + double2 *accumulator_fft = (double2 *)selected_memory; + Torus *accumulator = + (Torus *)accumulator_fft + + (ptrdiff_t)(sizeof(double2) * polynomial_size / 2 / sizeof(Torus)); + Torus *accumulator_rotated = + (Torus *)accumulator + (ptrdiff_t)polynomial_size; + + if constexpr (SMD == PARTIALSM) + accumulator_fft = (double2 *)sharedmem; + + // The third dimension of the block is used to determine on which ciphertext + // this block is operating, in the case of batch bootstraps + Torus *block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[blockIdx.z] * (lwe_dimension + 1)]; + + Torus *block_lut_vector = &lut_vector[lut_vector_indexes[blockIdx.z] * + params::degree * (glwe_dimension + 1)]; + + double2 *block_join_buffer = + &join_buffer[blockIdx.z * level_count * (glwe_dimension + 1) * + params::degree / 2]; + // Since the space is L1 cache is small, we use the same memory location for + // the rotated accumulator and the fft accumulator, since we know that the + // rotated array is not in use anymore by the time we perform the fft + + // Put "b" in [0, 2N[ + Torus b_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_dimension], b_hat, + 2 * params::degree); + + divide_by_monomial_negacyclic_inplace( + accumulator, &block_lut_vector[blockIdx.y * params::degree], b_hat, + false); + + for (int i = 0; i < lwe_dimension; i++) { + synchronize_threads_in_block(); + + // Put "a" in [0, 2N[ + Torus a_hat = 0; + rescale_torus_element(block_lwe_array_in[i], a_hat, + 2 * params::degree); // 2 * params::log2_degree + 1); + + // Perform ACC * (X^ä - 1) + multiply_by_monomial_negacyclic_and_sub_polynomial< + Torus, params::opt, params::degree / params::opt>( + accumulator, accumulator_rotated, a_hat); + + // Perform a rounding to increase the accuracy of the + // bootstrapped ciphertext + round_to_closest_multiple_inplace( + accumulator_rotated, base_log, level_count); + + synchronize_threads_in_block(); + + // Decompose the accumulator. Each block gets one level of the + // decomposition, for the mask and the body (so block 0 will have the + // accumulator decomposed at level 0, 1 at 1, etc.) + GadgetMatrix gadget_acc(base_log, level_count, + accumulator_rotated); + gadget_acc.decompose_and_compress_level(accumulator_fft, blockIdx.x); + + // We are using the same memory space for accumulator_fft and + // accumulator_rotated, so we need to synchronize here to make sure they + // don't modify the same memory space at the same time + synchronize_threads_in_block(); + + // Perform G^-1(ACC) * GGSW -> GLWE + mul_ggsw_glwe( + accumulator, accumulator_fft, block_join_buffer, bootstrapping_key, + polynomial_size, glwe_dimension, level_count, i, grid); + + synchronize_threads_in_block(); + } + + auto block_lwe_array_out = + &lwe_array_out[lwe_output_indexes[blockIdx.z] * + (glwe_dimension * polynomial_size + 1) + + blockIdx.y * polynomial_size]; + + if (blockIdx.x == 0 && blockIdx.y < glwe_dimension) { + // Perform a sample extract. At this point, all blocks have the result, but + // we do the computation at block 0 to avoid waiting for extra blocks, in + // case they're not synchronized + sample_extract_mask(block_lwe_array_out, accumulator); + } else if (blockIdx.x == 0 && blockIdx.y == glwe_dimension) { + sample_extract_body(block_lwe_array_out, accumulator, 0); + } +} + +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_bootstrap_fast_low_latency(uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size + // accumulator_rotated + sizeof(Torus) * polynomial_size + // accumulator + sizeof(double2) * polynomial_size / 2; // accumulator fft +} + +template +__host__ __device__ uint64_t +get_buffer_size_partial_sm_bootstrap_fast_low_latency( + uint32_t polynomial_size) { + return sizeof(double2) * polynomial_size / 2; // accumulator fft mask & body +} + +template +__host__ __device__ uint64_t get_buffer_size_bootstrap_fast_low_latency( + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) { + + uint64_t full_sm = get_buffer_size_full_sm_bootstrap_fast_low_latency( + polynomial_size); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_fast_low_latency( + polynomial_size); + uint64_t partial_dm = full_sm - partial_sm; + uint64_t full_dm = full_sm; + uint64_t device_mem = 0; + if (max_shared_memory < partial_sm) { + device_mem = full_dm * input_lwe_ciphertext_count * level_count * + (glwe_dimension + 1); + } else if (max_shared_memory < full_sm) { + device_mem = partial_dm * input_lwe_ciphertext_count * level_count * + (glwe_dimension + 1); + } + uint64_t buffer_size = device_mem + (glwe_dimension + 1) * level_count * + input_lwe_ciphertext_count * + polynomial_size / 2 * sizeof(double2); + return buffer_size + buffer_size % sizeof(double2); +} + +template +__host__ void scratch_bootstrap_fast_low_latency( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory) { + cudaSetDevice(stream->gpu_index); + + uint64_t full_sm = get_buffer_size_full_sm_bootstrap_fast_low_latency( + polynomial_size); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_fast_low_latency( + polynomial_size); + if (max_shared_memory >= partial_sm && max_shared_memory < full_sm) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_fast_low_latency, + cudaFuncAttributeMaxDynamicSharedMemorySize, partial_sm)); + cudaFuncSetCacheConfig( + device_bootstrap_fast_low_latency, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } else if (max_shared_memory >= partial_sm) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_fast_low_latency, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm)); + cudaFuncSetCacheConfig( + device_bootstrap_fast_low_latency, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } + if (allocate_gpu_memory) { + uint64_t buffer_size = get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + *pbs_buffer = (int8_t *)cuda_malloc_async(buffer_size, stream); + check_cuda_error(cudaGetLastError()); + } +} + +/* + * Host wrapper to the low latency version + * of bootstrapping + */ +template +__host__ void host_bootstrap_fast_low_latency( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer, + uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors, + uint32_t max_shared_memory) { + cudaSetDevice(stream->gpu_index); + + // With SM each block corresponds to either the mask or body, no need to + // duplicate data for each + uint64_t full_sm = get_buffer_size_full_sm_bootstrap_fast_low_latency( + polynomial_size); + + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_fast_low_latency( + polynomial_size); + + uint64_t full_dm = full_sm; + + uint64_t partial_dm = full_dm - partial_sm; + + int8_t *d_mem = pbs_buffer; + double2 *buffer_fft = + (double2 *)d_mem + + (ptrdiff_t)(get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory) / + sizeof(double2) - + (glwe_dimension + 1) * level_count * + input_lwe_ciphertext_count * polynomial_size / 2); + + int thds = polynomial_size / params::opt; + dim3 grid(level_count, glwe_dimension + 1, input_lwe_ciphertext_count); + + void *kernel_args[14]; + kernel_args[0] = &lwe_array_out; + kernel_args[1] = &lwe_output_indexes; + kernel_args[2] = &lut_vector; + kernel_args[3] = &lut_vector_indexes; + kernel_args[4] = &lwe_array_in; + kernel_args[5] = &lwe_input_indexes; + kernel_args[6] = &bootstrapping_key; + kernel_args[7] = &buffer_fft; + kernel_args[8] = &lwe_dimension; + kernel_args[9] = &polynomial_size; + kernel_args[10] = &base_log; + kernel_args[11] = &level_count; + kernel_args[12] = &d_mem; + + if (max_shared_memory < partial_sm) { + kernel_args[13] = &full_dm; + check_cuda_error(cudaLaunchCooperativeKernel( + (void *)device_bootstrap_fast_low_latency, grid, + thds, (void **)kernel_args, 0, stream->stream)); + } else if (max_shared_memory < full_sm) { + kernel_args[13] = &partial_dm; + check_cuda_error(cudaLaunchCooperativeKernel( + (void *)device_bootstrap_fast_low_latency, + grid, thds, (void **)kernel_args, partial_sm, stream->stream)); + } else { + int no_dm = 0; + kernel_args[13] = &no_dm; + check_cuda_error(cudaLaunchCooperativeKernel( + (void *)device_bootstrap_fast_low_latency, grid, + thds, (void **)kernel_args, full_sm, stream->stream)); + } + + check_cuda_error(cudaGetLastError()); +} + +// Verify if the grid size for the low latency kernel satisfies the cooperative +// group constraints +template +__host__ bool verify_cuda_bootstrap_fast_low_latency_grid_size( + int glwe_dimension, int level_count, int num_samples, + uint32_t max_shared_memory) { + + // If Cooperative Groups is not supported, no need to check anything else + if (!cuda_check_support_cooperative_groups()) + return false; + + // Calculate the dimension of the kernel + uint64_t full_sm = + get_buffer_size_full_sm_bootstrap_fast_low_latency(params::degree); + + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_fast_low_latency( + params::degree); + + int thds = params::degree / params::opt; + + // Get the maximum number of active blocks per streaming multiprocessors + int number_of_blocks = level_count * (glwe_dimension + 1) * num_samples; + int max_active_blocks_per_sm; + + if (max_shared_memory < partial_sm) { + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks_per_sm, + (void *)device_bootstrap_fast_low_latency, thds, + 0); + } else if (max_shared_memory < full_sm) { + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks_per_sm, + (void *)device_bootstrap_fast_low_latency, + thds, 0); + } else { + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks_per_sm, + (void *)device_bootstrap_fast_low_latency, thds, + 0); + } + + // Get the number of streaming multiprocessors + int number_of_sm = 0; + cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0); + return number_of_blocks <= max_active_blocks_per_sm * number_of_sm; +} + +#endif // LOWLAT_FAST_PBS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_multibit.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_multibit.cuh new file mode 100644 index 000000000..a06dc11b0 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_fast_multibit.cuh @@ -0,0 +1,321 @@ +#ifndef CUDA_FAST_MULTIBIT_PBS_CUH +#define CUDA_FAST_MULTIBIT_PBS_CUH + +#include "bootstrap.h" +#include "bootstrap_multibit.cuh" +#include "bootstrap_multibit.h" +#include "cooperative_groups.h" +#include "crypto/gadget.cuh" +#include "crypto/ggsw.cuh" +#include "crypto/torus.cuh" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "fft/twiddles.cuh" +#include "polynomial/functions.cuh" +#include "polynomial/parameters.cuh" +#include "polynomial/polynomial_math.cuh" +#include "types/complex/operations.cuh" +#include + +template +__global__ void device_multi_bit_bootstrap_fast_accumulate( + Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, Torus *lwe_array_in, Torus *lwe_input_indexes, + double2 *keybundle_array, double2 *join_buffer, Torus *global_accumulator, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t grouping_factor, + uint32_t lwe_offset, uint32_t lwe_chunk_size, + uint32_t keybundle_size_per_input) { + + grid_group grid = this_grid(); + + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + + selected_memory = sharedmem; + + // We always compute the pointer with most restrictive alignment to avoid + // alignment issues + double2 *accumulator_fft = (double2 *)selected_memory; + Torus *accumulator = + (Torus *)accumulator_fft + + (ptrdiff_t)(sizeof(double2) * polynomial_size / 2 / sizeof(Torus)); + + // The third dimension of the block is used to determine on which ciphertext + // this block is operating, in the case of batch bootstraps + Torus *block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[blockIdx.z] * (lwe_dimension + 1)]; + + Torus *block_lut_vector = &lut_vector[lut_vector_indexes[blockIdx.z] * + params::degree * (glwe_dimension + 1)]; + + double2 *block_join_buffer = + &join_buffer[blockIdx.z * level_count * (glwe_dimension + 1) * + params::degree / 2]; + + Torus *global_slice = + global_accumulator + + (blockIdx.y + blockIdx.z * (glwe_dimension + 1)) * params::degree; + + double2 *keybundle = keybundle_array + + // select the input + blockIdx.z * keybundle_size_per_input; + + if (lwe_offset == 0) { + // Put "b" in [0, 2N[ + Torus b_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_dimension], b_hat, + 2 * params::degree); + + divide_by_monomial_negacyclic_inplace( + accumulator, &block_lut_vector[blockIdx.y * params::degree], b_hat, + false); + } else { + // Load the accumulator calculated in previous iterations + copy_polynomial( + global_slice, accumulator); + } + + for (int i = 0; (i + lwe_offset) < lwe_dimension && i < lwe_chunk_size; i++) { + // Perform a rounding to increase the accuracy of the + // bootstrapped ciphertext + round_to_closest_multiple_inplace( + accumulator, base_log, level_count); + + // Decompose the accumulator. Each block gets one level of the + // decomposition, for the mask and the body (so block 0 will have the + // accumulator decomposed at level 0, 1 at 1, etc.) + GadgetMatrix gadget_acc(base_log, level_count, accumulator); + gadget_acc.decompose_and_compress_level(accumulator_fft, blockIdx.x); + + // We are using the same memory space for accumulator_fft and + // accumulator_rotated, so we need to synchronize here to make sure they + // don't modify the same memory space at the same time + synchronize_threads_in_block(); + + // Perform G^-1(ACC) * GGSW -> GLWE + mul_ggsw_glwe(accumulator, accumulator_fft, + block_join_buffer, keybundle, polynomial_size, + glwe_dimension, level_count, i, grid); + + synchronize_threads_in_block(); + } + + if (lwe_offset + lwe_chunk_size >= (lwe_dimension / grouping_factor)) { + auto block_lwe_array_out = + &lwe_array_out[lwe_output_indexes[blockIdx.z] * + (glwe_dimension * polynomial_size + 1) + + blockIdx.y * polynomial_size]; + + if (blockIdx.x == 0 && blockIdx.y < glwe_dimension) { + // Perform a sample extract. At this point, all blocks have the result, + // but we do the computation at block 0 to avoid waiting for extra blocks, + // in case they're not synchronized + sample_extract_mask(block_lwe_array_out, accumulator); + } else if (blockIdx.x == 0 && blockIdx.y == glwe_dimension) { + sample_extract_body(block_lwe_array_out, accumulator, 0); + } + } else { + // Load the accumulator calculated in previous iterations + copy_polynomial( + accumulator, global_slice); + } +} + +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_fast_multibit_bootstrap(uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size * 2; // accumulator +} + +template +__host__ __device__ uint64_t get_buffer_size_fast_multibit_bootstrap( + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t level_count, uint32_t input_lwe_ciphertext_count, + uint32_t grouping_factor, uint32_t lwe_chunk_size, + uint32_t max_shared_memory) { + + uint64_t buffer_size = 0; + buffer_size += input_lwe_ciphertext_count * lwe_chunk_size * level_count * + (glwe_dimension + 1) * (glwe_dimension + 1) * + (polynomial_size / 2) * sizeof(double2); // keybundle fft + buffer_size += input_lwe_ciphertext_count * (glwe_dimension + 1) * + level_count * (polynomial_size / 2) * + sizeof(double2); // join buffer + buffer_size += input_lwe_ciphertext_count * (glwe_dimension + 1) * + polynomial_size * sizeof(Torus); // global_accumulator + + return buffer_size + buffer_size % sizeof(double2); +} + +template +__host__ void scratch_fast_multi_bit_pbs( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t grouping_factor, + uint32_t max_shared_memory, bool allocate_gpu_memory, + uint32_t lwe_chunk_size = 0) { + + cudaSetDevice(stream->gpu_index); + + uint64_t full_sm_keybundle = + get_buffer_size_full_sm_multibit_bootstrap_keybundle( + polynomial_size); + uint64_t full_sm_accumulate = + get_buffer_size_full_sm_fast_multibit_bootstrap(polynomial_size); + + check_cuda_error(cudaFuncSetAttribute( + device_multi_bit_bootstrap_keybundle, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_keybundle)); + cudaFuncSetCacheConfig(device_multi_bit_bootstrap_keybundle, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + + check_cuda_error(cudaFuncSetAttribute( + device_multi_bit_bootstrap_fast_accumulate, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_accumulate)); + cudaFuncSetCacheConfig( + device_multi_bit_bootstrap_fast_accumulate, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + + if (allocate_gpu_memory) { + if (!lwe_chunk_size) + lwe_chunk_size = get_average_lwe_chunk_size(lwe_dimension, level_count, + glwe_dimension); + + uint64_t buffer_size = get_buffer_size_fast_multibit_bootstrap( + lwe_dimension, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, grouping_factor, lwe_chunk_size, + max_shared_memory); + *pbs_buffer = (int8_t *)cuda_malloc_async(buffer_size, stream); + check_cuda_error(cudaGetLastError()); + } +} + +template +__host__ void host_fast_multi_bit_pbs( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, uint64_t *bootstrapping_key, int8_t *pbs_buffer, + uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, + uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx, + uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0) { + cudaSetDevice(stream->gpu_index); + + if (!lwe_chunk_size) + lwe_chunk_size = + get_average_lwe_chunk_size(lwe_dimension, level_count, glwe_dimension); + + // + double2 *keybundle_fft = (double2 *)pbs_buffer; + double2 *buffer_fft = (double2 *)keybundle_fft + + num_samples * lwe_chunk_size * level_count * + (glwe_dimension + 1) * (glwe_dimension + 1) * + (polynomial_size / 2); + Torus *global_accumulator = + (Torus *)buffer_fft + + (ptrdiff_t)(sizeof(double2) * num_samples * (glwe_dimension + 1) * + level_count * (polynomial_size / 2) / sizeof(Torus)); + + // + uint64_t full_sm_keybundle = + get_buffer_size_full_sm_multibit_bootstrap_keybundle( + polynomial_size); + uint64_t full_sm_accumulate = + get_buffer_size_full_sm_fast_multibit_bootstrap(polynomial_size); + + uint32_t keybundle_size_per_input = + lwe_chunk_size * level_count * (glwe_dimension + 1) * + (glwe_dimension + 1) * (polynomial_size / 2); + + // + void *kernel_args[18]; + kernel_args[0] = &lwe_array_out; + kernel_args[1] = &lwe_output_indexes; + kernel_args[2] = &lut_vector; + kernel_args[3] = &lut_vector_indexes; + kernel_args[4] = &lwe_array_in; + kernel_args[5] = &lwe_input_indexes; + kernel_args[6] = &keybundle_fft; + kernel_args[7] = &buffer_fft; + kernel_args[8] = &global_accumulator; + kernel_args[9] = &lwe_dimension; + kernel_args[10] = &glwe_dimension; + kernel_args[11] = &polynomial_size; + kernel_args[12] = &base_log; + kernel_args[13] = &level_count; + kernel_args[14] = &grouping_factor; + kernel_args[17] = &keybundle_size_per_input; + + // + dim3 grid_accumulate(level_count, glwe_dimension + 1, num_samples); + dim3 thds(polynomial_size / params::opt, 1, 1); + + for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor); + lwe_offset += lwe_chunk_size) { + + uint32_t chunk_size = std::min( + lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset); + + // Compute a keybundle + dim3 grid_keybundle(num_samples * chunk_size, + (glwe_dimension + 1) * (glwe_dimension + 1), + level_count); + device_multi_bit_bootstrap_keybundle + <<stream>>>( + lwe_array_in, lwe_input_indexes, keybundle_fft, bootstrapping_key, + lwe_dimension, glwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, lwe_offset, chunk_size, + keybundle_size_per_input); + check_cuda_error(cudaGetLastError()); + + kernel_args[15] = &lwe_offset; + kernel_args[16] = &chunk_size; + + check_cuda_error(cudaLaunchCooperativeKernel( + (void *)device_multi_bit_bootstrap_fast_accumulate, + grid_accumulate, thds, (void **)kernel_args, full_sm_accumulate, + stream->stream)); + } +} + +// Verify if the grid size for the low latency kernel satisfies the cooperative +// group constraints +template +__host__ bool +verify_cuda_bootstrap_fast_multi_bit_grid_size(int glwe_dimension, + int level_count, int num_samples, + uint32_t max_shared_memory) { + + // If Cooperative Groups is not supported, no need to check anything else + if (!cuda_check_support_cooperative_groups()) + return false; + + // Calculate the dimension of the kernel + uint64_t full_sm = + get_buffer_size_full_sm_fast_multibit_bootstrap(params::degree); + + int thds = params::degree / params::opt; + + // Get the maximum number of active blocks per streaming multiprocessors + int number_of_blocks = level_count * (glwe_dimension + 1) * num_samples; + int max_active_blocks_per_sm; + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &max_active_blocks_per_sm, + (void *)device_multi_bit_bootstrap_fast_accumulate, thds, + full_sm); + + // Get the number of streaming multiprocessors + int number_of_sm = 0; + cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0); + return number_of_blocks <= max_active_blocks_per_sm * number_of_sm; +} +#endif // FASTMULTIBIT_PBS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cu b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cu new file mode 100644 index 000000000..928af7b6b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cu @@ -0,0 +1,845 @@ +#include "bootstrap_fast_low_latency.cuh" +#include "bootstrap_low_latency.cuh" +/* + * Returns the buffer size for 64 bits executions + */ +uint64_t get_buffer_size_bootstrap_low_latency_64( + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) { + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 512: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 1024: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 2048: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 4096: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 8192: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + case 16384: + if (verify_cuda_bootstrap_fast_low_latency_grid_size< + uint64_t, AmortizedDegree<16384>>(glwe_dimension, level_count, + input_lwe_ciphertext_count, + max_shared_memory)) + return get_buffer_size_bootstrap_fast_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + else + return get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + break; + default: + return 0; + break; + } +} + +/* + * Runs standard checks to validate the inputs + */ +void checks_fast_bootstrap_low_latency(int glwe_dimension, int level_count, + int polynomial_size, int num_samples) { + + assert(( + "Error (GPU low latency PBS): polynomial size should be one of 256, 512, " + "1024, 2048, 4096, 8192, 16384", + polynomial_size == 256 || polynomial_size == 512 || + polynomial_size == 1024 || polynomial_size == 2048 || + polynomial_size == 4096 || polynomial_size == 8192 || + polynomial_size == 16384)); +} + +/* + * Runs standard checks to validate the inputs + */ +void checks_bootstrap_low_latency(int nbits, int glwe_dimension, + int level_count, int base_log, + int polynomial_size, int num_samples) { + assert(("Error (GPU low latency PBS): base log should be <= nbits", + base_log <= nbits)); + checks_fast_bootstrap_low_latency(glwe_dimension, level_count, + polynomial_size, num_samples); +} + +/* + * This scratch function allocates the necessary amount of data on the GPU for + * the low latency PBS on 32 bits inputs, into `pbs_buffer`. It also + * configures SM options on the GPU in case FULLSM or PARTIALSM mode is going to + * be used. + */ +void scratch_cuda_bootstrap_low_latency_32( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory) { + checks_fast_bootstrap_low_latency( + glwe_dimension, level_count, polynomial_size, input_lwe_ciphertext_count); + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 512: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 2048: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 4096: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 8192: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 16384: + if (verify_cuda_bootstrap_fast_low_latency_grid_size< + uint32_t, AmortizedDegree<16384>>(glwe_dimension, level_count, + input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + default: + break; + } +} + +/* + * This scratch function allocates the necessary amount of data on the GPU for + * the low_latency PBS on 64 bits inputs, into `pbs_buffer`. It also + * configures SM options on the GPU in case FULLSM or PARTIALSM mode is going to + * be used. + */ +void scratch_cuda_bootstrap_low_latency_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory) { + + checks_fast_bootstrap_low_latency( + glwe_dimension, level_count, polynomial_size, input_lwe_ciphertext_count); + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 512: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 1024: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 2048: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 4096: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 8192: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + case 16384: + if (verify_cuda_bootstrap_fast_low_latency_grid_size< + uint64_t, AmortizedDegree<16384>>(glwe_dimension, level_count, + input_lwe_ciphertext_count, + max_shared_memory)) + scratch_bootstrap_fast_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + else + scratch_bootstrap_low_latency>( + stream, pbs_buffer, glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory, allocate_gpu_memory); + break; + default: + break; + } +} + +/* Perform bootstrapping on a batch of input u32 LWE ciphertexts. + * This function performs best for small numbers of inputs. Beyond a certain + * number of inputs (the exact number depends on the cryptographic parameters), + * the kernel cannot be launched and it is necessary to split the kernel call + * into several calls on smaller batches of inputs. For more details on this + * operation, head on to the equivalent u64 operation. + */ +void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) { + + checks_bootstrap_low_latency(32, glwe_dimension, level_count, base_log, + polynomial_size, num_samples); + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 512: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 1024: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 2048: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 4096: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 8192: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 16384: + if (verify_cuda_bootstrap_fast_low_latency_grid_size< + uint32_t, AmortizedDegree<16384>>(glwe_dimension, level_count, + num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + default: + break; + } +} + +/* Perform bootstrapping on a batch of input u64 LWE ciphertexts. + * This function performs best for small numbers of inputs. Beyond a certain + * number of inputs (the exact number depends on the cryptographic parameters), + * the kernel cannot be launched and it is necessary to split the kernel call + * into several calls on smaller batches of inputs. + * + * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c = + * (a0,..an-1,b) where n is the LWE dimension + * - lut_vector: should hold as many test vectors of size polynomial_size + * as there are input ciphertexts, but actually holds + * num_lut_vectors vectors to reduce memory usage + * - lut_vector_indexes: stores the index corresponding to + * which test vector to use for each sample in + * lut_vector + * - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n + * mask values + 1 body value + * - bootstrapping_key: GGSW encryption of the LWE secret key sk1 + * under secret key sk2 + * bsk = Z + sk1 H + * where H is the gadget matrix and Z is a matrix (k+1).l + * containing GLWE encryptions of 0 under sk2. + * bsk is thus a tensor of size (k+1)^2.l.N.n + * where l is the number of decomposition levels and + * k is the GLWE dimension, N is the polynomial size for + * GLWE. The polynomial size for GLWE and the test vector + * are the same because they have to be in the same ring + * to be multiplied. + * - lwe_dimension: size of the Torus vector used to encrypt the input + * LWE ciphertexts - referred to as n above (~ 600) + * - glwe_dimension: size of the polynomial vector used to encrypt the LUT + * GLWE ciphertexts - referred to as k above. Only the value 1 is supported for + * this parameter. + * - polynomial_size: size of the test polynomial (test vector) and size of the + * GLWE polynomial (~1024) + * - base_log: log base used for the gadget matrix - B = 2^base_log (~8) + * - level_count: number of decomposition levels in the gadget matrix (~4) + * - num_samples: number of encrypted input messages + * - num_lut_vectors: parameter to set the actual number of test vectors to be + * used + * - lwe_idx: the index of the LWE input to consider for the GPU of index + * gpu_index. In case of multi-GPU computing, it is assumed that only a part of + * the input LWE array is copied to each GPU, but the whole LUT array is copied + * (because the case when the number of LUTs is smaller than the number of input + * LWEs is not trivial to take into account in the data repartition on the + * GPUs). `lwe_idx` is used to determine which LUT to consider for a given LWE + * input in the LUT array `lut_vector`. + * - 'max_shared_memory' maximum amount of shared memory to be used inside + * device functions + * + * This function calls a wrapper to a device kernel that performs the + * bootstrapping: + * - the kernel is templatized based on integer discretization and + * polynomial degree + * - num_samples * level_count * (glwe_dimension + 1) blocks of threads are + * launched, where each thread is going to handle one or more polynomial + * coefficients at each stage, for a given level of decomposition, either for + * the LUT mask or its body: + * - perform the blind rotation + * - round the result + * - get the decomposition for the current level + * - switch to the FFT domain + * - multiply with the bootstrapping key + * - come back to the coefficients representation + * - between each stage a synchronization of the threads is necessary (some + * synchronizations happen at the block level, some happen between blocks, using + * cooperative groups). + * - in case the device has enough shared memory, temporary arrays used for + * the different stages (accumulators) are stored into the shared memory + * - the accumulators serve to combine the results for all decomposition + * levels + * - the constant memory (64K) is used for storing the roots of identity + * values for the FFT + */ +void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, uint32_t num_samples, + uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) { + checks_bootstrap_low_latency(64, glwe_dimension, level_count, base_log, + polynomial_size, num_samples); + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 512: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 1024: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 2048: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 4096: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 8192: + if (verify_cuda_bootstrap_fast_low_latency_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + break; + case 16384: + if (verify_cuda_bootstrap_fast_low_latency_grid_size< + uint64_t, AmortizedDegree<16384>>(glwe_dimension, level_count, + num_samples, max_shared_memory)) + host_bootstrap_fast_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + else + host_bootstrap_low_latency>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, glwe_dimension, + lwe_dimension, polynomial_size, base_log, level_count, num_samples, + num_lut_vectors, max_shared_memory); + default: + break; + } +} + +/* + * This cleanup function frees the data for the low latency PBS on GPU in + * pbs_buffer for 32 or 64 bits inputs. + */ +void cleanup_cuda_bootstrap_low_latency(cuda_stream_t *stream, + int8_t **pbs_buffer) { + // Free memory + cuda_drop_async(*pbs_buffer, stream); +} diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cuh new file mode 100644 index 000000000..17404b7a4 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_low_latency.cuh @@ -0,0 +1,487 @@ +#ifndef CUDA_LOWLAT_PBS_CUH +#define CUDA_LOWLAT_PBS_CUH + +#ifdef __CDT_PARSER__ +#undef __CUDA_RUNTIME_H__ +#include +#endif + +#include "bootstrap.h" +#include "crypto/gadget.cuh" +#include "crypto/torus.cuh" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "fft/twiddles.cuh" +#include "polynomial/parameters.cuh" +#include "polynomial/polynomial_math.cuh" +#include "types/complex/operations.cuh" + +template +__global__ void device_bootstrap_low_latency_step_one( + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, double2 *bootstrapping_key, + Torus *global_accumulator, double2 *global_accumulator_fft, + uint32_t lwe_iteration, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, int8_t *device_mem, + uint64_t device_memory_size_per_block) { + + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + uint32_t glwe_dimension = gridDim.y - 1; + + if constexpr (SMD == FULLSM) { + selected_memory = sharedmem; + } else { + int block_index = blockIdx.x + blockIdx.y * gridDim.x + + blockIdx.z * gridDim.x * gridDim.y; + selected_memory = &device_mem[block_index * device_memory_size_per_block]; + } + + Torus *accumulator = (Torus *)selected_memory; + double2 *accumulator_fft = + (double2 *)accumulator + + (ptrdiff_t)(sizeof(Torus) * polynomial_size / sizeof(double2)); + + if constexpr (SMD == PARTIALSM) + accumulator_fft = (double2 *)sharedmem; + + // The third dimension of the block is used to determine on which ciphertext + // this block is operating, in the case of batch bootstraps + Torus *block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[blockIdx.z] * (lwe_dimension + 1)]; + + Torus *block_lut_vector = &lut_vector[lut_vector_indexes[blockIdx.z] * + params::degree * (glwe_dimension + 1)]; + + Torus *global_slice = + global_accumulator + + (blockIdx.y + blockIdx.z * (glwe_dimension + 1)) * params::degree; + + double2 *global_fft_slice = + global_accumulator_fft + + (blockIdx.y + blockIdx.x * (glwe_dimension + 1) + + blockIdx.z * level_count * (glwe_dimension + 1)) * + (polynomial_size / 2); + + if (lwe_iteration == 0) { + // First iteration + // Put "b" in [0, 2N[ + Torus b_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_dimension], b_hat, + 2 * params::degree); + // The y-dimension is used to select the element of the GLWE this block will + // compute + divide_by_monomial_negacyclic_inplace( + accumulator, &block_lut_vector[blockIdx.y * params::degree], b_hat, + false); + + // Persist + int tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + global_slice[tid] = accumulator[tid]; + tid += params::degree / params::opt; + } + } + + // Put "a" in [0, 2N[ + Torus a_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_iteration], a_hat, + 2 * params::degree); // 2 * params::log2_degree + 1); + + synchronize_threads_in_block(); + + // Perform ACC * (X^ä - 1) + multiply_by_monomial_negacyclic_and_sub_polynomial< + Torus, params::opt, params::degree / params::opt>(global_slice, + accumulator, a_hat); + + // Perform a rounding to increase the accuracy of the + // bootstrapped ciphertext + round_to_closest_multiple_inplace( + accumulator, base_log, level_count); + + synchronize_threads_in_block(); + + // Decompose the accumulator. Each block gets one level of the + // decomposition, for the mask and the body (so block 0 will have the + // accumulator decomposed at level 0, 1 at 1, etc.) + GadgetMatrix gadget_acc(base_log, level_count, accumulator); + gadget_acc.decompose_and_compress_level(accumulator_fft, blockIdx.x); + + // We are using the same memory space for accumulator_fft and + // accumulator_rotated, so we need to synchronize here to make sure they + // don't modify the same memory space at the same time + // Switch to the FFT space + NSMFFT_direct>(accumulator_fft); + + int tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + global_fft_slice[tid] = accumulator_fft[tid]; + tid += params::degree / params::opt; + } +} + +template +__global__ void device_bootstrap_low_latency_step_two( + Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, double2 *bootstrapping_key, + Torus *global_accumulator, double2 *global_accumulator_fft, + uint32_t lwe_iteration, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, int8_t *device_mem, + uint64_t device_memory_size_per_block) { + + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + uint32_t glwe_dimension = gridDim.y - 1; + + if constexpr (SMD == FULLSM) { + selected_memory = sharedmem; + } else { + int block_index = blockIdx.x + blockIdx.y * gridDim.x + + blockIdx.z * gridDim.x * gridDim.y; + selected_memory = &device_mem[block_index * device_memory_size_per_block]; + } + + // We always compute the pointer with most restrictive alignment to avoid + // alignment issues + double2 *accumulator_fft = (double2 *)selected_memory; + Torus *accumulator = + (Torus *)accumulator_fft + + (ptrdiff_t)(sizeof(double2) * params::degree / 2 / sizeof(Torus)); + + if constexpr (SMD == PARTIALSM) + accumulator_fft = (double2 *)sharedmem; + + for (int level = 0; level < level_count; level++) { + double2 *global_fft_slice = global_accumulator_fft + + (level + blockIdx.x * level_count) * + (glwe_dimension + 1) * (params::degree / 2); + + for (int j = 0; j < (glwe_dimension + 1); j++) { + double2 *fft = global_fft_slice + j * params::degree / 2; + + // Get the bootstrapping key piece necessary for the multiplication + // It is already in the Fourier domain + auto bsk_slice = + get_ith_mask_kth_block(bootstrapping_key, lwe_iteration, j, level, + polynomial_size, glwe_dimension, level_count); + auto bsk_poly = bsk_slice + blockIdx.y * params::degree / 2; + + polynomial_product_accumulate_in_fourier_domain( + accumulator_fft, fft, bsk_poly, !level && !j); + } + } + + Torus *global_slice = + global_accumulator + + (blockIdx.y + blockIdx.x * (glwe_dimension + 1)) * params::degree; + + // Load the persisted accumulator + int tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + accumulator[tid] = global_slice[tid]; + tid += params::degree / params::opt; + } + + // Perform the inverse FFT on the result of the GGSW x GLWE and add to the + // accumulator + NSMFFT_inverse>(accumulator_fft); + add_to_torus(accumulator_fft, accumulator); + + if (lwe_iteration + 1 == lwe_dimension) { + // Last iteration + auto block_lwe_array_out = + &lwe_array_out[lwe_output_indexes[blockIdx.x] * + (glwe_dimension * polynomial_size + 1) + + blockIdx.y * polynomial_size]; + + if (blockIdx.y < glwe_dimension) { + // Perform a sample extract. At this point, all blocks have the result, + // but we do the computation at block 0 to avoid waiting for extra blocks, + // in case they're not synchronized + sample_extract_mask(block_lwe_array_out, accumulator); + } else if (blockIdx.y == glwe_dimension) { + sample_extract_body(block_lwe_array_out, accumulator, 0); + } + } else { + // Persist the updated accumulator + tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + global_slice[tid] = accumulator[tid]; + tid += params::degree / params::opt; + } + } +} + +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_bootstrap_low_latency_step_one( + uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size + // accumulator_rotated + sizeof(double2) * polynomial_size / 2; // accumulator fft +} +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_bootstrap_low_latency_step_two( + uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size + // accumulator + sizeof(double2) * polynomial_size / 2; // accumulator fft +} + +template +__host__ __device__ uint64_t +get_buffer_size_partial_sm_bootstrap_low_latency(uint32_t polynomial_size) { + return sizeof(double2) * polynomial_size / 2; // accumulator fft +} + +template +__host__ __device__ uint64_t get_buffer_size_bootstrap_low_latency( + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) { + + uint64_t full_sm_step_one = + get_buffer_size_full_sm_bootstrap_low_latency_step_one( + polynomial_size); + uint64_t full_sm_step_two = + get_buffer_size_full_sm_bootstrap_low_latency_step_two( + polynomial_size); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_low_latency(polynomial_size); + + uint64_t partial_dm_step_one = full_sm_step_one - partial_sm; + uint64_t partial_dm_step_two = full_sm_step_two - partial_sm; + uint64_t full_dm = full_sm_step_one; + + uint64_t device_mem = 0; + if (max_shared_memory < partial_sm) { + device_mem = full_dm * input_lwe_ciphertext_count * level_count * + (glwe_dimension + 1); + } else if (max_shared_memory < full_sm_step_two) { + device_mem = (partial_dm_step_two + partial_dm_step_one * level_count) * + input_lwe_ciphertext_count * (glwe_dimension + 1); + } else if (max_shared_memory < full_sm_step_one) { + device_mem = partial_dm_step_one * input_lwe_ciphertext_count * + level_count * (glwe_dimension + 1); + } + // Otherwise, both kernels run all in shared memory + uint64_t buffer_size = device_mem + + // global_accumulator_fft + (glwe_dimension + 1) * level_count * + input_lwe_ciphertext_count * + (polynomial_size / 2) * sizeof(double2) + + // global_accumulator + (glwe_dimension + 1) * input_lwe_ciphertext_count * + polynomial_size * sizeof(Torus); + return buffer_size + buffer_size % sizeof(double2); +} + +template +__host__ void scratch_bootstrap_low_latency( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory, + bool allocate_gpu_memory) { + cudaSetDevice(stream->gpu_index); + + uint64_t full_sm_step_one = + get_buffer_size_full_sm_bootstrap_low_latency_step_one( + polynomial_size); + uint64_t full_sm_step_two = + get_buffer_size_full_sm_bootstrap_low_latency_step_two( + polynomial_size); + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_low_latency(polynomial_size); + + // Configure step one + if (max_shared_memory >= partial_sm && max_shared_memory < full_sm_step_one) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_low_latency_step_one, + cudaFuncAttributeMaxDynamicSharedMemorySize, partial_sm)); + cudaFuncSetCacheConfig( + device_bootstrap_low_latency_step_one, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } else if (max_shared_memory >= partial_sm) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_low_latency_step_one, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_step_one)); + cudaFuncSetCacheConfig( + device_bootstrap_low_latency_step_one, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } + + // Configure step two + if (max_shared_memory >= partial_sm && max_shared_memory < full_sm_step_two) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_low_latency_step_two, + cudaFuncAttributeMaxDynamicSharedMemorySize, partial_sm)); + cudaFuncSetCacheConfig( + device_bootstrap_low_latency_step_two, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } else if (max_shared_memory >= partial_sm) { + check_cuda_error(cudaFuncSetAttribute( + device_bootstrap_low_latency_step_two, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_step_two)); + cudaFuncSetCacheConfig( + device_bootstrap_low_latency_step_two, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + } + + if (allocate_gpu_memory) { + uint64_t buffer_size = get_buffer_size_bootstrap_low_latency( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, max_shared_memory); + *pbs_buffer = (int8_t *)cuda_malloc_async(buffer_size, stream); + check_cuda_error(cudaGetLastError()); + } +} + +template +__host__ void execute_low_latency_step_one( + cuda_stream_t *stream, Torus *lut_vector, Torus *lut_vector_indexes, + Torus *lwe_array_in, Torus *lwe_input_indexes, double2 *bootstrapping_key, + Torus *global_accumulator, double2 *global_accumulator_fft, + uint32_t input_lwe_ciphertext_count, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, + uint32_t level_count, int8_t *d_mem, uint32_t max_shared_memory, + int lwe_iteration, uint64_t partial_sm, uint64_t partial_dm, + uint64_t full_sm, uint64_t full_dm) { + + int thds = polynomial_size / params::opt; + dim3 grid(level_count, glwe_dimension + 1, input_lwe_ciphertext_count); + + if (max_shared_memory < partial_sm) { + device_bootstrap_low_latency_step_one + <<stream>>>( + lut_vector, lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, full_dm); + } else if (max_shared_memory < full_sm) { + device_bootstrap_low_latency_step_one + <<stream>>>( + lut_vector, lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, partial_dm); + } else { + device_bootstrap_low_latency_step_one + <<stream>>>( + lut_vector, lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, 0); + } + check_cuda_error(cudaGetLastError()); +} + +template +__host__ void execute_low_latency_step_two( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, double2 *bootstrapping_key, + Torus *global_accumulator, double2 *global_accumulator_fft, + uint32_t input_lwe_ciphertext_count, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, + uint32_t level_count, int8_t *d_mem, uint32_t max_shared_memory, + int lwe_iteration, uint64_t partial_sm, uint64_t partial_dm, + uint64_t full_sm, uint64_t full_dm) { + + int thds = polynomial_size / params::opt; + dim3 grid(input_lwe_ciphertext_count, glwe_dimension + 1); + + if (max_shared_memory < partial_sm) { + device_bootstrap_low_latency_step_two + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, full_dm); + } else if (max_shared_memory < full_sm) { + device_bootstrap_low_latency_step_two + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, partial_dm); + } else { + device_bootstrap_low_latency_step_two + <<stream>>>( + lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + lwe_iteration, lwe_dimension, polynomial_size, base_log, + level_count, d_mem, 0); + } + check_cuda_error(cudaGetLastError()); +} +/* + * Host wrapper to the low latency version + * of bootstrapping + */ +template +__host__ void host_bootstrap_low_latency( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer, + uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t base_log, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors, + uint32_t max_shared_memory) { + cudaSetDevice(stream->gpu_index); + + // With SM each block corresponds to either the mask or body, no need to + // duplicate data for each + uint64_t full_sm_step_one = + get_buffer_size_full_sm_bootstrap_low_latency_step_one( + polynomial_size); + uint64_t full_sm_step_two = + get_buffer_size_full_sm_bootstrap_low_latency_step_two( + polynomial_size); + + uint64_t partial_sm = + get_buffer_size_partial_sm_bootstrap_low_latency(polynomial_size); + + uint64_t partial_dm_step_one = full_sm_step_one - partial_sm; + uint64_t partial_dm_step_two = full_sm_step_two - partial_sm; + uint64_t full_dm_step_one = full_sm_step_one; + uint64_t full_dm_step_two = full_sm_step_two; + + double2 *global_accumulator_fft = (double2 *)pbs_buffer; + Torus *global_accumulator = + (Torus *)global_accumulator_fft + + (ptrdiff_t)(sizeof(double2) * (glwe_dimension + 1) * level_count * + input_lwe_ciphertext_count * (polynomial_size / 2) / + sizeof(Torus)); + int8_t *d_mem = (int8_t *)global_accumulator + + (ptrdiff_t)(sizeof(Torus) * (glwe_dimension + 1) * + input_lwe_ciphertext_count * polynomial_size / + sizeof(int8_t)); + + for (int i = 0; i < lwe_dimension; i++) { + execute_low_latency_step_one( + stream, lut_vector, lut_vector_indexes, lwe_array_in, lwe_input_indexes, + bootstrapping_key, global_accumulator, global_accumulator_fft, + input_lwe_ciphertext_count, lwe_dimension, glwe_dimension, + polynomial_size, base_log, level_count, d_mem, max_shared_memory, i, + partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one); + execute_low_latency_step_two( + stream, lwe_array_out, lwe_output_indexes, lut_vector, + lut_vector_indexes, bootstrapping_key, global_accumulator, + global_accumulator_fft, input_lwe_ciphertext_count, lwe_dimension, + glwe_dimension, polynomial_size, base_log, level_count, d_mem, + max_shared_memory, i, partial_sm, partial_dm_step_two, full_sm_step_two, + full_dm_step_two); + } +} + +#endif // LOWLAT_PBS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cu b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cu new file mode 100644 index 000000000..14b0fdbbf --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cu @@ -0,0 +1,485 @@ +#include "../polynomial/parameters.cuh" +#include "bootstrap_fast_multibit.cuh" +#include "bootstrap_multibit.cuh" +#include "bootstrap_multibit.h" + +void checks_multi_bit_pbs(int polynomial_size) { + assert( + ("Error (GPU multi-bit PBS): polynomial size should be one of 256, 512, " + "1024, 2048, 4096, 8192, 16384", + polynomial_size == 256 || polynomial_size == 512 || + polynomial_size == 1024 || polynomial_size == 2048 || + polynomial_size == 4096 || polynomial_size == 8192 || + polynomial_size == 16384)); +} + +void cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes, + void *lut_vector, void *lut_vector_indexes, void *lwe_array_in, + void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, + uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx, + uint32_t max_shared_memory, uint32_t lwe_chunk_size) { + + checks_multi_bit_pbs(polynomial_size); + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 512: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 1024: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 2048: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 4096: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 8192: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + case 16384: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, num_samples, max_shared_memory)) { + host_fast_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } else { + host_multi_bit_pbs>( + stream, static_cast(lwe_array_out), + static_cast(lwe_output_indexes), + static_cast(lut_vector), + static_cast(lut_vector_indexes), + static_cast(lwe_array_in), + static_cast(lwe_input_indexes), + static_cast(bootstrapping_key), pbs_buffer, + glwe_dimension, lwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, num_samples, num_lut_vectors, lwe_idx, + max_shared_memory, lwe_chunk_size); + } + break; + default: + break; + } +} + +void scratch_cuda_multi_bit_pbs_64( + cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count, + uint32_t max_shared_memory, bool allocate_gpu_memory, + uint32_t lwe_chunk_size) { + + switch (polynomial_size) { + case 256: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 512: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 1024: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 2048: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 4096: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 8192: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + case 16384: + if (verify_cuda_bootstrap_fast_multi_bit_grid_size>( + glwe_dimension, level_count, input_lwe_ciphertext_count, + max_shared_memory)) { + scratch_fast_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } else { + scratch_multi_bit_pbs>( + stream, pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size, + level_count, input_lwe_ciphertext_count, grouping_factor, + max_shared_memory, allocate_gpu_memory, lwe_chunk_size); + } + break; + default: + break; + } +} + +void cleanup_cuda_multi_bit_pbs(cuda_stream_t *stream, int8_t **pbs_buffer) { + + // Free memory + cuda_drop_async(*pbs_buffer, stream); +} + +// Pick the best possible chunk size for each GPU +__host__ uint32_t get_lwe_chunk_size(uint32_t lwe_dimension, + uint32_t level_count, + uint32_t glwe_dimension, + uint32_t num_samples) { + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); // Assuming device 0 + + const char *v100Name = "V100"; // Known name of V100 GPU + const char *a100Name = "A100"; // Known name of A100 GPU + const char *h100Name = "H100"; // Known name of H100 GPU + + if (std::strstr(deviceProp.name, v100Name) != nullptr) { + // Tesla V100 + if (num_samples == 1) + return 60; + else if (num_samples == 2) + return 40; + else if (num_samples <= 4) + return 20; + else if (num_samples <= 8) + return 10; + else if (num_samples <= 16) + return 40; + else if (num_samples <= 32) + return 27; + else if (num_samples <= 64) + return 20; + else if (num_samples <= 128) + return 18; + else if (num_samples <= 256) + return 16; + else if (num_samples <= 512) + return 15; + else if (num_samples <= 1024) + return 15; + else + return 12; + } else if (std::strstr(deviceProp.name, a100Name) != nullptr) { + // Tesla A100 + if (num_samples < 4) + return 11; + else if (num_samples < 8) + return 6; + else if (num_samples < 16) + return 13; + else if (num_samples < 64) + return 19; + else if (num_samples < 128) + return 1; + else if (num_samples < 512) + return 19; + else if (num_samples < 1024) + return 17; + else if (num_samples < 8192) + return 19; + else if (num_samples < 16384) + return 12; + else + return 9; + } else if (std::strstr(deviceProp.name, h100Name) != nullptr) { + // Tesla H100 + return 45; + } + + // Generic case + return 1; +} + +// Returns a chunk size that is not optimal but close to +__host__ uint32_t get_average_lwe_chunk_size(uint32_t lwe_dimension, + uint32_t level_count, + uint32_t glwe_dimension) { + + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); // Assuming device 0 + + const char *v100Name = "V100"; // Known name of V100 GPU + const char *a100Name = "A100"; // Known name of A100 GPU + const char *h100Name = "H100"; // Known name of H100 GPU + + if (std::strstr(deviceProp.name, v100Name) != nullptr) { + // Tesla V100 + return 18; + } else if (std::strstr(deviceProp.name, a100Name) != nullptr) { + // Tesla A100 + return 45; + } else if (std::strstr(deviceProp.name, h100Name) != nullptr) { + // Tesla H100 + return 45; + } + + // Generic case + return 10; +} + +// Returns the maximum buffer size required to execute batches up to +// max_input_lwe_ciphertext_count +// todo: Deprecate this function +__host__ uint64_t get_max_buffer_size_multibit_bootstrap( + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t level_count, uint32_t max_input_lwe_ciphertext_count) { + + uint64_t max_buffer_size = 0; + for (uint32_t input_lwe_ciphertext_count = 1; + input_lwe_ciphertext_count <= max_input_lwe_ciphertext_count; + input_lwe_ciphertext_count *= 2) { + max_buffer_size = std::max( + max_buffer_size, + get_buffer_size_multibit_bootstrap( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, + get_lwe_chunk_size(lwe_dimension, level_count, glwe_dimension, + input_lwe_ciphertext_count))); + } + + return max_buffer_size; +} diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cuh new file mode 100644 index 000000000..7cb2b8abc --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrap_multibit.cuh @@ -0,0 +1,476 @@ +#ifndef CUDA_MULTIBIT_PBS_CUH +#define CUDA_MULTIBIT_PBS_CUH + +#include "bootstrap.h" +#include "bootstrap_fast_low_latency.cuh" +#include "bootstrap_multibit.h" +#include "cooperative_groups.h" +#include "crypto/gadget.cuh" +#include "crypto/ggsw.cuh" +#include "crypto/torus.cuh" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "fft/twiddles.cuh" +#include "polynomial/functions.cuh" +#include "polynomial/parameters.cuh" +#include "polynomial/polynomial_math.cuh" +#include "types/complex/operations.cuh" +#include + +template +__device__ Torus calculates_monomial_degree(Torus *lwe_array_group, + uint32_t ggsw_idx, + uint32_t grouping_factor) { + Torus x = 0; + for (int i = 0; i < grouping_factor; i++) { + uint32_t mask_position = grouping_factor - (i + 1); + int selection_bit = (ggsw_idx >> mask_position) & 1; + x += selection_bit * lwe_array_group[i]; + } + + return rescale_torus_element( + x, 2 * params::degree); // 2 * params::log2_degree + 1); +} + +template +__global__ void device_multi_bit_bootstrap_keybundle( + Torus *lwe_array_in, Torus *lwe_input_indexes, double2 *keybundle_array, + Torus *bootstrapping_key, uint32_t lwe_dimension, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log, + uint32_t level_count, uint32_t lwe_offset, uint32_t lwe_chunk_size, + uint32_t keybundle_size_per_input) { + + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory = sharedmem; + + // Ids + uint32_t level_id = blockIdx.z; + uint32_t glwe_id = blockIdx.y / (glwe_dimension + 1); + uint32_t poly_id = blockIdx.y % (glwe_dimension + 1); + uint32_t lwe_iteration = (blockIdx.x % lwe_chunk_size + lwe_offset); + uint32_t input_idx = blockIdx.x / lwe_chunk_size; + + if (lwe_iteration < (lwe_dimension / grouping_factor)) { + // + Torus *accumulator = (Torus *)selected_memory; + + Torus *block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[input_idx] * (lwe_dimension + 1)]; + + double2 *keybundle = keybundle_array + + // select the input + input_idx * keybundle_size_per_input; + + //////////////////////////////////////////////////////////// + // Computes all keybundles + uint32_t rev_lwe_iteration = + ((lwe_dimension / grouping_factor) - lwe_iteration - 1); + + // //////////////////////////////// + // Keygen guarantees the first term is a constant term of the polynomial, no + // polynomial multiplication required + Torus *bsk_slice = get_multi_bit_ith_lwe_gth_group_kth_block( + bootstrapping_key, 0, rev_lwe_iteration, glwe_id, level_id, + grouping_factor, 2 * polynomial_size, glwe_dimension, level_count); + Torus *bsk_poly = bsk_slice + poly_id * params::degree; + + copy_polynomial( + bsk_poly, accumulator); + + // Accumulate the other terms + for (int g = 1; g < (1 << grouping_factor); g++) { + + Torus *bsk_slice = get_multi_bit_ith_lwe_gth_group_kth_block( + bootstrapping_key, g, rev_lwe_iteration, glwe_id, level_id, + grouping_factor, 2 * polynomial_size, glwe_dimension, level_count); + Torus *bsk_poly = bsk_slice + poly_id * params::degree; + + // Calculates the monomial degree + Torus *lwe_array_group = + block_lwe_array_in + rev_lwe_iteration * grouping_factor; + uint32_t monomial_degree = calculates_monomial_degree( + lwe_array_group, g, grouping_factor); + + synchronize_threads_in_block(); + // Multiply by the bsk element + polynomial_product_accumulate_by_monomial( + accumulator, bsk_poly, monomial_degree, false); + } + + synchronize_threads_in_block(); + + double2 *fft = (double2 *)sharedmem; + + // Move accumulator to local memory + double2 temp[params::opt / 2]; + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + temp[i].x = __ll2double_rn((int64_t)accumulator[tid]); + temp[i].y = + __ll2double_rn((int64_t)accumulator[tid + params::degree / 2]); + temp[i].x /= (double)std::numeric_limits::max(); + temp[i].y /= (double)std::numeric_limits::max(); + tid += params::degree / params::opt; + } + + synchronize_threads_in_block(); + // Move from local memory back to shared memory but as complex + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + fft[tid] = temp[i]; + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + NSMFFT_direct>(fft); + + // lwe iteration + auto keybundle_out = get_ith_mask_kth_block( + keybundle, blockIdx.x % lwe_chunk_size, glwe_id, level_id, + polynomial_size, glwe_dimension, level_count); + auto keybundle_poly = keybundle_out + poly_id * params::degree / 2; + + copy_polynomial( + fft, keybundle_poly); + } +} + +template +__global__ void device_multi_bit_bootstrap_accumulate_step_one( + Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *lut_vector, + Torus *lut_vector_indexes, Torus *global_accumulator, + double2 *global_accumulator_fft, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, + uint32_t level_count, uint32_t lwe_iteration) { + + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + + selected_memory = sharedmem; + + Torus *accumulator = (Torus *)selected_memory; + double2 *accumulator_fft = + (double2 *)accumulator + + (ptrdiff_t)(sizeof(Torus) * polynomial_size / sizeof(double2)); + + Torus *block_lwe_array_in = + &lwe_array_in[lwe_input_indexes[blockIdx.z] * (lwe_dimension + 1)]; + + Torus *block_lut_vector = &lut_vector[lut_vector_indexes[blockIdx.z] * + params::degree * (glwe_dimension + 1)]; + + Torus *global_slice = + global_accumulator + + (blockIdx.y + blockIdx.z * (glwe_dimension + 1)) * params::degree; + + double2 *global_fft_slice = + global_accumulator_fft + + (blockIdx.y + blockIdx.x * (glwe_dimension + 1) + + blockIdx.z * level_count * (glwe_dimension + 1)) * + (polynomial_size / 2); + + if (lwe_iteration == 0) { + // First iteration + //////////////////////////////////////////////////////////// + // Initializes the accumulator with the body of LWE + // Put "b" in [0, 2N[ + Torus b_hat = 0; + rescale_torus_element(block_lwe_array_in[lwe_dimension], b_hat, + 2 * params::degree); + + divide_by_monomial_negacyclic_inplace( + accumulator, &block_lut_vector[blockIdx.y * params::degree], b_hat, + false); + + // Persist + copy_polynomial( + accumulator, global_slice); + } else { + // Load the accumulator calculated in previous iterations + copy_polynomial( + global_slice, accumulator); + } + + // Perform a rounding to increase the accuracy of the + // bootstrapped ciphertext + round_to_closest_multiple_inplace( + accumulator, base_log, level_count); + + // Decompose the accumulator. Each block gets one level of the + // decomposition, for the mask and the body (so block 0 will have the + // accumulator decomposed at level 0, 1 at 1, etc.) + GadgetMatrix gadget_acc(base_log, level_count, accumulator); + gadget_acc.decompose_and_compress_next_polynomial(accumulator_fft, + blockIdx.x); + + // We are using the same memory space for accumulator_fft and + // accumulator_rotated, so we need to synchronize here to make sure they + // don't modify the same memory space at the same time + // Switch to the FFT space + NSMFFT_direct>(accumulator_fft); + + copy_polynomial( + accumulator_fft, global_fft_slice); +} + +template +__global__ void device_multi_bit_bootstrap_accumulate_step_two( + Torus *lwe_array_out, Torus *lwe_output_indexes, double2 *keybundle_array, + Torus *global_accumulator, double2 *global_accumulator_fft, + uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size, + uint32_t level_count, uint32_t grouping_factor, uint32_t iteration, + uint32_t lwe_offset, uint32_t lwe_chunk_size) { + // We use shared memory for the polynomials that are used often during the + // bootstrap, since shared memory is kept in L1 cache and accessing it is + // much faster than global memory + extern __shared__ int8_t sharedmem[]; + int8_t *selected_memory; + + selected_memory = sharedmem; + double2 *accumulator_fft = (double2 *)selected_memory; + + double2 *keybundle = keybundle_array + + // select the input + blockIdx.x * lwe_chunk_size * level_count * + (glwe_dimension + 1) * (glwe_dimension + 1) * + (polynomial_size / 2); + + double2 *global_accumulator_fft_input = + global_accumulator_fft + + blockIdx.x * level_count * (glwe_dimension + 1) * (polynomial_size / 2); + + for (int level = 0; level < level_count; level++) { + double2 *global_fft_slice = + global_accumulator_fft_input + + level * (glwe_dimension + 1) * (polynomial_size / 2); + + for (int j = 0; j < (glwe_dimension + 1); j++) { + double2 *fft = global_fft_slice + j * params::degree / 2; + + // Get the bootstrapping key piece necessary for the multiplication + // It is already in the Fourier domain + auto bsk_slice = + get_ith_mask_kth_block(keybundle, iteration, j, level, + polynomial_size, glwe_dimension, level_count); + auto bsk_poly = bsk_slice + blockIdx.y * params::degree / 2; + + polynomial_product_accumulate_in_fourier_domain( + accumulator_fft, fft, bsk_poly, !level && !j); + } + } + + // Perform the inverse FFT on the result of the GGSW x GLWE and add to the + // accumulator + NSMFFT_inverse>(accumulator_fft); + Torus *global_slice = + global_accumulator + + (blockIdx.y + blockIdx.x * (glwe_dimension + 1)) * params::degree; + + add_to_torus(accumulator_fft, global_slice, true); + synchronize_threads_in_block(); + + uint32_t lwe_iteration = iteration + lwe_offset; + if (lwe_iteration + 1 == (lwe_dimension / grouping_factor)) { + // Last iteration + auto block_lwe_array_out = + &lwe_array_out[lwe_output_indexes[blockIdx.x] * + (glwe_dimension * polynomial_size + 1) + + blockIdx.y * polynomial_size]; + + if (blockIdx.y < glwe_dimension) { + // Perform a sample extract. At this point, all blocks have the result, + // but we do the computation at block 0 to avoid waiting for extra blocks, + // in case they're not synchronized + sample_extract_mask(block_lwe_array_out, global_slice); + } else if (blockIdx.y == glwe_dimension) { + sample_extract_body(block_lwe_array_out, global_slice, 0); + } + } +} +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_multibit_bootstrap_keybundle(uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size; // accumulator +} + +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_multibit_bootstrap_step_one(uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size * 2; // accumulator +} +template +__host__ __device__ uint64_t +get_buffer_size_full_sm_multibit_bootstrap_step_two(uint32_t polynomial_size) { + return sizeof(Torus) * polynomial_size; // accumulator +} + +template +__host__ __device__ uint64_t get_buffer_size_multibit_bootstrap( + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size) { + + uint64_t buffer_size = 0; + buffer_size += input_lwe_ciphertext_count * lwe_chunk_size * level_count * + (glwe_dimension + 1) * (glwe_dimension + 1) * + (polynomial_size / 2) * sizeof(double2); // keybundle fft + buffer_size += input_lwe_ciphertext_count * (glwe_dimension + 1) * + level_count * (polynomial_size / 2) * + sizeof(double2); // global_accumulator_fft + buffer_size += input_lwe_ciphertext_count * (glwe_dimension + 1) * + polynomial_size * sizeof(Torus); // global_accumulator + + return buffer_size + buffer_size % sizeof(double2); +} + +template +__host__ void +scratch_multi_bit_pbs(cuda_stream_t *stream, int8_t **pbs_buffer, + uint32_t lwe_dimension, uint32_t glwe_dimension, + uint32_t polynomial_size, uint32_t level_count, + uint32_t input_lwe_ciphertext_count, + uint32_t grouping_factor, uint32_t max_shared_memory, + bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0) { + + cudaSetDevice(stream->gpu_index); + + uint64_t full_sm_keybundle = + get_buffer_size_full_sm_multibit_bootstrap_keybundle( + polynomial_size); + uint64_t full_sm_accumulate_step_one = + get_buffer_size_full_sm_multibit_bootstrap_step_one( + polynomial_size); + uint64_t full_sm_accumulate_step_two = + get_buffer_size_full_sm_multibit_bootstrap_step_two( + polynomial_size); + + check_cuda_error(cudaFuncSetAttribute( + device_multi_bit_bootstrap_keybundle, + cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_keybundle)); + cudaFuncSetCacheConfig(device_multi_bit_bootstrap_keybundle, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + + check_cuda_error(cudaFuncSetAttribute( + device_multi_bit_bootstrap_accumulate_step_one, + cudaFuncAttributeMaxDynamicSharedMemorySize, + full_sm_accumulate_step_one)); + cudaFuncSetCacheConfig( + device_multi_bit_bootstrap_accumulate_step_one, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + + check_cuda_error(cudaFuncSetAttribute( + device_multi_bit_bootstrap_accumulate_step_two, + cudaFuncAttributeMaxDynamicSharedMemorySize, + full_sm_accumulate_step_two)); + cudaFuncSetCacheConfig( + device_multi_bit_bootstrap_accumulate_step_two, + cudaFuncCachePreferShared); + check_cuda_error(cudaGetLastError()); + + if (allocate_gpu_memory) { + if (!lwe_chunk_size) + lwe_chunk_size = get_average_lwe_chunk_size(lwe_dimension, level_count, + glwe_dimension); + + uint64_t buffer_size = get_buffer_size_multibit_bootstrap( + glwe_dimension, polynomial_size, level_count, + input_lwe_ciphertext_count, lwe_chunk_size); + *pbs_buffer = (int8_t *)cuda_malloc_async(buffer_size, stream); + check_cuda_error(cudaGetLastError()); + } +} + +template +__host__ void host_multi_bit_pbs( + cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes, + Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in, + Torus *lwe_input_indexes, uint64_t *bootstrapping_key, int8_t *pbs_buffer, + uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size, + uint32_t grouping_factor, uint32_t base_log, uint32_t level_count, + uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx, + uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0) { + cudaSetDevice(stream->gpu_index); + + // If a chunk size is not passed to this function, select one. + if (!lwe_chunk_size) + lwe_chunk_size = + get_average_lwe_chunk_size(lwe_dimension, level_count, glwe_dimension); + // + double2 *keybundle_fft = (double2 *)pbs_buffer; + double2 *global_accumulator_fft = + (double2 *)keybundle_fft + + num_samples * lwe_chunk_size * level_count * (glwe_dimension + 1) * + (glwe_dimension + 1) * (polynomial_size / 2); + Torus *global_accumulator = + (Torus *)global_accumulator_fft + + (ptrdiff_t)(sizeof(double2) * num_samples * (glwe_dimension + 1) * + level_count * (polynomial_size / 2) / sizeof(Torus)); + + // + uint64_t full_sm_keybundle = + get_buffer_size_full_sm_multibit_bootstrap_keybundle( + polynomial_size); + uint64_t full_sm_accumulate_step_one = + get_buffer_size_full_sm_multibit_bootstrap_step_one( + polynomial_size); + uint64_t full_sm_accumulate_step_two = + get_buffer_size_full_sm_multibit_bootstrap_step_two( + polynomial_size); + + uint32_t keybundle_size_per_input = + lwe_chunk_size * level_count * (glwe_dimension + 1) * + (glwe_dimension + 1) * (polynomial_size / 2); + + // + dim3 grid_accumulate_step_one(level_count, glwe_dimension + 1, num_samples); + dim3 grid_accumulate_step_two(num_samples, glwe_dimension + 1); + dim3 thds(polynomial_size / params::opt, 1, 1); + + for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor); + lwe_offset += lwe_chunk_size) { + + uint32_t chunk_size = std::min( + lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset); + + // Compute a keybundle + dim3 grid_keybundle(num_samples * chunk_size, + (glwe_dimension + 1) * (glwe_dimension + 1), + level_count); + device_multi_bit_bootstrap_keybundle + <<stream>>>( + lwe_array_in, lwe_input_indexes, keybundle_fft, bootstrapping_key, + lwe_dimension, glwe_dimension, polynomial_size, grouping_factor, + base_log, level_count, lwe_offset, chunk_size, + keybundle_size_per_input); + check_cuda_error(cudaGetLastError()); + + // Accumulate + for (int j = 0; j < chunk_size; j++) { + device_multi_bit_bootstrap_accumulate_step_one + <<stream>>>(lwe_array_in, lwe_input_indexes, lut_vector, + lut_vector_indexes, global_accumulator, + global_accumulator_fft, lwe_dimension, + glwe_dimension, polynomial_size, base_log, + level_count, j + lwe_offset); + check_cuda_error(cudaGetLastError()); + + device_multi_bit_bootstrap_accumulate_step_two + <<stream>>>(lwe_array_out, lwe_output_indexes, keybundle_fft, + global_accumulator, global_accumulator_fft, + lwe_dimension, glwe_dimension, polynomial_size, + level_count, grouping_factor, j, lwe_offset, + lwe_chunk_size); + check_cuda_error(cudaGetLastError()); + } + } +} +#endif // MULTIBIT_PBS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrapping_key.cuh b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrapping_key.cuh new file mode 100644 index 000000000..d009b6e7c --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/pbs/bootstrapping_key.cuh @@ -0,0 +1,500 @@ +#ifndef CUDA_BSK_CUH +#define CUDA_BSK_CUH + +#include "bootstrap.h" +#include "bootstrap_multibit.h" +#include "device.h" +#include "fft/bnsmfft.cuh" +#include "polynomial/parameters.cuh" +#include +#include + +__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count) { + return i * polynomial_size / 2 * (glwe_dimension + 1) * (glwe_dimension + 1) * + level_count; +} + +//////////////////////////////////////////////// +template +__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level, + uint32_t polynomial_size, + int glwe_dimension, uint32_t level_count) { + return &ptr[get_start_ith_ggsw(i, polynomial_size, glwe_dimension, + level_count) + + level * polynomial_size / 2 * (glwe_dimension + 1) * + (glwe_dimension + 1) + + k * polynomial_size / 2 * (glwe_dimension + 1)]; +} + +template +__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level, + uint32_t polynomial_size, + int glwe_dimension, uint32_t level_count) { + return &ptr[get_start_ith_ggsw(i, polynomial_size, glwe_dimension, + level_count) + + level * polynomial_size / 2 * (glwe_dimension + 1) * + (glwe_dimension + 1) + + k * polynomial_size / 2 * (glwe_dimension + 1) + + glwe_dimension * polynomial_size / 2]; +} + +//////////////////////////////////////////////// +__device__ inline int get_start_ith_lwe(uint32_t i, uint32_t grouping_factor, + uint32_t polynomial_size, + uint32_t glwe_dimension, + uint32_t level_count) { + return i * (1 << grouping_factor) * polynomial_size / 2 * + (glwe_dimension + 1) * (glwe_dimension + 1) * level_count; +} + +template +__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block( + T *ptr, int g, int i, int k, int level, uint32_t grouping_factor, + uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count) { + T *ptr_group = ptr + get_start_ith_lwe(i, grouping_factor, polynomial_size, + glwe_dimension, level_count); + return get_ith_mask_kth_block(ptr_group, g, k, level, polynomial_size, + glwe_dimension, level_count); +} +//////////////////////////////////////////////// +template +void cuda_convert_lwe_bootstrap_key(double2 *dest, ST *src, + cuda_stream_t *stream, + uint32_t input_lwe_dim, uint32_t glwe_dim, + uint32_t level_count, + uint32_t polynomial_size, + uint32_t total_polynomials) { + + cudaSetDevice(stream->gpu_index); + int shared_memory_size = sizeof(double) * polynomial_size; + + // Here the buffer size is the size of double2 times the number of polynomials + // times the polynomial size over 2 because the polynomials are compressed + // into the complex domain to perform the FFT + size_t buffer_size = + total_polynomials * polynomial_size / 2 * sizeof(double2); + + int gridSize = total_polynomials; + int blockSize = polynomial_size / choose_opt_amortized(polynomial_size); + + double2 *h_bsk = (double2 *)malloc(buffer_size); + + double2 *d_bsk = (double2 *)cuda_malloc_async(buffer_size, stream); + + // compress real bsk to complex and divide it on DOUBLE_MAX + for (int i = 0; i < total_polynomials; i++) { + int complex_current_poly_idx = i * polynomial_size / 2; + int torus_current_poly_idx = i * polynomial_size; + for (int j = 0; j < polynomial_size / 2; j++) { + h_bsk[complex_current_poly_idx + j].x = src[torus_current_poly_idx + j]; + h_bsk[complex_current_poly_idx + j].y = + src[torus_current_poly_idx + j + polynomial_size / 2]; + h_bsk[complex_current_poly_idx + j].x /= + (double)std::numeric_limits::max(); + h_bsk[complex_current_poly_idx + j].y /= + (double)std::numeric_limits::max(); + } + } + + cuda_memcpy_async_to_gpu(d_bsk, h_bsk, buffer_size, stream); + + double2 *buffer; + switch (polynomial_size) { + case 256: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 512: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 1024: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 2048: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 4096: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 8192: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + case 16384: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_NSMFFT, ForwardFFT>, FULLSM>, + cudaFuncCachePreferShared)); + batch_NSMFFT, ForwardFFT>, FULLSM> + <<stream>>>( + d_bsk, dest, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_NSMFFT, ForwardFFT>, NOSM> + <<stream>>>(d_bsk, dest, buffer); + } + break; + default: + break; + } + + cuda_drop_async(d_bsk, stream); + cuda_drop_async(buffer, stream); + free(h_bsk); +} + +void cuda_convert_lwe_bootstrap_key_32(void *dest, void *src, + cuda_stream_t *stream, + uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, + uint32_t polynomial_size) { + uint32_t total_polynomials = + input_lwe_dim * (glwe_dim + 1) * (glwe_dim + 1) * level_count; + cuda_convert_lwe_bootstrap_key( + (double2 *)dest, (int32_t *)src, stream, input_lwe_dim, glwe_dim, + level_count, polynomial_size, total_polynomials); +} + +void cuda_convert_lwe_bootstrap_key_64(void *dest, void *src, + cuda_stream_t *stream, + uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, + uint32_t polynomial_size) { + uint32_t total_polynomials = + input_lwe_dim * (glwe_dim + 1) * (glwe_dim + 1) * level_count; + cuda_convert_lwe_bootstrap_key( + (double2 *)dest, (int64_t *)src, stream, input_lwe_dim, glwe_dim, + level_count, polynomial_size, total_polynomials); +} + +void cuda_convert_lwe_multi_bit_bootstrap_key_64( + void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim, + uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size, + uint32_t grouping_factor) { + uint32_t total_polynomials = input_lwe_dim * (glwe_dim + 1) * (glwe_dim + 1) * + level_count * (1 << grouping_factor) / + grouping_factor; + size_t buffer_size = total_polynomials * polynomial_size * sizeof(uint64_t); + + cuda_memcpy_async_to_gpu((uint64_t *)dest, (uint64_t *)src, buffer_size, + stream); +} + +void cuda_fourier_polynomial_mul(void *_input1, void *_input2, void *_output, + cuda_stream_t *stream, + uint32_t polynomial_size, + uint32_t total_polynomials) { + + auto input1 = (double2 *)_input1; + auto input2 = (double2 *)_input2; + auto output = (double2 *)_output; + + size_t shared_memory_size = sizeof(double2) * polynomial_size / 2; + + int gridSize = total_polynomials; + int blockSize = polynomial_size / choose_opt_amortized(polynomial_size); + + double2 *buffer; + switch (polynomial_size) { + case 256: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 512: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 1024: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 2048: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 4096: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 8192: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + case 16384: + if (shared_memory_size <= cuda_get_max_shared_memory(stream->gpu_index)) { + buffer = (double2 *)cuda_malloc_async(0, stream); + check_cuda_error(cudaFuncSetAttribute( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size)); + check_cuda_error(cudaFuncSetCacheConfig( + batch_polynomial_mul, ForwardFFT>, + FULLSM>, + cudaFuncCachePreferShared)); + batch_polynomial_mul, ForwardFFT>, + FULLSM> + <<stream>>>( + input1, input2, output, buffer); + } else { + buffer = (double2 *)cuda_malloc_async( + shared_memory_size * total_polynomials, stream); + batch_polynomial_mul, ForwardFFT>, NOSM> + <<stream>>>(input1, input2, output, + buffer); + } + break; + default: + break; + } + cuda_drop_async(buffer, stream); +} + +// We need these lines so the compiler knows how to specialize these functions +template __device__ uint64_t *get_ith_mask_kth_block(uint64_t *ptr, int i, + int k, int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); +template __device__ uint32_t *get_ith_mask_kth_block(uint32_t *ptr, int i, + int k, int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); +template __device__ double2 *get_ith_mask_kth_block(double2 *ptr, int i, int k, + int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); +template __device__ uint64_t *get_ith_body_kth_block(uint64_t *ptr, int i, + int k, int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); +template __device__ uint32_t *get_ith_body_kth_block(uint32_t *ptr, int i, + int k, int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); +template __device__ double2 *get_ith_body_kth_block(double2 *ptr, int i, int k, + int level, + uint32_t polynomial_size, + int glwe_dimension, + uint32_t level_count); + +template __device__ uint64_t *get_multi_bit_ith_lwe_gth_group_kth_block( + uint64_t *ptr, int g, int i, int k, int level, uint32_t grouping_factor, + uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count); + +template __device__ double2 *get_multi_bit_ith_lwe_gth_group_kth_block( + double2 *ptr, int g, int i, int k, int level, uint32_t grouping_factor, + uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count); +#endif // CNCRT_BSK_H diff --git a/backends/tfhe-cuda-backend/implementation/src/polynomial/functions.cuh b/backends/tfhe-cuda-backend/implementation/src/polynomial/functions.cuh new file mode 100644 index 000000000..902ff0583 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/polynomial/functions.cuh @@ -0,0 +1,305 @@ +#ifndef GPU_POLYNOMIAL_FUNCTIONS_CUH +#define GPU_POLYNOMIAL_FUNCTIONS_CUH + +#include "crypto/torus.cuh" +#include "device.h" + +// Return A if C == 0 and B if C == 1 +#define SEL(A, B, C) ((-(C) & ((A) ^ (B))) ^ (A)) + +/* + * function compresses decomposed buffer into half size complex buffer for fft + */ +template +__device__ void real_to_complex_compressed(int16_t *src, double2 *dst) { + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + dst[tid].x = __int2double_rn(src[2 * tid]); + dst[tid].y = __int2double_rn(src[2 * tid + 1]); + tid += params::degree / params::opt; + } +} + +/* + * copy source polynomial to specific slice of batched polynomials + * used only in low latency version + */ +template +__device__ void copy_into_ith_polynomial_low_lat(T *source, T *dst, int i) { + int tid = threadIdx.x; + int begin = i * (params::degree / 2 + 1); +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + dst[tid + begin] = source[tid]; + tid = tid + params::degree / params::opt; + } + + if (threadIdx.x == 0) { + dst[params::degree / 2 + begin] = source[params::degree / 2]; + } +} + +template +__device__ void copy_polynomial(T *source, T *dst) { + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < elems_per_thread; i++) { + dst[tid] = source[tid]; + tid = tid + block_size; + } +} + +/* + * accumulates source polynomial into specific slice of batched polynomial + * used only in low latency version + */ +template +__device__ void add_polynomial_inplace_low_lat(T *source, T *dst, int p_id) { + int tid = threadIdx.x; + int begin = p_id * (params::degree / 2 + 1); +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + dst[tid] += source[tid + begin]; + tid = tid + params::degree / params::opt; + } + + if (threadIdx.x == 0) { + dst[params::degree / 2] += source[params::degree / 2 + begin]; + } +} + +/* + * Receives num_poly concatenated polynomials of type T. For each: + * + * Performs acc = acc * (X^ä + 1) if zeroAcc = false + * Performs acc = 0 if zeroAcc + * takes single buffer and calculates inplace. + * + * By default, it works on a single polynomial. + */ +template +__device__ void divide_by_monomial_negacyclic_inplace(T *accumulator, T *input, + uint32_t j, bool zeroAcc, + uint32_t num_poly = 1) { + constexpr int degree = block_size * elems_per_thread; + for (int z = 0; z < num_poly; z++) { + T *accumulator_slice = (T *)accumulator + (ptrdiff_t)(z * degree); + T *input_slice = (T *)input + (ptrdiff_t)(z * degree); + + int tid = threadIdx.x; + if (zeroAcc) { + for (int i = 0; i < elems_per_thread; i++) { + accumulator_slice[tid] = 0; + tid += block_size; + } + } else { + tid = threadIdx.x; + for (int i = 0; i < elems_per_thread; i++) { + if (j < degree) { + // if (tid < degree - j) + // accumulator_slice[tid] = input_slice[tid + j]; + // else + // accumulator_slice[tid] = -input_slice[tid - degree + j]; + int x = tid + j - SEL(degree, 0, tid < degree - j); + accumulator_slice[tid] = + SEL(-1, 1, tid < degree - j) * input_slice[x]; + } else { + int32_t jj = j - degree; + // if (tid < degree - jj) + // accumulator_slice[tid] = -input_slice[tid + jj]; + // else + // accumulator_slice[tid] = input_slice[tid - degree + jj]; + int x = tid + jj - SEL(degree, 0, tid < degree - jj); + accumulator_slice[tid] = + SEL(1, -1, tid < degree - jj) * input_slice[x]; + } + tid += block_size; + } + } + } +} + +/* + * Receives num_poly concatenated polynomials of type T. For each: + * + * Performs result_acc = acc * (X^ä - 1) - acc + * takes single buffer as input and returns a single rotated buffer + * + * By default, it works on a single polynomial. + */ +template +__device__ void multiply_by_monomial_negacyclic_and_sub_polynomial( + T *acc, T *result_acc, uint32_t j, uint32_t num_poly = 1) { + constexpr int degree = block_size * elems_per_thread; + for (int z = 0; z < num_poly; z++) { + T *acc_slice = (T *)acc + (ptrdiff_t)(z * degree); + T *result_acc_slice = (T *)result_acc + (ptrdiff_t)(z * degree); + int tid = threadIdx.x; + for (int i = 0; i < elems_per_thread; i++) { + if (j < degree) { + // if (tid < j) + // result_acc_slice[tid] = -acc_slice[tid - j + degree]-acc_slice[tid]; + // else + // result_acc_slice[tid] = acc_slice[tid - j] - acc_slice[tid]; + int x = tid - j + SEL(0, degree, tid < j); + result_acc_slice[tid] = + SEL(1, -1, tid < j) * acc_slice[x] - acc_slice[tid]; + } else { + int32_t jj = j - degree; + // if (tid < jj) + // result_acc_slice[tid] = acc_slice[tid - jj + degree]-acc_slice[tid]; + // else + // result_acc_slice[tid] = -acc_slice[tid - jj] - acc_slice[tid]; + int x = tid - jj + SEL(0, degree, tid < jj); + result_acc_slice[tid] = + SEL(-1, 1, tid < jj) * acc_slice[x] - acc_slice[tid]; + } + tid += block_size; + } + } +} + +/* + * Receives num_poly concatenated polynomials of type T. For each performs a + * rounding to increase accuracy of the PBS. Calculates inplace. + * + * By default, it works on a single polynomial. + */ +template +__device__ void round_to_closest_multiple_inplace(T *rotated_acc, int base_log, + int level_count, + uint32_t num_poly = 1) { + constexpr int degree = block_size * elems_per_thread; + for (int z = 0; z < num_poly; z++) { + T *rotated_acc_slice = (T *)rotated_acc + (ptrdiff_t)(z * degree); + int tid = threadIdx.x; + for (int i = 0; i < elems_per_thread; i++) { + T x_acc = rotated_acc_slice[tid]; + T shift = sizeof(T) * 8 - level_count * base_log; + T mask = 1ll << (shift - 1); + T b_acc = (x_acc & mask) >> (shift - 1); + T res_acc = x_acc >> shift; + res_acc += b_acc; + res_acc <<= shift; + rotated_acc_slice[tid] = res_acc; + tid = tid + block_size; + } + } +} + +template +__device__ void add_to_torus(double2 *m_values, Torus *result, + bool init_torus = false) { + Torus mx = (sizeof(Torus) == 4) ? UINT32_MAX : UINT64_MAX; + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt / 2; i++) { + double v1 = m_values[tid].x; + double v2 = m_values[tid].y; + + double frac = v1 - floor(v1); + frac *= mx; + double carry = frac - floor(frac); + frac += (carry >= 0.5); + + Torus V1 = 0; + typecast_double_to_torus(frac, V1); + + frac = v2 - floor(v2); + frac *= mx; + carry = frac - floor(v2); + frac += (carry >= 0.5); + + Torus V2 = 0; + typecast_double_to_torus(frac, V2); + + if (init_torus) { + result[tid] = V1; + result[tid + params::degree / 2] = V2; + } else { + result[tid] += V1; + result[tid + params::degree / 2] += V2; + } + tid = tid + params::degree / params::opt; + } +} + +// Extracts the body of a GLWE. +// k is the offset to find the body element / polynomial in the lwe_array_out / +// accumulator +template +__device__ void sample_extract_body(Torus *lwe_array_out, Torus *accumulator, + uint32_t k) { + // Set first coefficient of the accumulator as the body of the LWE sample + lwe_array_out[k * params::degree] = accumulator[k * params::degree]; +} + +// Extracts the mask from num_poly polynomials individually +template +__device__ void sample_extract_mask(Torus *lwe_array_out, Torus *accumulator, + uint32_t num_poly = 1) { + for (int z = 0; z < num_poly; z++) { + Torus *lwe_array_out_slice = + (Torus *)lwe_array_out + (ptrdiff_t)(z * params::degree); + Torus *accumulator_slice = + (Torus *)accumulator + (ptrdiff_t)(z * params::degree); + + // Set ACC = -ACC + int tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt; i++) { + accumulator_slice[tid] = -accumulator_slice[tid]; + tid = tid + params::degree / params::opt; + } + synchronize_threads_in_block(); + + // Reverse the accumulator + tid = threadIdx.x; + Torus result[params::opt]; +#pragma unroll + for (int i = 0; i < params::opt; i++) { + result[i] = accumulator_slice[params::degree - tid - 1]; + tid = tid + params::degree / params::opt; + } + synchronize_threads_in_block(); + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt; i++) { + accumulator_slice[tid] = result[i]; + tid = tid + params::degree / params::opt; + } + synchronize_threads_in_block(); + + // Perform ACC * X + // (equivalent to multiply_by_monomial_negacyclic_inplace(1)) + tid = threadIdx.x; + result[params::opt]; + for (int i = 0; i < params::opt; i++) { + // if (tid < 1) + // result[i] = -accumulator_slice[tid - 1 + params::degree]; + // else + // result[i] = accumulator_slice[tid - 1]; + int x = tid - 1 + SEL(0, params::degree, tid < 1); + result[i] = SEL(1, -1, tid < 1) * accumulator_slice[x]; + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + accumulator_slice[tid] = result[i]; + tid += params::degree / params::opt; + } + synchronize_threads_in_block(); + + // Copy to the mask of the LWE sample + tid = threadIdx.x; +#pragma unroll + for (int i = 0; i < params::opt; i++) { + lwe_array_out_slice[tid] = accumulator_slice[tid]; + tid = tid + params::degree / params::opt; + } + } +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/polynomial/parameters.cuh b/backends/tfhe-cuda-backend/implementation/src/polynomial/parameters.cuh new file mode 100644 index 000000000..d165c7394 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/polynomial/parameters.cuh @@ -0,0 +1,91 @@ +#ifndef CUDA_PARAMETERS_CUH +#define CUDA_PARAMETERS_CUH + +constexpr int log2(int n) { return (n <= 2) ? 1 : 1 + log2(n / 2); } + +constexpr int choose_opt_amortized(int degree) { + if (degree <= 1024) + return 4; + else if (degree == 2048) + return 8; + else if (degree == 4096) + return 16; + else if (degree == 8192) + return 32; + else + return 64; +} + +constexpr int choose_opt(int degree) { + if (degree <= 1024) + return 4; + else if (degree == 2048) + return 4; + else if (degree == 4096) + return 4; + else if (degree == 8192) + return 8; + else if (degree == 16384) + return 16; + else + return 64; +} +template class HalfDegree { +public: + constexpr static int degree = params::degree / 2; + constexpr static int opt = params::opt / 2; + constexpr static int log2_degree = params::log2_degree - 1; +}; + +template class Degree { +public: + constexpr static int degree = N; + constexpr static int opt = choose_opt(N); + constexpr static int log2_degree = log2(N); +}; + +template class AmortizedDegree { +public: + constexpr static int degree = N; + constexpr static int opt = choose_opt_amortized(N); + constexpr static int log2_degree = log2(N); +}; +enum sharedMemDegree { + NOSM = 0, + PARTIALSM = 1, + FULLSM = 2 + +}; + +class ForwardFFT { +public: + constexpr static int direction = 0; +}; + +class BackwardFFT { +public: + constexpr static int direction = 1; +}; + +class ReorderFFT { + constexpr static int reorder = 1; +}; +class NoReorderFFT { + constexpr static int reorder = 0; +}; + +template +class FFTDegree : public params { +public: + constexpr static int fft_direction = direction::direction; + constexpr static int fft_reorder = reorder::reorder; +}; + +template +class FFTParams : public Degree { +public: + constexpr static int fft_direction = direction::direction; + constexpr static int fft_reorder = reorder::reorder; +}; + +#endif // CNCRT_PARAMETERS_H diff --git a/backends/tfhe-cuda-backend/implementation/src/polynomial/polynomial_math.cuh b/backends/tfhe-cuda-backend/implementation/src/polynomial/polynomial_math.cuh new file mode 100644 index 000000000..3997d5151 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/polynomial/polynomial_math.cuh @@ -0,0 +1,86 @@ +#ifndef CUDA_POLYNOMIAL_MATH_CUH +#define CUDA_POLYNOMIAL_MATH_CUH + +#include "crypto/torus.cuh" +#include "parameters.cuh" + +template +__device__ T *get_chunk(T *data, int chunk_num, int chunk_size) { + int pos = chunk_num * chunk_size; + T *ptr = &data[pos]; + return ptr; +} + +template +__device__ void sub_polynomial(FT *result, FT *first, FT *second) { + int tid = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + result[tid] = first[tid] - second[tid]; + tid += params::degree / params::opt; + } +} + +template +__device__ void polynomial_product_in_fourier_domain(T *result, T *first, + T *second) { + int tid = threadIdx.x; + for (int i = 0; i < params::opt / 2; i++) { + result[tid] = first[tid] * second[tid]; + tid += params::degree / params::opt; + } + + if (threadIdx.x == 0) { + result[params::degree / 2] = + first[params::degree / 2] * second[params::degree / 2]; + } +} + +// Computes result += first * second +// If init_accumulator is set, assumes that result was not initialized and does +// that with the outcome of first * second +template +__device__ void +polynomial_product_accumulate_in_fourier_domain(T *result, T *first, T *second, + bool init_accumulator = false) { + int tid = threadIdx.x; + if (init_accumulator) { + for (int i = 0; i < params::opt / 2; i++) { + result[tid] = first[tid] * second[tid]; + tid += params::degree / params::opt; + } + } else { + for (int i = 0; i < params::opt / 2; i++) { + result[tid] += first[tid] * second[tid]; + tid += params::degree / params::opt; + } + } +} + +// If init_accumulator is set, assumes that result was not initialized and does +// that with the outcome of first * second +template +__device__ void +polynomial_product_accumulate_by_monomial(T *result, T *poly, + uint64_t monomial_degree, + bool init_accumulator = false) { + // monomial_degree \in [0, 2 * params::degree) + int full_cycles_count = monomial_degree / params::degree; + int remainder_degrees = monomial_degree % params::degree; + + int pos = threadIdx.x; + for (int i = 0; i < params::opt; i++) { + T element = poly[pos]; + int new_pos = (pos + monomial_degree) % params::degree; + + T x = SEL(element, -element, full_cycles_count % 2); // monomial coefficient + x = SEL(-x, x, new_pos >= remainder_degrees); + + if (init_accumulator) + result[new_pos] = x; + else + result[new_pos] += x; + pos += params::degree / params::opt; + } +} + +#endif // CNCRT_POLYNOMIAL_MATH_H diff --git a/backends/tfhe-cuda-backend/implementation/src/types/complex/operations.cuh b/backends/tfhe-cuda-backend/implementation/src/types/complex/operations.cuh new file mode 100644 index 000000000..0e4f8e11e --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/types/complex/operations.cuh @@ -0,0 +1,97 @@ +#ifndef GPU_BOOTSTRAP_COMMON_CUH +#define GPU_BOOTSTRAP_COMMON_CUH + +#include +#include +#include + +#define SNT 1 +#define dPI 6.283185307179586231995926937088 + +using sTorus = int32_t; +// using Torus = uint32_t; +using sTorus = int32_t; +using u32 = uint32_t; +using i32 = int32_t; + +//-------------------------------------------------- +// Basic double2 operations + +__device__ inline double2 conjugate(const double2 num) { + double2 res; + res.x = num.x; + res.y = -num.y; + return res; +} + +__device__ inline void operator+=(double2 &lh, const double2 rh) { + lh.x += rh.x; + lh.y += rh.y; +} + +__device__ inline void operator-=(double2 &lh, const double2 rh) { + lh.x -= rh.x; + lh.y -= rh.y; +} + +__device__ inline double2 operator+(const double2 a, const double2 b) { + double2 res; + res.x = a.x + b.x; + res.y = a.y + b.y; + return res; +} + +__device__ inline double2 operator-(const double2 a, const double2 b) { + double2 res; + res.x = a.x - b.x; + res.y = a.y - b.y; + return res; +} + +__device__ inline double2 operator*(const double2 a, const double2 b) { + double xx = a.x * b.x; + double xy = a.x * b.y; + double yx = a.y * b.x; + double yy = a.y * b.y; + + double2 res; + // asm volatile("fma.rn.f64 %0, %1, %2, %3;": "=d"(res.x) : "d"(a.x), + // "d"(b.x), "d"(yy)); + res.x = xx - yy; + res.y = xy + yx; + return res; +} + +__device__ inline double2 operator*(const double2 a, double b) { + double2 res; + res.x = a.x * b; + res.y = a.y * b; + return res; +} + +__device__ inline void operator*=(double2 &a, const double2 b) { + double tmp = a.x; + a.x *= b.x; + a.x -= a.y * b.y; + a.y *= b.x; + a.y += b.y * tmp; +} + +__device__ inline void operator*=(double2 &a, const double b) { + a.x *= b; + a.y *= b; +} + +__device__ inline void operator/=(double2 &a, const double b) { + a.x /= b; + a.y /= b; +} + +__device__ inline double2 operator*(double a, double2 b) { + double2 res; + res.x = b.x * a; + res.y = b.y * a; + return res; +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/types/int128.cuh b/backends/tfhe-cuda-backend/implementation/src/types/int128.cuh new file mode 100644 index 000000000..ae925fadb --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/types/int128.cuh @@ -0,0 +1,76 @@ +#ifndef CNCRT_INT128_CUH +#define CNCRT_INT128_CUH + +// abseil's int128 type +// licensed under Apache license + +class uint128 { +public: + __device__ uint128(uint64_t high, uint64_t low) : hi_(high), lo_(low) {} + + uint64_t lo_; + uint64_t hi_; +}; + +class int128 { +public: + int128() = default; + + __device__ operator unsigned long long() const { + return static_cast(lo_); + } + + __device__ int128(int64_t high, uint64_t low) : hi_(high), lo_(low) {} + + uint64_t lo_; + int64_t hi_; +}; + +__device__ inline uint128 make_uint128(uint64_t high, uint64_t low) { + return uint128(high, low); +} + +template __device__ uint128 make_uint128_from_float(T v) { + if (v >= ldexp(static_cast(1), 64)) { + uint64_t hi = static_cast(ldexp(v, -64)); + uint64_t lo = static_cast(v - ldexp(static_cast(hi), 64)); + return make_uint128(hi, lo); + } + + return make_uint128(0, static_cast(v)); +} + +__device__ inline int128 make_int128(int64_t high, uint64_t low) { + return int128(high, low); +} + +__device__ inline int64_t bitcast_to_signed(uint64_t v) { + return v & (uint64_t{1} << 63) ? ~static_cast(~v) + : static_cast(v); +} + +__device__ inline uint64_t uint128_high64(uint128 v) { return v.hi_; } +__device__ inline uint64_t uint128_low64(uint128 v) { return v.lo_; } + +__device__ __forceinline__ uint128 operator-(uint128 val) { + uint64_t hi = ~uint128_high64(val); + uint64_t lo = ~uint128_low64(val) + 1; + if (lo == 0) + ++hi; // carry + return make_uint128(hi, lo); +} + +template __device__ int128 make_int128_from_float(T v) { + + // We must convert the absolute value and then negate as needed, because + // floating point types are typically sign-magnitude. Otherwise, the + // difference between the high and low 64 bits when interpreted as two's + // complement overwhelms the precision of the mantissa. + uint128 result = + v < 0 ? -make_uint128_from_float(-v) : make_uint128_from_float(v); + + return make_int128(bitcast_to_signed(uint128_high64(result)), + uint128_low64(result)); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/utils/helper.cuh b/backends/tfhe-cuda-backend/implementation/src/utils/helper.cuh new file mode 100644 index 000000000..46716b46b --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/utils/helper.cuh @@ -0,0 +1,36 @@ +#ifndef HELPER_CUH +#define HELPER_CUH + +#include + +template __global__ void print_debug_kernel(T *src, int N) { + for (int i = 0; i < N; i++) { + printf("%lu, ", src[i]); + } +} + +template void print_debug(const char *name, T *src, int N) { + printf("%s: ", name); + cudaDeviceSynchronize(); + print_debug_kernel<<<1, 1>>>(src, N); + cudaDeviceSynchronize(); + printf("\n"); +} + +template +__global__ void print_body_kernel(T *src, int N, int lwe_dimension) { + for (int i = 0; i < N; i++) { + printf("%lu, ", src[i * (lwe_dimension + 1) + lwe_dimension]); + } +} + +template +void print_body(const char *name, T *src, int n, int lwe_dimension) { + printf("%s: ", name); + cudaDeviceSynchronize(); + print_body_kernel<<<1, 1>>>(src, n, lwe_dimension); + cudaDeviceSynchronize(); + printf("\n"); +} + +#endif diff --git a/backends/tfhe-cuda-backend/implementation/src/utils/kernel_dimensions.cuh b/backends/tfhe-cuda-backend/implementation/src/utils/kernel_dimensions.cuh new file mode 100644 index 000000000..156aa8617 --- /dev/null +++ b/backends/tfhe-cuda-backend/implementation/src/utils/kernel_dimensions.cuh @@ -0,0 +1,21 @@ +#ifndef KERNEL_DIMENSIONS_CUH +#define KERNEL_DIMENSIONS_CUH + +inline int nextPow2(int x) { + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return ++x; +} + +inline void getNumBlocksAndThreads(const int n, const int maxBlockSize, + int &blocks, int &threads) { + threads = + (n < maxBlockSize * 2) ? max(128, nextPow2((n + 1) / 2)) : maxBlockSize; + blocks = (n + threads - 1) / threads; +} + +#endif // KERNEL_DIMENSIONS_H diff --git a/backends/tfhe-cuda-backend/rust_api/Cargo.toml b/backends/tfhe-cuda-backend/rust_api/Cargo.toml new file mode 100644 index 000000000..27c6164c8 --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "tfhe-cuda-backend" +version = "0.1.2" +edition = "2021" +authors = ["Zama team"] +license = "BSD-3-Clause-Clear" +description = "Cuda implementation of TFHE-rs primitives." +homepage = "https://www.zama.ai/" +documentation = "https://docs.zama.ai/tfhe-rs" +repository = "https://github.com/zama-ai/tfhe-rs" +readme = "README.md" +keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"] + +[build-dependencies] +cmake = { version = "0.1" } + +[dependencies] +thiserror = "1.0" diff --git a/backends/tfhe-cuda-backend/rust_api/LICENSE b/backends/tfhe-cuda-backend/rust_api/LICENSE new file mode 100644 index 000000000..c04b2b236 --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/LICENSE @@ -0,0 +1,28 @@ +BSD 3-Clause Clear License + +Copyright © 2023 ZAMA. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or other +materials provided with the distribution. + +3. Neither the name of ZAMA nor the names of its contributors may be used to endorse +or promote products derived from this software without specific prior written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE. +THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/backends/tfhe-cuda-backend/rust_api/build.rs b/backends/tfhe-cuda-backend/rust_api/build.rs new file mode 100644 index 000000000..7a1a2219a --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/build.rs @@ -0,0 +1,28 @@ +use std::env; +use std::process::Command; + +fn main() { + println!("Build tfhe-cuda-backend"); + if env::consts::OS == "linux" { + let output = Command::new("./get_os_name.sh").output().unwrap(); + let distribution = String::from_utf8(output.stdout).unwrap(); + if distribution != "Ubuntu\n" { + println!( + "cargo:warning=This Linux distribution is not officially supported. \ + Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n" + ); + } + let dest = cmake::build("../implementation"); + println!("cargo:rustc-link-search=native={}", dest.display()); + println!("cargo:rustc-link-lib=static=tfhe_cuda_backend"); + println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64"); + println!("cargo:rustc-link-lib=gomp"); + println!("cargo:rustc-link-lib=cudart"); + println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); + println!("cargo:rustc-link-lib=stdc++"); + } else { + panic!( + "Error: platform not supported, tfhe-cuda-backend not built (only Linux is supported)" + ); + } +} diff --git a/backends/tfhe-cuda-backend/rust_api/get_os_name.sh b/backends/tfhe-cuda-backend/rust_api/get_os_name.sh new file mode 100755 index 000000000..331ff5c13 --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/get_os_name.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +cat /etc/os-release | grep "\" | sed "s/NAME=\"//g" | sed "s/\"//g" diff --git a/backends/tfhe-cuda-backend/rust_api/src/cuda_bind.rs b/backends/tfhe-cuda-backend/rust_api/src/cuda_bind.rs new file mode 100644 index 000000000..abc06915c --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/src/cuda_bind.rs @@ -0,0 +1,794 @@ +use std::ffi::c_void; + +#[link(name = "tfhe_cuda_backend", kind = "static")] +extern "C" { + + /// Create a new Cuda stream on GPU `gpu_index` + pub fn cuda_create_stream(gpu_index: u32) -> *mut c_void; + + /// Destroy the Cuda stream `v_stream` on GPU `gpu_index` + pub fn cuda_destroy_stream(v_stream: *mut c_void) -> i32; + + /// Allocate `size` memory on GPU `gpu_index` asynchronously + pub fn cuda_malloc_async(size: u64, v_stream: *const c_void) -> *mut c_void; + + /// Copy `size` memory asynchronously from `src` on GPU `gpu_index` to `dest` on CPU using + /// the Cuda stream `v_stream`. + pub fn cuda_memcpy_async_to_cpu( + dest: *mut c_void, + src: *const c_void, + size: u64, + v_stream: *const c_void, + ) -> i32; + + /// Copy `size` memory asynchronously from `src` on CPU to `dest` on GPU `gpu_index` using + /// the Cuda stream `v_stream`. + pub fn cuda_memcpy_async_to_gpu( + dest: *mut c_void, + src: *const c_void, + size: u64, + v_stream: *const c_void, + ) -> i32; + + /// Copy `size` memory asynchronously from `src` to `dest` on the same GPU `gpu_index` using + /// the Cuda stream `v_stream`. + pub fn cuda_memcpy_async_gpu_to_gpu( + dest: *mut c_void, + src: *const c_void, + size: u64, + v_stream: *const c_void, + ) -> i32; + + /// Copy `size` memory asynchronously from `src` on CPU to `dest` on GPU `gpu_index` using + /// the Cuda stream `v_stream`. + pub fn cuda_memset_async( + dest: *mut c_void, + value: u64, + size: u64, + v_stream: *const c_void, + ) -> i32; + + /// Get the total number of Nvidia GPUs detected on the platform + pub fn cuda_get_number_of_gpus() -> i32; + + /// Synchronize all streams on GPU `gpu_index` + pub fn cuda_synchronize_device(gpu_index: u32) -> i32; + + /// Synchronize Cuda stream + pub fn cuda_synchronize_stream(v_stream: *const c_void) -> i32; + + /// Free memory for pointer `ptr` on GPU `gpu_index` asynchronously, using stream `v_stream` + pub fn cuda_drop_async(ptr: *mut c_void, v_stream: *const c_void) -> i32; + + /// Free memory for pointer `ptr` on GPU `gpu_index` synchronously + pub fn cuda_drop(ptr: *mut c_void) -> i32; + + /// Get the maximum amount of shared memory on GPU `gpu_index` + pub fn cuda_get_max_shared_memory(gpu_index: u32) -> i32; + + /// Copy a bootstrap key `src` represented with 64 bits in the standard domain from the CPU to + /// the GPU `gpu_index` using the stream `v_stream`, and convert it to the Fourier domain on the + /// GPU. The resulting bootstrap key `dest` on the GPU is an array of f64 values. + pub fn cuda_convert_lwe_bootstrap_key_64( + dest: *mut c_void, + src: *const c_void, + v_stream: *const c_void, + input_lwe_dim: u32, + glwe_dim: u32, + level_count: u32, + polynomial_size: u32, + ); + + /// Copy a multi-bit bootstrap key `src` represented with 64 bits in the standard domain from + /// the CPU to the GPU `gpu_index` using the stream `v_stream`. The resulting bootstrap key + /// `dest` on the GPU is an array of uint64_t values. + pub fn cuda_convert_lwe_multi_bit_bootstrap_key_64( + dest: *mut c_void, + src: *const c_void, + v_stream: *const c_void, + input_lwe_dim: u32, + glwe_dim: u32, + level_count: u32, + polynomial_size: u32, + grouping_factor: u32, + ); + + /// Copy `number_of_cts` LWE ciphertext represented with 64 bits in the standard domain from the + /// CPU to the GPU `gpu_index` using the stream `v_stream`. All ciphertexts must be + /// concatenated. + pub fn cuda_convert_lwe_ciphertext_vector_to_gpu_64( + dest: *mut c_void, + src: *mut c_void, + v_stream: *const c_void, + number_of_cts: u32, + lwe_dimension: u32, + ); + + /// Copy `number_of_cts` LWE ciphertext represented with 64 bits in the standard domain from the + /// GPU to the CPU `gpu_index` using the stream `v_stream`. All ciphertexts must be + /// concatenated. + pub fn cuda_convert_lwe_ciphertext_vector_to_cpu_64( + dest: *mut c_void, + src: *mut c_void, + v_stream: *const c_void, + number_of_cts: u32, + lwe_dimension: u32, + ); + + /// This scratch function allocates the necessary amount of data on the GPU for + /// the low latency PBS on 64-bit inputs, into `pbs_buffer`. It also configures SM + /// options on the GPU in case FULLSM or PARTIALSM mode are going to be used. + pub fn scratch_cuda_bootstrap_low_latency_64( + v_stream: *const c_void, + pbs_buffer: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + level_count: u32, + input_lwe_ciphertext_count: u32, + max_shared_memory: u32, + allocate_gpu_memory: bool, + ); + + /// Perform bootstrapping on a batch of input u64 LWE ciphertexts. + /// + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out`: output batch of num_samples bootstrapped ciphertexts c = + /// (a0,..an-1,b) where n is the LWE dimension + /// - `lut_vector`: should hold as many test vectors of size polynomial_size + /// as there are input ciphertexts, but actually holds + /// `num_lut_vectors` vectors to reduce memory usage + /// - `lut_vector_indexes`: stores the index corresponding to + /// which test vector to use for each sample in + /// `lut_vector` + /// - `lwe_array_in`: input batch of num_samples LWE ciphertexts, containing n + /// mask values + 1 body value + /// - `bootstrapping_key`: GGSW encryption of the LWE secret key sk1 + /// under secret key sk2. + /// bsk = Z + sk1 H + /// where H is the gadget matrix and Z is a matrix (k+1).l + /// containing GLWE encryptions of 0 under sk2. + /// bsk is thus a tensor of size (k+1)^2.l.N.n + /// where l is the number of decomposition levels and + /// k is the GLWE dimension, N is the polynomial size for + /// GLWE. The polynomial size for GLWE and the test vector + /// are the same because they have to be in the same ring + /// to be multiplied. + /// - `pbs_buffer`: a preallocated buffer to store temporary results + /// - `lwe_dimension`: size of the Torus vector used to encrypt the input + /// LWE ciphertexts - referred to as n above (~ 600) + /// - `glwe_dimension`: size of the polynomial vector used to encrypt the LUT + /// GLWE ciphertexts - referred to as k above. Only the value 1 is supported for this parameter. + /// - `polynomial_size`: size of the test polynomial (test vector) and size of the + /// GLWE polynomial (~1024) + /// - `base_log`: log base used for the gadget matrix - B = 2^base_log (~8) + /// - `level_count`: number of decomposition levels in the gadget matrix (~4) + /// - `num_samples`: number of encrypted input messages + /// - `num_lut_vectors`: parameter to set the actual number of test vectors to be + /// used + /// - `lwe_idx`: the index of the LWE input to consider for the GPU of index gpu_index. In + /// case of multi-GPU computing, it is assumed that only a part of the input LWE array is + /// copied to each GPU, but the whole LUT array is copied (because the case when the number + /// of LUTs is smaller than the number of input LWEs is not trivial to take into account in + /// the data repartition on the GPUs). `lwe_idx` is used to determine which LUT to consider + /// for a given LWE input in the LUT array `lut_vector`. + /// - `max_shared_memory` maximum amount of shared memory to be used inside + /// device functions + /// + /// This function calls a wrapper to a device kernel that performs the + /// bootstrapping: + /// - the kernel is templatized based on integer discretization and + /// polynomial degree + /// - num_samples * level_count * (glwe_dimension + 1) blocks of threads are launched, where + /// each thread is going to handle one or more polynomial coefficients at each stage, + /// for a given level of decomposition, either for the LUT mask or its body: + /// - perform the blind rotation + /// - round the result + /// - get the decomposition for the current level + /// - switch to the FFT domain + /// - multiply with the bootstrapping key + /// - come back to the coefficients representation + /// - between each stage a synchronization of the threads is necessary (some + /// synchronizations + /// happen at the block level, some happen between blocks, using cooperative groups). + /// - in case the device has enough shared memory, temporary arrays used for + /// the different stages (accumulators) are stored into the shared memory + /// - the accumulators serve to combine the results for all decomposition + /// levels + /// - the constant memory (64K) is used for storing the roots of identity + /// values for the FFT + pub fn cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_output_indexes: *const c_void, + lut_vector: *const c_void, + lut_vector_indexes: *const c_void, + lwe_array_in: *const c_void, + lwe_input_indexes: *const c_void, + bootstrapping_key: *const c_void, + pbs_buffer: *mut i8, + lwe_dimension: u32, + glwe_dimension: u32, + polynomial_size: u32, + base_log: u32, + level: u32, + num_samples: u32, + num_lut_vectors: u32, + lwe_idx: u32, + max_shared_memory: u32, + ); + + /// This cleanup function frees the data for the low latency PBS on GPU + /// contained in pbs_buffer for 32 or 64-bit inputs. + pub fn cleanup_cuda_bootstrap_low_latency(v_stream: *const c_void, pbs_buffer: *mut *mut i8); + + /// This scratch function allocates the necessary amount of data on the GPU for + /// the multi-bit PBS on 64-bit inputs into `pbs_buffer`. + pub fn scratch_cuda_multi_bit_pbs_64( + v_stream: *const c_void, + pbs_buffer: *mut *mut i8, + lwe_dimension: u32, + glwe_dimension: u32, + polynomial_size: u32, + level_count: u32, + grouping_factor: u32, + input_lwe_ciphertext_count: u32, + max_shared_memory: u32, + allocate_gpu_memory: bool, + lwe_chunk_size: u32, + ); + + /// Perform bootstrapping on a batch of input u64 LWE ciphertexts using the multi-bit algorithm. + /// + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out`: output batch of num_samples bootstrapped ciphertexts c = + /// (a0,..an-1,b) where n is the LWE dimension + /// - `lut_vector`: should hold as many test vectors of size polynomial_size + /// as there are input ciphertexts, but actually holds + /// `num_lut_vectors` vectors to reduce memory usage + /// - `lut_vector_indexes`: stores the index corresponding to + /// which test vector to use for each sample in + /// `lut_vector` + /// - `lwe_array_in`: input batch of num_samples LWE ciphertexts, containing n + /// mask values + 1 body value + /// - `bootstrapping_key`: GGSW encryption of elements of the LWE secret key as in the + /// classical PBS, but this time we follow Zhou's trick and encrypt combinations of elements + /// of the key + /// - `pbs_buffer`: a preallocated buffer to store temporary results + /// - `lwe_dimension`: size of the Torus vector used to encrypt the input + /// LWE ciphertexts - referred to as n above (~ 600) + /// - `glwe_dimension`: size of the polynomial vector used to encrypt the LUT + /// GLWE ciphertexts - referred to as k above. Only the value 1 is supported for this parameter. + /// - `polynomial_size`: size of the test polynomial (test vector) and size of the + /// GLWE polynomial (~1024) + /// - `grouping_factor`: number of elements of the LWE secret key combined per GGSW of the + /// bootstrap key + /// - `base_log`: log base used for the gadget matrix - B = 2^base_log (~8) + /// - `level_count`: number of decomposition levels in the gadget matrix (~4) + /// - `num_samples`: number of encrypted input messages + /// - `num_lut_vectors`: parameter to set the actual number of test vectors to be + /// used + /// - `lwe_idx`: the index of the LWE input to consider for the GPU of index gpu_index. In + /// case of multi-GPU computing, it is assumed that only a part of the input LWE array is + /// copied to each GPU, but the whole LUT array is copied (because the case when the number + /// of LUTs is smaller than the number of input LWEs is not trivial to take into account in + /// the data repartition on the GPUs). `lwe_idx` is used to determine which LUT to consider + /// for a given LWE input in the LUT array `lut_vector`. + /// - `max_shared_memory` maximum amount of shared memory to be used inside + /// device functions + pub fn cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_output_indexes: *const c_void, + lut_vector: *const c_void, + lut_vector_indexes: *const c_void, + lwe_array_in: *const c_void, + lwe_input_indexes: *const c_void, + bootstrapping_key: *const c_void, + pbs_buffer: *mut i8, + lwe_dimension: u32, + glwe_dimension: u32, + polynomial_size: u32, + grouping_factor: u32, + base_log: u32, + level: u32, + num_samples: u32, + num_lut_vectors: u32, + lwe_idx: u32, + max_shared_memory: u32, + lwe_chunk_size: u32, + ); + + /// This cleanup function frees the data for the multi-bit PBS on GPU + /// contained in pbs_buffer for 64-bit inputs. + pub fn cleanup_cuda_multi_bit_pbs(v_stream: *const c_void, pbs_buffer: *mut *mut i8); + + /// Perform keyswitch on a batch of 64 bits input LWE ciphertexts. + /// + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out`: output batch of num_samples keyswitched ciphertexts c = + /// (a0,..an-1,b) where n is the output LWE dimension (lwe_dimension_out) + /// - `lwe_array_in`: input batch of num_samples LWE ciphertexts, containing lwe_dimension_in + /// mask values + 1 body value + /// - `ksk`: the keyswitch key to be used in the operation + /// - `base_log`: the log of the base used in the decomposition (should be the one used to + /// create the ksk). + /// - `level_count`: the number of levels used in the decomposition (should be the one used to + /// create the ksk). + /// - `num_samples`: the number of input and output LWE ciphertexts. + /// + /// This function calls a wrapper to a device kernel that performs the keyswitch. + /// `num_samples` blocks of threads are launched + pub fn cuda_keyswitch_lwe_ciphertext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_output_indexes: *const c_void, + lwe_array_in: *const c_void, + lwe_input_indexes: *const c_void, + keyswitch_key: *const c_void, + input_lwe_dimension: u32, + output_lwe_dimension: u32, + base_log: u32, + level_count: u32, + num_samples: u32, + ); + + /// Perform the negation of a u64 input LWE ciphertext vector. + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out` is an array of size + /// `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have been allocated on + /// the GPU before calling this function, and that will hold the result of the computation. + /// - `lwe_array_in` is the LWE ciphertext vector used as input, it should have been + /// allocated and initialized before calling this function. It has the same size as the output + /// array. + /// - `input_lwe_dimension` is the number of mask elements in the two input and in the output + /// ciphertext vectors + /// - `input_lwe_ciphertext_count` is the number of ciphertexts contained in each input LWE + /// ciphertext vector, as well as in the output. + /// + /// Each element (mask element or body) of the input LWE ciphertext vector is negated. + /// The result is stored in the output LWE ciphertext vector. The input LWE ciphertext vector + /// is left unchanged. This function is a wrapper to a device function that performs the + /// operation on the GPU. + pub fn cuda_negate_lwe_ciphertext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_array_in: *const c_void, + input_lwe_dimension: u32, + input_lwe_ciphertext_count: u32, + ); + + pub fn cuda_negate_integer_radix_ciphertext_64_inplace( + v_stream: *const c_void, + lwe_array: *mut c_void, + lwe_dimension: u32, + lwe_ciphertext_count: u32, + message_modulus: u32, + carry_modulus: u32, + ); + + /// Perform the addition of two u64 input LWE ciphertext vectors. + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out` is an array of size + /// `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have been allocated on + /// the GPU before calling this function, and that will hold the result of the computation. + /// - `lwe_array_in_1` is the first LWE ciphertext vector used as input, it should have been + /// allocated and initialized before calling this function. It has the same size as the output + /// array. + /// - `lwe_array_in_2` is the second LWE ciphertext vector used as input, it should have been + /// allocated and initialized before calling this function. It has the same size as the output + /// array. + /// - `input_lwe_dimension` is the number of mask elements in the two input and in the output + /// ciphertext vectors + /// - `input_lwe_ciphertext_count` is the number of ciphertexts contained in each input LWE + /// ciphertext vector, as well as in the output. + /// + /// Each element (mask element or body) of the input LWE ciphertext vector 1 is added to the + /// corresponding element in the input LWE ciphertext 2. The result is stored in the output LWE + /// ciphertext vector. The two input LWE ciphertext vectors are left unchanged. This function is + /// a wrapper to a device function that performs the operation on the GPU. + pub fn cuda_add_lwe_ciphertext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_array_in_1: *const c_void, + lwe_array_in_2: *const c_void, + input_lwe_dimension: u32, + input_lwe_ciphertext_count: u32, + ); + + /// Perform the addition of a u64 input LWE ciphertext vector with a u64 input plaintext vector. + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out` is an array of size + /// `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have been allocated + /// on the GPU before calling this function, and that will hold the result of the computation. + /// - `lwe_array_in` is the LWE ciphertext vector used as input, it should have been + /// allocated and initialized before calling this function. It has the same size as the output + /// array. + /// - `plaintext_array_in` is the plaintext vector used as input, it should have been + /// allocated and initialized before calling this function. It should be of size + /// `input_lwe_ciphertext_count`. + /// - `input_lwe_dimension` is the number of mask elements in the input and output LWE + /// ciphertext vectors + /// - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the input LWE + /// ciphertext vector, as well as in the output. It is also the number of plaintexts in the + /// input plaintext vector. + /// + /// Each plaintext of the input plaintext vector is added to the body of the corresponding LWE + /// ciphertext in the LWE ciphertext vector. The result of the operation is stored in the output + /// LWE ciphertext vector. The two input vectors are unchanged. This function is a + /// wrapper to a device function that performs the operation on the GPU. + pub fn cuda_add_lwe_ciphertext_vector_plaintext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_array_in: *const c_void, + plaintext_array_in: *const c_void, + input_lwe_dimension: u32, + input_lwe_ciphertext_count: u32, + ); + + /// Perform the multiplication of a u64 input LWE ciphertext vector with a u64 input cleartext + /// vector. + /// - `v_stream` is a void pointer to the Cuda stream to be used in the kernel launch + /// - `gpu_index` is the index of the GPU to be used in the kernel launch + /// - `lwe_array_out` is an array of size + /// `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have been allocated + /// on the GPU before calling this function, and that will hold the result of the computation. + /// - `lwe_array_in` is the LWE ciphertext vector used as input, it should have been + /// allocated and initialized before calling this function. It has the same size as the output + /// array. + /// - `cleartext_array_in` is the cleartext vector used as input, it should have been + /// allocated and initialized before calling this function. It should be of size + /// `input_lwe_ciphertext_count`. + /// - `input_lwe_dimension` is the number of mask elements in the input and output LWE + /// ciphertext vectors + /// - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the input LWE + /// ciphertext vector, as well as in the output. It is also the number of cleartexts in the + /// input cleartext vector. + /// + /// Each cleartext of the input cleartext vector is multiplied to the mask and body of the + /// corresponding LWE ciphertext in the LWE ciphertext vector. + /// The result of the operation is stored in the output + /// LWE ciphertext vector. The two input vectors are unchanged. This function is a + /// wrapper to a device function that performs the operation on the GPU. + pub fn cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_array_in: *const c_void, + cleartext_array_in: *const c_void, + input_lwe_dimension: u32, + input_lwe_ciphertext_count: u32, + ); + + pub fn scratch_cuda_integer_mult_radix_ciphertext_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + message_modulus: u32, + carry_modulus: u32, + glwe_dimension: u32, + lwe_dimension: u32, + polynomial_size: u32, + pbs_base_log: u32, + pbs_level: u32, + ks_base_log: u32, + ks_level: u32, + grouping_factor: u32, + num_blocks: u32, + pbs_type: u32, + max_shared_memory: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_integer_mult_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_out: *mut c_void, + radix_lwe_left: *const c_void, + radix_lwe_right: *const c_void, + bsk: *const c_void, + ksk: *const c_void, + mem_ptr: *mut i8, + message_modulus: u32, + carry_modulus: u32, + glwe_dimension: u32, + lwe_dimension: u32, + polynomial_size: u32, + pbs_base_log: u32, + pbs_level: u32, + ks_base_log: u32, + ks_level: u32, + grouping_factor: u32, + num_blocks: u32, + pbs_type: u32, + max_shared_memory: u32, + ); + + pub fn cleanup_cuda_integer_mult(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn cuda_scalar_addition_integer_radix_ciphertext_64_inplace( + v_stream: *const c_void, + lwe_array: *mut c_void, + scalar_input: *const c_void, + lwe_dimension: u32, + lwe_ciphertext_count: u32, + message_modulus: u32, + carry_modulus: u32, + ); + + pub fn cuda_small_scalar_multiplication_integer_radix_ciphertext_64_inplace( + v_stream: *const c_void, + lwe_array: *mut c_void, + scalar_input: u64, + lwe_dimension: u32, + lwe_ciphertext_count: u32, + ); + + pub fn scratch_cuda_integer_radix_bitop_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + op_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_bitop_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_out: *mut c_void, + radix_lwe_left: *const c_void, + radix_lwe_right: *const c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cuda_bitnot_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_out: *mut c_void, + radix_lwe_in: *const c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cuda_scalar_bitop_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_output: *mut c_void, + radix_lwe_input: *mut c_void, + clear_blocks: *const c_void, + num_clear_blocks: u32, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + op_type: u32, + ); + + pub fn cleanup_cuda_integer_bitop(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_integer_radix_comparison_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + op_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_comparison_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_out: *mut c_void, + radix_lwe_left: *const c_void, + radix_lwe_right: *const c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cuda_scalar_comparison_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + radix_lwe_out: *mut c_void, + radix_lwe_in: *const c_void, + scalar_blocks: *const c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + num_scalar_blocks: u32, + ); + + pub fn cleanup_cuda_integer_comparison(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_full_propagation_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + lwe_dimension: u32, + glwe_dimension: u32, + polynomial_size: u32, + pbs_level: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_full_propagation_64_inplace( + v_stream: *const c_void, + radix_lwe_right: *mut c_void, + mem_ptr: *mut i8, + ksk: *const c_void, + bsk: *const c_void, + lwe_dimension: u32, + glwe_dimension: u32, + polynomial_size: u32, + ks_base_log: u32, + ks_level: u32, + pbs_base_log: u32, + pbs_level: u32, + grouping_factor: u32, + num_blocks: u32, + ); + + pub fn cleanup_cuda_full_propagation(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_integer_radix_scalar_shift_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + shift_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_integer_radix_scalar_shift_kb_64_inplace( + v_stream: *const c_void, + radix_lwe: *mut c_void, + shift: u32, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cleanup_cuda_integer_radix_scalar_shift(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_integer_radix_cmux_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_cmux_integer_radix_ciphertext_kb_64( + v_stream: *const c_void, + lwe_array_out: *mut c_void, + lwe_condition: *const c_void, + lwe_array_true: *const c_void, + lwe_array_false: *const c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cleanup_cuda_integer_radix_cmux(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_integer_radix_scalar_rotate_kb_64( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + shift_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_integer_radix_scalar_rotate_kb_64_inplace( + v_stream: *const c_void, + radix_lwe: *mut c_void, + n: u32, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cleanup_cuda_integer_radix_scalar_rotate(v_stream: *const c_void, mem_ptr: *mut *mut i8); + + pub fn scratch_cuda_propagate_single_carry_low_latency_kb_64_inplace( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + glwe_dimension: u32, + polynomial_size: u32, + big_lwe_dimension: u32, + small_lwe_dimension: u32, + ks_level: u32, + ks_base_log: u32, + pbs_level: u32, + pbs_base_log: u32, + grouping_factor: u32, + num_blocks: u32, + message_modulus: u32, + carry_modulus: u32, + pbs_type: u32, + allocate_gpu_memory: bool, + ); + + pub fn cuda_propagate_single_carry_low_latency_kb_64_inplace( + v_stream: *const c_void, + radix_lwe: *mut c_void, + mem_ptr: *mut i8, + bsk: *const c_void, + ksk: *const c_void, + num_blocks: u32, + ); + + pub fn cleanup_cuda_propagate_single_carry_low_latency( + v_stream: *const c_void, + mem_ptr: *mut *mut i8, + ); + +} diff --git a/backends/tfhe-cuda-backend/rust_api/src/lib.rs b/backends/tfhe-cuda-backend/rust_api/src/lib.rs new file mode 100644 index 000000000..26b2e30eb --- /dev/null +++ b/backends/tfhe-cuda-backend/rust_api/src/lib.rs @@ -0,0 +1 @@ +pub mod cuda_bind; diff --git a/ci/ec2_products_cost.json b/ci/ec2_products_cost.json index 4d531c481..506018a42 100644 --- a/ci/ec2_products_cost.json +++ b/ci/ec2_products_cost.json @@ -1,4 +1,7 @@ { "m6i.metal": 7.168, - "hpc7a.96xlarge": 7.7252 + "hpc7a.96xlarge": 7.7252, + "p3.2xlarge": 3.06, + "p4d.24xlarge": 32.7726, + "p5.48xlarge": 98.32 } diff --git a/ci/slab.toml b/ci/slab.toml index 946101f95..d37fc9ce0 100644 --- a/ci/slab.toml +++ b/ci/slab.toml @@ -18,6 +18,31 @@ region = "eu-west-1" image_id = "ami-0e88d98b86aff13de" instance_type = "hpc7a.96xlarge" +[profile.gpu-test] +region = "us-east-1" +image_id = "ami-05b4b37bcbb24dc48" +instance_type = "p3.2xlarge" +# One spawn attempt every 30 seconds for 1 hour +spawn_retry_attempts = 120 +spawn_retry_duration = 60 + +[profile.gpu-bench] +region = "us-east-1" +image_id = "ami-05b4b37bcbb24dc48" +instance_type = "p4d.24xlarge" +# One spawn attempt every 30 seconds for 6 hours +spawn_retry_attempts = 720 +spawn_retry_duration = 360 +max_spot_hourly_price = "100.0" + +[profile.gpu-bench-big] +region = "us-east-1" +image_id = "ami-05b4b37bcbb24dc48" +instance_type = "p5.48xlarge" +spawn_retry_attempts = 720 +spawn_retry_duration = 360 +max_spot_hourly_price = "150.0" + [command.cpu_test] workflow = "aws_tfhe_tests.yml" profile = "cpu-big" @@ -43,21 +68,36 @@ workflow = "aws_tfhe_fast_tests.yml" profile = "cpu-big" check_run_name = "CPU AWS Fast Tests" -[command.integer_full_bench] -workflow = "integer_full_benchmark.yml" -profile = "bench" -check_run_name = "Integer CPU AWS Benchmarks Full Suite" +[command.gpu_test] +workflow = "aws_tfhe_gpu_tests.yml" +profile = "gpu-test" +check_run_name = "GPU AWS Tests" [command.signed_integer_full_bench] workflow = "signed_integer_full_benchmark.yml" profile = "bench" check_run_name = "Signed Integer CPU AWS Benchmarks Full Suite" +[command.integer_full_bench] +workflow = "integer_full_benchmark.yml" +profile = "bench" +check_run_name = "Integer CPU AWS Benchmarks Full Suite" + +[command.integer_gpu_full_bench] +workflow = "integer_gpu_full_benchmark.yml" +profile = "gpu-test" # p3.2xlarge is the baseline for GPU benchmarks +check_run_name = "Integer GPU AWS Benchmarks Full Suite" + [command.integer_bench] workflow = "integer_benchmark.yml" profile = "bench" check_run_name = "Integer CPU AWS Benchmarks" +[command.integer_gpu_bench] +workflow = "integer_gpu_benchmark.yml" +profile = "gpu-test" +check_run_name = "Integer GPU AWS Benchmarks" + [command.integer_multi_bit_bench] workflow = "integer_multi_bit_benchmark.yml" profile = "bench" @@ -73,6 +113,11 @@ workflow = "signed_integer_multi_bit_benchmark.yml" profile = "bench" check_run_name = "Signed integer multi bit CPU AWS Benchmarks" +[command.integer_multi_bit_gpu_bench] +workflow = "integer_multi_bit_gpu_benchmark.yml" +profile = "gpu-bench" +check_run_name = "Integer multi bit GPU AWS Benchmarks" + [command.shortint_full_bench] workflow = "shortint_full_benchmark.yml" profile = "bench" diff --git a/tfhe/Cargo.toml b/tfhe/Cargo.toml index 0ba6a2e46..dba04bd85 100644 --- a/tfhe/Cargo.toml +++ b/tfhe/Cargo.toml @@ -60,6 +60,7 @@ rayon = { version = "1.5.0" } bincode = "1.3.3" concrete-fft = { version = "0.3.0", features = ["serde", "fft128"] } pulp = "0.13" +tfhe-cuda-backend = {path = "../backends/tfhe-cuda-backend/rust_api", optional = true} aligned-vec = { version = "0.5", features = ["serde"] } dyn-stack = { version = "0.9" } paste = "1.0.7" @@ -83,6 +84,7 @@ boolean = [] shortint = [] integer = ["shortint"] internal-keycache = ["dep:lazy_static", "dep:fs2"] +gpu = ["tfhe-cuda-backend"] # Experimental section experimental = [] diff --git a/tfhe/benches/integer/bench.rs b/tfhe/benches/integer/bench.rs index 286ac5dd9..d490781d5 100644 --- a/tfhe/benches/integer/bench.rs +++ b/tfhe/benches/integer/bench.rs @@ -21,6 +21,7 @@ use tfhe::integer::U256; use tfhe::shortint::parameters::{ PARAM_MESSAGE_1_CARRY_1_KS_PBS, PARAM_MESSAGE_2_CARRY_2_KS_PBS, PARAM_MESSAGE_3_CARRY_3_KS_PBS, PARAM_MESSAGE_4_CARRY_4_KS_PBS, PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS, + PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS, }; /// The type used to hold scalar values @@ -57,6 +58,11 @@ impl Default for ParamsAndNumBlocksIter { Err(_) => false, }; + let is_gpu = match env::var("__TFHE_RS_BENCH_OP_FLAVOR") { + Ok(val) => val.contains("gpu"), + Err(_) => false, + }; + let bit_sizes = if is_fast_bench { FAST_BENCH_BIT_SIZES.to_vec() } else { @@ -64,7 +70,18 @@ impl Default for ParamsAndNumBlocksIter { }; if is_multi_bit { - let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()]; + let params = if is_gpu { + vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS.into()] + } else { + vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()] + }; + + let bit_sizes = if is_fast_bench { + vec![32] + } else { + BENCH_BIT_SIZES.to_vec() + }; + let params_and_bit_sizes = iproduct!(params, bit_sizes); Self { params_and_bit_sizes, @@ -77,6 +94,7 @@ impl Default for ParamsAndNumBlocksIter { // PARAM_MESSAGE_3_CARRY_3_KS_PBS.into(), // PARAM_MESSAGE_4_CARRY_4_KS_PBS.into(), ]; + let params_and_bit_sizes = iproduct!(params, bit_sizes); Self { params_and_bit_sizes, @@ -1136,6 +1154,709 @@ define_server_key_bench_default_fn!( display_name: rotate_right ); +#[cfg(feature = "gpu")] +mod cuda { + use super::{default_scalar, shift_scalar, ParamsAndNumBlocksIter, ScalarType}; + use crate::utilities::{write_to_json, OperatorType}; + use criterion::{criterion_group, Criterion}; + use rand::prelude::*; + use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + use tfhe::integer::gpu::server_key::CudaServerKey; + use tfhe::integer::keycache::KEY_CACHE; + use tfhe::integer::IntegerKeyKind; + use tfhe::keycache::NamedParam; + + fn bench_cuda_server_key_unary_function_clean_inputs( + c: &mut Criterion, + bench_name: &str, + display_name: &str, + unary_op: F, + ) where + F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &CudaStream), + { + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(60)); + let mut rng = rand::thread_rng(); + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() { + let param_name = param.name(); + + let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &stream); + + let encrypt_two_values = || { + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_0 = cks.encrypt_radix(clear_0, num_block); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream); + + d_ctxt_1 + }; + + b.iter_batched( + encrypt_two_values, + |mut ct_0| { + unary_op(&gpu_sks, &mut ct_0, &stream); + }, + criterion::BatchSize::SmallInput, + ) + }); + + write_to_json::( + &bench_id, + param, + param.name(), + display_name, + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus().0.ilog2(); num_block], + ); + } + + bench_group.finish() + } + + /// Base function to bench a server key function that is a binary operation, input ciphertext + /// will contain only zero carries + fn bench_cuda_server_key_binary_function_clean_inputs( + c: &mut Criterion, + bench_name: &str, + display_name: &str, + binary_op: F, + ) where + F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &mut CudaRadixCiphertext, &CudaStream), + { + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(60)); + let mut rng = rand::thread_rng(); + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() { + let param_name = param.name(); + + let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &stream); + + let encrypt_two_values = || { + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_0 = cks.encrypt_radix(clear_0, num_block); + + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_1 = cks.encrypt_radix(clear_1, num_block); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ct_1, &stream); + + (d_ctxt_1, d_ctxt_2) + }; + + b.iter_batched( + encrypt_two_values, + |(mut ct_0, mut ct_1)| { + binary_op(&gpu_sks, &mut ct_0, &mut ct_1, &stream); + }, + criterion::BatchSize::SmallInput, + ) + }); + + write_to_json::( + &bench_id, + param, + param.name(), + display_name, + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus().0.ilog2(); num_block], + ); + } + + bench_group.finish() + } + + fn bench_cuda_server_key_binary_scalar_function_clean_inputs( + c: &mut Criterion, + bench_name: &str, + display_name: &str, + binary_op: F, + rng_func: G, + ) where + F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, ScalarType, &CudaStream), + G: Fn(&mut ThreadRng, usize) -> ScalarType, + { + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(60)); + let mut rng = rand::thread_rng(); + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() { + if bit_size > ScalarType::BITS as usize { + break; + } + + let param_name = param.name(); + + let max_value_for_bit_size = ScalarType::MAX >> (ScalarType::BITS as usize - bit_size); + + let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &stream); + + let encrypt_one_value = || { + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_0 = cks.encrypt_radix(clear_0, num_block); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream); + + let clear_1 = rng_func(&mut rng, bit_size) & max_value_for_bit_size; + + (d_ctxt_1, clear_1) + }; + + b.iter_batched( + encrypt_one_value, + |(mut ct_0, clear_1)| { + binary_op(&gpu_sks, &mut ct_0, clear_1, &stream); + }, + criterion::BatchSize::SmallInput, + ) + }); + + write_to_json::( + &bench_id, + param, + param.name(), + display_name, + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus().0.ilog2(); num_block], + ); + } + + bench_group.finish() + } + + fn cuda_default_if_then_else(c: &mut Criterion) { + let mut bench_group = c.benchmark_group("integer::cuda::if_then_else"); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(60)); + let mut rng = rand::thread_rng(); + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() { + if bit_size > ScalarType::BITS as usize { + break; + } + + let param_name = param.name(); + + let bench_id = format!("if_then_else:{param_name}::{bit_size}_bits_scalar_{bit_size}"); + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &stream); + + let encrypt_tree_values = || { + let clear_cond = rng.gen::(); + let ct_cond = + cks.encrypt_radix(tfhe::integer::U256::from(clear_cond), num_block); + + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_then = cks.encrypt_radix(clear_0, num_block); + + let clearlow = rng.gen::(); + let clearhigh = rng.gen::(); + let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh)); + let ct_else = cks.encrypt_radix(clear_1, num_block); + + let d_ct_cond = CudaRadixCiphertext::from_radix_ciphertext(&ct_cond, &stream); + let d_ct_then = CudaRadixCiphertext::from_radix_ciphertext(&ct_then, &stream); + let d_ct_else = CudaRadixCiphertext::from_radix_ciphertext(&ct_else, &stream); + + (d_ct_cond, d_ct_then, d_ct_else) + }; + + b.iter_batched( + encrypt_tree_values, + |(ct_cond, ct_then, ct_else)| { + let _ = gpu_sks.if_then_else(&ct_cond, &ct_then, &ct_else, &stream); + }, + criterion::BatchSize::SmallInput, + ) + }); + + write_to_json::( + &bench_id, + param, + param.name(), + "if_then_else", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus().0.ilog2(); num_block], + ); + } + + bench_group.finish() + } + + macro_rules! define_cuda_server_key_bench_clean_input_unary_fn ( + (method_name: $server_key_method:ident, display_name:$name:ident) => { + ::paste::paste!{ + fn [](c: &mut Criterion) { + bench_cuda_server_key_unary_function_clean_inputs( + c, + concat!("integer::cuda::", stringify!($server_key_method)), + stringify!($name), + |server_key, lhs, stream| { + server_key.$server_key_method(lhs, stream); + } + ) + } + } + } + ); + + macro_rules! define_cuda_server_key_bench_clean_input_fn ( + (method_name: $server_key_method:ident, display_name:$name:ident) => { + ::paste::paste!{ + fn [](c: &mut Criterion) { + bench_cuda_server_key_binary_function_clean_inputs( + c, + concat!("integer::cuda::", stringify!($server_key_method)), + stringify!($name), + |server_key, lhs, rhs, stream| { + server_key.$server_key_method(lhs, rhs, stream); + } + ) + } + } + } + ); + + macro_rules! define_cuda_server_key_bench_clean_input_scalar_fn ( + (method_name: $server_key_method:ident, display_name:$name:ident, rng_func:$($rng_fn:tt)*) => { + ::paste::paste!{ + fn [](c: &mut Criterion) { + bench_cuda_server_key_binary_scalar_function_clean_inputs( + c, + concat!("integer::cuda::", stringify!($server_key_method)), + stringify!($name), + |server_key, lhs, rhs, stream| { + server_key.$server_key_method(lhs, rhs, stream); + }, + $($rng_fn)* + ) + } + } + } + ); + + //=========================================== + // Unchecked + //=========================================== + define_cuda_server_key_bench_clean_input_unary_fn!( + method_name: unchecked_neg, + display_name: negation + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_bitand, + display_name: bitand + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_bitor, + display_name: bitor + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_bitxor, + display_name: bitxor + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_mul, + display_name: mul + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_add, + display_name: add + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_sub, + display_name: sub + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_eq, + display_name: equal + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: unchecked_ne, + display_name: not_equal + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_bitand, + display_name: bitand, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_bitor, + display_name: bitand, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_bitxor, + display_name: bitand, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_add, + display_name: add, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_sub, + display_name: sub, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_left_shift, + display_name: left_shift, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_right_shift, + display_name: right_shift, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_left_rotate, + display_name: left_rotate, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_right_rotate, + display_name: right_rotate, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_gt, + display_name: greater_than, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_ge, + display_name: greater_or_equal, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_lt, + display_name: less_than, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_le, + display_name: less_or_equal, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_max, + display_name: max, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: unchecked_scalar_min, + display_name: min, + rng_func: default_scalar + ); + + //=========================================== + // Default + //=========================================== + + define_cuda_server_key_bench_clean_input_unary_fn!( + method_name: neg, + display_name: negation + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: add, + display_name: add + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: sub, + display_name: sub + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: mul, + display_name: mul + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: ne, + display_name: not_equal + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: eq, + display_name: equal + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: bitand, + display_name: bitand + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: bitor, + display_name: bitor + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: bitxor, + display_name: bitxor + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: gt, + display_name: greater_than + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: ge, + display_name: greater_or_equal + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: lt, + display_name: less_than + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: le, + display_name: less_or_equal + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: max, + display_name: max + ); + + define_cuda_server_key_bench_clean_input_fn!( + method_name: min, + display_name: min + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_sub, + display_name: sub, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_add, + display_name: add, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_left_shift, + display_name: left_shift, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_right_shift, + display_name: right_shift, + rng_func: shift_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_bitand, + display_name: bitand, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_bitor, + display_name: bitor, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_bitxor, + display_name: bitxor, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_gt, + display_name: greater_than, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_ge, + display_name: greater_or_equal, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_lt, + display_name: less_than, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_le, + display_name: less_or_equal, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_max, + display_name: max, + rng_func: default_scalar + ); + + define_cuda_server_key_bench_clean_input_scalar_fn!( + method_name: scalar_min, + display_name: min, + rng_func: default_scalar + ); + + criterion_group!( + unchecked_cuda_ops, + cuda_unchecked_neg, + cuda_unchecked_bitand, + cuda_unchecked_bitor, + cuda_unchecked_bitxor, + cuda_unchecked_mul, + cuda_unchecked_sub, + cuda_unchecked_add, + cuda_unchecked_eq, + cuda_unchecked_ne, + ); + + criterion_group!( + unchecked_scalar_cuda_ops, + cuda_unchecked_scalar_bitand, + cuda_unchecked_scalar_bitor, + cuda_unchecked_scalar_bitxor, + cuda_unchecked_scalar_add, + cuda_unchecked_scalar_sub, + cuda_unchecked_scalar_left_shift, + cuda_unchecked_scalar_right_shift, + cuda_unchecked_scalar_left_rotate, + cuda_unchecked_scalar_right_rotate, + cuda_unchecked_scalar_ge, + cuda_unchecked_scalar_gt, + cuda_unchecked_scalar_le, + cuda_unchecked_scalar_lt, + cuda_unchecked_scalar_max, + cuda_unchecked_scalar_min, + ); + + criterion_group!( + default_cuda_ops, + cuda_neg, + cuda_sub, + cuda_add, + cuda_mul, + cuda_eq, + cuda_ne, + cuda_ge, + cuda_gt, + cuda_le, + cuda_lt, + cuda_max, + cuda_min, + cuda_bitand, + cuda_bitor, + cuda_bitxor, + cuda_default_if_then_else, + ); + + criterion_group!( + default_scalar_cuda_ops, + cuda_scalar_sub, + cuda_scalar_add, + cuda_scalar_left_shift, + cuda_scalar_right_shift, + cuda_scalar_bitand, + cuda_scalar_bitor, + cuda_scalar_bitxor, + cuda_scalar_ge, + cuda_scalar_gt, + cuda_scalar_le, + cuda_scalar_lt, + cuda_scalar_max, + cuda_scalar_min, + ); +} + +#[cfg(feature = "gpu")] +use cuda::{ + default_cuda_ops, default_scalar_cuda_ops, unchecked_cuda_ops, unchecked_scalar_cuda_ops, +}; + criterion_group!( smart_ops, smart_neg, @@ -1371,35 +2092,56 @@ criterion_group!( criterion_group!(misc, full_propagate, full_propagate_parallelized); +#[cfg(feature = "gpu")] +fn go_through_gpu_bench_groups(val: &str) { + match val.to_lowercase().as_str() { + "default" => { + default_cuda_ops(); + default_scalar_cuda_ops() + } + "unchecked" => { + unchecked_cuda_ops(); + unchecked_scalar_cuda_ops() + } + _ => panic!("unknown benchmark operations flavor"), + }; +} + +fn go_through_cpu_bench_groups(val: &str) { + match val.to_lowercase().as_str() { + "default" => { + default_parallelized_ops(); + default_parallelized_ops_comp(); + default_scalar_parallelized_ops(); + default_scalar_parallelized_ops_comp() + } + "smart" => { + smart_ops(); + smart_ops_comp(); + smart_scalar_ops(); + smart_parallelized_ops(); + smart_parallelized_ops_comp(); + smart_scalar_parallelized_ops(); + smart_scalar_parallelized_ops_comp() + } + "unchecked" => { + unchecked_ops(); + unchecked_parallelized_ops(); + unchecked_ops_comp(); + unchecked_scalar_ops(); + unchecked_scalar_ops_comp() + } + "misc" => misc(), + _ => panic!("unknown benchmark operations flavor"), + }; +} fn main() { match env::var("__TFHE_RS_BENCH_OP_FLAVOR") { Ok(val) => { - match val.to_lowercase().as_str() { - "default" => { - default_parallelized_ops(); - default_parallelized_ops_comp(); - default_scalar_parallelized_ops(); - default_scalar_parallelized_ops_comp() - } - "smart" => { - smart_ops(); - smart_ops_comp(); - smart_scalar_ops(); - smart_parallelized_ops(); - smart_parallelized_ops_comp(); - smart_scalar_parallelized_ops(); - smart_scalar_parallelized_ops_comp() - } - "unchecked" => { - unchecked_ops(); - unchecked_parallelized_ops(); - unchecked_ops_comp(); - unchecked_scalar_ops(); - unchecked_scalar_ops_comp() - } - "misc" => misc(), - _ => panic!("unknown benchmark operations flavor"), - }; + #[cfg(feature = "gpu")] + go_through_gpu_bench_groups(&val); + #[cfg(not(feature = "gpu"))] + go_through_cpu_bench_groups(&val); } Err(_) => { default_parallelized_ops(); diff --git a/tfhe/src/c_api/core_crypto/mod.rs b/tfhe/src/c_api/core_crypto/mod.rs index 6018454a8..90d88445c 100644 --- a/tfhe/src/c_api/core_crypto/mod.rs +++ b/tfhe/src/c_api/core_crypto/mod.rs @@ -1,6 +1,194 @@ use super::utils::*; use std::os::raw::c_int; +#[no_mangle] +pub unsafe extern "C" fn core_crypto_lwe_secret_key( + output_lwe_sk_ptr: *mut u64, + lwe_sk_dim: usize, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let mut secret_generator = + SecretRandomGenerator::::new(Seed(seed)); + + // Create the LweSecretKey + let output_lwe_sk_slice = std::slice::from_raw_parts_mut(output_lwe_sk_ptr, lwe_sk_dim); + + let mut lwe_sk = LweSecretKey::from_container(output_lwe_sk_slice); + + generate_binary_lwe_secret_key(&mut lwe_sk, &mut secret_generator); + }) +} + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_lwe_encrypt( + output_ct_ptr: *mut u64, + pt: u64, + lwe_sk_ptr: *const u64, + lwe_sk_dim: usize, + lwe_encryption_std_dev: f64, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::generators::DeterministicSeeder; + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let lwe_sk_slice = std::slice::from_raw_parts(lwe_sk_ptr, lwe_sk_dim); + let lwe_sk = LweSecretKey::from_container(lwe_sk_slice); + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let seed = Seed(seed); + let mut determinisitic_seeder = DeterministicSeeder::::new(seed); + let mut encryption_generator = EncryptionRandomGenerator::::new( + determinisitic_seeder.seed(), + &mut determinisitic_seeder, + ); + + let plaintext = Plaintext(pt); + let output_ct = std::slice::from_raw_parts_mut(output_ct_ptr, lwe_sk_dim + 1); + let mut ct = LweCiphertext::from_container(output_ct, CiphertextModulus::new_native()); + + let lwe_encryption_std_dev = StandardDev(lwe_encryption_std_dev); + + encrypt_lwe_ciphertext( + &lwe_sk, + &mut ct, + plaintext, + lwe_encryption_std_dev, + &mut encryption_generator, + ); + }) +} + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_ggsw_encrypt( + output_ct_ptr: *mut u64, + pt: u64, + glwe_sk_ptr: *const u64, + glwe_sk_dim: usize, + poly_size: usize, + level_count: usize, + base_log: usize, + glwe_modular_variance: f64, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::generators::DeterministicSeeder; + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let glwe_sk_slice = std::slice::from_raw_parts(glwe_sk_ptr, glwe_sk_dim * poly_size); + let glwe_sk = GlweSecretKey::from_container(glwe_sk_slice, PolynomialSize(poly_size)); + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let seed = Seed(seed); + let mut determinisitic_seeder = DeterministicSeeder::::new(seed); + let mut encryption_generator = EncryptionRandomGenerator::::new( + determinisitic_seeder.seed(), + &mut determinisitic_seeder, + ); + + let plaintext = Plaintext(pt); + let output_ct = std::slice::from_raw_parts_mut( + output_ct_ptr, + ggsw_ciphertext_size( + GlweDimension(glwe_sk_dim).to_glwe_size(), + PolynomialSize(poly_size), + DecompositionLevelCount(level_count), + ), + ); + let mut ct = GgswCiphertext::from_container( + output_ct, + GlweDimension(glwe_sk_dim).to_glwe_size(), + PolynomialSize(poly_size), + DecompositionBaseLog(base_log), + CiphertextModulus::new_native(), + ); + + let glwe_encryption_std_dev = StandardDev(glwe_modular_variance); + + encrypt_constant_ggsw_ciphertext( + &glwe_sk, + &mut ct, + plaintext, + glwe_encryption_std_dev, + &mut encryption_generator, + ); + }) +} + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_lwe_decrypt( + output_pt: *mut u64, + input_ct_ptr: *const u64, + lwe_sk_ptr: *const u64, + lwe_sk_dim: usize, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::prelude::*; + + let lwe_sk_slice = std::slice::from_raw_parts(lwe_sk_ptr, lwe_sk_dim); + let lwe_sk = LweSecretKey::from_container(lwe_sk_slice); + + let input_ct = std::slice::from_raw_parts(input_ct_ptr, lwe_sk_dim + 1); + let ct = LweCiphertext::from_container(input_ct, CiphertextModulus::new_native()); + + let plaintext = decrypt_lwe_ciphertext(&lwe_sk, &ct); + + *output_pt = plaintext.0; + }) +} + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_glwe_decrypt( + output_pt: *mut u64, + input_ct_ptr: *const u64, + glwe_sk_ptr: *const u64, + glwe_sk_dim: usize, + glwe_poly_size: usize, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::prelude::*; + + let glwe_sk_slice = std::slice::from_raw_parts(glwe_sk_ptr, glwe_sk_dim * glwe_poly_size); + let glwe_sk = GlweSecretKey::from_container(glwe_sk_slice, PolynomialSize(glwe_poly_size)); + + let input_ct = std::slice::from_raw_parts( + input_ct_ptr, + glwe_ciphertext_size( + GlweDimension(glwe_sk_dim).to_glwe_size(), + PolynomialSize(glwe_poly_size), + ), + ); + let ct = GlweCiphertext::from_container( + input_ct, + PolynomialSize(glwe_poly_size), + CiphertextModulus::new_native(), + ); + let output = std::slice::from_raw_parts_mut(output_pt, glwe_poly_size); + let mut plaintext_list = PlaintextList::from_container(output); + + decrypt_glwe_ciphertext(&glwe_sk, &ct, &mut plaintext_list); + }) +} + #[no_mangle] pub unsafe extern "C" fn core_crypto_lwe_multi_bit_bootstrapping_key_element_size( input_lwe_sk_dim: usize, @@ -34,6 +222,88 @@ pub unsafe extern "C" fn core_crypto_lwe_multi_bit_bootstrapping_key_element_siz }) } +#[no_mangle] +pub unsafe extern "C" fn core_crypto_par_generate_lwe_bootstrapping_key( + output_bsk_ptr: *mut u64, + bsk_base_log: usize, + bsk_level_count: usize, + input_lwe_sk_ptr: *const u64, + input_lwe_sk_dim: usize, + output_glwe_sk_ptr: *const u64, + output_glwe_sk_dim: usize, + output_glwe_sk_poly_size: usize, + glwe_encryption_std_dev: f64, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::generators::DeterministicSeeder; + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let input_lwe_sk_slice = std::slice::from_raw_parts(input_lwe_sk_ptr, input_lwe_sk_dim); + let input_lwe_sk = LweSecretKey::from_container(input_lwe_sk_slice); + + let output_glwe_sk_dim = GlweDimension(output_glwe_sk_dim); + let output_glwe_sk_poly_size = PolynomialSize(output_glwe_sk_poly_size); + let output_glwe_sk_size = + glwe_ciphertext_mask_size(output_glwe_sk_dim, output_glwe_sk_poly_size); + let output_glwe_sk_slice = + std::slice::from_raw_parts(output_glwe_sk_ptr, output_glwe_sk_size); + let output_glwe_sk = + GlweSecretKey::from_container(output_glwe_sk_slice, output_glwe_sk_poly_size); + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let mut deterministic_seeder = + DeterministicSeeder::::new(Seed(seed)); + let mut encryption_random_generator = + EncryptionRandomGenerator::::new( + deterministic_seeder.seed(), + &mut deterministic_seeder, + ); + + let lwe_base_log = DecompositionBaseLog(bsk_base_log); + let lwe_level_count = DecompositionLevelCount(bsk_level_count); + + let lwe_slice_len = { + let bsk = LweBootstrapKeyOwned::new( + 0u64, + output_glwe_sk.glwe_dimension().to_glwe_size(), + output_glwe_sk.polynomial_size(), + lwe_base_log, + lwe_level_count, + input_lwe_sk.lwe_dimension(), + CiphertextModulus::new_native(), + ); + bsk.into_container().len() + }; + + let bsk_slice = std::slice::from_raw_parts_mut(output_bsk_ptr, lwe_slice_len); + + let mut bsk = LweBootstrapKey::from_container( + bsk_slice, + output_glwe_sk.glwe_dimension().to_glwe_size(), + output_glwe_sk.polynomial_size(), + lwe_base_log, + lwe_level_count, + CiphertextModulus::new_native(), + ); + + let glwe_encryption_std_dev = StandardDev(glwe_encryption_std_dev); + + par_generate_lwe_bootstrap_key( + &input_lwe_sk, + &output_glwe_sk, + &mut bsk, + glwe_encryption_std_dev, + &mut encryption_random_generator, + ) + }) +} + #[no_mangle] pub unsafe extern "C" fn core_crypto_par_generate_lwe_multi_bit_bootstrapping_key( input_lwe_sk_ptr: *const u64, @@ -120,3 +390,151 @@ pub unsafe extern "C" fn core_crypto_par_generate_lwe_multi_bit_bootstrapping_ke ); }) } + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_par_generate_lwe_keyswitch_key( + output_ksk_ptr: *mut u64, + ksk_base_log: usize, + ksk_level_count: usize, + input_lwe_sk_ptr: *const u64, + input_lwe_sk_dim: usize, + output_lwe_sk_ptr: *const u64, + output_lwe_sk_dim: usize, + lwe_encryption_std_dev: f64, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::generators::DeterministicSeeder; + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let input_lwe_sk_slice = std::slice::from_raw_parts(input_lwe_sk_ptr, input_lwe_sk_dim); + let input_lwe_sk = LweSecretKey::from_container(input_lwe_sk_slice); + let output_lwe_sk_slice = std::slice::from_raw_parts(output_lwe_sk_ptr, output_lwe_sk_dim); + let output_lwe_sk = LweSecretKey::from_container(output_lwe_sk_slice); + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let mut deterministic_seeder = + DeterministicSeeder::::new(Seed(seed)); + let mut encryption_random_generator = + EncryptionRandomGenerator::::new( + deterministic_seeder.seed(), + &mut deterministic_seeder, + ); + + let lwe_base_log = DecompositionBaseLog(ksk_base_log); + let lwe_level_count = DecompositionLevelCount(ksk_level_count); + + let lwe_slice_len = { + let bsk = LweKeyswitchKeyOwned::new( + 0u64, + lwe_base_log, + lwe_level_count, + LweDimension(input_lwe_sk_dim), + LweDimension(output_lwe_sk_dim), + CiphertextModulus::new_native(), + ); + bsk.into_container().len() + }; + + let ksk_slice = std::slice::from_raw_parts_mut(output_ksk_ptr, lwe_slice_len); + + let mut ksk = LweKeyswitchKey::from_container( + ksk_slice, + lwe_base_log, + lwe_level_count, + LweDimension(output_lwe_sk_dim).to_lwe_size(), + CiphertextModulus::new_native(), + ); + + let lwe_encryption_std_dev = StandardDev(lwe_encryption_std_dev); + + generate_lwe_keyswitch_key( + &input_lwe_sk, + &output_lwe_sk, + &mut ksk, + lwe_encryption_std_dev, + &mut encryption_random_generator, + ) + }) +} + +#[no_mangle] +pub unsafe extern "C" fn core_crypto_par_generate_lwe_private_functional_keyswitch_key( + output_pksk_ptr: *mut u64, + pksk_base_log: usize, + pksk_level_count: usize, + input_lwe_sk_ptr: *const u64, + input_lwe_sk_dim: usize, + output_glwe_sk_ptr: *const u64, + poly_size: usize, + glwe_dim: usize, + lwe_encryption_std_dev: f64, + seed_low_bytes: u64, + seed_high_bytes: u64, +) -> c_int { + catch_panic(|| { + use crate::core_crypto::commons::generators::DeterministicSeeder; + use crate::core_crypto::commons::math::random::Seed; + use crate::core_crypto::prelude::*; + + let input_lwe_sk_slice = std::slice::from_raw_parts(input_lwe_sk_ptr, input_lwe_sk_dim); + let input_lwe_sk = LweSecretKey::from_container(input_lwe_sk_slice); + let output_glwe_sk_slice = + std::slice::from_raw_parts(output_glwe_sk_ptr, glwe_dim * poly_size); + let output_glwe_sk = + GlweSecretKey::from_container(output_glwe_sk_slice, PolynomialSize(poly_size)); + + let seed_low_bytes: u128 = seed_low_bytes.into(); + let seed_high_bytes: u128 = seed_high_bytes.into(); + let seed = (seed_high_bytes << 64) | seed_low_bytes; + + let mut deterministic_seeder = + DeterministicSeeder::::new(Seed(seed)); + let mut encryption_random_generator = + EncryptionRandomGenerator::::new( + deterministic_seeder.seed(), + &mut deterministic_seeder, + ); + + let pksk_len = { + let ksk = LwePrivateFunctionalPackingKeyswitchKeyList::new( + 0u64, + DecompositionBaseLog(pksk_base_log), + DecompositionLevelCount(pksk_level_count), + LweDimension(input_lwe_sk_dim), + GlweDimension(glwe_dim).to_glwe_size(), + PolynomialSize(poly_size), + FunctionalPackingKeyswitchKeyCount(glwe_dim + 1), + CiphertextModulus::new_native(), + ); + ksk.into_container().len() + }; + + let ksk_slice = std::slice::from_raw_parts_mut(output_pksk_ptr, pksk_len); + + let mut fp_ksk = LwePrivateFunctionalPackingKeyswitchKeyList::from_container( + ksk_slice, + DecompositionBaseLog(pksk_base_log), + DecompositionLevelCount(pksk_level_count), + LweDimension(input_lwe_sk_dim).to_lwe_size(), + GlweDimension(glwe_dim).to_glwe_size(), + PolynomialSize(poly_size), + CiphertextModulus::new_native(), + ); + + let lwe_encryption_std_dev = StandardDev(lwe_encryption_std_dev); + + generate_circuit_bootstrap_lwe_pfpksk_list( + &mut fp_ksk, + &input_lwe_sk, + &output_glwe_sk, + lwe_encryption_std_dev, + &mut encryption_random_generator, + ) + }) +} diff --git a/tfhe/src/core_crypto/algorithms/test/mod.rs b/tfhe/src/core_crypto/algorithms/test/mod.rs index e8ade4645..e20054c2a 100644 --- a/tfhe/src/core_crypto/algorithms/test/mod.rs +++ b/tfhe/src/core_crypto/algorithms/test/mod.rs @@ -332,7 +332,7 @@ pub fn round_decode(decrypted: Scalar, delta: Scalar) - } // Here we will define a helper function to generate an accumulator for a PBS -fn generate_accumulator>( +pub(crate) fn generate_accumulator>( polynomial_size: PolynomialSize, glwe_size: GlweSize, message_modulus: usize, diff --git a/tfhe/src/core_crypto/commons/parameters.rs b/tfhe/src/core_crypto/commons/parameters.rs index d945c06ba..5ba50f9cf 100644 --- a/tfhe/src/core_crypto/commons/parameters.rs +++ b/tfhe/src/core_crypto/commons/parameters.rs @@ -23,6 +23,11 @@ pub struct CiphertextCount(pub usize); #[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] pub struct LweCiphertextCount(pub usize); +/// The index of a ciphertext in an lwe ciphertext list. +#[cfg(feature = "gpu")] +#[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] +pub struct LweCiphertextIndex(pub usize); + /// The number of ciphertexts in a glwe ciphertext list. #[derive(Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] pub struct GlweCiphertextCount(pub usize); diff --git a/tfhe/src/core_crypto/gpu/algorithms/lwe_keyswitch.rs b/tfhe/src/core_crypto/gpu/algorithms/lwe_keyswitch.rs new file mode 100644 index 000000000..d14b8f40e --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/lwe_keyswitch.rs @@ -0,0 +1,73 @@ +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey; +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::UnsignedInteger; + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_keyswitch_lwe_ciphertext_async( + lwe_keyswitch_key: &CudaLweKeyswitchKey, + input_lwe_ciphertext: &CudaLweCiphertextList, + output_lwe_ciphertext: &mut CudaLweCiphertextList, + input_indexes: &CudaVec, + output_indexes: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + assert!( + lwe_keyswitch_key.input_key_lwe_size().to_lwe_dimension() + == input_lwe_ciphertext.lwe_dimension(), + "Mismatched input LweDimension. \ + LweKeyswitchKey input LweDimension: {:?}, input LweCiphertext LweDimension {:?}.", + lwe_keyswitch_key.input_key_lwe_size().to_lwe_dimension(), + input_lwe_ciphertext.lwe_dimension(), + ); + assert!( + lwe_keyswitch_key.output_key_lwe_size().to_lwe_dimension() + == output_lwe_ciphertext.lwe_dimension(), + "Mismatched output LweDimension. \ + LweKeyswitchKey output LweDimension: {:?}, output LweCiphertext LweDimension {:?}.", + lwe_keyswitch_key.output_key_lwe_size().to_lwe_dimension(), + output_lwe_ciphertext.lwe_dimension(), + ); + + stream.keyswitch_async( + &mut output_lwe_ciphertext.0.d_vec, + output_indexes, + &input_lwe_ciphertext.0.d_vec, + input_indexes, + lwe_keyswitch_key.input_key_lwe_size().to_lwe_dimension(), + lwe_keyswitch_key.output_key_lwe_size().to_lwe_dimension(), + &lwe_keyswitch_key.d_vec, + lwe_keyswitch_key.decomposition_base_log(), + lwe_keyswitch_key.decomposition_level_count(), + input_lwe_ciphertext.lwe_ciphertext_count().0 as u32, + ); +} + +pub fn cuda_keyswitch_lwe_ciphertext( + lwe_keyswitch_key: &CudaLweKeyswitchKey, + input_lwe_ciphertext: &CudaLweCiphertextList, + output_lwe_ciphertext: &mut CudaLweCiphertextList, + input_indexes: &CudaVec, + output_indexes: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_keyswitch_lwe_ciphertext_async( + lwe_keyswitch_key, + input_lwe_ciphertext, + output_lwe_ciphertext, + input_indexes, + output_indexes, + stream, + ); + } + stream.synchronize(); +} diff --git a/tfhe/src/core_crypto/gpu/algorithms/lwe_linear_algebra.rs b/tfhe/src/core_crypto/gpu/algorithms/lwe_linear_algebra.rs new file mode 100644 index 000000000..43ac540b0 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/lwe_linear_algebra.rs @@ -0,0 +1,361 @@ +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::UnsignedInteger; + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_add_async( + output: &mut CudaLweCiphertextList, + lhs: &CudaLweCiphertextList, + rhs: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = output.lwe_ciphertext_count().0 as u32; + + assert_eq!( + lhs.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between lhs ({:?}) and rhs ({:?})", + lhs.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count() + ); + + assert_eq!( + output.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between output ({:?}) and rhs ({:?})", + output.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count() + ); + + assert_eq!( + lhs.ciphertext_modulus(), + rhs.ciphertext_modulus(), + "Mismatched moduli between lhs ({:?}) and rhs ({:?}) LweCiphertext", + lhs.ciphertext_modulus(), + rhs.ciphertext_modulus() + ); + + assert_eq!( + output.ciphertext_modulus(), + rhs.ciphertext_modulus(), + "Mismatched moduli between output ({:?}) and rhs ({:?}) LweCiphertext", + output.ciphertext_modulus(), + rhs.ciphertext_modulus() + ); + + stream.add_lwe_ciphertext_vector_async( + &mut output.0.d_vec, + &lhs.0.d_vec, + &rhs.0.d_vec, + lhs.lwe_dimension(), + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_add_assign_async( + lhs: &mut CudaLweCiphertextList, + rhs: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = lhs.lwe_ciphertext_count().0 as u32; + + assert_eq!( + lhs.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between lhs ({:?}) and rhs ({:?})", + lhs.lwe_ciphertext_count(), + rhs.lwe_ciphertext_count() + ); + + assert_eq!( + lhs.ciphertext_modulus(), + rhs.ciphertext_modulus(), + "Mismatched moduli between lhs ({:?}) and rhs ({:?}) LweCiphertext", + lhs.ciphertext_modulus(), + rhs.ciphertext_modulus() + ); + + stream.add_lwe_ciphertext_vector_assign_async( + &mut lhs.0.d_vec, + &rhs.0.d_vec, + rhs.lwe_dimension(), + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_plaintext_add_async( + output: &mut CudaLweCiphertextList, + lhs: &CudaLweCiphertextList, + rhs: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = output.lwe_ciphertext_count().0 as u32; + + assert_eq!( + output.lwe_ciphertext_count(), + lhs.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between output ({:?}) and lhs ({:?})", + output.lwe_ciphertext_count(), + lhs.lwe_ciphertext_count() + ); + + assert_eq!( + output.ciphertext_modulus(), + lhs.ciphertext_modulus(), + "Mismatched moduli between output ({:?}) and lhs ({:?}) LweCiphertext", + output.ciphertext_modulus(), + lhs.ciphertext_modulus() + ); + + stream.add_lwe_ciphertext_vector_plaintext_vector_async( + &mut output.0.d_vec, + &lhs.0.d_vec, + rhs, + lhs.lwe_dimension(), + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_plaintext_add_assign_async( + lhs: &mut CudaLweCiphertextList, + rhs: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = lhs.lwe_ciphertext_count().0 as u32; + let lwe_dimension = &lhs.lwe_dimension(); + + stream.add_lwe_ciphertext_vector_plaintext_vector_assign_async( + &mut lhs.0.d_vec, + rhs, + *lwe_dimension, + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_negate_async( + output: &mut CudaLweCiphertextList, + input: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + assert_eq!( + input.lwe_ciphertext_count(), + output.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between input ({:?}) and output ({:?})", + input.lwe_ciphertext_count(), + output.lwe_ciphertext_count() + ); + let num_samples = output.lwe_ciphertext_count().0 as u32; + let lwe_dimension = &output.lwe_dimension(); + + stream.negate_lwe_ciphertext_vector_async( + &mut output.0.d_vec, + &input.0.d_vec, + *lwe_dimension, + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_negate_assign_async( + ct: &mut CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = ct.lwe_ciphertext_count().0 as u32; + let lwe_dimension = &ct.lwe_dimension(); + + stream.negate_lwe_ciphertext_vector_assign_async(&mut ct.0.d_vec, *lwe_dimension, num_samples); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_cleartext_mul_async( + output: &mut CudaLweCiphertextList, + input: &CudaLweCiphertextList, + cleartext: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + assert_eq!( + input.lwe_ciphertext_count(), + output.lwe_ciphertext_count(), + "Mismatched number of ciphertexts between input ({:?}) and output ({:?})", + input.lwe_ciphertext_count(), + output.lwe_ciphertext_count() + ); + let num_samples = output.lwe_ciphertext_count().0 as u32; + let lwe_dimension = &output.lwe_dimension(); + + stream.mult_lwe_ciphertext_vector_cleartext_vector( + &mut output.0.d_vec, + &input.0.d_vec, + cleartext, + *lwe_dimension, + num_samples, + ); +} + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +pub unsafe fn cuda_lwe_ciphertext_cleartext_mul_assign_async( + ct: &mut CudaLweCiphertextList, + cleartext: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + let num_samples = ct.lwe_ciphertext_count().0 as u32; + let lwe_dimension = ct.lwe_dimension(); + + stream.mult_lwe_ciphertext_vector_cleartext_vector_assign_async( + &mut ct.0.d_vec, + cleartext, + lwe_dimension, + num_samples, + ); +} + +pub fn cuda_lwe_ciphertext_add( + output: &mut CudaLweCiphertextList, + lhs: &CudaLweCiphertextList, + rhs: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_add_async(output, lhs, rhs, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_add_assign( + lhs: &mut CudaLweCiphertextList, + rhs: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_add_assign_async(lhs, rhs, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_plaintext_add( + output: &mut CudaLweCiphertextList, + lhs: &CudaLweCiphertextList, + rhs: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_plaintext_add_async(output, lhs, rhs, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_plaintext_add_assign( + lhs: &mut CudaLweCiphertextList, + rhs: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_plaintext_add_assign_async(lhs, rhs, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_negate( + output: &mut CudaLweCiphertextList, + input: &CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_negate_async(output, input, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_negate_assign( + ct: &mut CudaLweCiphertextList, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_negate_assign_async(ct, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_cleartext_mul( + output: &mut CudaLweCiphertextList, + input: &CudaLweCiphertextList, + cleartext: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_cleartext_mul_async(output, input, cleartext, stream); + } + stream.synchronize(); +} + +pub fn cuda_lwe_ciphertext_cleartext_mul_assign( + ct: &mut CudaLweCiphertextList, + cleartext: &CudaVec, + stream: &CudaStream, +) where + Scalar: UnsignedInteger, +{ + unsafe { + cuda_lwe_ciphertext_cleartext_mul_assign_async(ct, cleartext, stream); + } + stream.synchronize(); +} diff --git a/tfhe/src/core_crypto/gpu/algorithms/lwe_multi_bit_programmable_bootstrapping.rs b/tfhe/src/core_crypto/gpu/algorithms/lwe_multi_bit_programmable_bootstrapping.rs new file mode 100644 index 000000000..ecc91ee87 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/lwe_multi_bit_programmable_bootstrapping.rs @@ -0,0 +1,123 @@ +use crate::core_crypto::gpu::entities::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::gpu::entities::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::entities::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey; +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{CastInto, LweCiphertextIndex, UnsignedTorus}; + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +#[allow(clippy::too_many_arguments)] +pub unsafe fn cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_async( + input: &CudaLweCiphertextList, + output: &mut CudaLweCiphertextList, + accumulator: &CudaGlweCiphertextList, + lut_indexes: &CudaVec, + output_indexes: &CudaVec, + input_indexes: &CudaVec, + multi_bit_bsk: &CudaLweMultiBitBootstrapKey, + stream: &CudaStream, +) where + // CastInto required for PBS modulus switch which returns a usize + Scalar: UnsignedTorus + CastInto, +{ + assert_eq!( + input.lwe_dimension(), + multi_bit_bsk.input_lwe_dimension(), + "Mimatched input LweDimension. LweCiphertext input LweDimension {:?}. \ + FourierLweMultiBitBootstrapKey input LweDimension {:?}.", + input.lwe_dimension(), + multi_bit_bsk.input_lwe_dimension(), + ); + + assert_eq!( + output.lwe_dimension(), + multi_bit_bsk.output_lwe_dimension(), + "Mimatched output LweDimension. LweCiphertext output LweDimension {:?}. \ + FourierLweMultiBitBootstrapKey output LweDimension {:?}.", + output.lwe_dimension(), + multi_bit_bsk.output_lwe_dimension(), + ); + + assert_eq!( + accumulator.glwe_dimension(), + multi_bit_bsk.glwe_dimension(), + "Mimatched GlweSize. Accumulator GlweSize {:?}. \ + FourierLweMultiBitBootstrapKey GlweSize {:?}.", + accumulator.glwe_dimension(), + multi_bit_bsk.glwe_dimension(), + ); + + assert_eq!( + accumulator.polynomial_size(), + multi_bit_bsk.polynomial_size(), + "Mimatched PolynomialSize. Accumulator PolynomialSize {:?}. \ + FourierLweMultiBitBootstrapKey PolynomialSize {:?}.", + accumulator.polynomial_size(), + multi_bit_bsk.polynomial_size(), + ); + + assert_eq!( + input.ciphertext_modulus(), + output.ciphertext_modulus(), + "Mismatched CiphertextModulus between input ({:?}) and output ({:?})", + input.ciphertext_modulus(), + output.ciphertext_modulus(), + ); + + assert_eq!( + input.ciphertext_modulus(), + accumulator.ciphertext_modulus(), + "Mismatched CiphertextModulus between input ({:?}) and accumulator ({:?})", + input.ciphertext_modulus(), + accumulator.ciphertext_modulus(), + ); + + stream.bootstrap_multi_bit_async( + &mut output.0.d_vec, + output_indexes, + &accumulator.0.d_vec, + lut_indexes, + &input.0.d_vec, + input_indexes, + &multi_bit_bsk.d_vec, + input.lwe_dimension(), + multi_bit_bsk.glwe_dimension(), + multi_bit_bsk.polynomial_size(), + multi_bit_bsk.decomp_base_log(), + multi_bit_bsk.decomp_level_count(), + multi_bit_bsk.grouping_factor(), + input.lwe_ciphertext_count().0 as u32, + LweCiphertextIndex(0), + ); +} + +#[allow(clippy::too_many_arguments)] +pub fn cuda_multi_bit_programmable_bootstrap_lwe_ciphertext( + input: &CudaLweCiphertextList, + output: &mut CudaLweCiphertextList, + accumulator: &CudaGlweCiphertextList, + lut_indexes: &CudaVec, + output_indexes: &CudaVec, + input_indexes: &CudaVec, + multi_bit_bsk: &CudaLweMultiBitBootstrapKey, + stream: &CudaStream, +) where + // CastInto required for PBS modulus switch which returns a usize + Scalar: UnsignedTorus + CastInto, +{ + unsafe { + cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_async( + input, + output, + accumulator, + lut_indexes, + output_indexes, + input_indexes, + multi_bit_bsk, + stream, + ); + } +} diff --git a/tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs b/tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs new file mode 100644 index 000000000..51929b919 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs @@ -0,0 +1,81 @@ +use crate::core_crypto::gpu::entities::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::gpu::entities::lwe_bootstrap_key::CudaLweBootstrapKey; +use crate::core_crypto::gpu::entities::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{ + CastInto, LweCiphertextCount, LweCiphertextIndex, UnsignedTorus, +}; + +/// # Safety +/// +/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not +/// be dropped until stream is synchronised +#[allow(clippy::too_many_arguments)] +pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_async( + input: &CudaLweCiphertextList, + output: &mut CudaLweCiphertextList, + accumulator: &CudaGlweCiphertextList, + lut_indexes: &CudaVec, + output_indexes: &CudaVec, + input_indexes: &CudaVec, + num_samples: LweCiphertextCount, + bsk: &CudaLweBootstrapKey, + stream: &CudaStream, +) where + // CastInto required for PBS modulus switch which returns a usize + Scalar: UnsignedTorus + CastInto, +{ + assert_eq!(input.ciphertext_modulus(), output.ciphertext_modulus()); + assert_eq!( + output.ciphertext_modulus(), + accumulator.ciphertext_modulus() + ); + + stream.bootstrap_low_latency_async( + &mut output.0.d_vec, + output_indexes, + &accumulator.0.d_vec, + lut_indexes, + &input.0.d_vec, + input_indexes, + &bsk.d_vec, + input.lwe_dimension(), + bsk.glwe_dimension(), + bsk.polynomial_size(), + bsk.decomp_base_log(), + bsk.decomp_level_count(), + num_samples.0 as u32, + LweCiphertextIndex(0), + ); +} + +#[allow(clippy::too_many_arguments)] +pub fn cuda_programmable_bootstrap_lwe_ciphertext( + input: &CudaLweCiphertextList, + output: &mut CudaLweCiphertextList, + accumulator: &CudaGlweCiphertextList, + lut_indexes: &CudaVec, + output_indexes: &CudaVec, + input_indexes: &CudaVec, + num_samples: LweCiphertextCount, + bsk: &CudaLweBootstrapKey, + stream: &CudaStream, +) where + // CastInto required for PBS modulus switch which returns a usize + Scalar: UnsignedTorus + CastInto, +{ + unsafe { + cuda_programmable_bootstrap_lwe_ciphertext_async( + input, + output, + accumulator, + lut_indexes, + output_indexes, + input_indexes, + num_samples, + bsk, + stream, + ); + } +} diff --git a/tfhe/src/core_crypto/gpu/algorithms/mod.rs b/tfhe/src/core_crypto/gpu/algorithms/mod.rs new file mode 100644 index 000000000..9dafafcbf --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/mod.rs @@ -0,0 +1,12 @@ +pub mod lwe_linear_algebra; +pub mod lwe_multi_bit_programmable_bootstrapping; +pub mod lwe_programmable_bootstrapping; + +mod lwe_keyswitch; +#[cfg(test)] +mod test; + +pub use lwe_keyswitch::*; +pub use lwe_linear_algebra::*; +pub use lwe_multi_bit_programmable_bootstrapping::*; +pub use lwe_programmable_bootstrapping::*; diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/lwe_keyswitch.rs b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_keyswitch.rs new file mode 100644 index 000000000..484a4e4bb --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_keyswitch.rs @@ -0,0 +1,123 @@ +use super::*; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey; +use crate::core_crypto::gpu::{cuda_keyswitch_lwe_ciphertext, CudaDevice, CudaStream}; +use itertools::Itertools; + +fn lwe_encrypt_ks_decrypt_custom_mod>( + params: ClassicTestParams, +) { + let lwe_dimension = params.lwe_dimension; + let lwe_modular_std_dev = params.lwe_modular_std_dev; + let ciphertext_modulus = params.ciphertext_modulus; + let message_modulus_log = params.message_modulus_log; + let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus); + let glwe_dimension = params.glwe_dimension; + let polynomial_size = params.polynomial_size; + let ks_decomp_base_log = params.ks_base_log; + let ks_decomp_level_count = params.ks_level; + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let mut rsc = TestResources::new(); + + const NB_TESTS: usize = 10; + let msg_modulus = Scalar::ONE.shl(message_modulus_log.0); + let mut msg = msg_modulus; + let delta: Scalar = encoding_with_padding / msg_modulus; + + while msg != Scalar::ZERO { + msg = msg.wrapping_sub(Scalar::ONE); + for _ in 0..NB_TESTS { + let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key( + lwe_dimension, + &mut rsc.secret_random_generator, + ); + + let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key( + glwe_dimension, + polynomial_size, + &mut rsc.secret_random_generator, + ); + + let big_lwe_sk = glwe_sk.into_lwe_secret_key(); + + let ksk_big_to_small = allocate_and_generate_new_lwe_keyswitch_key( + &big_lwe_sk, + &lwe_sk, + ks_decomp_base_log, + ks_decomp_level_count, + lwe_modular_std_dev, + ciphertext_modulus, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &ksk_big_to_small, + ciphertext_modulus + )); + + let d_ksk_big_to_small = + CudaLweKeyswitchKey::from_lwe_keyswitch_key(&ksk_big_to_small, &stream); + + let plaintext = Plaintext(msg * delta); + + let ct = allocate_and_encrypt_new_lwe_ciphertext( + &big_lwe_sk, + plaintext, + lwe_modular_std_dev, + ciphertext_modulus, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &ct, + ciphertext_modulus + )); + + let d_ct = CudaLweCiphertextList::from_lwe_ciphertext(&ct, &stream); + let mut d_output_ct = CudaLweCiphertextList::new( + ksk_big_to_small.output_key_lwe_dimension(), + LweCiphertextCount(1), + ciphertext_modulus, + &stream, + ); + let num_blocks = d_ct.0.lwe_ciphertext_count.0; + let lwe_indexes_usize = (0..num_blocks).collect_vec(); + let lwe_indexes = lwe_indexes_usize + .iter() + .map(|&x| >::cast_into(x)) + .collect_vec(); + let mut d_input_indexes = stream.malloc_async::(num_blocks as u32); + let mut d_output_indexes = stream.malloc_async::(num_blocks as u32); + stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes); + stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes); + + cuda_keyswitch_lwe_ciphertext( + &d_ksk_big_to_small, + &d_ct, + &mut d_output_ct, + &d_input_indexes, + &d_output_indexes, + &stream, + ); + + let output_ct = d_output_ct.into_lwe_ciphertext(&stream); + + assert!(check_encrypted_content_respects_mod( + &output_ct, + ciphertext_modulus + )); + + let decrypted = decrypt_lwe_ciphertext(&lwe_sk, &output_ct); + + let decoded = round_decode(decrypted.0, delta) % msg_modulus; + + assert_eq!(msg, decoded); + } + } +} + +create_gpu_parametrized_test!(lwe_encrypt_ks_decrypt_custom_mod); diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/lwe_linear_algebra.rs b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_linear_algebra.rs new file mode 100644 index 000000000..9f39dcff4 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_linear_algebra.rs @@ -0,0 +1,78 @@ +use super::*; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::{cuda_lwe_ciphertext_add_assign, CudaDevice, CudaStream}; + +fn lwe_encrypt_add_assign_decrypt_custom_mod( + params: ClassicTestParams, +) { + let lwe_dimension = params.lwe_dimension; + let lwe_modular_std_dev = params.lwe_modular_std_dev; + let ciphertext_modulus = params.ciphertext_modulus; + let message_modulus_log = params.message_modulus_log; + let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus); + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let mut rsc = TestResources::new(); + + const NB_TESTS: usize = 10; + let msg_modulus = Scalar::ONE.shl(message_modulus_log.0); + let mut msg = msg_modulus; + let delta: Scalar = encoding_with_padding / msg_modulus; + + while msg != Scalar::ZERO { + msg = msg.wrapping_sub(Scalar::ONE); + for _ in 0..NB_TESTS { + let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key( + lwe_dimension, + &mut rsc.secret_random_generator, + ); + + let mut ct = LweCiphertext::new( + Scalar::ZERO, + lwe_dimension.to_lwe_size(), + ciphertext_modulus, + ); + + let plaintext = Plaintext(msg * delta); + + encrypt_lwe_ciphertext( + &lwe_sk, + &mut ct, + plaintext, + lwe_modular_std_dev, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &ct, + ciphertext_modulus + )); + + let rhs = ct.clone(); + + // Convert to CUDA objects + let mut d_ct = CudaLweCiphertextList::from_lwe_ciphertext(&ct, &stream); + let d_rhs = CudaLweCiphertextList::from_lwe_ciphertext(&rhs, &stream); + + cuda_lwe_ciphertext_add_assign(&mut d_ct, &d_rhs, &stream); + + let output = d_ct.into_lwe_ciphertext(&stream); + + assert!(check_encrypted_content_respects_mod( + &output, + ciphertext_modulus + )); + + let decrypted = decrypt_lwe_ciphertext(&lwe_sk, &output); + + let decoded = round_decode(decrypted.0, delta) % msg_modulus; + + assert_eq!((msg + msg) % msg_modulus, decoded); + } + } +} + +create_gpu_parametrized_test!(lwe_encrypt_add_assign_decrypt_custom_mod); diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/lwe_multi_bit_programmable_bootstrapping.rs b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_multi_bit_programmable_bootstrapping.rs new file mode 100644 index 000000000..e2b20a3f9 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_multi_bit_programmable_bootstrapping.rs @@ -0,0 +1,216 @@ +use super::*; +use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey; +use crate::core_crypto::gpu::{ + cuda_multi_bit_programmable_bootstrap_lwe_ciphertext, CudaDevice, CudaStream, +}; +use itertools::Itertools; + +pub struct MultiBitParams { + pub input_lwe_dimension: LweDimension, + pub lwe_modular_std_dev: StandardDev, + pub decomp_base_log: DecompositionBaseLog, + pub decomp_level_count: DecompositionLevelCount, + pub glwe_dimension: GlweDimension, + pub polynomial_size: PolynomialSize, + pub glwe_modular_std_dev: StandardDev, + pub message_modulus_log: CiphertextModulusLog, + pub ciphertext_modulus: CiphertextModulus, + pub grouping_factor: LweBskGroupingFactor, +} + +#[allow(clippy::too_many_arguments)] +fn lwe_encrypt_multi_bit_pbs_decrypt_custom_mod< + Scalar: UnsignedTorus + Sync + Send + CastFrom + CastInto, +>( + params: MultiBitParams, +) { + assert!(Scalar::BITS <= 64); + + let input_lwe_dimension = params.input_lwe_dimension; + let lwe_modular_std_dev = params.lwe_modular_std_dev; + let glwe_modular_std_dev = params.glwe_modular_std_dev; + let ciphertext_modulus = params.ciphertext_modulus; + let message_modulus_log = params.message_modulus_log; + let msg_modulus = Scalar::ONE.shl(message_modulus_log.0); + let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus); + let glwe_dimension = params.glwe_dimension; + let polynomial_size = params.polynomial_size; + let decomp_base_log = params.decomp_base_log; + let decomp_level_count = params.decomp_level_count; + let grouping_factor = params.grouping_factor; + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let mut rsc = TestResources::new(); + + let f = |x: Scalar| { + x.wrapping_mul(Scalar::TWO) + .wrapping_sub(Scalar::ONE) + .wrapping_rem(msg_modulus) + }; + + let delta: Scalar = encoding_with_padding / msg_modulus; + let mut msg = msg_modulus; + const NB_TESTS: usize = 10; + let number_of_messages = 1; + + let accumulator = generate_accumulator( + polynomial_size, + glwe_dimension.to_glwe_size(), + msg_modulus.cast_into(), + ciphertext_modulus, + delta, + f, + ); + + assert!(check_encrypted_content_respects_mod( + &accumulator, + ciphertext_modulus + )); + + while msg != Scalar::ZERO { + msg = msg.wrapping_sub(Scalar::ONE); + // Create the LweSecretKey + let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key( + input_lwe_dimension, + &mut rsc.secret_random_generator, + ); + let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key( + glwe_dimension, + polynomial_size, + &mut rsc.secret_random_generator, + ); + let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key(); + let output_lwe_dimension = output_lwe_secret_key.lwe_dimension(); + + let mut bsk = LweMultiBitBootstrapKey::new( + Scalar::ZERO, + glwe_dimension.to_glwe_size(), + polynomial_size, + decomp_base_log, + decomp_level_count, + input_lwe_dimension, + grouping_factor, + ciphertext_modulus, + ); + + par_generate_lwe_multi_bit_bootstrap_key( + &input_lwe_secret_key, + &output_glwe_secret_key, + &mut bsk, + glwe_modular_std_dev, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &*bsk, + ciphertext_modulus + )); + + let d_bsk = CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key(&bsk, &stream); + + for _ in 0..NB_TESTS { + let plaintext = Plaintext(msg * delta); + + let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext( + &input_lwe_secret_key, + plaintext, + lwe_modular_std_dev, + ciphertext_modulus, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &lwe_ciphertext_in, + ciphertext_modulus + )); + + let d_lwe_ciphertext_in = + CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream); + let mut d_out_pbs_ct = CudaLweCiphertextList::new( + output_lwe_dimension, + LweCiphertextCount(1), + ciphertext_modulus, + &stream, + ); + let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream); + + let mut test_vector_indexes: Vec = vec![Scalar::ZERO; number_of_messages]; + for (i, ind) in test_vector_indexes.iter_mut().enumerate() { + *ind = >::cast_into(i); + } + + let mut d_test_vector_indexes = + stream.malloc_async::(number_of_messages as u32); + stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes); + + let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0; + let lwe_indexes_usize: Vec = (0..num_blocks).collect_vec(); + let lwe_indexes = lwe_indexes_usize + .iter() + .map(|&x| >::cast_into(x)) + .collect_vec(); + let mut d_output_indexes = stream.malloc_async::(num_blocks as u32); + let mut d_input_indexes = stream.malloc_async::(num_blocks as u32); + stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes); + stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes); + + cuda_multi_bit_programmable_bootstrap_lwe_ciphertext( + &d_lwe_ciphertext_in, + &mut d_out_pbs_ct, + &d_accumulator, + &d_test_vector_indexes, + &d_output_indexes, + &d_input_indexes, + &d_bsk, + &stream, + ); + + let out_pbs_ct = d_out_pbs_ct.into_lwe_ciphertext(&stream); + assert!(check_encrypted_content_respects_mod( + &out_pbs_ct, + ciphertext_modulus + )); + + let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct); + + let decoded = round_decode(decrypted.0, delta) % msg_modulus; + + assert_eq!(decoded, f(msg)); + } + } +} + +create_gpu_multi_bit_parametrized_test!(lwe_encrypt_multi_bit_pbs_decrypt_custom_mod); + +// DISCLAIMER: these toy example parameters are not guaranteed to be secure or yield +// correct computations +const TEST_PARAMS_MULTI_BIT_2_2_2: MultiBitParams = MultiBitParams { + input_lwe_dimension: LweDimension(818), + lwe_modular_std_dev: StandardDev(1.3880686109937e-11), + decomp_base_log: DecompositionBaseLog(22), + decomp_level_count: DecompositionLevelCount(1), + glwe_dimension: GlweDimension(1), + polynomial_size: PolynomialSize(2048), + glwe_modular_std_dev: StandardDev(1.1919984450689246e-23), + message_modulus_log: CiphertextModulusLog(4), + ciphertext_modulus: CiphertextModulus::new_native(), + grouping_factor: LweBskGroupingFactor(2), +}; + +const TEST_PARAMS_MULTI_BIT_2_2_3: MultiBitParams = MultiBitParams { + input_lwe_dimension: LweDimension(888), + lwe_modular_std_dev: StandardDev(0.0000006125031601933181), + decomp_base_log: DecompositionBaseLog(21), + decomp_level_count: DecompositionLevelCount(1), + glwe_dimension: GlweDimension(1), + polynomial_size: PolynomialSize(2048), + glwe_modular_std_dev: StandardDev(0.0000000000000003152931493498455), + message_modulus_log: CiphertextModulusLog(4), + ciphertext_modulus: CiphertextModulus::new_native(), + grouping_factor: LweBskGroupingFactor(3), +}; diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/lwe_programmable_bootstrapping.rs b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_programmable_bootstrapping.rs new file mode 100644 index 000000000..7e046d990 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_programmable_bootstrapping.rs @@ -0,0 +1,191 @@ +use super::*; +use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::{cuda_programmable_bootstrap_lwe_ciphertext, CudaDevice, CudaStream}; +use itertools::Itertools; + +fn lwe_encrypt_pbs_decrypt< + Scalar: UnsignedTorus + Sync + Send + CastFrom + CastInto, +>( + params: ClassicTestParams, +) { + assert!(Scalar::BITS <= 64); + + let input_lwe_dimension = params.lwe_dimension; + let lwe_modular_std_dev = params.lwe_modular_std_dev; + let glwe_modular_std_dev = params.glwe_modular_std_dev; + let ciphertext_modulus = params.ciphertext_modulus; + let message_modulus_log = params.message_modulus_log; + let msg_modulus = Scalar::ONE.shl(message_modulus_log.0); + let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus); + let glwe_dimension = params.glwe_dimension; + let polynomial_size = params.polynomial_size; + let decomp_base_log = params.pbs_base_log; + let decomp_level_count = params.pbs_level; + + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let mut rsc = TestResources::new(); + + let f = |x: Scalar| { + x.wrapping_mul(Scalar::TWO) + .wrapping_sub(Scalar::ONE) + .wrapping_rem(msg_modulus) + }; + + let delta: Scalar = encoding_with_padding / msg_modulus; + let mut msg = msg_modulus; + const NB_TESTS: usize = 10; + let number_of_messages = 1; + + let accumulator = generate_accumulator( + polynomial_size, + glwe_dimension.to_glwe_size(), + msg_modulus.cast_into(), + ciphertext_modulus, + delta, + f, + ); + + assert!(check_encrypted_content_respects_mod( + &accumulator, + ciphertext_modulus + )); + + while msg != Scalar::ZERO { + msg = msg.wrapping_sub(Scalar::ONE); + // Create the LweSecretKey + let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key( + input_lwe_dimension, + &mut rsc.secret_random_generator, + ); + let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key( + glwe_dimension, + polynomial_size, + &mut rsc.secret_random_generator, + ); + let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key(); + let output_lwe_dimension = output_lwe_secret_key.lwe_dimension(); + + let mut bsk = LweBootstrapKey::new( + Scalar::ZERO, + glwe_dimension.to_glwe_size(), + polynomial_size, + decomp_base_log, + decomp_level_count, + input_lwe_dimension, + ciphertext_modulus, + ); + + par_generate_lwe_bootstrap_key( + &input_lwe_secret_key, + &output_glwe_secret_key, + &mut bsk, + glwe_modular_std_dev, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &*bsk, + ciphertext_modulus + )); + + let d_bsk = CudaLweBootstrapKey::from_lwe_bootstrap_key(&bsk, &stream); + + for _ in 0..NB_TESTS { + let plaintext = Plaintext(msg * delta); + + let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext( + &input_lwe_secret_key, + plaintext, + lwe_modular_std_dev, + ciphertext_modulus, + &mut rsc.encryption_random_generator, + ); + + assert!(check_encrypted_content_respects_mod( + &lwe_ciphertext_in, + ciphertext_modulus + )); + + let d_lwe_ciphertext_in = + CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream); + let mut d_out_pbs_ct = CudaLweCiphertextList::new( + output_lwe_dimension, + LweCiphertextCount(1), + ciphertext_modulus, + &stream, + ); + let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream); + + let mut test_vector_indexes: Vec = vec![Scalar::ZERO; number_of_messages]; + for (i, ind) in test_vector_indexes.iter_mut().enumerate() { + *ind = >::cast_into(i); + } + + let mut d_test_vector_indexes = + stream.malloc_async::(number_of_messages as u32); + stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes); + + let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0; + let lwe_indexes_usize: Vec = (0..num_blocks).collect_vec(); + let lwe_indexes = lwe_indexes_usize + .iter() + .map(|&x| >::cast_into(x)) + .collect_vec(); + let mut d_output_indexes = stream.malloc_async::(num_blocks as u32); + let mut d_input_indexes = stream.malloc_async::(num_blocks as u32); + stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes); + stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes); + + cuda_programmable_bootstrap_lwe_ciphertext( + &d_lwe_ciphertext_in, + &mut d_out_pbs_ct, + &d_accumulator, + &d_test_vector_indexes, + &d_output_indexes, + &d_input_indexes, + LweCiphertextCount(num_blocks), + &d_bsk, + &stream, + ); + + let out_pbs_ct = d_out_pbs_ct.into_lwe_ciphertext(&stream); + assert!(check_encrypted_content_respects_mod( + &out_pbs_ct, + ciphertext_modulus + )); + + let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct); + + let decoded = round_decode(decrypted.0, delta) % msg_modulus; + + assert_eq!(decoded, f(msg)); + } + } +} + +create_gpu_parametrized_test!(lwe_encrypt_pbs_decrypt); + +// DISCLAIMER: all parameters here are not guaranteed to be secure or yield correct computations +pub const TEST_PARAMS_4_BITS_NATIVE_U64: ClassicTestParams = ClassicTestParams { + lwe_dimension: LweDimension(742), + glwe_dimension: GlweDimension(1), + polynomial_size: PolynomialSize(2048), + lwe_modular_std_dev: StandardDev(4.9982771e-11), + glwe_modular_std_dev: StandardDev(8.6457178e-32), + pbs_base_log: DecompositionBaseLog(23), + pbs_level: DecompositionLevelCount(1), + ks_level: DecompositionLevelCount(5), + ks_base_log: DecompositionBaseLog(3), + pfks_level: DecompositionLevelCount(1), + pfks_base_log: DecompositionBaseLog(23), + pfks_modular_std_dev: StandardDev(0.00000000000000029403601535432533), + cbs_level: DecompositionLevelCount(0), + cbs_base_log: DecompositionBaseLog(0), + message_modulus_log: CiphertextModulusLog(4), + ciphertext_modulus: CiphertextModulus::new_native(), +}; diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/mod.rs b/tfhe/src/core_crypto/gpu/algorithms/test/mod.rs new file mode 100644 index 000000000..f5ffe7d79 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/algorithms/test/mod.rs @@ -0,0 +1,49 @@ +use crate::core_crypto::algorithms::test::*; +use crate::core_crypto::prelude::*; +use paste::paste; + +mod lwe_keyswitch; +mod lwe_linear_algebra; +mod lwe_multi_bit_programmable_bootstrapping; +mod lwe_programmable_bootstrapping; + +// Macro to generate tests for all parameter sets +macro_rules! create_gpu_parametrized_test{ + ($name:ident { $($param:ident),* }) => { + paste! { + $( + #[test] + fn []() { + $name($param) + } + )* + } + }; + ($name:ident)=> { + create_gpu_parametrized_test!($name + { + TEST_PARAMS_4_BITS_NATIVE_U64 + }); + }; +} +macro_rules! create_gpu_multi_bit_parametrized_test{ + ($name:ident { $($param:ident),* }) => { + paste! { + $( + #[test] + fn []() { + $name($param) + } + )* + } + }; + ($name:ident)=> { + create_gpu_multi_bit_parametrized_test!($name + { + TEST_PARAMS_MULTI_BIT_2_2_2, + TEST_PARAMS_MULTI_BIT_2_2_3 + }); + }; +} + +use {create_gpu_multi_bit_parametrized_test, create_gpu_parametrized_test}; diff --git a/tfhe/src/core_crypto/gpu/entities/glwe_ciphertext_list.rs b/tfhe/src/core_crypto/gpu/entities/glwe_ciphertext_list.rs new file mode 100644 index 000000000..7b68b6710 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/glwe_ciphertext_list.rs @@ -0,0 +1,128 @@ +use crate::core_crypto::gpu::{CudaGlweList, CudaStream}; +use crate::core_crypto::prelude::{ + glwe_ciphertext_size, CiphertextModulus, Container, GlweCiphertext, GlweCiphertextCount, + GlweCiphertextList, GlweDimension, PolynomialSize, UnsignedInteger, +}; + +/// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU. +#[derive(Debug)] +pub struct CudaGlweCiphertextList(pub(crate) CudaGlweList); + +#[allow(dead_code)] +impl CudaGlweCiphertextList { + pub fn new( + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + glwe_ciphertext_count: GlweCiphertextCount, + ciphertext_modulus: CiphertextModulus, + stream: &CudaStream, + ) -> Self { + // Allocate memory in the device + let d_vec = stream.malloc_async( + (glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size) + * glwe_ciphertext_count.0) as u32, + ); + + let cuda_glwe_list = CudaGlweList { + d_vec, + glwe_ciphertext_count, + glwe_dimension, + polynomial_size, + ciphertext_modulus, + }; + + Self(cuda_glwe_list) + } + + pub fn from_glwe_ciphertext_list>( + h_ct: &GlweCiphertextList, + stream: &CudaStream, + ) -> Self { + let glwe_dimension = h_ct.glwe_size().to_glwe_dimension(); + let glwe_ciphertext_count = h_ct.glwe_ciphertext_count(); + let polynomial_size = h_ct.polynomial_size(); + let ciphertext_modulus = h_ct.ciphertext_modulus(); + + let mut d_vec = stream.malloc_async( + (glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size) + * glwe_ciphertext_count.0) as u32, + ); + + // Copy to the GPU + stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref()); + + let cuda_glwe_list = CudaGlweList { + d_vec, + glwe_ciphertext_count, + glwe_dimension, + polynomial_size, + ciphertext_modulus, + }; + + Self(cuda_glwe_list) + } + + pub(crate) fn to_glwe_ciphertext_list( + &self, + stream: &CudaStream, + ) -> GlweCiphertextList> { + let glwe_ct_size = self.0.glwe_ciphertext_count.0 + * glwe_ciphertext_size(self.0.glwe_dimension.to_glwe_size(), self.0.polynomial_size); + let mut container: Vec = vec![T::ZERO; glwe_ct_size]; + + stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec); + stream.synchronize(); + + GlweCiphertextList::from_container( + container, + self.glwe_dimension().to_glwe_size(), + self.polynomial_size(), + self.ciphertext_modulus(), + ) + } + + pub fn from_glwe_ciphertext>( + h_ct: &GlweCiphertext, + stream: &CudaStream, + ) -> Self { + let glwe_dimension = h_ct.glwe_size().to_glwe_dimension(); + let glwe_ciphertext_count = GlweCiphertextCount(1); + let polynomial_size = h_ct.polynomial_size(); + let ciphertext_modulus = h_ct.ciphertext_modulus(); + + let mut d_vec = stream.malloc_async( + (glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size) + * glwe_ciphertext_count.0) as u32, + ); + + // Copy to the GPU + let h_input = h_ct.as_view().into_container(); + stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref()); + + let cuda_glwe_list = CudaGlweList { + d_vec, + glwe_ciphertext_count, + glwe_dimension, + polynomial_size, + ciphertext_modulus, + }; + + Self(cuda_glwe_list) + } + + pub(crate) fn glwe_dimension(&self) -> GlweDimension { + self.0.glwe_dimension + } + + pub(crate) fn polynomial_size(&self) -> PolynomialSize { + self.0.polynomial_size + } + + pub(crate) fn glwe_ciphertext_count(&self) -> GlweCiphertextCount { + self.0.glwe_ciphertext_count + } + + pub(crate) fn ciphertext_modulus(&self) -> CiphertextModulus { + self.0.ciphertext_modulus + } +} diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs b/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs new file mode 100644 index 000000000..1dc9bf3cd --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs @@ -0,0 +1,90 @@ +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{ + lwe_bootstrap_key_size, Container, DecompositionBaseLog, DecompositionLevelCount, + GlweDimension, LweBootstrapKey, LweDimension, PolynomialSize, UnsignedInteger, +}; + +/// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU. +#[derive(Debug)] +#[allow(dead_code)] +pub struct CudaLweBootstrapKey { + // Pointers to GPU data + pub(crate) d_vec: CudaVec, + // Lwe dimension + pub(crate) input_lwe_dimension: LweDimension, + // Glwe dimension + pub(crate) glwe_dimension: GlweDimension, + // Polynomial size + pub(crate) polynomial_size: PolynomialSize, + // Base log + pub(crate) decomp_base_log: DecompositionBaseLog, + // Decomposition level count + pub(crate) decomp_level_count: DecompositionLevelCount, +} + +#[allow(dead_code)] +impl CudaLweBootstrapKey { + pub fn from_lwe_bootstrap_key( + bsk: &LweBootstrapKey, + stream: &CudaStream, + ) -> Self + where + InputBskCont::Element: UnsignedInteger, + { + let input_lwe_dimension = bsk.input_lwe_dimension(); + let polynomial_size = bsk.polynomial_size(); + let decomp_level_count = bsk.decomposition_level_count(); + let decomp_base_log = bsk.decomposition_base_log(); + let glwe_dimension = bsk.glwe_size().to_glwe_dimension(); + + // Allocate memory + let mut d_vec = stream.malloc_async::(lwe_bootstrap_key_size( + input_lwe_dimension, + glwe_dimension.to_glwe_size(), + polynomial_size, + decomp_level_count, + ) as u32); + // Copy to the GPU + stream.convert_lwe_bootstrap_key_async( + &mut d_vec, + bsk.as_ref(), + input_lwe_dimension, + glwe_dimension, + decomp_level_count, + polynomial_size, + ); + stream.synchronize(); + Self { + d_vec, + input_lwe_dimension, + glwe_dimension, + polynomial_size, + decomp_base_log, + decomp_level_count, + } + } + + pub(crate) fn input_lwe_dimension(&self) -> LweDimension { + self.input_lwe_dimension + } + + pub(crate) fn output_lwe_dimension(&self) -> LweDimension { + LweDimension(self.glwe_dimension.0 * self.polynomial_size.0) + } + + pub(crate) fn glwe_dimension(&self) -> GlweDimension { + self.glwe_dimension + } + + pub(crate) fn polynomial_size(&self) -> PolynomialSize { + self.polynomial_size + } + + pub(crate) fn decomp_base_log(&self) -> DecompositionBaseLog { + self.decomp_base_log + } + pub(crate) fn decomp_level_count(&self) -> DecompositionLevelCount { + self.decomp_level_count + } +} diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_ciphertext_list.rs b/tfhe/src/core_crypto/gpu/entities/lwe_ciphertext_list.rs new file mode 100644 index 000000000..bd5e39d7a --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/lwe_ciphertext_list.rs @@ -0,0 +1,174 @@ +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::{CudaLweList, CudaStream}; +use crate::core_crypto::prelude::{ + CiphertextModulus, Container, LweCiphertext, LweCiphertextCount, LweCiphertextList, + LweDimension, LweSize, UnsignedInteger, +}; + +/// A structure representing a vector of LWE ciphertexts with 64 bits of precision on the GPU. +#[derive(Debug)] +pub struct CudaLweCiphertextList(pub(crate) CudaLweList); + +#[allow(dead_code)] +impl CudaLweCiphertextList { + pub fn new( + lwe_dimension: LweDimension, + lwe_ciphertext_count: LweCiphertextCount, + ciphertext_modulus: CiphertextModulus, + stream: &CudaStream, + ) -> Self { + // Allocate memory in the device + let d_vec = + stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32); + + let cuda_lwe_list = CudaLweList { + d_vec, + lwe_ciphertext_count, + lwe_dimension, + ciphertext_modulus, + }; + + Self(cuda_lwe_list) + } + + pub fn from_lwe_ciphertext_list>( + h_ct: &LweCiphertextList, + stream: &CudaStream, + ) -> Self { + let lwe_dimension = h_ct.lwe_size().to_lwe_dimension(); + let lwe_ciphertext_count = h_ct.lwe_ciphertext_count(); + let ciphertext_modulus = h_ct.ciphertext_modulus(); + + // Copy to the GPU + let h_input = h_ct.as_view().into_container(); + let mut d_vec = + stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32); + stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref()); + stream.synchronize(); + let cuda_lwe_list = CudaLweList { + d_vec, + lwe_ciphertext_count, + lwe_dimension, + ciphertext_modulus, + }; + Self(cuda_lwe_list) + } + + pub fn from_cuda_vec( + d_vec: CudaVec, + lwe_ciphertext_count: LweCiphertextCount, + ciphertext_modulus: CiphertextModulus, + ) -> Self { + let lwe_dimension = LweSize(d_vec.len() / lwe_ciphertext_count.0).to_lwe_dimension(); + let cuda_lwe_list = CudaLweList { + d_vec, + lwe_ciphertext_count, + lwe_dimension, + ciphertext_modulus, + }; + Self(cuda_lwe_list) + } + + pub fn to_lwe_ciphertext_list(&self, stream: &CudaStream) -> LweCiphertextList> { + let lwe_ct_size = self.0.lwe_ciphertext_count.0 * self.0.lwe_dimension.to_lwe_size().0; + let mut container: Vec = vec![T::ZERO; lwe_ct_size]; + + stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec); + stream.synchronize(); + + LweCiphertextList::from_container( + container, + self.lwe_dimension().to_lwe_size(), + self.ciphertext_modulus(), + ) + } + + pub fn from_lwe_ciphertext>( + h_ct: &LweCiphertext, + stream: &CudaStream, + ) -> Self { + let lwe_dimension = h_ct.lwe_size().to_lwe_dimension(); + let lwe_ciphertext_count = LweCiphertextCount(1); + let ciphertext_modulus = h_ct.ciphertext_modulus(); + + // Copy to the GPU + let mut d_vec = stream.malloc_async((lwe_dimension.to_lwe_size().0) as u32); + stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref()); + + let cuda_lwe_list = CudaLweList { + d_vec, + lwe_ciphertext_count, + lwe_dimension, + ciphertext_modulus, + }; + Self(cuda_lwe_list) + } + + pub fn into_lwe_ciphertext(&self, stream: &CudaStream) -> LweCiphertext> { + let lwe_ct_size = self.0.lwe_dimension.to_lwe_size().0; + let mut container: Vec = vec![T::ZERO; lwe_ct_size]; + + stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec); + stream.synchronize(); + + LweCiphertext::from_container(container, self.ciphertext_modulus()) + } + + /// ```rust + /// use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::core_crypto::prelude::{LweCiphertextCount, LweCiphertextList}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// use tfhe::shortint::CiphertextModulus; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let lwe_size = PARAM_MESSAGE_2_CARRY_2_KS_PBS.lwe_dimension.to_lwe_size(); + /// let ciphertext_modulus = PARAM_MESSAGE_2_CARRY_2_KS_PBS.ciphertext_modulus; + /// let lwe_ciphertext_count = LweCiphertextCount(2); + /// + /// // Create a new LweCiphertextList + /// let lwe_list = LweCiphertextList::new(0u64, lwe_size, lwe_ciphertext_count, ciphertext_modulus); + /// + /// // Copy to GPU + /// let d_lwe_list = CudaLweCiphertextList::from_lwe_ciphertext_list(&lwe_list, &mut stream); + /// let d_lwe_list_copied = d_lwe_list.duplicate(&mut stream); + /// + /// let lwe_list_copied = d_lwe_list_copied.to_lwe_ciphertext_list(&mut stream); + /// + /// assert_eq!(lwe_list, lwe_list_copied); + /// ``` + pub fn duplicate(&self, stream: &CudaStream) -> Self { + let lwe_dimension = self.lwe_dimension(); + let lwe_ciphertext_count = self.lwe_ciphertext_count(); + let ciphertext_modulus = self.ciphertext_modulus(); + + // Copy to the GPU + let mut d_vec = stream.malloc_async(self.0.d_vec.len() as u32); + stream.copy_gpu_to_gpu_async(&mut d_vec, &self.0.d_vec); + + let cuda_lwe_list = CudaLweList { + d_vec, + lwe_ciphertext_count, + lwe_dimension, + ciphertext_modulus, + }; + Self(cuda_lwe_list) + } + + pub(crate) fn lwe_dimension(&self) -> LweDimension { + self.0.lwe_dimension + } + + pub(crate) fn lwe_ciphertext_count(&self) -> LweCiphertextCount { + self.0.lwe_ciphertext_count + } + + pub(crate) fn ciphertext_modulus(&self) -> CiphertextModulus { + self.0.ciphertext_modulus + } +} diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_keyswitch_key.rs b/tfhe/src/core_crypto/gpu/entities/lwe_keyswitch_key.rs new file mode 100644 index 000000000..d6bb2640c --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/lwe_keyswitch_key.rs @@ -0,0 +1,67 @@ +//! Module containing the definition of the [`CudaLweKeyswitchKey`]. + +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::{ + CiphertextModulus, CudaStream, DecompositionBaseLog, DecompositionLevelCount, +}; +use crate::core_crypto::prelude::{ + lwe_keyswitch_key_input_key_element_encrypted_size, LweKeyswitchKeyOwned, LweSize, + UnsignedInteger, +}; + +#[allow(dead_code)] +pub struct CudaLweKeyswitchKey { + pub(crate) d_vec: CudaVec, + input_lwe_size: LweSize, + output_lwe_size: LweSize, + decomp_base_log: DecompositionBaseLog, + decomp_level_count: DecompositionLevelCount, + ciphertext_modulus: CiphertextModulus, +} + +impl CudaLweKeyswitchKey { + pub fn from_lwe_keyswitch_key(h_ksk: &LweKeyswitchKeyOwned, stream: &CudaStream) -> Self { + let decomp_base_log = h_ksk.decomposition_base_log(); + let decomp_level_count = h_ksk.decomposition_level_count(); + let input_lwe_size = h_ksk.input_key_lwe_dimension().to_lwe_size(); + let output_lwe_size = h_ksk.output_key_lwe_dimension().to_lwe_size(); + let ciphertext_modulus = h_ksk.ciphertext_modulus(); + + // Allocate memory + let mut d_vec = stream.malloc_async::( + (input_lwe_size.to_lwe_dimension().0 + * lwe_keyswitch_key_input_key_element_encrypted_size( + decomp_level_count, + output_lwe_size, + )) as u32, + ); + + stream.convert_lwe_keyswitch_key_async(&mut d_vec, h_ksk.as_ref()); + + stream.synchronize(); + + Self { + d_vec, + input_lwe_size, + output_lwe_size, + decomp_base_log, + decomp_level_count, + ciphertext_modulus, + } + } + + pub(crate) fn input_key_lwe_size(&self) -> LweSize { + self.input_lwe_size + } + + pub(crate) fn output_key_lwe_size(&self) -> LweSize { + self.output_lwe_size + } + + pub(crate) fn decomposition_base_log(&self) -> DecompositionBaseLog { + self.decomp_base_log + } + pub(crate) fn decomposition_level_count(&self) -> DecompositionLevelCount { + self.decomp_level_count + } +} diff --git a/tfhe/src/core_crypto/gpu/entities/lwe_multi_bit_bootstrap_key.rs b/tfhe/src/core_crypto/gpu/entities/lwe_multi_bit_bootstrap_key.rs new file mode 100644 index 000000000..f23ed1db3 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/lwe_multi_bit_bootstrap_key.rs @@ -0,0 +1,103 @@ +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{ + lwe_multi_bit_bootstrap_key_size, Container, DecompositionBaseLog, DecompositionLevelCount, + GlweDimension, LweBskGroupingFactor, LweDimension, LweMultiBitBootstrapKey, PolynomialSize, + UnsignedInteger, +}; + +/// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU. +#[derive(Debug)] +pub struct CudaLweMultiBitBootstrapKey { + // Pointers to GPU data + pub(crate) d_vec: CudaVec, + // Lwe dimension + pub(crate) input_lwe_dimension: LweDimension, + // Glwe dimension + pub(crate) glwe_dimension: GlweDimension, + // Polynomial size + pub(crate) polynomial_size: PolynomialSize, + // Base log + pub(crate) decomp_base_log: DecompositionBaseLog, + // Decomposition level count + pub(crate) decomp_level_count: DecompositionLevelCount, + // Grouping factor + pub(crate) grouping_factor: LweBskGroupingFactor, +} + +impl CudaLweMultiBitBootstrapKey { + pub fn from_lwe_multi_bit_bootstrap_key( + bsk: &LweMultiBitBootstrapKey, + stream: &CudaStream, + ) -> Self + where + InputBskCont::Element: UnsignedInteger, + { + let input_lwe_dimension = bsk.input_lwe_dimension(); + let polynomial_size = bsk.polynomial_size(); + let decomp_level_count = bsk.decomposition_level_count(); + let decomp_base_log = bsk.decomposition_base_log(); + let glwe_dimension = bsk.glwe_size().to_glwe_dimension(); + let grouping_factor = bsk.grouping_factor(); + + // Allocate memory + let mut d_vec = stream.malloc_async::( + lwe_multi_bit_bootstrap_key_size( + input_lwe_dimension, + glwe_dimension.to_glwe_size(), + polynomial_size, + decomp_level_count, + grouping_factor, + ) + .unwrap() as u32, + ); + // Copy to the GPU + stream.convert_lwe_multi_bit_bootstrap_key_async( + &mut d_vec, + bsk.as_ref(), + input_lwe_dimension, + glwe_dimension, + decomp_level_count, + polynomial_size, + grouping_factor, + ); + stream.synchronize(); + Self { + d_vec, + input_lwe_dimension, + glwe_dimension, + polynomial_size, + decomp_base_log, + decomp_level_count, + grouping_factor, + } + } + + pub(crate) fn input_lwe_dimension(&self) -> LweDimension { + self.input_lwe_dimension + } + + pub(crate) fn output_lwe_dimension(&self) -> LweDimension { + LweDimension(self.glwe_dimension.0 * self.polynomial_size.0) + } + + pub(crate) fn glwe_dimension(&self) -> GlweDimension { + self.glwe_dimension + } + + pub(crate) fn polynomial_size(&self) -> PolynomialSize { + self.polynomial_size + } + + pub(crate) fn decomp_base_log(&self) -> DecompositionBaseLog { + self.decomp_base_log + } + + pub(crate) fn decomp_level_count(&self) -> DecompositionLevelCount { + self.decomp_level_count + } + + pub(crate) fn grouping_factor(&self) -> LweBskGroupingFactor { + self.grouping_factor + } +} diff --git a/tfhe/src/core_crypto/gpu/entities/mod.rs b/tfhe/src/core_crypto/gpu/entities/mod.rs new file mode 100644 index 000000000..34dcb5f03 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/entities/mod.rs @@ -0,0 +1,5 @@ +pub mod glwe_ciphertext_list; +pub mod lwe_bootstrap_key; +pub mod lwe_ciphertext_list; +pub mod lwe_keyswitch_key; +pub mod lwe_multi_bit_bootstrap_key; diff --git a/tfhe/src/core_crypto/gpu/mod.rs b/tfhe/src/core_crypto/gpu/mod.rs new file mode 100644 index 000000000..ce65cabd4 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/mod.rs @@ -0,0 +1,678 @@ +pub mod algorithms; +pub mod entities; +pub mod vec; + +pub use algorithms::*; +pub use entities::*; + +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::prelude::{ + CiphertextModulus, DecompositionBaseLog, DecompositionLevelCount, GlweCiphertextCount, + GlweDimension, LweBskGroupingFactor, LweCiphertextCount, LweCiphertextIndex, LweDimension, + Numeric, PolynomialSize, UnsignedInteger, +}; +use std::ffi::c_void; +use tfhe_cuda_backend::cuda_bind::*; + +#[derive(Debug, Clone)] +pub struct CudaPtr { + ptr: *mut c_void, + device: CudaDevice, +} + +#[derive(Debug, Clone)] +pub struct CudaStream { + ptr: *mut c_void, + device: CudaDevice, +} + +impl CudaStream { + pub fn new_unchecked(device: CudaDevice) -> Self { + let gpu_index = device.gpu_index(); + unsafe { + let ptr = cuda_create_stream(gpu_index); + + Self { ptr, device } + } + } + + /// # Safety + /// + /// - `stream` __must__ be a valid pointer + pub unsafe fn as_mut_c_ptr(&mut self) -> *mut c_void { + self.ptr + } + + /// # Safety + /// + /// - `stream` __must__ be a valid pointer + pub unsafe fn as_c_ptr(&self) -> *const c_void { + self.ptr.cast_const() + } + + pub fn device(&self) -> CudaDevice { + self.device + } + + /// Synchronizes the stream + pub fn synchronize(&self) { + unsafe { cuda_synchronize_stream(self.as_c_ptr()) }; + } + + /// Allocates `elements` on the GPU asynchronously + pub fn malloc_async(&self, elements: u32) -> CudaVec + where + T: Numeric, + { + let size = elements as u64 * std::mem::size_of::() as u64; + unsafe { + let ptr = CudaPtr { + ptr: cuda_malloc_async(size, self.as_c_ptr()), + device: self.device(), + }; + + CudaVec::new(ptr, elements as usize, self.device()) + } + } + + pub fn memset_async(&self, dest: &mut CudaVec, value: T) + where + T: Numeric + Into, + { + let dest_size = dest.len() * std::mem::size_of::(); + unsafe { + cuda_memset_async( + dest.as_mut_c_ptr(), + value.into(), + dest_size as u64, + self.as_c_ptr(), + ); + } + } + + /// Copies data from slice into GPU pointer + /// + /// # Safety + /// + /// - `dest` __must__ be a valid pointer to the GPU global memory + /// - [CudaDevice::cuda_synchronize_device] __must__ be called after the copy + /// as soon as synchronization is required + pub fn copy_to_gpu_async(&self, dest: &mut CudaVec, src: &[T]) + where + T: Numeric, + { + let src_size = std::mem::size_of_val(src); + assert!(dest.len() * std::mem::size_of::() >= src_size); + + unsafe { + cuda_memcpy_async_to_gpu( + dest.as_mut_c_ptr(), + src.as_ptr().cast(), + src_size as u64, + self.as_c_ptr(), + ); + } + } + + /// Copies data between different arrays in the GPU + /// + /// # Safety + /// + /// - `src` __must__ be a valid pointer to the GPU global memory + /// - `dest` __must__ be a valid pointer to the GPU global memory + /// - [CudaDevice::cuda_synchronize_device] __must__ be called after the copy + /// as soon as synchronization is required + pub fn copy_gpu_to_gpu_async(&self, dest: &mut CudaVec, src: &CudaVec) + where + T: Numeric, + { + assert!(dest.len() >= src.len()); + let size = dest.len() * std::mem::size_of::(); + + unsafe { + cuda_memcpy_async_gpu_to_gpu( + dest.as_mut_c_ptr(), + src.as_c_ptr(), + size as u64, + self.as_c_ptr(), + ); + } + } + + /// Copies data from GPU pointer into slice + /// + /// # Safety + /// + /// - `src` __must__ be a valid pointer to the GPU global memory + /// - [CudaDevice::cuda_synchronize_device] __must__ be called as soon as synchronization is + /// required + pub fn copy_to_cpu_async(&self, dest: &mut [T], src: &CudaVec) + where + T: Numeric, + { + let dest_size = std::mem::size_of_val(dest); + assert!(dest_size >= src.len() * std::mem::size_of::()); + + unsafe { + cuda_memcpy_async_to_cpu( + dest.as_mut_ptr().cast(), + src.as_c_ptr(), + dest_size as u64, + self.as_c_ptr(), + ); + } + } + + /// Discarding bootstrap on a vector of LWE ciphertexts + #[allow(clippy::too_many_arguments)] + pub fn bootstrap_low_latency_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_out_indexes: &CudaVec, + test_vector: &CudaVec, + test_vector_indexes: &CudaVec, + lwe_array_in: &CudaVec, + lwe_in_indexes: &CudaVec, + bootstrapping_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + base_log: DecompositionBaseLog, + level: DecompositionLevelCount, + num_samples: u32, + lwe_idx: LweCiphertextIndex, + ) { + let mut pbs_buffer: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_bootstrap_low_latency_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(pbs_buffer), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + level.0 as u32, + num_samples, + self.device().get_max_shared_memory() as u32, + true, + ); + cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_out_indexes.as_c_ptr(), + test_vector.as_c_ptr(), + test_vector_indexes.as_c_ptr(), + lwe_array_in.as_c_ptr(), + lwe_in_indexes.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + pbs_buffer, + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + base_log.0 as u32, + level.0 as u32, + num_samples, + num_samples, + lwe_idx.0 as u32, + self.device().get_max_shared_memory() as u32, + ); + cleanup_cuda_bootstrap_low_latency(self.as_c_ptr(), std::ptr::addr_of_mut!(pbs_buffer)); + } + } + + /// Discarding bootstrap on a vector of LWE ciphertexts + #[allow(clippy::too_many_arguments)] + pub fn bootstrap_multi_bit_async( + &self, + lwe_array_out: &mut CudaVec, + output_indexes: &CudaVec, + test_vector: &CudaVec, + test_vector_indexes: &CudaVec, + lwe_array_in: &CudaVec, + input_indexes: &CudaVec, + bootstrapping_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + base_log: DecompositionBaseLog, + level: DecompositionLevelCount, + grouping_factor: LweBskGroupingFactor, + num_samples: u32, + lwe_idx: LweCiphertextIndex, + ) { + let mut pbs_buffer: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_multi_bit_pbs_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(pbs_buffer), + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + level.0 as u32, + grouping_factor.0 as u32, + num_samples, + self.device().get_max_shared_memory() as u32, + true, + 0u32, + ); + cuda_multi_bit_pbs_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + output_indexes.as_c_ptr(), + test_vector.as_c_ptr(), + test_vector_indexes.as_c_ptr(), + lwe_array_in.as_c_ptr(), + input_indexes.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + pbs_buffer, + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + grouping_factor.0 as u32, + base_log.0 as u32, + level.0 as u32, + num_samples, + num_samples, + lwe_idx.0 as u32, + self.device().get_max_shared_memory() as u32, + 0u32, + ); + cleanup_cuda_multi_bit_pbs(self.as_c_ptr(), std::ptr::addr_of_mut!(pbs_buffer)); + } + } + /// Discarding keyswitch on a vector of LWE ciphertexts + #[allow(clippy::too_many_arguments)] + pub fn keyswitch_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_out_indexes: &CudaVec, + lwe_array_in: &CudaVec, + lwe_in_indexes: &CudaVec, + input_lwe_dimension: LweDimension, + output_lwe_dimension: LweDimension, + keyswitch_key: &CudaVec, + base_log: DecompositionBaseLog, + l_gadget: DecompositionLevelCount, + num_samples: u32, + ) { + unsafe { + cuda_keyswitch_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_out_indexes.as_c_ptr(), + lwe_array_in.as_c_ptr(), + lwe_in_indexes.as_c_ptr(), + keyswitch_key.as_c_ptr(), + input_lwe_dimension.0 as u32, + output_lwe_dimension.0 as u32, + base_log.0 as u32, + l_gadget.0 as u32, + num_samples, + ); + } + } + + /// Convert bootstrap key + #[allow(clippy::too_many_arguments)] + pub fn convert_lwe_keyswitch_key_async( + &self, + dest: &mut CudaVec, + src: &[T], + ) { + self.copy_to_gpu_async(dest, src); + } + + /// Convert bootstrap key + #[allow(clippy::too_many_arguments)] + pub fn convert_lwe_bootstrap_key_async( + &self, + dest: &mut CudaVec, + src: &[T], + input_lwe_dim: LweDimension, + glwe_dim: GlweDimension, + l_gadget: DecompositionLevelCount, + polynomial_size: PolynomialSize, + ) { + let size = std::mem::size_of_val(src); + assert_eq!(dest.len() * std::mem::size_of::(), size); + + unsafe { + cuda_convert_lwe_bootstrap_key_64( + dest.as_mut_c_ptr(), + src.as_ptr().cast(), + self.as_c_ptr(), + input_lwe_dim.0 as u32, + glwe_dim.0 as u32, + l_gadget.0 as u32, + polynomial_size.0 as u32, + ); + }; + } + + /// Convert multi-bit bootstrap key + #[allow(clippy::too_many_arguments)] + pub fn convert_lwe_multi_bit_bootstrap_key_async( + &self, + dest: &mut CudaVec, + src: &[T], + input_lwe_dim: LweDimension, + glwe_dim: GlweDimension, + l_gadget: DecompositionLevelCount, + polynomial_size: PolynomialSize, + grouping_factor: LweBskGroupingFactor, + ) { + let size = std::mem::size_of_val(src); + assert_eq!(dest.len() * std::mem::size_of::(), size); + + unsafe { + cuda_convert_lwe_multi_bit_bootstrap_key_64( + dest.as_mut_c_ptr(), + src.as_ptr().cast(), + self.as_c_ptr(), + input_lwe_dim.0 as u32, + glwe_dim.0 as u32, + l_gadget.0 as u32, + polynomial_size.0 as u32, + grouping_factor.0 as u32, + ) + }; + } + + /// Discarding addition of a vector of LWE ciphertexts + pub fn add_lwe_ciphertext_vector_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_array_in_1: &CudaVec, + lwe_array_in_2: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_add_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_in_1.as_c_ptr(), + lwe_array_in_2.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding addition of a vector of LWE ciphertexts + pub fn add_lwe_ciphertext_vector_assign_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_array_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_add_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_out.as_c_ptr(), + lwe_array_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding addition of a vector of LWE ciphertexts + pub fn add_lwe_ciphertext_vector_plaintext_vector_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_array_in: &CudaVec, + plaintext_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_add_lwe_ciphertext_vector_plaintext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_in.as_c_ptr(), + plaintext_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding addition of a vector of LWE ciphertexts + pub fn add_lwe_ciphertext_vector_plaintext_vector_assign_async( + &self, + lwe_array_out: &mut CudaVec, + plaintext_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_add_lwe_ciphertext_vector_plaintext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_out.as_c_ptr(), + plaintext_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding negation of a vector of LWE ciphertexts + pub fn negate_lwe_ciphertext_vector_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_array_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_negate_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding negation of a vector of LWE ciphertexts + pub fn negate_lwe_ciphertext_vector_assign_async( + &self, + lwe_array_out: &mut CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_negate_lwe_ciphertext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_out.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn negate_integer_radix_assign_async( + &self, + lwe_array: &mut CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + message_modulus: u32, + carry_modulus: u32, + ) { + unsafe { + cuda_negate_integer_radix_ciphertext_64_inplace( + self.as_c_ptr(), + lwe_array.as_mut_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + message_modulus, + carry_modulus, + ); + } + } + + /// Discarding negation of a vector of LWE ciphertexts + pub fn mult_lwe_ciphertext_vector_cleartext_vector_assign_async( + &self, + lwe_array: &mut CudaVec, + cleartext_array_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( + self.as_c_ptr(), + lwe_array.as_mut_c_ptr(), + lwe_array.as_c_ptr(), + cleartext_array_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } + + /// Discarding negation of a vector of LWE ciphertexts + pub fn mult_lwe_ciphertext_vector_cleartext_vector( + &self, + lwe_array_out: &mut CudaVec, + lwe_array_in: &CudaVec, + cleartext_array_in: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + ) { + unsafe { + cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( + self.as_c_ptr(), + lwe_array_out.as_mut_c_ptr(), + lwe_array_in.as_c_ptr(), + cleartext_array_in.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + ); + } + } +} + +impl Drop for CudaStream { + fn drop(&mut self) { + self.synchronize(); + unsafe { + cuda_destroy_stream(self.as_mut_c_ptr()); + } + } +} + +impl CudaPtr { + /// Returns a raw pointer to the vector’s buffer. + pub fn as_c_ptr(&self) -> *const c_void { + self.ptr.cast_const() + } + + /// Returns an unsafe mutable pointer to the vector’s buffer. + pub fn as_mut_c_ptr(&mut self) -> *mut c_void { + self.ptr + } +} + +impl Drop for CudaPtr { + /// Free memory for pointer `ptr` synchronously + fn drop(&mut self) { + // Synchronizes the device to be sure no stream is still using this pointer + let device = self.device; + device.synchronize_device(); + + // Release memory asynchronously so control returns to the CPU asap + // let stream = CudaStream::new_unchecked(device); + // unsafe { cuda_drop_async(self.ptr, stream.as_c_ptr(), device.gpu_index()) }; + unsafe { cuda_drop(self.as_mut_c_ptr()) }; + } +} + +#[derive(Debug)] +pub struct CudaLweList { + // Pointer to GPU data + pub d_vec: CudaVec, + // Number of ciphertexts in the array + pub lwe_ciphertext_count: LweCiphertextCount, + // Lwe dimension + pub lwe_dimension: LweDimension, + // Ciphertext Modulus + pub ciphertext_modulus: CiphertextModulus, +} + +#[derive(Debug)] +pub struct CudaGlweList { + // Pointer to GPU data + pub d_vec: CudaVec, + // Number of ciphertexts in the array + pub glwe_ciphertext_count: GlweCiphertextCount, + // Glwe dimension + pub glwe_dimension: GlweDimension, + // Polynomial size + pub polynomial_size: PolynomialSize, + // Ciphertext Modulus + pub ciphertext_modulus: CiphertextModulus, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CudaDevice { + gpu_index: u32, +} + +impl CudaDevice { + /// Creates a CudaDevice related to the GPU with index gpu_index + pub fn new(gpu_index: u32) -> Self { + Self { gpu_index } + } + + pub fn gpu_index(&self) -> u32 { + self.gpu_index + } + + /// Synchronizes the device + #[allow(dead_code)] + pub fn synchronize_device(&self) { + unsafe { cuda_synchronize_device(self.gpu_index()) }; + } + + /// Get the maximum amount of shared memory + pub fn get_max_shared_memory(&self) -> i32 { + unsafe { cuda_get_max_shared_memory(self.gpu_index()) } + } + + /// Synchronizes the stream + pub fn get_number_of_gpus(&self) -> i32 { + unsafe { cuda_get_number_of_gpus() } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn print_gpu_info() { + println!("Number of GPUs: {}", unsafe { cuda_get_number_of_gpus() }); + let gpu_index: u32 = 0; + let device = CudaDevice::new(gpu_index); + println!("Max shared memory: {}", device.get_max_shared_memory()) + } + #[test] + fn allocate_and_copy() { + let vec = vec![1_u64, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let gpu_index: u32 = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + let mut d_vec: CudaVec = stream.malloc_async::(vec.len() as u32); + stream.copy_to_gpu_async(&mut d_vec, &vec); + let mut empty = vec![0_u64; vec.len()]; + stream.copy_to_cpu_async(&mut empty, &d_vec); + stream.synchronize(); + assert_eq!(vec, empty); + } +} diff --git a/tfhe/src/core_crypto/gpu/vec.rs b/tfhe/src/core_crypto/gpu/vec.rs new file mode 100644 index 000000000..50ad3a0c5 --- /dev/null +++ b/tfhe/src/core_crypto/gpu/vec.rs @@ -0,0 +1,82 @@ +use crate::core_crypto::gpu::{CudaDevice, CudaPtr, CudaStream}; +use crate::core_crypto::prelude::Numeric; +use std::ffi::c_void; +use std::marker::PhantomData; + +/// A contiguous array type stored in the gpu memory. +/// +/// Note: +/// ----- +/// +/// Such a structure: +/// + can be created via the `CudaStream::malloc` function +/// + can not be copied or cloned but can be (mutably) borrowed +/// + frees the gpu memory on drop. +/// +/// Put differently, it owns a region of the gpu memory at a given time. For this reason, regarding +/// memory, it is pretty close to a `Vec`. That being said, it only present a very very limited api. +#[derive(Debug)] +pub struct CudaVec { + ptr: CudaPtr, + len: usize, + device: CudaDevice, + _phantom: PhantomData, +} + +impl CudaVec { + /// # Safety + /// + /// - `ptr` __must__ be a valid device pointer to an array of `len` elements of type `T` + pub unsafe fn new(ptr: CudaPtr, len: usize, device: CudaDevice) -> Self { + Self { + ptr, + len, + device, + _phantom: PhantomData, + } + } + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn from_async(h_data: &Vec, stream: &CudaStream) -> Self { + let mut d_data = stream.malloc_async(h_data.len() as u32); + stream.copy_to_gpu_async(&mut d_data, h_data.as_slice()); + d_data + } + + pub(crate) fn as_mut_c_ptr(&mut self) -> *mut c_void { + self.ptr.as_mut_c_ptr() + } + + pub(crate) fn as_c_ptr(&self) -> *const c_void { + self.ptr.as_c_ptr() + } + + pub fn gpu_index(&self) -> u32 { + self.device.gpu_index() + } + + /// Returns the number of elements in the vector, also referred to as its ‘length’. + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the CudaVec contains no elements. + pub fn is_empty(&self) -> bool { + self.len == 0 + } +} + +// SAFETY +// +// Behind the void* that is in the CudaPtr, the data is a contiguous +// chunk of T on the GPU, so as long as T is Send/Sync CudaVec is. +// +// clippy complains that we impl Send on CudaVec while CudaPtr is non Send. +// This is ok for us, as CudaPtr is meant to be a wrapper type that serves +// as distinguishing ptr that points to cuda memory from pointers pointing to +// CPU memory. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for CudaVec where T: Send + Numeric {} +unsafe impl Sync for CudaVec where T: Sync + Numeric {} diff --git a/tfhe/src/core_crypto/mod.rs b/tfhe/src/core_crypto/mod.rs index d2f0b1160..b79c0c5dc 100644 --- a/tfhe/src/core_crypto/mod.rs +++ b/tfhe/src/core_crypto/mod.rs @@ -18,5 +18,7 @@ pub mod seeders; pub mod fft_impl; +#[cfg(feature = "gpu")] +pub mod gpu; #[cfg(test)] pub mod keycache; diff --git a/tfhe/src/high_level_api/design.md b/tfhe/src/high_level_api/design.md index 165699370..b7129c547 100644 --- a/tfhe/src/high_level_api/design.md +++ b/tfhe/src/high_level_api/design.md @@ -56,8 +56,8 @@ and FheUint4Parameters only contains `PARAM_MESSAGE_4_CARRY_4_KS_PBS`. This way, we can specialize our wrapper types: * `type FheUint2 = GenericWrapperStruct` * `type FheUint4 = GenericWrapperStruct` -and now we have two disctint types, that have specific crypto parameters associated with them. -Also, they are type safe (can't `+` a FheUint2 to a FheUint4 without a compilatation error +and now we have two distinct types, that have specific crypto parameters associated with them. +Also, they are type safe (can't `+` a FheUint2 to a FheUint4 without a compilation error unless the implementation explicitly allows it since the type are different, which is not the case if you use the 'raw' shortint api) diff --git a/tfhe/src/integer/bigint/static_unsigned.rs b/tfhe/src/integer/bigint/static_unsigned.rs index f56bb6019..cfe83afd3 100644 --- a/tfhe/src/integer/bigint/static_unsigned.rs +++ b/tfhe/src/integer/bigint/static_unsigned.rs @@ -360,6 +360,12 @@ impl From for StaticUnsignedBigInt { } } +impl CastFrom> for u32 { + fn cast_from(input: StaticUnsignedBigInt) -> Self { + input.0[0] as Self + } +} + impl CastFrom> for u64 { fn cast_from(input: StaticUnsignedBigInt) -> Self { input.0[0] diff --git a/tfhe/src/integer/gpu/ciphertext/mod.rs b/tfhe/src/integer/gpu/ciphertext/mod.rs new file mode 100644 index 000000000..d6def4e4f --- /dev/null +++ b/tfhe/src/integer/gpu/ciphertext/mod.rs @@ -0,0 +1,605 @@ +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{LweCiphertextList, LweCiphertextOwned}; +use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto}; +use crate::integer::server_key::TwosComplementNegation; +use crate::integer::RadixCiphertext; +use crate::shortint::ciphertext::{Degree, NoiseLevel}; +use crate::shortint::{CarryModulus, Ciphertext, MessageModulus, PBSOrder}; +use itertools::Itertools; + +#[derive(Clone, Copy)] +pub struct CudaBlockInfo { + pub degree: Degree, + pub message_modulus: MessageModulus, + pub carry_modulus: CarryModulus, + pub pbs_order: PBSOrder, + pub noise_level: NoiseLevel, +} + +impl CudaBlockInfo { + pub fn carry_is_empty(&self) -> bool { + self.degree.get() < self.message_modulus.0 + } +} + +#[derive(Clone)] +pub struct CudaRadixCiphertextInfo { + pub blocks: Vec, +} + +impl CudaRadixCiphertextInfo { + // Creates an iterator that return decomposed blocks of the negated + // value of `scalar` + // + // Returns + // - `None` if scalar is zero + // - `Some` if scalar is non-zero + // + fn create_negated_block_decomposer(&self, scalar: T) -> Option> + where + T: TwosComplementNegation + DecomposableInto, + { + if scalar == T::ZERO { + return None; + } + let message_modulus = self.blocks.first().unwrap().message_modulus; + let bits_in_message = message_modulus.0.ilog2(); + assert!(bits_in_message <= u8::BITS); + + // The whole idea behind this iterator we construct is: + // - to support combos of parameters and num blocks for which the total number of bits is + // not a multiple of T::BITS + // + // - Support subtraction in the case the T::BITS is lower than the target ciphertext bits. + // In clear rust this would require an upcast, to support that we have to do a few things + + let neg_scalar = scalar.twos_complement_negation(); + + // If we had upcasted the scalar, its msb would be zeros (0) + // then they would become ones (1) after the bitwise_not (!). + // The only case where these msb could become 0 after the addition + // is if scalar == T::ZERO (=> !T::ZERO == T::MAX => T::MAX + 1 == overflow), + // but this case has been handled earlier. + let padding_bit = 1u32; // To handle when bits is not a multiple of T::BITS + // All bits of message set to one + let pad_block = (1 << bits_in_message as u8) - 1; + + let decomposer = BlockDecomposer::with_padding_bit( + neg_scalar, + bits_in_message, + T::cast_from(padding_bit), + ) + .iter_as::() + .chain(std::iter::repeat(pad_block)); + Some(decomposer) + } + + pub(crate) fn after_add(&self, other: &Self) -> Self { + Self { + blocks: self + .blocks + .iter() + .zip(&other.blocks) + .map(|(left, right)| CudaBlockInfo { + degree: left.degree + right.degree, + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level + right.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_neg(&self) -> Self { + let mut z; + let mut z_b: u8 = 0; + + let mut new_degrees: Vec = vec![]; + new_degrees.resize(self.blocks.len(), Degree::new(0)); + for (i, block) in self.blocks.iter().enumerate() { + let mut degree = block.degree.get(); + let msg_mod = block.message_modulus.0; + if z_b != 0 { + // scalar_add degree + degree += z_b as usize; + } + // neg_assign_with_correcting_term degree + z = ((degree + msg_mod - 1) / msg_mod) as u64; + z *= msg_mod as u64; + + new_degrees[i] = Degree::new(z as usize - z_b as usize); + z_b = (z / msg_mod as u64) as u8; + } + + Self { + blocks: self + .blocks + .iter() + .zip(new_degrees.iter()) + .map(|(left, d)| CudaBlockInfo { + degree: *d, + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_mul(&self) -> Self { + Self { + blocks: self + .blocks + .iter() + .map(|left| CudaBlockInfo { + degree: Degree::new(left.message_modulus.0 - 1), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level + NoiseLevel::NOMINAL, + }) + .collect(), + } + } + + pub(crate) fn after_scalar_add(&self, scalar: T) -> Self + where + T: DecomposableInto, + { + let message_modulus = self.blocks.first().unwrap().message_modulus; + let bits_in_message = message_modulus.0.ilog2(); + let decomposer = + BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message).iter_as::(); + let mut scalar_composed = decomposer.collect_vec(); + scalar_composed.resize(self.blocks.len(), 0); + + Self { + blocks: self + .blocks + .iter() + .zip(scalar_composed) + .map(|(left, scalar_block)| CudaBlockInfo { + degree: Degree::new(left.degree.get() + scalar_block as usize), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_small_scalar_mul(&self, scalar: u8) -> Self { + Self { + blocks: self + .blocks + .iter() + .map(|left| CudaBlockInfo { + degree: Degree::new(left.degree.get() * scalar as usize), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_scalar_sub(&self, scalar: T) -> Self + where + T: TwosComplementNegation + DecomposableInto, + { + let Some(decomposer) = self.create_negated_block_decomposer(scalar) else { + // subtraction by zero + return self.clone(); + }; + + Self { + blocks: self + .blocks + .iter() + .zip(decomposer) + .map(|(left, scalar_block)| CudaBlockInfo { + degree: Degree::new(left.degree.get() + scalar_block as usize), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_bitand(&self, other: &Self) -> Self { + Self { + blocks: self + .blocks + .iter() + .zip(&other.blocks) + .map(|(left, right)| CudaBlockInfo { + degree: left.degree.after_bitand(right.degree), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: NoiseLevel::NOMINAL, + }) + .collect(), + } + } + + pub(crate) fn after_bitor(&self, other: &Self) -> Self { + Self { + blocks: self + .blocks + .iter() + .zip(&other.blocks) + .map(|(left, right)| CudaBlockInfo { + degree: left.degree.after_bitor(right.degree), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: NoiseLevel::NOMINAL, + }) + .collect(), + } + } + + pub(crate) fn after_bitxor(&self, other: &Self) -> Self { + Self { + blocks: self + .blocks + .iter() + .zip(&other.blocks) + .map(|(left, right)| CudaBlockInfo { + degree: left.degree.after_bitxor(right.degree), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: NoiseLevel::NOMINAL, + }) + .collect(), + } + } + + pub(crate) fn after_scalar_bitand(&self, scalar: T) -> Self + where + T: DecomposableInto, + { + let message_modulus = self.blocks.first().unwrap().message_modulus; + let bits_in_message = message_modulus.0.ilog2(); + let decomposer = + BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message).iter_as::(); + let mut scalar_composed = decomposer.collect_vec(); + scalar_composed.resize(self.blocks.len(), 0); + + Self { + blocks: self + .blocks + .iter() + .zip(scalar_composed) + .map(|(left, scalar_block)| CudaBlockInfo { + degree: left.degree.after_bitand(Degree::new(scalar_block as usize)), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_scalar_bitor(&self, scalar: T) -> Self + where + T: DecomposableInto, + { + let message_modulus = self.blocks.first().unwrap().message_modulus; + let bits_in_message = message_modulus.0.ilog2(); + let decomposer = + BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message).iter_as::(); + let mut scalar_composed = decomposer.collect_vec(); + scalar_composed.resize(self.blocks.len(), 0); + + Self { + blocks: self + .blocks + .iter() + .zip(scalar_composed) + .map(|(left, scalar_block)| CudaBlockInfo { + degree: left.degree.after_bitor(Degree::new(scalar_block as usize)), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + pub(crate) fn after_scalar_bitxor(&self, scalar: T) -> Self + where + T: DecomposableInto, + { + let message_modulus = self.blocks.first().unwrap().message_modulus; + let bits_in_message = message_modulus.0.ilog2(); + let decomposer = + BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message).iter_as::(); + let mut scalar_composed = decomposer.collect_vec(); + scalar_composed.resize(self.blocks.len(), 0); + + Self { + blocks: self + .blocks + .iter() + .zip(scalar_composed) + .map(|(left, scalar_block)| CudaBlockInfo { + degree: left.degree.after_bitxor(Degree::new(scalar_block as usize)), + message_modulus: left.message_modulus, + carry_modulus: left.carry_modulus, + pbs_order: left.pbs_order, + noise_level: left.noise_level, + }) + .collect(), + } + } + + // eq/ne, and comparisons retuns a ciphertext that encrypts a 0 or 1, so the first block + // (least significant) has a degree of 1, the other blocks should be trivial lwe encrypting 0, + // so degree 0 + pub(crate) fn after_eq(&self) -> Self { + Self { + blocks: self + .blocks + .iter() + .enumerate() + .map(|(i, block)| CudaBlockInfo { + degree: if i == 0 { + Degree::new(1) + } else { + Degree::new(0) + }, + message_modulus: block.message_modulus, + carry_modulus: block.carry_modulus, + pbs_order: block.pbs_order, + noise_level: NoiseLevel::NOMINAL, + }) + .collect(), + } + } + + pub(crate) fn after_ne(&self) -> Self { + Self { + blocks: self + .blocks + .iter() + .enumerate() + .map(|(i, block)| CudaBlockInfo { + degree: if i == 0 { + Degree::new(1) + } else { + Degree::new(0) + }, + message_modulus: block.message_modulus, + carry_modulus: block.carry_modulus, + pbs_order: block.pbs_order, + noise_level: NoiseLevel::NOMINAL, + }) + .collect(), + } + } +} + +// #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +// #[must_use] +pub struct CudaRadixCiphertext { + pub d_blocks: CudaLweCiphertextList, + pub info: CudaRadixCiphertextInfo, +} + +impl CudaRadixCiphertext { + /// Copies a RadixCiphertext to the GPU memory + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// let size = 4; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let clear: u64 = 255; + /// + /// // Encrypt two messages + /// let ctxt = cks.encrypt(clear); + /// + /// let mut d_ctxt = CudaRadixCiphertext::from_radix_ciphertext(&ctxt, &mut stream); + /// let mut h_ctxt = d_ctxt.to_radix_ciphertext(&mut stream); + /// + /// assert_eq!(h_ctxt, ctxt); + /// ``` + pub fn from_radix_ciphertext(radix: &RadixCiphertext, stream: &CudaStream) -> Self { + let mut h_radix_ciphertext = radix + .blocks + .iter() + .flat_map(|block| block.ct.clone().into_container()) + .collect::>(); + + let lwe_size = radix.blocks.first().unwrap().ct.lwe_size(); + let ciphertext_modulus = radix.blocks.first().unwrap().ct.ciphertext_modulus(); + + let h_ct = LweCiphertextList::from_container( + h_radix_ciphertext.as_mut_slice(), + lwe_size, + ciphertext_modulus, + ); + let d_blocks = CudaLweCiphertextList::from_lwe_ciphertext_list(&h_ct, stream); + + let info = CudaRadixCiphertextInfo { + blocks: radix + .blocks + .iter() + .map(|block| CudaBlockInfo { + degree: block.degree, + message_modulus: block.message_modulus, + carry_modulus: block.carry_modulus, + pbs_order: block.pbs_order, + noise_level: block.noise_level(), + }) + .collect(), + }; + + Self { d_blocks, info } + } + + pub fn copy_from_radix_ciphertext(&mut self, radix: &RadixCiphertext, stream: &CudaStream) { + let mut h_radix_ciphertext = radix + .blocks + .iter() + .flat_map(|block| block.ct.clone().into_container()) + .collect::>(); + + stream.copy_to_gpu_async( + &mut self.d_blocks.0.d_vec, + h_radix_ciphertext.as_mut_slice(), + ); + + self.info = CudaRadixCiphertextInfo { + blocks: radix + .blocks + .iter() + .map(|block| CudaBlockInfo { + degree: block.degree, + message_modulus: block.message_modulus, + carry_modulus: block.carry_modulus, + pbs_order: block.pbs_order, + noise_level: block.noise_level(), + }) + .collect(), + }; + } + + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg1 = 10u32; + /// let ct1 = cks.encrypt(msg1); + /// + /// // Copy to GPU + /// let d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let ct2 = d_ct1.to_radix_ciphertext(&mut stream); + /// let msg2 = cks.decrypt(&ct2); + /// + /// assert_eq!(msg1, msg2); + /// ``` + pub fn to_radix_ciphertext(&self, stream: &CudaStream) -> RadixCiphertext { + let h_lwe_ciphertext_list = self.d_blocks.to_lwe_ciphertext_list(stream); + let ciphertext_modulus = h_lwe_ciphertext_list.ciphertext_modulus(); + let lwe_size = h_lwe_ciphertext_list.lwe_size().0; + + let h_blocks: Vec = h_lwe_ciphertext_list + .into_container() + .chunks(lwe_size) + .zip(&self.info.blocks) + .map(|(data, i)| Ciphertext { + ct: LweCiphertextOwned::from_container(data.to_vec(), ciphertext_modulus), + degree: i.degree, + noise_level: i.noise_level, + message_modulus: i.message_modulus, + carry_modulus: i.carry_modulus, + pbs_order: i.pbs_order, + }) + .collect(); + + RadixCiphertext::from(h_blocks) + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn duplicate_async(&self, stream: &CudaStream) -> Self { + let lwe_ciphertext_count = self.d_blocks.lwe_ciphertext_count(); + let ciphertext_modulus = self.d_blocks.ciphertext_modulus(); + + let mut d_ct = stream.malloc_async(self.d_blocks.0.d_vec.len() as u32); + stream.copy_gpu_to_gpu_async(&mut d_ct, &self.d_blocks.0.d_vec); + + let d_blocks = + CudaLweCiphertextList::from_cuda_vec(d_ct, lwe_ciphertext_count, ciphertext_modulus); + + Self { + d_blocks, + info: self.info.clone(), + } + } + + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg = 10u32; + /// let ct = cks.encrypt(msg); + /// + /// // Copy to GPU + /// let d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// let d_ct_copied = d_ct.duplicate(&mut stream); + /// + /// let ct_copied = d_ct_copied.to_radix_ciphertext(&mut stream); + /// let msg_copied = cks.decrypt(&ct_copied); + /// + /// assert_eq!(msg, msg_copied); + /// ``` + pub fn duplicate(&self, stream: &CudaStream) -> Self { + let ct = unsafe { self.duplicate_async(stream) }; + stream.synchronize(); + ct + } + + pub fn is_equal(&self, other: &Self, stream: &CudaStream) -> bool { + let self_size = self.d_blocks.0.d_vec.len(); + let other_size = other.d_blocks.0.d_vec.len(); + let mut self_container: Vec = vec![0; self_size]; + let mut other_container: Vec = vec![0; other_size]; + + stream.copy_to_cpu_async(self_container.as_mut_slice(), &self.d_blocks.0.d_vec); + stream.copy_to_cpu_async(other_container.as_mut_slice(), &other.d_blocks.0.d_vec); + stream.synchronize(); + + self_container == other_container + } + + pub(crate) fn block_carries_are_empty(&self) -> bool { + self.info.blocks.iter().all(CudaBlockInfo::carry_is_empty) + } +} diff --git a/tfhe/src/integer/gpu/mod.rs b/tfhe/src/integer/gpu/mod.rs new file mode 100644 index 000000000..df501166e --- /dev/null +++ b/tfhe/src/integer/gpu/mod.rs @@ -0,0 +1,1935 @@ +pub mod ciphertext; +pub mod server_key; + +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{ + DecompositionBaseLog, DecompositionLevelCount, GlweDimension, LweBskGroupingFactor, + LweDimension, PolynomialSize, UnsignedInteger, +}; +use crate::integer::{ClientKey, RadixClientKey}; +use crate::shortint::{CarryModulus, MessageModulus}; +pub use server_key::CudaServerKey; +use tfhe_cuda_backend::cuda_bind::*; + +#[repr(u32)] +#[derive(Clone, Copy)] +pub enum BitOpType { + And = 0, + Or = 1, + Xor = 2, + Not = 3, + ScalarAnd = 4, + ScalarOr = 5, + ScalarXor = 6, +} + +#[allow(dead_code)] +#[repr(u32)] +enum PBSType { + MultiBit = 0, + ClassicalLowLat = 1, + ClassicalAmortized = 2, +} + +#[repr(u32)] +enum ShiftType { + Left = 0, + Right = 1, +} + +#[repr(u32)] +pub enum ComparisonType { + EQ = 0, + NE = 1, + GT = 2, + GE = 3, + LT = 4, + LE = 5, + MAX = 6, + MIN = 7, +} + +pub fn gen_keys_gpu

(parameters_set: P, stream: &CudaStream) -> (ClientKey, CudaServerKey) +where + P: TryInto, +

>::Error: std::fmt::Debug, +{ + let shortint_parameters_set: crate::shortint::parameters::ShortintParameterSet = + parameters_set.try_into().unwrap(); + + let is_wopbs_only_params = shortint_parameters_set.wopbs_only(); + + // TODO + // Manually manage the wopbs only case as a workaround pending wopbs rework + let shortint_parameters_set = if is_wopbs_only_params { + let wopbs_params = shortint_parameters_set.wopbs_parameters().unwrap(); + let pbs_params = crate::shortint::parameters::ClassicPBSParameters { + lwe_dimension: wopbs_params.lwe_dimension, + glwe_dimension: wopbs_params.glwe_dimension, + polynomial_size: wopbs_params.polynomial_size, + lwe_modular_std_dev: wopbs_params.lwe_modular_std_dev, + glwe_modular_std_dev: wopbs_params.glwe_modular_std_dev, + pbs_base_log: wopbs_params.pbs_base_log, + pbs_level: wopbs_params.pbs_level, + ks_base_log: wopbs_params.ks_base_log, + ks_level: wopbs_params.ks_level, + message_modulus: wopbs_params.message_modulus, + carry_modulus: wopbs_params.carry_modulus, + ciphertext_modulus: wopbs_params.ciphertext_modulus, + encryption_key_choice: wopbs_params.encryption_key_choice, + }; + + crate::shortint::parameters::ShortintParameterSet::try_new_pbs_and_wopbs_param_set(( + pbs_params, + wopbs_params, + )) + .unwrap() + } else { + shortint_parameters_set + }; + + let gen_keys_inner = |parameters_set, stream: &CudaStream| { + let cks = ClientKey::new(parameters_set); + let sks = CudaServerKey::new(&cks, stream); + + (cks, sks) + }; + + // #[cfg(any(test, feature = "internal-keycache"))] + // { + // if is_wopbs_only_params { + // // TODO + // // Keycache is broken for the wopbs only case, so generate keys instead + // gen_keys_inner(shortint_parameters_set) + // } else { + // keycache::KEY_CACHE.get_from_params(shortint_parameters_set.pbs_parameters(). + // unwrap()) } + // } + // #[cfg(all(not(test), not(feature = "internal-keycache")))] + // { + gen_keys_inner(shortint_parameters_set, stream) + // } +} + +/// Generate a couple of client and server keys with given parameters +/// +/// Contrary to [gen_keys], this returns a [RadixClientKey] +/// +/// ```rust +/// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; +/// use tfhe::integer::gpu::gen_keys_radix_gpu; +/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; +/// +/// let gpu_index = 0; +/// let device = CudaDevice::new(gpu_index); +/// let mut stream = CudaStream::new_unchecked(device); +/// // generate the client key and the server key: +/// let num_blocks = 4; +/// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); +/// ``` +pub fn gen_keys_radix_gpu

( + parameters_set: P, + num_blocks: usize, + stream: &CudaStream, +) -> (RadixClientKey, CudaServerKey) +where + P: TryInto, +

>::Error: std::fmt::Debug, +{ + let (cks, sks) = gen_keys_gpu(parameters_set, stream); + + (RadixClientKey::from((cks, num_blocks)), sks) +} + +impl CudaStream { + #[allow(clippy::too_many_arguments)] + pub fn scalar_addition_integer_radix_assign_async( + &self, + lwe_array: &mut CudaVec, + scalar_input: &CudaVec, + lwe_dimension: LweDimension, + num_samples: u32, + message_modulus: u32, + carry_modulus: u32, + ) { + unsafe { + cuda_scalar_addition_integer_radix_ciphertext_64_inplace( + self.as_c_ptr(), + lwe_array.as_mut_c_ptr(), + scalar_input.as_c_ptr(), + lwe_dimension.0 as u32, + num_samples, + message_modulus, + carry_modulus, + ); + } + } + + pub fn small_scalar_mult_integer_radix_assign_async( + &self, + lwe_array: &mut CudaVec, + scalar: u64, + lwe_dimension: LweDimension, + num_blocks: u32, + ) { + unsafe { + cuda_small_scalar_multiplication_integer_radix_ciphertext_64_inplace( + self.as_c_ptr(), + lwe_array.as_mut_c_ptr(), + scalar, + lwe_dimension.0 as u32, + num_blocks, + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_add_integer_radix_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + lwe_dimension: LweDimension, + num_blocks: u32, + ) { + unsafe { + cuda_add_lwe_ciphertext_vector_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + lwe_dimension.0 as u32, + num_blocks, + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_mul_integer_radix_classic_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_left: &CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + lwe_dimension: LweDimension, + polynomial_size: PolynomialSize, + pbs_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + ks_level: DecompositionLevelCount, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + 0, + num_blocks, + PBSType::ClassicalLowLat as u32, + self.device().get_max_shared_memory() as u32, + true, + ); + cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + mem_ptr, + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + 0, + num_blocks, + PBSType::ClassicalLowLat as u32, + self.device().get_max_shared_memory() as u32, + ); + cleanup_cuda_integer_mult(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_mul_integer_radix_multibit_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_left: &CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + lwe_dimension: LweDimension, + polynomial_size: PolynomialSize, + pbs_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + ks_level: DecompositionLevelCount, + grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + grouping_factor.0 as u32, + num_blocks, + PBSType::MultiBit as u32, + self.device().get_max_shared_memory() as u32, + true, + ); + cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + mem_ptr, + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + grouping_factor.0 as u32, + num_blocks, + PBSType::MultiBit as u32, + self.device().get_max_shared_memory() as u32, + ); + cleanup_cuda_integer_mult(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_mul_integer_radix_classic_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + lwe_dimension: LweDimension, + polynomial_size: PolynomialSize, + pbs_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + ks_level: DecompositionLevelCount, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + 0, + num_blocks, + PBSType::ClassicalLowLat as u32, + self.device().get_max_shared_memory() as u32, + true, + ); + cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + mem_ptr, + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + 0, + num_blocks, + PBSType::ClassicalLowLat as u32, + self.device().get_max_shared_memory() as u32, + ); + cleanup_cuda_integer_mult(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_mul_integer_radix_multibit_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + lwe_dimension: LweDimension, + polynomial_size: PolynomialSize, + pbs_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + ks_level: DecompositionLevelCount, + grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + grouping_factor.0 as u32, + num_blocks, + PBSType::MultiBit as u32, + self.device().get_max_shared_memory() as u32, + true, + ); + cuda_integer_mult_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + mem_ptr, + message_modulus.0 as u32, + carry_modulus.0 as u32, + glwe_dimension.0 as u32, + lwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + grouping_factor.0 as u32, + num_blocks, + PBSType::MultiBit as u32, + self.device().get_max_shared_memory() as u32, + ); + cleanup_cuda_integer_mult(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_bitop_integer_radix_classic_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_left: &CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + polynomial_size: PolynomialSize, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + op: BitOpType, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + cuda_bitop_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_bitop_integer_radix_classic_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + op: BitOpType, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + cuda_bitop_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_bitnot_integer_radix_classic_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + BitOpType::Not as u32, + true, + ); + cuda_bitnot_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_bitnot_integer_radix_multibit_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + BitOpType::Not as u32, + true, + ); + cuda_bitnot_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_bitop_integer_radix_multibit_kb_assign_async( + &self, + radix_lwe: &mut CudaVec, + clear_blocks: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + grouping_factor: LweBskGroupingFactor, + op: BitOpType, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + op as u32, + true, + ); + cuda_scalar_bitop_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe.as_mut_c_ptr(), + radix_lwe.as_mut_c_ptr(), + clear_blocks.as_c_ptr(), + clear_blocks.len() as u32, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + op as u32, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_bitop_integer_radix_classic_kb_assign_async( + &self, + radix_lwe: &mut CudaVec, + clear_blocks: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + op: BitOpType, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + cuda_scalar_bitop_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe.as_mut_c_ptr(), + radix_lwe.as_mut_c_ptr(), + clear_blocks.as_c_ptr(), + clear_blocks.len() as u32, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + op as u32, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_bitop_integer_radix_multibit_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + op: BitOpType, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_bitop_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + op as u32, + true, + ); + cuda_bitop_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_bitop(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_comparison_integer_radix_classic_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_left: &CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + op: ComparisonType, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_comparison_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + + cuda_comparison_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + + cleanup_cuda_integer_comparison(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_comparison_integer_radix_classic_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + op: ComparisonType, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_comparison_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + cuda_comparison_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_comparison(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_comparison_integer_radix_multibit_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_left: &CudaVec, + radix_lwe_right: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + op: ComparisonType, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_comparison_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + op as u32, + true, + ); + cuda_comparison_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_left.as_c_ptr(), + radix_lwe_right.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_comparison(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_comparison_integer_radix_classic_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_in: &CudaVec, + scalar_blocks: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + num_scalar_blocks: u32, + op: ComparisonType, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_comparison_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + op as u32, + true, + ); + + cuda_scalar_comparison_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_in.as_c_ptr(), + scalar_blocks.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + num_scalar_blocks, + ); + + cleanup_cuda_integer_comparison(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_comparison_integer_radix_multibit_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_in: &CudaVec, + scalar_blocks: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + num_scalar_blocks: u32, + op: ComparisonType, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_comparison_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + op as u32, + true, + ); + cuda_scalar_comparison_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_in.as_c_ptr(), + scalar_blocks.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + num_scalar_blocks, + ); + cleanup_cuda_integer_comparison(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn full_propagate_classic_assign_async( + &self, + radix_lwe_input: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_full_propagation_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_level.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + true, + ); + cuda_full_propagation_64_inplace( + self.as_c_ptr(), + radix_lwe_input.as_mut_c_ptr(), + mem_ptr, + keyswitch_key.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + 0, + num_blocks, + ); + cleanup_cuda_full_propagation(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn full_propagate_multibit_assign_async( + &self, + radix_lwe_input: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_full_propagation_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + pbs_level.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + true, + ); + cuda_full_propagation_64_inplace( + self.as_c_ptr(), + radix_lwe_input.as_mut_c_ptr(), + mem_ptr, + keyswitch_key.as_c_ptr(), + bootstrapping_key.as_c_ptr(), + lwe_dimension.0 as u32, + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + ks_base_log.0 as u32, + ks_level.0 as u32, + pbs_base_log.0 as u32, + pbs_level.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + ); + cleanup_cuda_full_propagation(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn propagate_single_carry_classic_assign_async( + &self, + radix_lwe_input: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32; + scratch_cuda_propagate_single_carry_low_latency_kb_64_inplace( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension, + lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + true, + ); + cuda_propagate_single_carry_low_latency_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_input.as_mut_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_propagate_single_carry_low_latency( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn propagate_single_carry_multibit_assign_async( + &self, + radix_lwe_input: &mut CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + lwe_dimension: LweDimension, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32; + scratch_cuda_propagate_single_carry_low_latency_kb_64_inplace( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension, + lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + true, + ); + cuda_propagate_single_carry_low_latency_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_input.as_mut_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_propagate_single_carry_low_latency( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_shift_left_integer_radix_classic_kb_assign_async( + &self, + radix_lwe_left: &mut CudaVec, + shift: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_shift_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + ShiftType::Left as u32, + true, + ); + cuda_integer_radix_scalar_shift_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + shift, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_shift( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_shift_left_integer_radix_multibit_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + shift: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_shift_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + ShiftType::Left as u32, + true, + ); + cuda_integer_radix_scalar_shift_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + shift, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_shift( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_shift_right_integer_radix_classic_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + shift: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_shift_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + ShiftType::Right as u32, + true, + ); + cuda_integer_radix_scalar_shift_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + shift, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_shift( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_shift_right_integer_radix_multibit_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + shift: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_shift_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + ShiftType::Right as u32, + true, + ); + cuda_integer_radix_scalar_shift_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + shift, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_shift( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_cmux_integer_radix_classic_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_condition: &CudaVec, + radix_lwe_true: &CudaVec, + radix_lwe_false: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_cmux_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + true, + ); + cuda_cmux_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_condition.as_c_ptr(), + radix_lwe_true.as_c_ptr(), + radix_lwe_false.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_cmux(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_cmux_integer_radix_multibit_kb_async( + &self, + radix_lwe_out: &mut CudaVec, + radix_lwe_condition: &CudaVec, + radix_lwe_true: &CudaVec, + radix_lwe_false: &CudaVec, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_cmux_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + true, + ); + cuda_cmux_integer_radix_ciphertext_kb_64( + self.as_c_ptr(), + radix_lwe_out.as_mut_c_ptr(), + radix_lwe_condition.as_c_ptr(), + radix_lwe_true.as_c_ptr(), + radix_lwe_false.as_c_ptr(), + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_cmux(self.as_c_ptr(), std::ptr::addr_of_mut!(mem_ptr)); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_rotate_left_integer_radix_classic_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + n: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_rotate_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + ShiftType::Left as u32, + true, + ); + cuda_integer_radix_scalar_rotate_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + n, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_rotate( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_rotate_left_integer_radix_multibit_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + n: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_rotate_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + ShiftType::Left as u32, + true, + ); + cuda_integer_radix_scalar_rotate_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + n, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_rotate( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_rotate_right_integer_radix_classic_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + n: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_rotate_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + 0, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::ClassicalLowLat as u32, + ShiftType::Right as u32, + true, + ); + cuda_integer_radix_scalar_rotate_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + n, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_rotate( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } + + #[allow(clippy::too_many_arguments)] + pub fn unchecked_scalar_rotate_right_integer_radix_multibit_kb_assign_async< + T: UnsignedInteger, + >( + &self, + radix_lwe_left: &mut CudaVec, + n: u32, + bootstrapping_key: &CudaVec, + keyswitch_key: &CudaVec, + message_modulus: MessageModulus, + carry_modulus: CarryModulus, + glwe_dimension: GlweDimension, + polynomial_size: PolynomialSize, + big_lwe_dimension: LweDimension, + small_lwe_dimension: LweDimension, + ks_level: DecompositionLevelCount, + ks_base_log: DecompositionBaseLog, + pbs_level: DecompositionLevelCount, + pbs_base_log: DecompositionBaseLog, + pbs_grouping_factor: LweBskGroupingFactor, + num_blocks: u32, + ) { + let mut mem_ptr: *mut i8 = std::ptr::null_mut(); + unsafe { + scratch_cuda_integer_radix_scalar_rotate_kb_64( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + glwe_dimension.0 as u32, + polynomial_size.0 as u32, + big_lwe_dimension.0 as u32, + small_lwe_dimension.0 as u32, + ks_level.0 as u32, + ks_base_log.0 as u32, + pbs_level.0 as u32, + pbs_base_log.0 as u32, + pbs_grouping_factor.0 as u32, + num_blocks, + message_modulus.0 as u32, + carry_modulus.0 as u32, + PBSType::MultiBit as u32, + ShiftType::Right as u32, + true, + ); + cuda_integer_radix_scalar_rotate_kb_64_inplace( + self.as_c_ptr(), + radix_lwe_left.as_mut_c_ptr(), + n, + mem_ptr, + bootstrapping_key.as_c_ptr(), + keyswitch_key.as_c_ptr(), + num_blocks, + ); + cleanup_cuda_integer_radix_scalar_rotate( + self.as_c_ptr(), + std::ptr::addr_of_mut!(mem_ptr), + ); + } + } +} diff --git a/tfhe/src/integer/gpu/server_key/mod.rs b/tfhe/src/integer/gpu/server_key/mod.rs new file mode 100644 index 000000000..407741c97 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/mod.rs @@ -0,0 +1,363 @@ +use crate::core_crypto::commons::traits::contiguous_entity_container::ContiguousEntityContainerMut; +use crate::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey; +use crate::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey; +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::{ + allocate_and_generate_new_lwe_keyswitch_key, par_allocate_and_generate_new_lwe_bootstrap_key, + par_allocate_and_generate_new_lwe_multi_bit_bootstrap_key, LweBootstrapKeyOwned, + LweCiphertextCount, LweCiphertextList, LweMultiBitBootstrapKeyOwned, +}; +use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto}; +use crate::integer::gpu::ciphertext::{ + CudaBlockInfo, CudaRadixCiphertext, CudaRadixCiphertextInfo, +}; +use crate::integer::ClientKey; +use crate::shortint::ciphertext::{Degree, MaxDegree, NoiseLevel}; +use crate::shortint::engine::ShortintEngine; +use crate::shortint::{CarryModulus, CiphertextModulus, MessageModulus, PBSOrder}; + +mod radix; + +pub enum CudaBootstrappingKey { + Classic(CudaLweBootstrapKey), + MultiBit(CudaLweMultiBitBootstrapKey), +} + +/// A structure containing the server public key. +/// +/// The server key is generated by the client and is meant to be published: the client +/// sends it to the server so it can compute homomorphic circuits. +// #[derive(PartialEq, Serialize, Deserialize)] +pub struct CudaServerKey { + pub key_switching_key: CudaLweKeyswitchKey, + pub bootstrapping_key: CudaBootstrappingKey, + // Size of the message buffer + pub message_modulus: MessageModulus, + // Size of the carry buffer + pub carry_modulus: CarryModulus, + // Maximum number of operations that can be done before emptying the operation buffer + pub max_degree: MaxDegree, + // Modulus use for computations on the ciphertext + pub ciphertext_modulus: CiphertextModulus, + pub pbs_order: PBSOrder, +} + +impl CudaServerKey { + /// Generates a server key that stores keys in the device memopry. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::CudaServerKey; + /// use tfhe::integer::ClientKey; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key: + /// let cks = ClientKey::new(PARAM_MESSAGE_2_CARRY_2_KS_PBS); + /// + /// // Generate the server key: + /// let sks = CudaServerKey::new(&cks, &mut stream); + /// ``` + pub fn new(cks: C, stream: &CudaStream) -> Self + where + C: AsRef, + { + // It should remain just enough space to add a carry + let client_key = cks.as_ref(); + let max_degree = MaxDegree::integer_radix_server_key( + client_key.key.parameters.message_modulus(), + client_key.key.parameters.carry_modulus(), + ); + Self::new_server_key_with_max_degree(client_key, max_degree, stream) + } + + pub(crate) fn new_server_key_with_max_degree( + cks: &ClientKey, + max_degree: MaxDegree, + stream: &CudaStream, + ) -> Self { + let mut engine = ShortintEngine::new(); + + // Generate a regular keyset and convert to the GPU + let pbs_params_base = &cks.parameters(); + let d_bootstrapping_key = match pbs_params_base { + crate::shortint::PBSParameters::PBS(pbs_params) => { + let h_bootstrap_key: LweBootstrapKeyOwned = + par_allocate_and_generate_new_lwe_bootstrap_key( + &cks.key.small_lwe_secret_key(), + &cks.key.glwe_secret_key, + pbs_params.pbs_base_log, + pbs_params.pbs_level, + pbs_params.glwe_modular_std_dev, + pbs_params.ciphertext_modulus, + &mut engine.encryption_generator, + ); + + let d_bootstrap_key = + CudaLweBootstrapKey::from_lwe_bootstrap_key(&h_bootstrap_key, stream); + + CudaBootstrappingKey::Classic(d_bootstrap_key) + } + crate::shortint::PBSParameters::MultiBitPBS(pbs_params) => { + let h_bootstrap_key: LweMultiBitBootstrapKeyOwned = + par_allocate_and_generate_new_lwe_multi_bit_bootstrap_key( + &cks.key.small_lwe_secret_key(), + &cks.key.glwe_secret_key, + pbs_params.pbs_base_log, + pbs_params.pbs_level, + pbs_params.grouping_factor, + pbs_params.glwe_modular_std_dev, + pbs_params.ciphertext_modulus, + &mut engine.encryption_generator, + ); + + let d_bootstrap_key = CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key( + &h_bootstrap_key, + stream, + ); + + CudaBootstrappingKey::MultiBit(d_bootstrap_key) + } + }; + + // Creation of the key switching key + let h_key_switching_key = allocate_and_generate_new_lwe_keyswitch_key( + &cks.key.large_lwe_secret_key(), + &cks.key.small_lwe_secret_key(), + cks.parameters().ks_base_log(), + cks.parameters().ks_level(), + cks.parameters().lwe_modular_std_dev(), + cks.parameters().ciphertext_modulus(), + &mut engine.encryption_generator, + ); + + let d_key_switching_key = + CudaLweKeyswitchKey::from_lwe_keyswitch_key(&h_key_switching_key, stream); + + assert!(matches!( + cks.parameters().encryption_key_choice().into(), + PBSOrder::KeyswitchBootstrap + )); + + // Pack the keys in the server key set: + Self { + key_switching_key: d_key_switching_key, + bootstrapping_key: d_bootstrapping_key, + message_modulus: cks.parameters().message_modulus(), + carry_modulus: cks.parameters().carry_modulus(), + max_degree, + ciphertext_modulus: cks.parameters().ciphertext_modulus(), + pbs_order: cks.parameters().encryption_key_choice().into(), + } + } + + // pub(crate) fn from_server_key(key: ServerKey, cks: &ClientKey, stream: &CudaStream) -> + // Self { + // + // let bootstrapping_key = key.bootstrapping_key; + // + // let bootstrapping_key = match bootstrapping_key { + // ShortintBootstrappingKey::Classic(fourier_key) => { + // // Handle the Classic variant + // + // CudaBootstrappingKey::Classic(CudaLweBootstrapKey::from_lwe_bootstrap_key(fourier_key, + // stream)); } + // ShortintBootstrappingKey::MultiBit { fourier_bsk, thread_count, + // deterministic_execution } => { // Handle the MultiBit variant + // CudaBootstrappingKey::MultiBit + // (CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key + // (fourier_bsk, stream)); + // } + // }; + // } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub(crate) unsafe fn propagate_single_carry_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + stream: &CudaStream, + ) { + let num_blocks = ct.d_blocks.lwe_ciphertext_count().0 as u32; + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.propagate_single_carry_classic_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + d_bsk.input_lwe_dimension(), + d_bsk.glwe_dimension(), + d_bsk.polynomial_size(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count(), + d_bsk.decomp_base_log(), + num_blocks, + ct.info.blocks.first().unwrap().message_modulus, + ct.info.blocks.first().unwrap().carry_modulus, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.propagate_single_carry_multibit_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + d_multibit_bsk.input_lwe_dimension(), + d_multibit_bsk.glwe_dimension(), + d_multibit_bsk.polynomial_size(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count(), + d_multibit_bsk.decomp_base_log(), + d_multibit_bsk.grouping_factor, + num_blocks, + ct.info.blocks.first().unwrap().message_modulus, + ct.info.blocks.first().unwrap().carry_modulus, + ); + } + }; + ct.info + .blocks + .iter_mut() + .for_each(|b| b.degree = Degree::new(b.message_modulus.0 - 1)); + } + + pub(crate) unsafe fn full_propagate_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + stream: &CudaStream, + ) { + let num_blocks = ct.d_blocks.lwe_ciphertext_count().0 as u32; + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.full_propagate_classic_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + d_bsk.input_lwe_dimension(), + d_bsk.glwe_dimension(), + d_bsk.polynomial_size(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count(), + d_bsk.decomp_base_log(), + num_blocks, + ct.info.blocks.first().unwrap().message_modulus, + ct.info.blocks.first().unwrap().carry_modulus, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.full_propagate_multibit_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + d_multibit_bsk.input_lwe_dimension(), + d_multibit_bsk.glwe_dimension(), + d_multibit_bsk.polynomial_size(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count(), + d_multibit_bsk.decomp_base_log(), + d_multibit_bsk.grouping_factor, + num_blocks, + ct.info.blocks.first().unwrap().message_modulus, + ct.info.blocks.first().unwrap().carry_modulus, + ); + } + }; + ct.info + .blocks + .iter_mut() + .for_each(|b| b.degree = Degree::new(b.message_modulus.0 - 1)); + } + + /// Create a ciphertext filled with zeros + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::integer::{gen_keys_radix, RadixCiphertext}; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let num_blocks = 4; + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let d_ctxt: CudaRadixCiphertext = sks.create_trivial_zero_radix(num_blocks, &mut stream); + /// let ctxt = d_ctxt.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ctxt); + /// assert_eq!(0, dec); + /// ``` + pub fn create_trivial_zero_radix( + &self, + num_blocks: usize, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + self.create_trivial_radix(0, num_blocks, stream) + } + + pub fn create_trivial_radix( + &self, + scalar: T, + num_blocks: usize, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let lwe_size = match self.pbs_order { + PBSOrder::KeyswitchBootstrap => self.key_switching_key.input_key_lwe_size(), + PBSOrder::BootstrapKeyswitch => self.key_switching_key.output_key_lwe_size(), + }; + + let delta = (1_u64 << 63) / (self.message_modulus.0 * self.carry_modulus.0) as u64; + + let decomposer = BlockDecomposer::new(scalar, self.message_modulus.0.ilog2()) + .iter_as::() + .chain(std::iter::repeat(0)) + .take(num_blocks); + let mut cpu_lwe_list = LweCiphertextList::new( + 0, + lwe_size, + LweCiphertextCount(num_blocks), + self.ciphertext_modulus, + ); + let mut info = Vec::with_capacity(num_blocks); + for (block_value, mut lwe) in decomposer.zip(cpu_lwe_list.iter_mut()) { + *lwe.get_mut_body().data = block_value * delta; + info.push(CudaBlockInfo { + degree: Degree::new(block_value as usize), + message_modulus: self.message_modulus, + carry_modulus: self.carry_modulus, + pbs_order: self.pbs_order, + noise_level: NoiseLevel::ZERO, + }); + } + + let d_blocks = CudaLweCiphertextList::from_lwe_ciphertext_list(&cpu_lwe_list, stream); + + CudaRadixCiphertext { + d_blocks, + info: CudaRadixCiphertextInfo { blocks: info }, + } + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/add.rs b/tfhe/src/integer/gpu/server_key/radix/add.rs new file mode 100644 index 000000000..8033f25be --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/add.rs @@ -0,0 +1,211 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; + +impl CudaServerKey { + /// Computes homomorphically an addition between two ciphertexts encrypting integer values. + /// + /// This function, like all "default" operations (i.e. not smart, checked or unchecked), will + /// check that the input ciphertexts block carries are empty and clears them if it's not the + /// case and the operation requires it. It outputs a ciphertext whose block carries are always + /// empty. + /// + /// This means that when using only "default" operations, a given operation (like add for + /// example) has always the same performance characteristics from one call to another and + /// guarantees correctness by pre-emptively clearing carries of output ciphertexts. + /// + /// # Warning + /// + /// - Multithreaded + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg1 = 14; + /// let msg2 = 97; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.add(&d_ct1, &d_ct2, &mut stream); + /// + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg1 + msg2); + /// ``` + pub fn add( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.add_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn add_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + }; + self.unchecked_add_assign_async(lhs, rhs, stream); + self.propagate_single_carry_assign_async(lhs, stream); + } + + pub fn add_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.add_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg1 = 10; + /// let msg2 = 127; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.unchecked_add(&d_ct1, &d_ct2, &mut stream); + /// + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg1 + msg2); + /// ``` + pub fn unchecked_add( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.unchecked_add_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_add_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + assert_eq!( + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension(), + "Mismatched lwe dimension between ct_left ({:?}) and ct_right ({:?})", + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension() + ); + + assert_eq!( + ct_left.d_blocks.ciphertext_modulus(), + ct_right.d_blocks.ciphertext_modulus(), + "Mismatched moduli between ct_left ({:?}) and ct_right ({:?})", + ct_left.d_blocks.ciphertext_modulus(), + ct_right.d_blocks.ciphertext_modulus() + ); + + let lwe_dimension = ct_left.d_blocks.lwe_dimension(); + let lwe_ciphertext_count = ct_left.d_blocks.lwe_ciphertext_count(); + + stream.unchecked_add_integer_radix_assign_async( + &mut ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + lwe_dimension, + lwe_ciphertext_count.0 as u32, + ); + + ct_left.info = ct_left.info.after_add(&ct_right.info); + } + + pub fn unchecked_add_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_add_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs new file mode 100644 index 000000000..f9ae0d068 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/bitwise_op.rs @@ -0,0 +1,787 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::{BitOpType, CudaServerKey}; + +impl CudaServerKey { + /// Computes homomorphically bitnot for an encrypted integer value. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use std::ops::Not; + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 1u64; + /// + /// let ct = cks.encrypt(msg); + /// + /// // Copy to GPU + /// let d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.unchecked_bitnot(&d_ct, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, !msg % 256); + /// ``` + pub fn unchecked_bitnot( + &self, + ct: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_bitnot_assign(&mut result, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_bitnot_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + stream: &CudaStream, + ) { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_bitnot_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_bitnot_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + pub fn unchecked_bitnot_assign(&self, ct: &mut CudaRadixCiphertext, stream: &CudaStream) { + unsafe { + self.unchecked_bitnot_assign_async(ct, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically bitand between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 201u64; + /// let msg2 = 1u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.unchecked_bitand(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 & msg2); + /// ``` + pub fn unchecked_bitand( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.unchecked_bitand_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_bitop_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + op: BitOpType, + stream: &CudaStream, + ) { + assert_eq!( + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension() + ); + assert_eq!( + ct_left.d_blocks.lwe_ciphertext_count(), + ct_right.d_blocks.lwe_ciphertext_count() + ); + + let lwe_ciphertext_count = ct_left.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_bitop_integer_radix_classic_kb_assign_async( + &mut ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + op, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_bitop_integer_radix_multibit_kb_assign_async( + &mut ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + op, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + pub fn unchecked_bitand_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::And, stream); + ct_left.info = ct_left.info.after_bitand(&ct_right.info); + } + stream.synchronize(); + } + + /// Computes homomorphically bitor between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 200u64; + /// let msg2 = 1u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.unchecked_bitor(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 | msg2); + /// ``` + pub fn unchecked_bitor( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.unchecked_bitor_assign(&mut result, ct_right, stream); + result + } + + pub fn unchecked_bitor_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Or, stream); + ct_left.info = ct_left.info.after_bitor(&ct_right.info); + } + stream.synchronize(); + } + + /// Computes homomorphically bitxor between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 49; + /// let msg2 = 64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.unchecked_bitxor(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 ^ msg2); + /// ``` + pub fn unchecked_bitxor( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.unchecked_bitxor_assign(&mut result, ct_right, stream); + result + } + + pub fn unchecked_bitxor_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_bitop_assign_async(ct_left, ct_right, BitOpType::Xor, stream); + ct_left.info = ct_left.info.after_bitxor(&ct_right.info); + } + stream.synchronize(); + } + + /// Computes homomorphically bitand between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 201u64; + /// let msg2 = 1u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.bitand(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 & msg2); + /// ``` + pub fn bitand( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.bitand_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn bitand_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = unsafe { + match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + } + }; + self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::And, stream); + } + + pub fn bitand_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.bitand_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically bitor between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 201u64; + /// let msg2 = 1u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.bitor(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 | msg2); + /// ``` + pub fn bitor( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.bitor_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn bitor_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + }; + + self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Or, stream); + } + + pub fn bitor_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.bitor_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically bitxor between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 201u64; + /// let msg2 = 1u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.bitxor(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, msg1 ^ msg2); + /// ``` + pub fn bitxor( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.bitxor_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn bitxor_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + }; + + self.unchecked_bitop_assign_async(lhs, rhs, BitOpType::Xor, stream); + } + + pub fn bitxor_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.bitxor_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically bitnot for an encrypted integer value. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use std::ops::Not; + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 1u64; + /// + /// let ct = cks.encrypt(msg); + /// + /// // Copy to GPU + /// let d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically a bitwise and: + /// let d_ct_res = sks.bitnot(&d_ct, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec, !msg % 256); + /// ``` + pub fn bitnot(&self, ct: &CudaRadixCiphertext, stream: &CudaStream) -> CudaRadixCiphertext { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.bitnot_assign(&mut result, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn bitnot_assign_async(&self, ct: &mut CudaRadixCiphertext, stream: &CudaStream) { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + + self.unchecked_bitnot_assign_async(ct, stream); + } + + pub fn bitnot_assign(&self, ct: &mut CudaRadixCiphertext, stream: &CudaStream) { + unsafe { + self.bitnot_assign_async(ct, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/cmux.rs b/tfhe/src/integer/gpu/server_key/radix/cmux.rs new file mode 100644 index 000000000..89c77061b --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/cmux.rs @@ -0,0 +1,132 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::CudaServerKey; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_if_then_else_async( + &self, + condition: &CudaRadixCiphertext, + true_ct: &CudaRadixCiphertext, + false_ct: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let lwe_ciphertext_count = true_ct.d_blocks.lwe_ciphertext_count(); + let mut result = + self.create_trivial_zero_radix(true_ct.d_blocks.lwe_ciphertext_count().0, stream); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_cmux_integer_radix_classic_kb_async( + &mut result.d_blocks.0.d_vec, + &condition.d_blocks.0.d_vec, + &true_ct.d_blocks.0.d_vec, + &false_ct.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_cmux_integer_radix_multibit_kb_async( + &mut result.d_blocks.0.d_vec, + &condition.d_blocks.0.d_vec, + &true_ct.d_blocks.0.d_vec, + &false_ct.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + + result + } + pub fn unchecked_if_then_else( + &self, + condition: &CudaRadixCiphertext, + true_ct: &CudaRadixCiphertext, + false_ct: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = + unsafe { self.unchecked_if_then_else_async(condition, true_ct, false_ct, stream) }; + stream.synchronize(); + result + } + + pub fn if_then_else( + &self, + condition: &CudaRadixCiphertext, + true_ct: &CudaRadixCiphertext, + false_ct: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_condition; + let mut tmp_true_ct; + let mut tmp_false_ct; + + let result = unsafe { + let condition = if condition.block_carries_are_empty() { + condition + } else { + tmp_condition = condition.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_condition, stream); + &tmp_condition + }; + + let true_ct = if true_ct.block_carries_are_empty() { + true_ct + } else { + tmp_true_ct = true_ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_true_ct, stream); + &tmp_true_ct + }; + + let false_ct = if false_ct.block_carries_are_empty() { + false_ct + } else { + tmp_false_ct = false_ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_false_ct, stream); + &tmp_false_ct + }; + + self.unchecked_if_then_else_async(condition, true_ct, false_ct, stream) + }; + stream.synchronize(); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/comparison.rs b/tfhe/src/integer/gpu/server_key/radix/comparison.rs new file mode 100644 index 000000000..a7e5eb06a --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/comparison.rs @@ -0,0 +1,1159 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::{ComparisonType, CudaServerKey}; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_comparison_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + op: ComparisonType, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + assert_eq!( + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension() + ); + assert_eq!( + ct_left.d_blocks.lwe_ciphertext_count(), + ct_right.d_blocks.lwe_ciphertext_count() + ); + + let mut result = ct_left.duplicate_async(stream); + + let lwe_ciphertext_count = ct_left.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_comparison_integer_radix_classic_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + op, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_comparison_integer_radix_multibit_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + op, + ); + } + } + + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_eq_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::EQ, stream); + result.info = result.info.after_eq(); + result + } + + /// Compares for equality 2 ciphertexts + /// + /// Returns a ciphertext containing 1 if lhs == rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 14u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_eq(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 == msg2)); + /// ``` + pub fn unchecked_eq( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_eq_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_ne_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::NE, stream); + result.info = result.info.after_ne(); + result + } + + /// Compares for equality 2 ciphertexts + /// + /// Returns a ciphertext containing 1 if lhs == rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 14u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_ne(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 != msg2)); + /// ``` + pub fn unchecked_ne( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_ne_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn eq_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_eq_async(lhs, rhs, stream) + } + + /// Compares for equality 2 ciphertexts + /// + /// Returns a ciphertext containing 1 if lhs == rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 14u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.eq(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 == msg2)); + /// ``` + pub fn eq( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.eq_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn ne_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_ne_async(lhs, rhs, stream) + } + + /// Compares for equality 2 ciphertexts + /// + /// Returns a ciphertext containing 1 if lhs == rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 14u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.ne(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 != msg2)); + /// ``` + pub fn ne( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.ne_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_gt_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::GT, stream) + } + + /// Compares if lhs is strictly greater than rhs + /// + /// Returns a ciphertext containing 1 if lhs == rhs, otherwise 0 + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 14u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_gt(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 > msg2)); + /// ``` + pub fn unchecked_gt( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_gt_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_ge_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::GE, stream) + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn gt_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_gt_async(lhs, rhs, stream) + } + + pub fn gt( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.gt_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// Compares if lhs is greater or equal than rhs + /// + /// Returns a ciphertext containing 1 if lhs >= rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 97u64; + /// let msg2 = 97u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_ge(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 >= msg2)); + /// ``` + pub fn unchecked_ge( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_ge_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn ge_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_ge_async(lhs, rhs, stream) + } + + pub fn ge( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.ge_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_lt_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::LT, stream) + } + + /// Compares if lhs is lower than rhs + /// + /// Returns a ciphertext containing 1 if lhs < rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 237u64; + /// let msg2 = 23u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_lt(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 < msg2)); + /// ``` + pub fn unchecked_lt( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_lt_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn lt_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_lt_async(lhs, rhs, stream) + } + + pub fn lt( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.lt_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_le_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + self.unchecked_comparison_async(ct_left, ct_right, ComparisonType::LE, stream) + } + + /// Compares if lhs is lower or equal than rhs + /// + /// Returns a ciphertext containing 1 if lhs < rhs, otherwise 0 + /// + /// Requires carry bits to be empty + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg1 = 237u64; + /// let msg2 = 23u64; + /// + /// let ct1 = cks.encrypt(msg1); + /// let ct2 = cks.encrypt(msg2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// let d_ct_res = sks.unchecked_le(&d_ct1, &d_ct2, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, u64::from(msg1 < msg2)); + /// ``` + pub fn unchecked_le( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_le_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn le_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + + self.unchecked_le_async(lhs, rhs, stream) + } + + pub fn le( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.le_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_max_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + assert_eq!( + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension() + ); + assert_eq!( + ct_left.d_blocks.lwe_ciphertext_count(), + ct_right.d_blocks.lwe_ciphertext_count() + ); + + let mut result = ct_left.duplicate_async(stream); + + let lwe_ciphertext_count = ct_left.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_comparison_integer_radix_classic_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ComparisonType::MAX, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_comparison_integer_radix_multibit_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ComparisonType::MAX, + ); + } + } + + result + } + + pub fn unchecked_max( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_max_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_min_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + assert_eq!( + ct_left.d_blocks.lwe_dimension(), + ct_right.d_blocks.lwe_dimension() + ); + assert_eq!( + ct_left.d_blocks.lwe_ciphertext_count(), + ct_right.d_blocks.lwe_ciphertext_count() + ); + + let mut result = ct_left.duplicate_async(stream); + + let lwe_ciphertext_count = ct_left.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_comparison_integer_radix_classic_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ComparisonType::MIN, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_comparison_integer_radix_multibit_kb_async( + &mut result.d_blocks.0.d_vec, + &ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ComparisonType::MIN, + ); + } + } + + result + } + + pub fn unchecked_min( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_min_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn max_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + self.unchecked_max_async(lhs, rhs, stream) + } + + pub fn max( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.max_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn min_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut tmp_lhs; + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + tmp_lhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + (&tmp_lhs, ct_right) + } + (false, false) => { + tmp_lhs = ct_right.duplicate_async(stream); + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(&mut tmp_lhs, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (&tmp_lhs, &tmp_rhs) + } + }; + self.unchecked_min_async(lhs, rhs, stream) + } + + pub fn min( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.min_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/mod.rs b/tfhe/src/integer/gpu/server_key/radix/mod.rs new file mode 100644 index 000000000..4358e261f --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs @@ -0,0 +1,17 @@ +mod add; +mod bitwise_op; +mod cmux; +mod comparison; +mod mul; +mod neg; +mod scalar_add; +mod scalar_bitwise_op; +mod scalar_comparison; +mod scalar_mul; +mod scalar_sub; +mod shift; +mod sub; + +mod scalar_rotate; +#[cfg(test)] +mod tests; diff --git a/tfhe/src/integer/gpu/server_key/radix/mul.rs b/tfhe/src/integer/gpu/server_key/radix/mul.rs new file mode 100644 index 000000000..126a5237f --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/mul.rs @@ -0,0 +1,235 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; + +impl CudaServerKey { + /// Computes homomorphically a multiplication between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is assigned to the `ct_left` ciphertext. + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let number_of_blocks = 2; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, &mut stream); + /// + /// let modulus = PARAM_MESSAGE_2_CARRY_2_KS_PBS + /// .message_modulus + /// .0 + /// .pow(number_of_blocks as u32) as u64; + /// let clear_1: u64 = 13 % modulus; + /// let clear_2: u64 = 4 % modulus; + /// + /// // Encrypt two messages + /// let ctxt_1 = cks.encrypt_radix(clear_1, number_of_blocks); + /// let ctxt_2 = cks.encrypt_radix(clear_2, number_of_blocks); + /// + /// let mut d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + /// let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + /// + /// // Compute homomorphically a multiplication + /// let mut d_ct_res = sks.unchecked_mul(&mut d_ctxt_1, &d_ctxt_2, &mut stream); + /// + /// // Decrypt + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// let res: u64 = cks.decrypt_radix(&ct_res); + /// assert_eq!((clear_1 * clear_2) % modulus, res); + /// ``` + pub fn unchecked_mul( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.unchecked_mul_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_mul_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let num_blocks = ct_left.d_blocks.lwe_ciphertext_count().0 as u32; + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_mul_integer_radix_classic_kb_assign_async( + &mut ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension(), + d_bsk.input_lwe_dimension(), + d_bsk.polynomial_size(), + d_bsk.decomp_base_log(), + d_bsk.decomp_level_count(), + self.key_switching_key.decomposition_base_log(), + self.key_switching_key.decomposition_level_count(), + num_blocks, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_mul_integer_radix_multibit_kb_assign_async( + &mut ct_left.d_blocks.0.d_vec, + &ct_right.d_blocks.0.d_vec, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension(), + d_multibit_bsk.input_lwe_dimension(), + d_multibit_bsk.polynomial_size(), + d_multibit_bsk.decomp_base_log(), + d_multibit_bsk.decomp_level_count(), + self.key_switching_key.decomposition_base_log(), + self.key_switching_key.decomposition_level_count(), + d_multibit_bsk.grouping_factor, + num_blocks, + ); + } + }; + + ct_left.info = ct_left.info.after_mul(); + } + + pub fn unchecked_mul_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_mul_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically a multiplication between two ciphertexts encrypting integer values. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is assigned to the `ct_left` ciphertext. + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let number_of_blocks = 2; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, &mut stream); + /// + /// let modulus = PARAM_MESSAGE_2_CARRY_2_KS_PBS + /// .message_modulus + /// .0 + /// .pow(number_of_blocks as u32) as u64; + /// let clear_1: u64 = 13 % modulus; + /// let clear_2: u64 = 4 % modulus; + /// + /// // Encrypt two messages + /// let ctxt_1 = cks.encrypt_radix(clear_1, number_of_blocks); + /// let ctxt_2 = cks.encrypt_radix(clear_2, number_of_blocks); + /// + /// let mut d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + /// let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + /// + /// // Compute homomorphically a multiplication + /// let mut d_ct_res = sks.mul(&mut d_ctxt_1, &d_ctxt_2, &mut stream); + /// + /// // Decrypt + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// let res: u64 = cks.decrypt_radix(&ct_res); + /// assert_eq!((clear_1 * clear_2) % modulus, res); + /// ``` + pub fn mul( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct_left.duplicate_async(stream) }; + self.mul_assign(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn mul_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + }; + + self.unchecked_mul_assign_async(lhs, rhs, stream); + // Carries are cleaned internally in the mul algorithm + } + + pub fn mul_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.mul_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/neg.rs b/tfhe/src/integer/gpu/server_key/radix/neg.rs new file mode 100644 index 000000000..641e5e9bf --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/neg.rs @@ -0,0 +1,171 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; + +impl CudaServerKey { + /// Homomorphically computes the opposite of a ciphertext encrypting an integer message. + /// + /// This function computes the opposite of a message without checking if it exceeds the + /// capacity of the ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// // Encrypt two messages: + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let modulus = 1 << 8; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 159u64; + /// + /// // Encrypt a message + /// let mut ctxt = cks.encrypt(msg); + /// let mut d_ctxt = CudaRadixCiphertext::from_radix_ciphertext(&ctxt, &mut stream); + /// + /// // Compute homomorphically a negation + /// let d_res = sks.unchecked_neg(&mut d_ctxt, &mut stream); + /// let res = d_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt + /// let dec: u64 = cks.decrypt(&res); + /// assert_eq!(modulus - msg, dec); + /// ``` + pub fn unchecked_neg( + &self, + ctxt: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_neg_async(ctxt, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_neg_async( + &self, + ctxt: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = ctxt.duplicate_async(stream); + self.unchecked_neg_assign_async(&mut result, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_neg_assign_async( + &self, + ctxt: &mut CudaRadixCiphertext, + stream: &CudaStream, + ) { + let lwe_dimension = ctxt.d_blocks.lwe_dimension(); + let lwe_ciphertext_count = ctxt.d_blocks.lwe_ciphertext_count(); + + let info = ctxt.info.blocks.first().unwrap(); + + stream.negate_integer_radix_assign_async( + &mut ctxt.d_blocks.0.d_vec, + lwe_dimension, + lwe_ciphertext_count.0 as u32, + info.message_modulus.0 as u32, + info.carry_modulus.0 as u32, + ); + + ctxt.info = ctxt.info.after_neg(); + } + + pub fn unchecked_neg_assign(&self, ctxt: &mut CudaRadixCiphertext, stream: &CudaStream) { + unsafe { + self.unchecked_neg_assign_async(ctxt, stream); + } + stream.synchronize(); + } + + /// Homomorphically computes the opposite of a ciphertext encrypting an integer message. + /// + /// This function computes the opposite of a message without checking if it exceeds the + /// capacity of the ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// // Encrypt two messages: + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let modulus = 1 << 8; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 159u64; + /// + /// // Encrypt a message + /// let mut ctxt = cks.encrypt(msg); + /// let mut d_ctxt = CudaRadixCiphertext::from_radix_ciphertext(&ctxt, &mut stream); + /// + /// // Compute homomorphically a negation + /// let d_res = sks.neg(&mut d_ctxt, &mut stream); + /// let res = d_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt + /// let dec: u64 = cks.decrypt(&res); + /// assert_eq!(modulus - msg, dec); + /// ``` + pub fn neg(&self, ctxt: &CudaRadixCiphertext, stream: &CudaStream) -> CudaRadixCiphertext { + let mut result = unsafe { ctxt.duplicate_async(stream) }; + self.neg_assign(&mut result, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn neg_assign_async(&self, ctxt: &mut CudaRadixCiphertext, stream: &CudaStream) { + let mut tmp_ctxt; + + let ct = if ctxt.block_carries_are_empty() { + ctxt + } else { + tmp_ctxt = ctxt.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_ctxt, stream); + &mut tmp_ctxt + }; + + self.unchecked_neg_assign_async(ct, stream); + self.propagate_single_carry_assign_async(ct, stream); + } + + pub fn neg_assign(&self, ctxt: &mut CudaRadixCiphertext, stream: &CudaStream) { + unsafe { + self.neg_assign_async(ctxt, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_add.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_add.rs new file mode 100644 index 000000000..50c3df3d7 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_add.rs @@ -0,0 +1,196 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto}; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; +use itertools::Itertools; + +impl CudaServerKey { + /// Computes homomorphically an addition between a scalar and a ciphertext. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 4; + /// let scalar = 40; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.unchecked_scalar_add(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(msg + scalar, dec); + /// ``` + pub fn unchecked_scalar_add( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_scalar_add_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_add_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto, + { + if scalar > T::ZERO { + let bits_in_message = self.message_modulus.0.ilog2(); + let decomposer = + BlockDecomposer::with_early_stop_at_zero(scalar, bits_in_message).iter_as::(); + + let mut d_decomposed_scalar = + stream.malloc_async::(ct.d_blocks.lwe_ciphertext_count().0 as u32); + let scalar64 = decomposer + .collect_vec() + .iter() + .map(|&x| x as u64) + .take(d_decomposed_scalar.len()) + .collect_vec(); + stream.copy_to_gpu_async(&mut d_decomposed_scalar, scalar64.as_slice()); + + let lwe_dimension = ct.d_blocks.lwe_dimension(); + // If the scalar is decomposed using less than the number of blocks our ciphertext + // has, we just don't touch ciphertext's last blocks + stream.scalar_addition_integer_radix_assign_async( + &mut ct.d_blocks.0.d_vec, + &d_decomposed_scalar, + lwe_dimension, + scalar64.len() as u32, + self.message_modulus.0 as u32, + self.carry_modulus.0 as u32, + ); + } + + ct.info = ct.info.after_scalar_add(scalar); + } + + pub fn unchecked_scalar_add_assign( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto, + { + unsafe { + self.unchecked_scalar_add_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically an addition between a scalar and a ciphertext. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 4; + /// let scalar = 40; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.scalar_add(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(msg + scalar, dec); + /// ``` + pub fn scalar_add( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.scalar_add_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_add_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + }; + + self.unchecked_scalar_add_assign_async(ct, scalar, stream); + self.full_propagate_assign_async(ct, stream); + } + + pub fn scalar_add_assign(&self, ct: &mut CudaRadixCiphertext, scalar: T, stream: &CudaStream) + where + T: DecomposableInto, + { + unsafe { + self.scalar_add_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs new file mode 100644 index 000000000..64f013f29 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_bitwise_op.rs @@ -0,0 +1,312 @@ +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto}; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::{BitOpType, CudaServerKey}; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_bitop_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + op: BitOpType, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + let message_modulus = self.message_modulus.0; + + let h_clear_blocks = BlockDecomposer::with_early_stop_at_zero(rhs, message_modulus.ilog2()) + .iter_as::() + .map(|x| x as u64) + .collect::>(); + + let clear_blocks = CudaVec::from_async(&h_clear_blocks, stream); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_bitop_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + &clear_blocks, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + op, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_bitop_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + &clear_blocks, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + op, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + pub fn unchecked_scalar_bitand( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_scalar_bitand_assign(&mut result, rhs, stream); + result + } + + pub fn unchecked_scalar_bitand_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream); + ct.info = ct.info.after_scalar_bitand(rhs); + } + stream.synchronize(); + } + + pub fn unchecked_scalar_bitor( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_scalar_bitor_assign(&mut result, rhs, stream); + result + } + + pub fn unchecked_scalar_bitor_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream); + ct.info = ct.info.after_scalar_bitor(rhs); + } + stream.synchronize(); + } + + pub fn unchecked_scalar_bitxor( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_scalar_bitxor_assign(&mut result, rhs, stream); + result + } + + pub fn unchecked_scalar_bitxor_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream); + ct.info = ct.info.after_scalar_bitxor(rhs); + } + stream.synchronize(); + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_bitand_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarAnd, stream); + ct.info = ct.info.after_scalar_bitand(rhs); + } + + pub fn scalar_bitand_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.scalar_bitand_assign_async(ct, rhs, stream); + } + stream.synchronize(); + } + + pub fn scalar_bitand( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.scalar_bitand_assign(&mut result, rhs, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_bitor_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarOr, stream); + ct.info = ct.info.after_scalar_bitor(rhs); + } + + pub fn scalar_bitor_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.scalar_bitor_assign_async(ct, rhs, stream); + } + stream.synchronize(); + } + + pub fn scalar_bitor( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.scalar_bitor_assign(&mut result, rhs, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_bitxor_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + self.unchecked_scalar_bitop_assign_async(ct, rhs, BitOpType::ScalarXor, stream); + ct.info = ct.info.after_scalar_bitxor(rhs); + } + + pub fn scalar_bitxor_assign( + &self, + ct: &mut CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) where + Scalar: DecomposableInto, + { + unsafe { + self.scalar_bitxor_assign_async(ct, rhs, stream); + } + stream.synchronize(); + } + + pub fn scalar_bitxor( + &self, + ct: &CudaRadixCiphertext, + rhs: Scalar, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + Scalar: DecomposableInto, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.scalar_bitxor_assign(&mut result, rhs, stream); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs new file mode 100644 index 000000000..b571df01b --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_comparison.rs @@ -0,0 +1,532 @@ +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::gpu::CudaStream; +use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto}; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; +use crate::integer::gpu::ComparisonType; +use crate::integer::server_key::comparator::Comparator; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_comparison_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + op: ComparisonType, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + if scalar < T::ZERO { + // ct represents an unsigned (always >= 0) + return self.create_trivial_radix( + Comparator::IS_SUPERIOR, + ct.d_blocks.lwe_ciphertext_count().0, + stream, + ); + } + + let message_modulus = self.message_modulus.0; + + let mut scalar_blocks = + BlockDecomposer::with_early_stop_at_zero(scalar, message_modulus.ilog2()) + .iter_as::() + .collect::>(); + + // scalar is obviously bigger if it has non-zero + // blocks after lhs's last block + let is_scalar_obviously_bigger = scalar_blocks + .get(ct.d_blocks.lwe_ciphertext_count().0..) + .is_some_and(|sub_slice| sub_slice.iter().any(|&scalar_block| scalar_block != 0)); + if is_scalar_obviously_bigger { + return self.create_trivial_radix( + Comparator::IS_INFERIOR, + ct.d_blocks.lwe_ciphertext_count().0, + stream, + ); + } + + // If we are still here, that means scalar_blocks above + // num_blocks are 0s, we can remove them + // as we will handle them separately. + scalar_blocks.truncate(ct.d_blocks.lwe_ciphertext_count().0); + + let d_scalar_blocks: CudaVec = CudaVec::from_async(&scalar_blocks, stream); + + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + + let mut result = ct.duplicate_async(stream); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_comparison_integer_radix_classic_kb_async( + &mut result.d_blocks.0.d_vec, + &ct.d_blocks.0.d_vec, + &d_scalar_blocks, + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + scalar_blocks.len() as u32, + op, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_comparison_integer_radix_multibit_kb_async( + &mut result.d_blocks.0.d_vec, + &ct.d_blocks.0.d_vec, + &d_scalar_blocks, + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + scalar_blocks.len() as u32, + op, + ); + } + } + + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_gt_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GT, stream) + } + + pub fn unchecked_scalar_gt( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_gt_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_ge_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::GE, stream) + } + + pub fn unchecked_scalar_ge( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_ge_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_lt_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LT, stream) + } + + pub fn unchecked_scalar_lt( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_lt_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_le_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::LE, stream) + } + + pub fn unchecked_scalar_le( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_le_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_gt_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_gt_async(lhs, scalar, stream) + } + + pub fn scalar_gt( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_gt_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_ge_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_ge_async(lhs, scalar, stream) + } + + pub fn scalar_ge( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_ge_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_lt_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_lt_async(lhs, scalar, stream) + } + + pub fn scalar_lt( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_lt_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_le_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_le_async(lhs, scalar, stream) + } + + pub fn scalar_le( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_le_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_max_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::MAX, stream) + } + + pub fn unchecked_scalar_max( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_max_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_min_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + self.unchecked_scalar_comparison_async(ct, scalar, ComparisonType::MIN, stream) + } + + pub fn unchecked_scalar_min( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.unchecked_scalar_min_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_max_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_max_async(lhs, scalar, stream) + } + + pub fn scalar_max( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_max_async(ct, scalar, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_min_async( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let mut tmp_lhs; + let lhs = if ct.block_carries_are_empty() { + ct + } else { + tmp_lhs = ct.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_lhs, stream); + &tmp_lhs + }; + + self.unchecked_scalar_min_async(lhs, scalar, stream) + } + + pub fn scalar_min( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto, + { + let result = unsafe { self.scalar_min_async(ct, scalar, stream) }; + stream.synchronize(); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs new file mode 100644 index 000000000..7efe1fb6e --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_mul.rs @@ -0,0 +1,176 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; + +impl CudaServerKey { + /// Computes homomorphically a multiplication between a scalar and a ciphertext. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 30; + /// let scalar = 3; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically a scalar multiplication: + /// let d_ct_res = sks.unchecked_small_scalar_mul(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// let clear: u64 = cks.decrypt(&ct_res); + /// assert_eq!(scalar * msg, clear); + /// ``` + pub fn unchecked_small_scalar_mul( + &self, + ct: &CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_small_scalar_mul_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_small_scalar_mul_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) { + match scalar { + 0 => { + stream.memset_async(&mut ct.d_blocks.0.d_vec, 0); + } + 1 => { + // Multiplication by one is the identity + } + _ => { + let lwe_dimension = ct.d_blocks.lwe_dimension(); + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + + stream.small_scalar_mult_integer_radix_assign_async( + &mut ct.d_blocks.0.d_vec, + scalar, + lwe_dimension, + lwe_ciphertext_count.0 as u32, + ); + } + } + + ct.info = ct.info.after_small_scalar_mul(scalar as u8); + } + + pub fn unchecked_small_scalar_mul_assign( + &self, + ct: &mut CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_small_scalar_mul_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically a multiplication between a scalar and a ciphertext. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 30; + /// let scalar = 3; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically a scalar multiplication: + /// let d_ct_res = sks.small_scalar_mul(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// let clear: u64 = cks.decrypt(&ct_res); + /// assert_eq!(scalar * msg, clear); + /// ``` + pub fn small_scalar_mul( + &self, + ct: &CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.small_scalar_mul_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn small_scalar_mul_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + }; + + self.unchecked_small_scalar_mul_assign_async(ct, scalar, stream); + self.full_propagate_assign_async(ct, stream); + } + + pub fn small_scalar_mul_assign( + &self, + ct: &mut CudaRadixCiphertext, + scalar: u64, + stream: &CudaStream, + ) { + unsafe { + self.small_scalar_mul_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_rotate.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_rotate.rs new file mode 100644 index 000000000..2d920f20b --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_rotate.rs @@ -0,0 +1,206 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::CastFrom; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::CudaServerKey; +use std::ops::Rem; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_rotate_left_async( + &self, + ct: &CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.unchecked_scalar_rotate_left_assign_async(&mut result, n, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_rotate_left_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_rotate_left_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(n), + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_rotate_left_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(n), + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + pub fn unchecked_scalar_left_rotate( + &self, + ct: &CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.unchecked_scalar_rotate_left_async(ct, n, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_rotate_right_async( + &self, + ct: &CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.unchecked_scalar_rotate_right_assign_async(&mut result, n, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_rotate_right_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_rotate_right_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(n), + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_rotate_right_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(n), + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + pub fn unchecked_scalar_right_rotate( + &self, + ct: &CudaRadixCiphertext, + n: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.unchecked_scalar_rotate_right_async(ct, n, stream) }; + stream.synchronize(); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/scalar_sub.rs b/tfhe/src/integer/gpu/server_key/radix/scalar_sub.rs new file mode 100644 index 000000000..196ca96e6 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/scalar_sub.rs @@ -0,0 +1,171 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::UnsignedNumeric; +use crate::integer::block_decomposition::DecomposableInto; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; +use crate::integer::server_key::TwosComplementNegation; + +impl CudaServerKey { + /// Computes homomorphically a subtraction between a ciphertext and a scalar. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg = 40; + /// let scalar = 3; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.unchecked_scalar_sub(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(msg - scalar, dec); + /// ``` + pub fn unchecked_scalar_sub( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.unchecked_scalar_sub_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_sub_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + let negated_scalar = scalar.twos_complement_negation(); + self.unchecked_scalar_add_assign_async(ct, negated_scalar, stream); + ct.info = ct.info.after_scalar_sub(scalar); + } + + pub fn unchecked_scalar_sub_assign( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + unsafe { + self.unchecked_scalar_sub_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically a subtraction between a ciphertext and a scalar. + /// + /// This function computes the operation without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg = 40; + /// let scalar = 3; + /// + /// let ct = cks.encrypt(msg); + /// let mut d_ct = CudaRadixCiphertext::from_radix_ciphertext(&ct, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.scalar_sub(&d_ct, scalar, &mut stream); + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec: u64 = cks.decrypt(&ct_res); + /// assert_eq!(msg - scalar, dec); + /// ``` + pub fn scalar_sub( + &self, + ct: &CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + let mut result = unsafe { ct.duplicate_async(stream) }; + self.scalar_sub_assign(&mut result, scalar, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_sub_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + scalar: T, + stream: &CudaStream, + ) where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + }; + + self.unchecked_scalar_sub_assign_async(ct, scalar, stream); + self.full_propagate_assign_async(ct, stream); + } + + pub fn scalar_sub_assign(&self, ct: &mut CudaRadixCiphertext, scalar: T, stream: &CudaStream) + where + T: DecomposableInto + UnsignedNumeric + TwosComplementNegation, + { + unsafe { + self.scalar_sub_assign_async(ct, scalar, stream); + } + stream.synchronize(); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/shift.rs b/tfhe/src/integer/gpu/server_key/radix/shift.rs new file mode 100644 index 000000000..780d4decc --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/shift.rs @@ -0,0 +1,460 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::core_crypto::prelude::CastFrom; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaBootstrappingKey; +use crate::integer::gpu::CudaServerKey; +use std::ops::Rem; + +impl CudaServerKey { + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_left_shift_async( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.unchecked_scalar_left_shift_assign_async(&mut result, shift, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_left_shift_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_shift_left_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(shift), + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_shift_left_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(shift), + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + /// Computes homomorphically a left shift by a scalar. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 21u64; + /// let shift = 2; + /// + /// let ct1 = cks.encrypt(msg); + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// + /// let d_ct_res = sks.unchecked_scalar_left_shift(&d_ct1, shift, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg << shift); + /// ``` + pub fn unchecked_scalar_left_shift( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.unchecked_scalar_left_shift_async(ct, shift, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_right_shift_async( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.unchecked_scalar_right_shift_assign_async(&mut result, shift, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_scalar_right_shift_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + let lwe_ciphertext_count = ct.d_blocks.lwe_ciphertext_count(); + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + stream.unchecked_scalar_shift_right_integer_radix_classic_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(shift), + &d_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_bsk.glwe_dimension, + d_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_bsk.decomp_level_count, + d_bsk.decomp_base_log, + lwe_ciphertext_count.0 as u32, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + stream.unchecked_scalar_shift_right_integer_radix_multibit_kb_assign_async( + &mut ct.d_blocks.0.d_vec, + u32::cast_from(shift), + &d_multibit_bsk.d_vec, + &self.key_switching_key.d_vec, + self.message_modulus, + self.carry_modulus, + d_multibit_bsk.glwe_dimension, + d_multibit_bsk.polynomial_size, + self.key_switching_key + .input_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key + .output_key_lwe_size() + .to_lwe_dimension(), + self.key_switching_key.decomposition_level_count(), + self.key_switching_key.decomposition_base_log(), + d_multibit_bsk.decomp_level_count, + d_multibit_bsk.decomp_base_log, + d_multibit_bsk.grouping_factor, + lwe_ciphertext_count.0 as u32, + ); + } + } + } + + /// Computes homomorphically a right shift by a scalar. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 21u64; + /// let shift = 2; + /// + /// let ct1 = cks.encrypt(msg); + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// + /// let d_ct_res = sks.unchecked_scalar_right_shift(&d_ct1, shift, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg >> shift); + /// ``` + pub fn unchecked_scalar_right_shift( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.unchecked_scalar_right_shift_async(ct, shift, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_right_shift_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + + self.unchecked_scalar_right_shift_assign_async(ct, shift, stream); + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_right_shift_async( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.scalar_right_shift_assign_async(&mut result, shift, stream); + result + } + + /// Computes homomorphically a right shift by a scalar. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 21u64; + /// let shift = 2; + /// + /// let ct1 = cks.encrypt(msg); + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// + /// let d_ct_res = sks.scalar_right_shift(&d_ct1, shift, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg >> shift); + /// ``` + pub fn scalar_right_shift( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.scalar_right_shift_async(ct, shift, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_left_shift_assign_async( + &self, + ct: &mut CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) where + T: Rem + CastFrom, + u32: CastFrom, + { + if !ct.block_carries_are_empty() { + self.full_propagate_assign_async(ct, stream); + } + + self.unchecked_scalar_left_shift_assign_async(ct, shift, stream); + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn scalar_left_shift_async( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let mut result = ct.duplicate_async(stream); + self.scalar_left_shift_assign_async(&mut result, shift, stream); + result + } + + /// Computes homomorphically a left shift by a scalar. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let size = 4; + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg = 21u64; + /// let shift = 2; + /// + /// let ct1 = cks.encrypt(msg); + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// + /// let d_ct_res = sks.scalar_left_shift(&d_ct1, shift, &mut stream); + /// + /// // Copy back to CPU + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg << shift); + /// ``` + pub fn scalar_left_shift( + &self, + ct: &CudaRadixCiphertext, + shift: T, + stream: &CudaStream, + ) -> CudaRadixCiphertext + where + T: Rem + CastFrom, + u32: CastFrom, + { + let result = unsafe { self.scalar_left_shift_async(ct, shift, stream) }; + stream.synchronize(); + result + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/sub.rs b/tfhe/src/integer/gpu/server_key/radix/sub.rs new file mode 100644 index 000000000..54e058d39 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/sub.rs @@ -0,0 +1,268 @@ +use crate::core_crypto::gpu::CudaStream; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::server_key::CudaServerKey; + +impl CudaServerKey { + /// Computes homomorphically a subtraction between two ciphertexts encrypting integer values. + /// + /// This function computes the subtraction without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is returned as a new ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg_1 = 12; + /// let msg_2 = 10; + /// + /// // Encrypt two messages: + /// let ctxt_1 = cks.encrypt(msg_1); + /// let ctxt_2 = cks.encrypt(msg_2); + /// + /// // Copy to GPU + /// let d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.unchecked_sub(&d_ct1, &d_ct2, &mut stream); + /// + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg_1 - msg_2); + /// ``` + pub fn unchecked_sub( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.unchecked_sub_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_sub_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = ct_left.duplicate_async(stream); + self.unchecked_sub_assign_async(&mut result, ct_right, stream); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn unchecked_sub_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let neg = self.unchecked_neg_async(ct_right, stream); + self.unchecked_add_assign_async(ct_left, &neg, stream); + } + + /// Computes homomorphically a subtraction between two ciphertexts encrypting integer values. + /// + /// This function computes the subtraction without checking if it exceeds the capacity of the + /// ciphertext. + /// + /// The result is assigned to the `ct_left` ciphertext. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gen_keys_radix; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let num_blocks = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, num_blocks, &mut stream); + /// + /// let msg_1 = 128; + /// let msg_2 = 99; + /// + /// // Encrypt two messages: + /// let ctxt_1 = cks.encrypt(msg_1); + /// let ctxt_2 = cks.encrypt(msg_2); + /// + /// // Copy to GPU + /// let mut d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + /// + /// // Compute homomorphically an addition: + /// sks.unchecked_sub_assign(&mut d_ct1, &d_ct2, &mut stream); + /// + /// let ct_res = d_ct1.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert_eq!(dec_result, msg_1 - msg_2); + /// ``` + pub fn unchecked_sub_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.unchecked_sub_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// Computes homomorphically the subtraction between ct_left and ct_right. + /// + /// This function, like all "default" operations (i.e. not smart, checked or unchecked), will + /// check that the input ciphertexts block carries are empty and clears them if it's not the + /// case and the operation requires it. It outputs a ciphertext whose block carries are always + /// empty. + /// + /// This means that when using only "default" operations, a given operation (like add for + /// example) has always the same performance characteristics from one call to another and + /// guarantees correctness by pre-emptively clearing carries of output ciphertexts. + /// + /// # Example + /// + /// ```rust + /// use tfhe::core_crypto::gpu::{CudaDevice, CudaStream}; + /// use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext; + /// use tfhe::integer::gpu::gen_keys_radix_gpu; + /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS; + /// + /// let gpu_index = 0; + /// let device = CudaDevice::new(gpu_index); + /// let mut stream = CudaStream::new_unchecked(device); + /// + /// // We have 4 * 2 = 8 bits of message + /// let size = 4; + /// let (cks, sks) = gen_keys_radix_gpu(PARAM_MESSAGE_2_CARRY_2_KS_PBS, size, &mut stream); + /// + /// let msg_1 = 120u8; + /// let msg_2 = 181u8; + /// + /// // Encrypt two messages: + /// let mut ct1 = cks.encrypt(msg_1 as u64); + /// let ct2 = cks.encrypt(msg_2 as u64); + /// + /// // Copy to GPU + /// let d_ct1 = CudaRadixCiphertext::from_radix_ciphertext(&ct1, &mut stream); + /// let d_ct2 = CudaRadixCiphertext::from_radix_ciphertext(&ct2, &mut stream); + /// + /// // Compute homomorphically an addition: + /// let d_ct_res = sks.sub(&d_ct1, &d_ct2, &mut stream); + /// + /// let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + /// + /// // Decrypt: + /// let res: u64 = cks.decrypt(&ct_res); + /// assert_eq!(msg_1.wrapping_sub(msg_2) as u64, res); + /// ``` + pub fn sub( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let result = unsafe { self.sub_async(ct_left, ct_right, stream) }; + stream.synchronize(); + result + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn sub_async( + &self, + ct_left: &CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) -> CudaRadixCiphertext { + let mut result = ct_left.duplicate_async(stream); + self.sub_assign_async(&mut result, ct_right, stream); + result + } + + pub fn sub_assign( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + unsafe { + self.sub_assign_async(ct_left, ct_right, stream); + } + stream.synchronize(); + } + + /// # Safety + /// + /// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must + /// not be dropped until stream is synchronised + pub unsafe fn sub_assign_async( + &self, + ct_left: &mut CudaRadixCiphertext, + ct_right: &CudaRadixCiphertext, + stream: &CudaStream, + ) { + let mut tmp_rhs; + + let (lhs, rhs) = match ( + ct_left.block_carries_are_empty(), + ct_right.block_carries_are_empty(), + ) { + (true, true) => (ct_left, ct_right), + (true, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + (false, true) => { + self.full_propagate_assign_async(ct_left, stream); + (ct_left, ct_right) + } + (false, false) => { + tmp_rhs = ct_right.duplicate_async(stream); + + self.full_propagate_assign_async(ct_left, stream); + self.full_propagate_assign_async(&mut tmp_rhs, stream); + (ct_left, &tmp_rhs) + } + }; + + self.unchecked_sub_assign_async(lhs, rhs, stream); + self.propagate_single_carry_assign_async(lhs, stream); + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/tests.rs b/tfhe/src/integer/gpu/server_key/radix/tests.rs new file mode 100644 index 000000000..a33d2c8af --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/tests.rs @@ -0,0 +1,2241 @@ +use crate::core_crypto::gpu::{CudaDevice, CudaStream}; +use crate::integer::gpu::ciphertext::CudaRadixCiphertext; +use crate::integer::gpu::{gen_keys_gpu, CudaServerKey}; +use crate::integer::{RadixCiphertext, RadixClientKey, ServerKey}; +use crate::shortint::parameters::*; +use rand::Rng; +use std::cmp::{max, min}; +use std::sync::Arc; + +// Macro to generate tests for all parameter sets +macro_rules! create_gpu_parametrized_test{ + ($name:ident { $($param:ident),* $(,)? }) => { + ::paste::paste! { + $( + #[test] + fn []() { + $name($param) + } + )* + } + }; + ($name:ident)=> { + create_gpu_parametrized_test!($name + { + // PARAM_MESSAGE_1_CARRY_1_KS_PBS, + PARAM_MESSAGE_2_CARRY_2_KS_PBS, + // PARAM_MESSAGE_3_CARRY_3_KS_PBS, + // PARAM_MESSAGE_4_CARRY_4_KS_PBS, + PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS + }); + }; +} + +// Unchecked operations +create_gpu_parametrized_test!(integer_unchecked_mul); +create_gpu_parametrized_test!(integer_unchecked_add); +create_gpu_parametrized_test!(integer_unchecked_add_assign); +create_gpu_parametrized_test!(integer_unchecked_sub); +create_gpu_parametrized_test!(integer_unchecked_neg); +create_gpu_parametrized_test!(integer_unchecked_scalar_add); +create_gpu_parametrized_test!(integer_unchecked_scalar_sub); +create_gpu_parametrized_test!(integer_unchecked_small_scalar_mul); +create_gpu_parametrized_test!(integer_unchecked_bitnot); +create_gpu_parametrized_test!(integer_unchecked_bitand); +create_gpu_parametrized_test!(integer_unchecked_bitor); +create_gpu_parametrized_test!(integer_unchecked_bitxor); +create_gpu_parametrized_test!(integer_unchecked_scalar_bitand); +create_gpu_parametrized_test!(integer_unchecked_scalar_bitor); +create_gpu_parametrized_test!(integer_unchecked_scalar_bitxor); +create_gpu_parametrized_test!(integer_unchecked_eq); +create_gpu_parametrized_test!(integer_unchecked_ne); +create_gpu_parametrized_test!(integer_unchecked_gt); +create_gpu_parametrized_test!(integer_unchecked_ge); +create_gpu_parametrized_test!(integer_unchecked_lt); +create_gpu_parametrized_test!(integer_unchecked_le); +create_gpu_parametrized_test!(integer_unchecked_scalar_gt); +create_gpu_parametrized_test!(integer_unchecked_scalar_ge); +create_gpu_parametrized_test!(integer_unchecked_scalar_lt); +create_gpu_parametrized_test!(integer_unchecked_scalar_le); +create_gpu_parametrized_test!(integer_unchecked_scalar_left_shift); +create_gpu_parametrized_test!(integer_unchecked_scalar_right_shift); +create_gpu_parametrized_test!(integer_unchecked_if_then_else); +create_gpu_parametrized_test!(integer_unchecked_max); +create_gpu_parametrized_test!(integer_unchecked_min); +create_gpu_parametrized_test!(integer_unchecked_scalar_max); +create_gpu_parametrized_test!(integer_unchecked_scalar_min); +create_gpu_parametrized_test!(integer_unchecked_scalar_rotate_left); +create_gpu_parametrized_test!(integer_unchecked_scalar_rotate_right); + +// Default operations +create_gpu_parametrized_test!(integer_mul); +create_gpu_parametrized_test!(integer_add); +create_gpu_parametrized_test!(integer_sub); +create_gpu_parametrized_test!(integer_neg); +create_gpu_parametrized_test!(integer_scalar_add); +create_gpu_parametrized_test!(integer_scalar_sub); +create_gpu_parametrized_test!(integer_small_scalar_mul); +create_gpu_parametrized_test!(integer_scalar_right_shift); +create_gpu_parametrized_test!(integer_scalar_left_shift); +create_gpu_parametrized_test!(integer_bitnot); +create_gpu_parametrized_test!(integer_bitand); +create_gpu_parametrized_test!(integer_bitor); +create_gpu_parametrized_test!(integer_bitxor); +create_gpu_parametrized_test!(integer_scalar_bitand); +create_gpu_parametrized_test!(integer_scalar_bitor); +create_gpu_parametrized_test!(integer_scalar_bitxor); +create_gpu_parametrized_test!(integer_eq); +create_gpu_parametrized_test!(integer_ne); +create_gpu_parametrized_test!(integer_gt); +create_gpu_parametrized_test!(integer_ge); +create_gpu_parametrized_test!(integer_lt); +create_gpu_parametrized_test!(integer_le); +create_gpu_parametrized_test!(integer_scalar_gt); +create_gpu_parametrized_test!(integer_scalar_ge); +create_gpu_parametrized_test!(integer_scalar_lt); +create_gpu_parametrized_test!(integer_scalar_le); +create_gpu_parametrized_test!(integer_if_then_else); +create_gpu_parametrized_test!(integer_max); +create_gpu_parametrized_test!(integer_min); +create_gpu_parametrized_test!(integer_scalar_max); +create_gpu_parametrized_test!(integer_scalar_min); + +/// Number of loop iteration within randomized tests +const NB_TEST: usize = 1000; + +/// Smaller number of loop iteration within randomized test, +/// meant for test where the function tested is more expensive +// const NB_TEST_SMALLER: usize = 10; +const NB_CTXT: usize = 4; +use crate::integer::server_key::radix_parallel::tests_cases_unsigned::*; + +struct GpuContext { + _device: CudaDevice, + stream: CudaStream, + sks: CudaServerKey, +} +struct GpuUncheckedFnExecutor { + context: Option, + func: F, +} + +impl GpuUncheckedFnExecutor { + fn new(func: F) -> Self { + Self { + context: None, + func, + } + } +} + +impl GpuUncheckedFnExecutor { + fn setup_from_keys(&mut self, cks: &RadixClientKey, _sks: Arc) { + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let sks = CudaServerKey::new(cks.as_ref(), &stream); + stream.synchronize(); + let context = GpuContext { + _device: device, + stream, + sks, + }; + self.context = Some(context); + } +} + +/// For default/unchecked binary functions +impl<'a, F> FunctionExecutor<(&'a RadixCiphertext, &'a RadixCiphertext), RadixCiphertext> + for GpuUncheckedFnExecutor +where + F: Fn( + &CudaServerKey, + &CudaRadixCiphertext, + &CudaRadixCiphertext, + &CudaStream, + ) -> CudaRadixCiphertext, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: (&'a RadixCiphertext, &'a RadixCiphertext)) -> RadixCiphertext { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(input.0, &context.stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(input.1, &context.stream); + + let gpu_result = (self.func)(&context.sks, &d_ctxt_1, &d_ctxt_2, &context.stream); + + gpu_result.to_radix_ciphertext(&context.stream) + } +} + +/// For unchecked/default assign binary functions +impl<'a, F> FunctionExecutor<(&'a mut RadixCiphertext, &'a RadixCiphertext), ()> + for GpuUncheckedFnExecutor +where + F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &CudaRadixCiphertext, &CudaStream), +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: (&'a mut RadixCiphertext, &'a RadixCiphertext)) { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let mut d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(input.0, &context.stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(input.1, &context.stream); + + (self.func)(&context.sks, &mut d_ctxt_1, &d_ctxt_2, &context.stream); + + *input.0 = d_ctxt_1.to_radix_ciphertext(&context.stream); + } +} + +/// For unchecked/default binary functions with one scalar input +impl<'a, F> FunctionExecutor<(&'a RadixCiphertext, u64), RadixCiphertext> + for GpuUncheckedFnExecutor +where + F: Fn(&CudaServerKey, &CudaRadixCiphertext, u64, &CudaStream) -> CudaRadixCiphertext, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: (&'a RadixCiphertext, u64)) -> RadixCiphertext { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(input.0, &context.stream); + + let gpu_result = (self.func)(&context.sks, &d_ctxt_1, input.1, &context.stream); + + gpu_result.to_radix_ciphertext(&context.stream) + } +} + +/// For unchecked/default binary functions with one scalar input +impl FunctionExecutor<(RadixCiphertext, u64), RadixCiphertext> for GpuUncheckedFnExecutor +where + F: Fn(&CudaServerKey, &CudaRadixCiphertext, u64, &CudaStream) -> CudaRadixCiphertext, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: (RadixCiphertext, u64)) -> RadixCiphertext { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&input.0, &context.stream); + + let gpu_result = (self.func)(&context.sks, &d_ctxt_1, input.1, &context.stream); + + gpu_result.to_radix_ciphertext(&context.stream) + } +} + +// Unary Function +impl<'a, F> FunctionExecutor<&'a RadixCiphertext, RadixCiphertext> for GpuUncheckedFnExecutor +where + F: Fn(&CudaServerKey, &CudaRadixCiphertext, &CudaStream) -> CudaRadixCiphertext, +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: &'a RadixCiphertext) -> RadixCiphertext { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(input, &context.stream); + + let gpu_result = (self.func)(&context.sks, &d_ctxt_1, &context.stream); + + gpu_result.to_radix_ciphertext(&context.stream) + } +} + +// Unary assign Function +impl<'a, F> FunctionExecutor<&'a mut RadixCiphertext, ()> for GpuUncheckedFnExecutor +where + F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &CudaStream), +{ + fn setup(&mut self, cks: &RadixClientKey, sks: Arc) { + self.setup_from_keys(cks, sks); + } + + fn execute(&mut self, input: &'a mut RadixCiphertext) { + let context = self + .context + .as_ref() + .expect("setup was not properly called"); + + let mut d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(input, &context.stream); + + (self.func)(&context.sks, &mut d_ctxt_1, &context.stream); + + *input = d_ctxt_1.to_radix_ciphertext(&context.stream) + } +} + +fn integer_unchecked_mul

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_mul); + unchecked_mul_test(param, executor); +} + +fn integer_unchecked_add

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_add); + unchecked_add_test(param, executor); +} + +fn integer_unchecked_add_assign

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_add_assign); + unchecked_add_assign_test(param, executor); +} + +fn integer_unchecked_scalar_add

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_add); + unchecked_scalar_add_test(param, executor); +} + +fn integer_unchecked_small_scalar_mul

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_small_scalar_mul); + unchecked_small_scalar_mul_test(param, executor); +} + +fn integer_unchecked_sub

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_sub); + unchecked_sub_test(param, executor); +} + +fn integer_unchecked_scalar_sub

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_sub); + unchecked_scalar_sub_test(param, executor); +} + +fn integer_unchecked_neg

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_neg); + unchecked_neg_test(param, executor); +} + +fn integer_unchecked_bitnot

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear = rng.gen::() % modulus; + println!("clear {clear}"); + // encryption of integers + let ctxt = cks.encrypt_radix(clear, NB_CTXT); + + // Copy to the GPU + let d_ctxt = CudaRadixCiphertext::from_radix_ciphertext(&ctxt, &mut stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_bitnot(&d_ctxt, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + let clear_result = (!clear) % modulus; + println!("not {} = {}", clear, clear_result); + assert_eq!(clear_result, dec_res); + } +} + +fn integer_unchecked_bitand

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + + let clear_1 = rng.gen::() % modulus; + + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + let ctxt_1 = cks.encrypt_radix(clear_1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &mut stream); + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_bitand(&d_ctxt_0, &d_ctxt_1, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 & clear_1, dec_res); + } +} + +fn integer_unchecked_bitor

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + + let clear_1 = rng.gen::() % modulus; + + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + let ctxt_1 = cks.encrypt_radix(clear_1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &stream); + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_bitor(&d_ctxt_0, &d_ctxt_1, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 | clear_1, dec_res); + } +} + +fn integer_unchecked_bitxor

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + + let clear_1 = rng.gen::() % modulus; + + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + let ctxt_1 = cks.encrypt_radix(clear_1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &stream); + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_bitxor(&d_ctxt_0, &d_ctxt_1, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 ^ clear_1, dec_res); + } +} + +fn integer_unchecked_scalar_bitand

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + let clear_1 = rng.gen::() % modulus; + + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &mut stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_scalar_bitand(&d_ctxt_0, clear_1, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 & clear_1, dec_res); + } +} + +fn integer_unchecked_scalar_bitor

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + let clear_1 = rng.gen::() % modulus; + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &mut stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_scalar_bitor(&d_ctxt_0, clear_1, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 | clear_1, dec_res); + } +} + +fn integer_unchecked_scalar_bitxor

(param: P) +where + P: Into, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (cks, sks) = KEY_CACHE.get_from_params(param); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + let clear_0 = rng.gen::() % modulus; + let clear_1 = rng.gen::() % modulus; + + // encryption of integers + let ctxt_0 = cks.encrypt_radix(clear_0, NB_CTXT); + + // Copy to the GPU + let d_ctxt_0 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_0, &mut stream); + + // add the two ciphertexts + let d_ct_res = sks.unchecked_scalar_bitxor(&d_ctxt_0, clear_1, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + + // decryption of ct_res + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // assert + assert_eq!(clear_0 ^ clear_1, dec_res); + } +} + +fn integer_unchecked_eq

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let h_ct_res = h_sks.unchecked_eq(&ctxt_1, &ctxt_2); + let d_ct_res = sks.unchecked_eq(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 == clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + let d_ctxt_2 = d_ctxt_1.duplicate(&stream); + let d_ct_res = sks.unchecked_eq(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(1, dec_res); + } +} + +fn integer_unchecked_ne

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let h_ct_res = h_sks.unchecked_eq(&ctxt_1, &ctxt_2); + let d_ct_res = sks.unchecked_ne(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 != clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + let d_ctxt_2 = d_ctxt_1.duplicate(&stream); + let d_ct_res = sks.unchecked_ne(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(0, dec_res); + } +} + +fn integer_unchecked_gt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.unchecked_gt(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 > clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_ge

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.unchecked_ge(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 >= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_lt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let _ = h_sks.unchecked_lt(&ctxt_1, &ctxt_2); + let d_ct_res = sks.unchecked_lt(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 < clear2) as u64; + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_le

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.unchecked_le(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 <= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_gt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + // Assert we are testing for 0 + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_gt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + let expected: u64 = (clear1 > clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_gt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + let expected: u64 = (clear1 > clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_ge

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + // Assert we are testing for 0 + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_ge(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + let expected: u64 = (clear1 >= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_ge(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 >= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_lt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + // Assert we are testing for 0 + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_lt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + let expected: u64 = (clear1 < clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_lt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 < clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_le

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + // Assert we are testing for 0 + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_le(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + let expected: u64 = (clear1 <= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.unchecked_scalar_le(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 <= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_max

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + + let d_ct_res = sks.unchecked_max(&d_ctxt_1, &d_ctxt_2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_min

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + + let d_ct_res = sks.unchecked_min(&d_ctxt_1, &d_ctxt_2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_max

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + // Assert we are testing for 0 + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_max(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + + // Define the cleartexts + let clear1 = 0; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_max(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_max(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_min

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = 0; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_min(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + + // Define the cleartexts + let clear1 = 0; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_min(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + + let d_ct_res = sks.unchecked_scalar_min(&d_ctxt_1, clear2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_if_then_else

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + let clear_condition = rng.gen_range(0u64..1); + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + let ctxt_condition = cks.encrypt_radix(clear_condition, 1); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + let d_ctxt_condition = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_condition, &stream); + + let d_ct_res = sks.unchecked_if_then_else(&d_ctxt_condition, &d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(dec_res, if clear_condition == 1 { clear1 } else { clear2 }); + } +} + +fn integer_mul

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::mul); + default_mul_test(param, executor); +} + +fn integer_add

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::add); + default_add_test(param, executor); +} + +fn integer_scalar_add

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_add); + default_scalar_add_test(param, executor); +} + +fn integer_small_scalar_mul

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::small_scalar_mul); + default_small_scalar_mul_test(param, executor); +} + +fn integer_sub

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::sub); + default_sub_test(param, executor); +} + +fn integer_scalar_sub

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_sub); + default_scalar_sub_test(param, executor); +} + +fn integer_neg

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::neg); + default_neg_test(param, executor); +} + +fn integer_bitnot

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::bitnot); + default_bitnot_test(param, executor); +} + +fn integer_bitand

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::bitand); + default_bitand_test(param, executor); +} + +fn integer_bitor

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::bitor); + default_bitor_test(param, executor); +} + +fn integer_bitxor

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::bitxor); + default_bitxor_test(param, executor); +} + +fn integer_scalar_bitand

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_bitand); + default_scalar_bitand_test(param, executor); +} + +fn integer_scalar_bitor

(param: P) +where + P: Into, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_bitor); + default_scalar_bitor_test(param, executor); +} + +fn integer_scalar_bitxor

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_bitxor); + default_scalar_bitxor_test(param, executor); +} + +fn integer_eq

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let h_ct_res = h_sks.eq(&ctxt_1, &ctxt_2); + let d_ct_res = sks.eq(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 == clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + let d_ctxt_2 = d_ctxt_1.duplicate(&stream); + let d_ct_res = sks.eq(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(1, dec_res); + } +} + +fn integer_ne

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let h_ct_res = h_sks.eq(&ctxt_1, &ctxt_2); + let d_ct_res = sks.ne(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 != clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + + let d_ctxt_2 = d_ctxt_1.duplicate(&stream); + let d_ct_res = sks.ne(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(0, dec_res); + } +} + +fn integer_gt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.gt(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 > clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_ge

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.ge(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 >= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_lt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + // let _ = h_sks.lt(&ctxt_1, &ctxt_2); + let d_ct_res = sks.lt(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 < clear2) as u64; + assert_eq!(expected, dec_res); + } +} + +fn integer_le

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + + let d_ct_res = sks.le(&d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 <= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_gt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_gt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 > clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_ge

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_ge(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 >= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_lt

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_lt(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 < clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_le

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_le(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = (clear1 <= clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_unchecked_scalar_left_shift

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_left_shift); + unchecked_scalar_left_shift_test(param, executor); +} + +fn integer_unchecked_scalar_right_shift

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_right_shift); + unchecked_scalar_right_shift_test(param, executor); +} + +fn integer_scalar_right_shift

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_right_shift); + default_scalar_right_shift_test(param, executor); +} + +fn integer_scalar_left_shift

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::scalar_left_shift); + default_scalar_left_shift_test(param, executor); +} + +fn integer_unchecked_scalar_rotate_left

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_left_rotate); + unchecked_scalar_rotate_left_test(param, executor); +} + +fn integer_unchecked_scalar_rotate_right

(param: P) +where + P: Into + Copy, +{ + let executor = GpuUncheckedFnExecutor::new(&CudaServerKey::unchecked_scalar_right_rotate); + unchecked_scalar_rotate_right_test(param, executor); +} + +fn integer_if_then_else

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + let clear_condition = rng.gen_range(0u64..1); + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + let ctxt_condition = cks.encrypt_radix(clear_condition, 1); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &stream); + let d_ctxt_condition = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_condition, &stream); + + let d_ct_res = sks.if_then_else(&d_ctxt_condition, &d_ctxt_1, &d_ctxt_2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + // Check the correctness + assert_eq!(dec_res, if clear_condition == 1 { clear1 } else { clear2 }); + } +} + +fn integer_max

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + + let d_ct_res = sks.max(&d_ctxt_1, &d_ctxt_2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_min

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let mut stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &mut stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + let ctxt_2 = cks.encrypt_radix(clear2, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &mut stream); + let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_2, &mut stream); + + let d_ct_res = sks.min(&d_ctxt_1, &d_ctxt_2, &mut stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&mut stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2); + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_max

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_max(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = max(clear1, clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} + +fn integer_scalar_min

(param: P) +where + P: Into + Copy, +{ + let gpu_index = 0; + let device = CudaDevice::new(gpu_index); + let stream = CudaStream::new_unchecked(device); + + // let (_, h_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let (cks, sks) = gen_keys_gpu(param, &stream); + + //RNG + let mut rng = rand::thread_rng(); + + // message_modulus^vec_length + let modulus = cks.parameters().message_modulus().0.pow(NB_CTXT as u32) as u64; + + for _ in 0..NB_TEST { + // Define the cleartexts + let clear1 = rng.gen::() % modulus; + let clear2 = rng.gen::() % modulus; + + // Encrypt the integers;; + let ctxt_1 = cks.encrypt_radix(clear1, NB_CTXT); + + // Copy to the GPU + let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ctxt_1, &stream); + + let d_ct_res = sks.scalar_min(&d_ctxt_1, clear2, &stream); + + let ct_res = d_ct_res.to_radix_ciphertext(&stream); + let dec_res: u64 = cks.decrypt_radix(&ct_res); + + let expected: u64 = min(clear1, clear2) as u64; + + // Check the correctness + assert_eq!(expected, dec_res); + } +} diff --git a/tfhe/src/integer/mod.rs b/tfhe/src/integer/mod.rs index d0bacb7fc..34cf6589d 100755 --- a/tfhe/src/integer/mod.rs +++ b/tfhe/src/integer/mod.rs @@ -63,6 +63,9 @@ pub mod public_key; pub mod server_key; pub mod wopbs; +#[cfg(feature = "gpu")] +pub mod gpu; + pub use bigint::i256::I256; pub use bigint::i512::I512; pub use bigint::u256::U256; diff --git a/tfhe/src/integer/server_key/comparator.rs b/tfhe/src/integer/server_key/comparator.rs index a063c77bb..715b165c5 100644 --- a/tfhe/src/integer/server_key/comparator.rs +++ b/tfhe/src/integer/server_key/comparator.rs @@ -40,9 +40,9 @@ pub struct Comparator<'a> { } impl<'a> Comparator<'a> { - const IS_INFERIOR: u64 = 0; + pub(crate) const IS_INFERIOR: u64 = 0; const IS_EQUAL: u64 = 1; - const IS_SUPERIOR: u64 = 2; + pub(crate) const IS_SUPERIOR: u64 = 2; /// Creates a new Comparator for the given ServerKey /// diff --git a/tfhe/src/integer/server_key/mod.rs b/tfhe/src/integer/server_key/mod.rs index a9b21527e..d72e10d82 100644 --- a/tfhe/src/integer/server_key/mod.rs +++ b/tfhe/src/integer/server_key/mod.rs @@ -40,7 +40,7 @@ impl MaxDegree { /// [`RadixCiphertext`](`crate::integer::RadixCiphertext`) (which includes adding the extracted /// carry from one shortint block to the next block), this formula provisions space to add a /// carry. - fn integer_radix_server_key( + pub(crate) fn integer_radix_server_key( message_modulus: MessageModulus, carry_modulus: CarryModulus, ) -> Self { diff --git a/tfhe/src/shortint/ciphertext/mod.rs b/tfhe/src/shortint/ciphertext/mod.rs index f3c76f42b..ea2f06fe6 100644 --- a/tfhe/src/shortint/ciphertext/mod.rs +++ b/tfhe/src/shortint/ciphertext/mod.rs @@ -263,7 +263,7 @@ impl std::ops::Mul for Degree { pub struct Ciphertext { pub ct: LweCiphertextOwned, pub degree: Degree, - noise_level: NoiseLevel, + pub(crate) noise_level: NoiseLevel, pub message_modulus: MessageModulus, pub carry_modulus: CarryModulus, pub pbs_order: PBSOrder,