Compare commits
3 Commits
mz/common_
...
bb/zk/sign
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf4b3a2b83 | ||
|
|
b04fb4b94b | ||
|
|
e716051049 |
@@ -4,6 +4,9 @@ ignore = [
|
||||
"RUSTSEC-2024-0436",
|
||||
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
|
||||
"RUSTSEC-2025-0141",
|
||||
# Ignoring unsoundness in 'rand' with custom logger. Rand update is currently blocked by
|
||||
# arkworks and we do not use custom loggers.
|
||||
"RUSTSEC-2026-0097",
|
||||
]
|
||||
|
||||
[output]
|
||||
|
||||
4
.github/workflows/aws_data_tests.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
|
||||
- name: Retrieve data from cache
|
||||
id: retrieve-data-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
@@ -89,7 +89,7 @@ jobs:
|
||||
- name: Store data in cache
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
|
||||
6
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -69,7 +69,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -213,7 +213,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
|
||||
2
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
|
||||
2
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
|
||||
6
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
wasm:
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
backward:
|
||||
@@ -79,11 +79,19 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Post/refresh backward-compat report
|
||||
- name: Find existing comment
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0
|
||||
id: find-comment
|
||||
uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0
|
||||
with:
|
||||
header: backward-compat-snapshot
|
||||
hide_and_recreate: true
|
||||
hide_classify: OUTDATED
|
||||
path: report.md
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-includes: '**Backward-compat snapshot:'
|
||||
|
||||
- name: Comment on PR
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
|
||||
with:
|
||||
comment-id: ${{ steps.find-comment.outputs.comment-id }}
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-path: report.md
|
||||
edit-mode: replace
|
||||
|
||||
2
.github/workflows/benchmark_cpu_common.yml
vendored
@@ -223,7 +223,7 @@ jobs:
|
||||
results_type: ${{ inputs.additional_results_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_ct_key_sizes.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_ct_key_sizes
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -173,7 +173,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -270,7 +270,7 @@ jobs:
|
||||
filenames: ${{ inputs.additional_file_to_parse }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -204,7 +204,7 @@ jobs:
|
||||
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
|
||||
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
@@ -232,7 +232,7 @@ jobs:
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
@@ -271,7 +271,7 @@ jobs:
|
||||
- name: Upload profile artifact
|
||||
env:
|
||||
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
|
||||
@@ -302,7 +302,7 @@ jobs:
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
|
||||
path: fhevm/$${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_hpu_common.yml
vendored
@@ -185,7 +185,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -280,7 +280,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -180,7 +180,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/cargo_build_common.yml
vendored
@@ -138,7 +138,7 @@ jobs:
|
||||
- name: Node cache restoration
|
||||
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -151,7 +151,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
|
||||
2
.github/workflows/cargo_test_fft.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
fft:
|
||||
|
||||
2
.github/workflows/cargo_test_ntt.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
ntt:
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Check workflows security
|
||||
uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3
|
||||
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
|
||||
with:
|
||||
advanced-security: 'false' # Print results directly in logs
|
||||
persona: pedantic
|
||||
|
||||
2
.github/workflows/code_coverage.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
csprng:
|
||||
|
||||
4
.github/workflows/generate_svg_common.yml
vendored
@@ -87,7 +87,7 @@ jobs:
|
||||
|
||||
- name: Upload tables
|
||||
if: inputs.backend_comparison == false
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
|
||||
# This will upload all the file generated
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
|
||||
- name: Upload comparison tables
|
||||
if: inputs.backend_comparison == true
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_backends_comparison_tables
|
||||
# This will upload all the file generated
|
||||
|
||||
2
.github/workflows/gpu_core_h100_tests.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
2
.github/workflows/gpu_fast_tests.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
2
.github/workflows/gpu_hlapi_h100_tests.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
2
.github/workflows/gpu_memory_sanitizer.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
2
.github/workflows/gpu_zk_tests.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
|
||||
2
.github/workflows/hpu_hlapi_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
hpu:
|
||||
|
||||
2
.github/workflows/make_release_common.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
|
||||
19
.github/workflows/make_release_common_cuda.yml
vendored
@@ -128,7 +128,7 @@ jobs:
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
@@ -196,13 +196,6 @@ jobs:
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
@@ -217,12 +210,12 @@ jobs:
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
DRY-RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# dry-run expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since dry-run is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p "${PACKAGE}" ${DRY_RUN}
|
||||
cargo publish -p "${PACKAGE}" ${DRY-RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
@@ -262,7 +255,7 @@ jobs:
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (${{ inputs.package-name }} release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
2
.github/workflows/make_release_tfhe.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Authenticate on NPM
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
|
||||
with:
|
||||
node-version: '24'
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
|
||||
4
.github/workflows/parameters_check.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
|
||||
- name: Restore Sagemath image from cache
|
||||
id: docker-cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: /tmp/sagemath_image
|
||||
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
- name: Store Sagemath image in cache
|
||||
if: steps.docker-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
with:
|
||||
path: /tmp/sagemath_image
|
||||
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
|
||||
|
||||
23
Makefile
@@ -312,7 +312,7 @@ semgrep_and_lint_gpu_code: semgrep_lint_setup_venv
|
||||
find "$(TFHECUDA_SRC)" -name '*.h' -o -name '*.cuh' -o -name '*.cu' \
|
||||
| grep -v '/cmake-build-debug/' \
|
||||
| grep -v '/build/' \
|
||||
| xargs venv/bin/semgrep --error --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
|
||||
| xargs venv/bin/semgrep --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
|
||||
venv/bin/python3 "scripts/check_scratch_cleanup.py"
|
||||
|
||||
.PHONY: semver_check_cuda_backend # Run semver checks on tfhe-cuda-backend
|
||||
@@ -360,7 +360,7 @@ check_fmt_toml: install_taplo
|
||||
|
||||
.PHONY: check_typos # Check for typos in codebase
|
||||
check_typos: install_typos_checker
|
||||
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" ":!*.hpu" | typos --file-list - && echo "No typos found"
|
||||
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" | typos --file-list - && echo "No typos found"
|
||||
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
@@ -587,17 +587,6 @@ clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selec
|
||||
echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
|
||||
fi
|
||||
|
||||
.PHONY: check_backward_compat_locks_did_not_change # Check backward compat Cargo.lock files are up to date
|
||||
check_backward_compat_locks_did_not_change: install_rs_check_toolchain
|
||||
@for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
|
||||
echo "checking Cargo.lock for $$crate"; \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
|
||||
-C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate metadata --locked --format-version 1 > /dev/null || \
|
||||
( echo "Cargo.lock for $$crate is out of date. Update it with:" && \
|
||||
echo " cd $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate && cargo metadata --format-version 1 > /dev/null" && \
|
||||
echo "then commit the updated Cargo.lock." && exit 1 ); \
|
||||
done
|
||||
|
||||
.PHONY: clippy_test_vectors # Run clippy lints on the test vectors app
|
||||
clippy_test_vectors: install_rs_check_toolchain
|
||||
cd apps/test-vectors; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
@@ -1794,13 +1783,6 @@ bench_boolean: install_rs_check_toolchain
|
||||
--bench boolean \
|
||||
--features=boolean,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_common_mask # Run benchmarks for CM-PBS
|
||||
bench_common_mask: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench cm-bench \
|
||||
--features=experimental -p tfhe-benchmark
|
||||
|
||||
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
@@ -2283,7 +2265,6 @@ pcc_batch_5:
|
||||
$(call run_recipe_with_details,clippy_tfhe_lints)
|
||||
$(call run_recipe_with_details,check_compile_tests)
|
||||
$(call run_recipe_with_details,clippy_backward_compat_data)
|
||||
$(call run_recipe_with_details,check_backward_compat_locks_did_not_change)
|
||||
|
||||
.PHONY: pcc_batch_6 # duration: 6'32''
|
||||
pcc_batch_6:
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn get_linux_distribution_name() -> Option<String> {
|
||||
let content = std::fs::read_to_string("/etc/os-release").ok()?;
|
||||
for line in content.lines() {
|
||||
if let Some(value) = line.strip_prefix("NAME=") {
|
||||
return Some(value.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
if let Ok(val) = std::env::var("DOCS_RS") {
|
||||
@@ -37,7 +28,9 @@ fn main() {
|
||||
println!("cargo::rerun-if-changed=src");
|
||||
|
||||
if std::env::consts::OS == "linux" {
|
||||
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
|
||||
let output = Command::new("./get_os_name.sh").output().unwrap();
|
||||
let distribution = String::from_utf8(output.stdout).unwrap();
|
||||
if distribution != "Ubuntu\n" {
|
||||
println!(
|
||||
"cargo:warning=This Linux distribution is not officially supported. \
|
||||
Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
|
||||
|
||||
@@ -721,7 +721,7 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *compute_bsks, void *const *ksks);
|
||||
void *const *ksks);
|
||||
|
||||
void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
@@ -390,7 +390,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_a[6], &wires_a[15], &input_bits[7]);
|
||||
XOR(&wires_a[10], &wires_a[15], &wires_b[0]);
|
||||
XOR(&wires_a[11], &wires_a[20], &wires_a[9]);
|
||||
FLUSH(&wires_a[6], &wires_a[10], &wires_a[11]);
|
||||
FLUSH(&wires_a[6], &wires_a[10]);
|
||||
XOR(&wires_a[7], &input_bits[7], &wires_a[11]);
|
||||
FLUSH(&wires_a[7]);
|
||||
XOR(&wires_a[17], &wires_a[10], &wires_a[11]);
|
||||
@@ -426,7 +426,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[22], &wires_b[18], &wires_a[19]);
|
||||
XOR(&wires_b[23], &wires_b[19], &wires_a[21]);
|
||||
XOR(&wires_b[24], &wires_b[20], &wires_a[18]);
|
||||
FLUSH(&wires_b[21], &wires_b[22], &wires_b[23], &wires_b[24]);
|
||||
FLUSH(&wires_b[21], &wires_b[23], &wires_b[24]);
|
||||
XOR(&wires_b[25], &wires_b[21], &wires_b[22]);
|
||||
FLUSH(&wires_b[25]);
|
||||
|
||||
@@ -468,7 +468,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
|
||||
XOR(&wires_b[37], &wires_b[36], &wires_b[34]);
|
||||
XOR(&wires_b[38], &wires_b[27], &wires_b[36]);
|
||||
FLUSH(&wires_b[38], &wires_b[37]);
|
||||
FLUSH(&wires_b[38]);
|
||||
XOR(&wires_b[44], &wires_b[33], &wires_b[37]);
|
||||
|
||||
CudaRadixCiphertextFFI *and_outs_6[] = {&wires_b[39]};
|
||||
@@ -479,7 +479,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[40], &wires_b[25], &wires_b[39]);
|
||||
XOR(&wires_b[41], &wires_b[40], &wires_b[37]);
|
||||
XOR(&wires_b[43], &wires_b[29], &wires_b[40]);
|
||||
FLUSH(&wires_b[41], &wires_b[40], &wires_b[43], &wires_b[44]);
|
||||
FLUSH(&wires_b[41]);
|
||||
XOR(&wires_b[45], &wires_b[42], &wires_b[41]);
|
||||
FLUSH(&wires_b[45]);
|
||||
|
||||
@@ -514,7 +514,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[57], &wires_b[50], &wires_b[53]);
|
||||
XOR(&wires_b[58], &wires_c[4], &wires_b[46]);
|
||||
XOR(&wires_b[59], &wires_c[3], &wires_b[54]);
|
||||
FLUSH(&wires_b[57], &wires_b[58]);
|
||||
XOR(&wires_b[60], &wires_b[46], &wires_b[57]);
|
||||
XOR(&wires_b[61], &wires_c[14], &wires_b[57]);
|
||||
XOR(&wires_b[62], &wires_b[52], &wires_b[58]);
|
||||
@@ -590,7 +589,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
#undef FLUSH
|
||||
#undef AND
|
||||
#undef ADD_ONE_FLUSH
|
||||
#undef ADD_ONE
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -489,7 +489,7 @@ template <typename Torus>
|
||||
__host__ void host_modulus_switch_multi_bit(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *array_out, Torus *array_in,
|
||||
int size, uint32_t log_modulus, uint32_t degree, uint32_t grouping_factor) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaSetDevice(gpu_index);
|
||||
int multibit_size = size / grouping_factor;
|
||||
int num_threads = 0, num_blocks = 0;
|
||||
getNumBlocksAndThreads(multibit_size, 1024, num_blocks, num_threads);
|
||||
|
||||
@@ -72,13 +72,13 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *compute_bsks, void *const *ksks) {
|
||||
void *const *ksks) {
|
||||
|
||||
host_integer_grouped_oprf_custom_range<uint64_t>(
|
||||
CudaStreams(streams), radix_lwe_out, num_blocks_intermediate,
|
||||
(const uint64_t *)seeded_lwe_input, decomposed_scalar,
|
||||
has_at_least_one_set, num_scalars, shift,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks, compute_bsks,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks,
|
||||
(uint64_t *const *)ksks);
|
||||
}
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ void host_integer_grouped_oprf_custom_range(
|
||||
const Torus *decomposed_scalar, const Torus *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift,
|
||||
int_grouped_oprf_custom_range_memory<Torus> *mem_ptr, void *const *bsks,
|
||||
void *const *compute_bsks, Torus *const *ksks) {
|
||||
Torus *const *ksks) {
|
||||
|
||||
CudaRadixCiphertextFFI *computation_buffer = mem_ptr->tmp_oprf_output;
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(
|
||||
@@ -127,12 +127,12 @@ void host_integer_grouped_oprf_custom_range(
|
||||
|
||||
host_integer_scalar_mul_radix<Torus>(
|
||||
streams, computation_buffer, decomposed_scalar, has_at_least_one_set,
|
||||
mem_ptr->scalar_mul_buffer, compute_bsks, ksks,
|
||||
mem_ptr->params.message_modulus, num_scalars);
|
||||
mem_ptr->scalar_mul_buffer, bsks, ksks, mem_ptr->params.message_modulus,
|
||||
num_scalars);
|
||||
|
||||
host_logical_scalar_shift_inplace<Torus>(
|
||||
streams, computation_buffer, shift, mem_ptr->logical_scalar_shift_buffer,
|
||||
compute_bsks, ksks, num_blocks_intermediate);
|
||||
host_logical_scalar_shift_inplace<Torus>(streams, computation_buffer, shift,
|
||||
mem_ptr->logical_scalar_shift_buffer,
|
||||
bsks, ksks, num_blocks_intermediate);
|
||||
|
||||
uint32_t num_blocks_output = radix_lwe_out->num_radix_blocks;
|
||||
uint32_t blocks_to_copy =
|
||||
|
||||
@@ -308,7 +308,6 @@ void cleanup_cuda_multi_bit_programmable_bootstrap_noise_tests_128(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer) {
|
||||
cleanup_cuda_multi_bit_programmable_bootstrap_128(stream, gpu_index,
|
||||
pbs_buffer);
|
||||
cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
|
||||
}
|
||||
|
||||
// Noise tests variant of the 128-bit multi-bit PBS, restricted to
|
||||
|
||||
3
backends/tfhe-cuda-backend/get_os_name.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
cat /etc/os-release | grep "\<NAME\>" | sed "s/NAME=\"//g" | sed "s/\"//g"
|
||||
@@ -1647,7 +1647,6 @@ unsafe extern "C" {
|
||||
shift: u32,
|
||||
mem: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
compute_bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-hpu-backend"
|
||||
version = "0.5.0"
|
||||
version = "0.4.0"
|
||||
edition = "2021"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "HPU implementation on FPGA of TFHE-rs primitives."
|
||||
@@ -36,7 +36,7 @@ thiserror = "1.0.61"
|
||||
bytemuck = { workspace = true }
|
||||
anyhow = "1.0.82"
|
||||
lazy_static = "1.4.0"
|
||||
rand = "0.10.1"
|
||||
rand = "0.8.5"
|
||||
regex = "1.10.4"
|
||||
bitflags = { version = "2.5.0", features = ["serde"] }
|
||||
itertools = "0.11.0"
|
||||
|
||||
@@ -24,7 +24,7 @@ use mem_alloc::{MemAlloc, MemChunk};
|
||||
|
||||
mod qdma;
|
||||
use qdma::QdmaDriver;
|
||||
use rand::RngExt;
|
||||
use rand::Rng;
|
||||
|
||||
const DMA_XFER_ALIGN: usize = 4096_usize;
|
||||
|
||||
@@ -148,8 +148,8 @@ impl HpuHw {
|
||||
tracing::debug!("Load stage1 through JTAG");
|
||||
let pdi_stg1_tmp = format!(
|
||||
"hpu_stg1_{}.pdi",
|
||||
rand::rng()
|
||||
.sample_iter(rand::distr::Alphanumeric)
|
||||
rand::thread_rng()
|
||||
.sample_iter(rand::distributions::Alphanumeric)
|
||||
.take(5)
|
||||
.map(char::from)
|
||||
.collect::<String>()
|
||||
|
||||
@@ -156,7 +156,7 @@ impl HpuVarWrapped {
|
||||
{
|
||||
let mut inner = var.inner.lock().unwrap();
|
||||
|
||||
for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct) {
|
||||
for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct.into_iter()) {
|
||||
#[cfg(feature = "io-dump")]
|
||||
let params = ct.params().clone();
|
||||
for (id, cut) in ct.into_container().iter().enumerate() {
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn get_linux_distribution_name() -> Option<String> {
|
||||
let content = std::fs::read_to_string("/etc/os-release").ok()?;
|
||||
for line in content.lines() {
|
||||
if let Some(value) = line.strip_prefix("NAME=") {
|
||||
return Some(value.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
// Handle docs.rs builds (no CUDA available)
|
||||
@@ -38,10 +29,16 @@ fn main() {
|
||||
println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition");
|
||||
println!("cargo:rustc-link-arg=-Wl,--no-as-needed");
|
||||
|
||||
// Check Linux distribution (reuse script from tfhe-cuda-backend)
|
||||
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
|
||||
.expect("CARGO_MANIFEST_DIR must be set by cargo during build");
|
||||
|
||||
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
|
||||
let script_path = PathBuf::from(&manifest_dir).join("../tfhe-cuda-backend/get_os_name.sh");
|
||||
let output = Command::new(&script_path)
|
||||
.output()
|
||||
.expect("Failed to run get_os_name.sh — is tfhe-cuda-backend present?");
|
||||
let distribution =
|
||||
String::from_utf8(output.stdout).expect("get_os_name.sh output must be valid UTF-8");
|
||||
if distribution != "Ubuntu\n" {
|
||||
println!(
|
||||
"cargo:warning=This Linux distribution is not officially supported. \
|
||||
Only Ubuntu is supported by zk-cuda-backend at this time. Build may fail\n"
|
||||
|
||||
@@ -71,9 +71,14 @@ set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
|
||||
# Additional CUDA flags (aligned with tfhe-cuda-backend)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")
|
||||
|
||||
# =============================================================================
|
||||
# Path to tfhe-cuda-backend for device utilities
|
||||
# =============================================================================
|
||||
set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-backend/cuda)
|
||||
|
||||
# Core source files (without device utilities) Device utilities come from tfhe-cuda-backend.
|
||||
set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/curve.cu src/msm/pippenger/msm_pippenger.cu
|
||||
src/msm/msm.cu)
|
||||
set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/primitives/xyzz.cu src/curve.cu
|
||||
src/msm/pippenger/msm_pippenger.cu src/msm/msm.cu)
|
||||
|
||||
# Headers (common.cuh is a header, not a compiled source)
|
||||
set(FP_MSM_HEADERS src/msm/common.cuh)
|
||||
@@ -107,7 +112,7 @@ endif()
|
||||
target_link_libraries(zk_cuda_backend PUBLIC cudart)
|
||||
|
||||
# Include both local headers and tfhe-cuda-backend headers (for device.h)
|
||||
target_include_directories(zk_cuda_backend PUBLIC include ../src/include)
|
||||
target_include_directories(zk_cuda_backend PUBLIC include ../src/include ${TFHE_CUDA_BACKEND_DIR}/include)
|
||||
|
||||
# =============================================================================
|
||||
# Tests and Benchmarks (optional, controlled by ZK_CUDA_BACKEND_BUILD_TESTS/BENCHMARKS)
|
||||
@@ -130,3 +135,4 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
|
||||
message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
message(STATUS "C++ standard: ${CMAKE_CXX_STANDARD}")
|
||||
message(STATUS "CUDA standard: ${CMAKE_CUDA_STANDARD}")
|
||||
message(STATUS "tfhe-cuda-backend path: ${TFHE_CUDA_BACKEND_DIR}")
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
|
||||
#include "device.h"
|
||||
|
||||
// Variadic checked multiplication of size_t values.
|
||||
// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
|
||||
// On overflow the value written to *out is unspecified.
|
||||
template <typename... Args>
|
||||
inline bool checked_mul(size_t *out, size_t first, Args... rest) {
|
||||
size_t result = first;
|
||||
for (size_t value : {static_cast<size_t>(rest)...}) {
|
||||
if (__builtin_mul_overflow(result, value, &result))
|
||||
return true;
|
||||
}
|
||||
*out = result;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Variadic safe multiplication: computes the product and panics on overflow.
|
||||
template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
|
||||
size_t result;
|
||||
bool overflow = checked_mul(&result, first, rest...);
|
||||
PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Variadic safe multiplication with an appended sizeof(T) factor.
|
||||
// Computes (args... * sizeof(T)) with overflow checking.
|
||||
template <typename T, typename... Args>
|
||||
inline size_t safe_mul_sizeof(Args... args) {
|
||||
return safe_mul(args..., sizeof(T));
|
||||
}
|
||||
@@ -17,7 +17,13 @@ __host__ __device__ void fp2_zero(Fp2 &a);
|
||||
|
||||
// G1 point: (x, y) coordinates in Fp
|
||||
// Curve equation: y^2 = x^3 + b (short Weierstrass form with a = 0)
|
||||
struct G1Affine {
|
||||
//
|
||||
// alignas(sizeof(uint64_t)): The bool infinity field causes the struct to be
|
||||
// padded to the largest field alignment (4 bytes in 32-bit limb mode, 8 bytes
|
||||
// in 64-bit). Forcing alignment to sizeof(uint64_t) ensures
|
||||
// sizeof(G1Affine)==120 in both modes, matching the Rust FFI bindings which
|
||||
// are always generated from the 64-bit layout regardless of LIMB_BITS_CONFIG.
|
||||
struct alignas(sizeof(uint64_t)) G1Affine {
|
||||
Fp x;
|
||||
Fp y;
|
||||
bool infinity; // true if point at infinity (identity element)
|
||||
@@ -36,7 +42,9 @@ struct G1Affine {
|
||||
|
||||
// G2 point: (x, y) coordinates in Fp2
|
||||
// Curve equation: y^2 = x^3 + b' (twisted curve over Fp2)
|
||||
struct G2Affine {
|
||||
//
|
||||
// alignas(sizeof(uint64_t)): same ABI-stability reason as G1Affine above.
|
||||
struct alignas(sizeof(uint64_t)) G2Affine {
|
||||
Fp2 x;
|
||||
Fp2 y;
|
||||
bool infinity; // true if point at infinity (identity element)
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
#ifndef DEVICE_H
|
||||
#define DEVICE_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#define check_cuda_error(ans) \
|
||||
{ cuda_error((ans), __FILE__, __LINE__); }
|
||||
inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
if (code != cudaSuccess) {
|
||||
std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
|
||||
file, line);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// The PANIC macro should be used to validate user-inputs to GPU functions
|
||||
// it will execute in all targets, including production settings
|
||||
// e.g., cudaMemCopy to the device should check that the destination pointer is
|
||||
// a device pointer
|
||||
#define PANIC(format, ...) \
|
||||
{ \
|
||||
std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__, \
|
||||
__LINE__, __func__, ##__VA_ARGS__); \
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
// This is a generic assertion checking macro with user defined printf-style
|
||||
// message
|
||||
#define PANIC_IF_FALSE(cond, format, ...) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifndef GPU_ASSERTS_DISABLE
|
||||
// The GPU assert should be used to validate assumptions in algorithms,
|
||||
// for example, checking that two user-provided quantities have a certain
|
||||
// relationship or that the size of the buffer provided to a function is
|
||||
// sufficient when it is filled with some algorithm that depends on
|
||||
// user-provided inputs e.g., OPRF corrections buffer should not have a size
|
||||
// higher than the number of blocks in the datatype that is generated
|
||||
#define GPU_ASSERT(cond, format, ...) \
|
||||
PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
|
||||
#else
|
||||
#define GPU_ASSERT(cond) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
uint32_t cuda_get_device();
|
||||
void cuda_set_device(uint32_t gpu_index);
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
|
||||
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_is_available();
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
|
||||
uint64_t cuda_device_total_memory(uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
|
||||
uint64_t size,
|
||||
cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
void *dest, void const *src, uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
int cuda_get_number_of_gpus();
|
||||
|
||||
int cuda_get_number_of_sms();
|
||||
|
||||
void cuda_synchronize_device(uint32_t gpu_index);
|
||||
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index);
|
||||
|
||||
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
|
||||
}
|
||||
|
||||
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_support_cooperative_groups();
|
||||
|
||||
bool cuda_check_support_thread_block_clusters();
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *d_array, Torus value, Torus n);
|
||||
|
||||
#endif
|
||||
@@ -18,7 +18,7 @@
|
||||
// Supported values: 32, 64.
|
||||
// ============================================================================
|
||||
#ifndef LIMB_BITS_CONFIG
|
||||
#define LIMB_BITS_CONFIG 64
|
||||
#define LIMB_BITS_CONFIG 32
|
||||
#endif
|
||||
|
||||
#if LIMB_BITS_CONFIG == 64
|
||||
@@ -209,6 +209,17 @@ __host__ __device__ void fp_add(Fp &c, const Fp &a, const Fp &b);
|
||||
// MONTGOMERY: Both inputs and output must be in Montgomery form
|
||||
__host__ __device__ void fp_sub(Fp &c, const Fp &a, const Fp &b);
|
||||
|
||||
// Lazy addition: c = a + b, output in [0, 2p) for inputs in [0, p).
|
||||
// Skips the final conditional subtraction of fp_add.
|
||||
// Safe as input to fp_mont_mul (CIOS accepts [0, 2p)); NOT safe for final
|
||||
// results or as input to fp_sub/fp_neg which require [0, p) inputs.
|
||||
__host__ __device__ void fp_add_lazy(Fp &c, const Fp &a, const Fp &b);
|
||||
|
||||
// Lazy subtraction: c ≡ a - b (mod p), output in [0, 2p) for inputs in [0, p).
|
||||
// Adds p unconditionally, skipping the borrow-select of fp_sub.
|
||||
// Same safety concerns as fp_add_lazy.
|
||||
__host__ __device__ void fp_sub_lazy(Fp &c, const Fp &a, const Fp &b);
|
||||
|
||||
// Multiplication: c = a * b (without reduction)
|
||||
// "Raw" means the operation is performed without modular reduction modulo p.
|
||||
// The result is stored in double-width (2*FP_LIMBS limbs) and may be >= p.
|
||||
@@ -225,6 +236,11 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a);
|
||||
// Both a and b are in Montgomery form, result is in Montgomery form
|
||||
__host__ __device__ void fp_mont_mul(Fp &c, const Fp &a, const Fp &b);
|
||||
|
||||
// Montgomery squaring: c = (a^2 * R_INV) mod p
|
||||
// Both input and output in Montgomery form.
|
||||
// On device uses a triangular MAD chain (fewer multiplications).
|
||||
__host__ __device__ void fp_mont_sqr(Fp &c, const Fp &a);
|
||||
|
||||
// CONVERSION: Input is normal form, output is Montgomery form
|
||||
__host__ __device__ void fp_to_montgomery(Fp &c, const Fp &a);
|
||||
|
||||
|
||||
@@ -72,6 +72,11 @@ __host__ __device__ void fp2_add(Fp2 &c, const Fp2 &a, const Fp2 &b);
|
||||
// Subtraction: c = a - b
|
||||
__host__ __device__ void fp2_sub(Fp2 &c, const Fp2 &a, const Fp2 &b);
|
||||
|
||||
// Lazy add/sub: each component output in [0, 2p) for inputs in [0, p).
|
||||
// Safe as input to fp2_mont_mul; same contract as fp_add_lazy / fp_sub_lazy.
|
||||
__host__ __device__ void fp2_add_lazy(Fp2 &c, const Fp2 &a, const Fp2 &b);
|
||||
__host__ __device__ void fp2_sub_lazy(Fp2 &c, const Fp2 &a, const Fp2 &b);
|
||||
|
||||
// Multiplication: c = a * b
|
||||
// (a0 + a1*i) * (b0 + b1*i) = (a0*b0 - a1*b1) + (a0*b1 + a1*b0)*i
|
||||
// NOTE: Assumes inputs are in normal form and converts to/from Montgomery
|
||||
@@ -84,7 +89,7 @@ __host__ __device__ void fp2_mont_mul(Fp2 &c, const Fp2 &a, const Fp2 &b);
|
||||
// Montgomery squaring: c = a^2 (all in Montgomery form)
|
||||
// Uses the complex-squaring identity: c0 = (a0+a1)(a0-a1), c1 = 2*a0*a1
|
||||
// Only 2 Fp multiplications vs 3 for fp2_mont_mul(c, a, a).
|
||||
// NOTE: All inputs and outputs are in Montgomery form (no conversions)
|
||||
// NOTE: All inputs should be in Montgomery form
|
||||
__host__ __device__ void fp2_mont_square(Fp2 &c, const Fp2 &a);
|
||||
|
||||
// Squaring: c = a^2
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#ifndef HELPER_PROFILE
|
||||
#define HELPER_PROFILE
|
||||
|
||||
#ifdef USE_NVTOOLS
|
||||
#include <nvtx3/nvToolsExt.h>
|
||||
#endif
|
||||
|
||||
void cuda_nvtx_label_with_color(const char *name);
|
||||
void cuda_nvtx_pop();
|
||||
|
||||
#define PUSH_RANGE(name) \
|
||||
{ cuda_nvtx_label_with_color(name); }
|
||||
#define POP_RANGE() \
|
||||
{ cuda_nvtx_pop(); }
|
||||
|
||||
#endif
|
||||
@@ -3,6 +3,7 @@
|
||||
#include "curve.h"
|
||||
#include "fp.h"
|
||||
#include "fp2.h"
|
||||
#include "xyzz.h"
|
||||
|
||||
// ============================================================================
|
||||
// Unified Trait System for Elliptic Curve Points
|
||||
@@ -276,3 +277,65 @@ template <> struct SelectorChooser<G1Projective> {
|
||||
template <> struct SelectorChooser<G2Projective> {
|
||||
using Selection = Projective<G2Projective>;
|
||||
};
|
||||
|
||||
// XYZZ<T>: trait for XYZZ extended Jacobian operations (used in MSM)
|
||||
template <typename XYZZType> struct XYZZ;
|
||||
|
||||
template <> struct XYZZ<G1XYZZ> {
|
||||
using FieldType = Fp;
|
||||
using AffineType = G1Affine;
|
||||
using ProjectiveType = G1Projective;
|
||||
|
||||
__host__ __device__ static void point_at_infinity(G1XYZZ &p) {
|
||||
xyzz_infinity(p);
|
||||
}
|
||||
__host__ __device__ static bool is_infinity(const G1XYZZ &p) {
|
||||
return xyzz_is_infinity(p);
|
||||
}
|
||||
__host__ __device__ static void from_affine(G1XYZZ &xyzz,
|
||||
const G1Affine &affine) {
|
||||
xyzz_from_affine(xyzz, affine);
|
||||
}
|
||||
__host__ __device__ static void mixed_add(G1XYZZ &acc, const G1Affine &p) {
|
||||
xyzz_mixed_add(acc, p);
|
||||
}
|
||||
__host__ __device__ static void to_projective(G1Projective &proj,
|
||||
const G1XYZZ &xyzz) {
|
||||
xyzz_to_projective(proj, xyzz);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct XYZZ<G2XYZZ> {
|
||||
using FieldType = Fp2;
|
||||
using AffineType = G2Affine;
|
||||
using ProjectiveType = G2Projective;
|
||||
|
||||
__host__ __device__ static void point_at_infinity(G2XYZZ &p) {
|
||||
xyzz_infinity(p);
|
||||
}
|
||||
__host__ __device__ static bool is_infinity(const G2XYZZ &p) {
|
||||
return xyzz_is_infinity(p);
|
||||
}
|
||||
__host__ __device__ static void from_affine(G2XYZZ &xyzz,
|
||||
const G2Affine &affine) {
|
||||
xyzz_from_affine(xyzz, affine);
|
||||
}
|
||||
__host__ __device__ static void mixed_add(G2XYZZ &acc, const G2Affine &p) {
|
||||
xyzz_mixed_add(acc, p);
|
||||
}
|
||||
__host__ __device__ static void to_projective(G2Projective &proj,
|
||||
const G2XYZZ &xyzz) {
|
||||
xyzz_to_projective(proj, xyzz);
|
||||
}
|
||||
};
|
||||
|
||||
// XYZZFor<ProjectiveType>: maps a projective type to its XYZZ accumulator type
|
||||
template <typename ProjectiveType> struct XYZZFor;
|
||||
|
||||
template <> struct XYZZFor<G1Projective> {
|
||||
using Type = G1XYZZ;
|
||||
};
|
||||
|
||||
template <> struct XYZZFor<G2Projective> {
|
||||
using Type = G2XYZZ;
|
||||
};
|
||||
|
||||
58
backends/zk-cuda-backend/cuda/include/xyzz.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include "curve.h"
|
||||
#include "fp.h"
|
||||
#include "fp2.h"
|
||||
|
||||
// XYZZ Extended Jacobian Coordinates for BLS12-446
|
||||
|
||||
// G1 XYZZ point: (X, Y, ZZ, ZZZ) in Fp
|
||||
struct G1XYZZ {
|
||||
Fp X;
|
||||
Fp Y;
|
||||
Fp ZZ;
|
||||
Fp ZZZ;
|
||||
|
||||
// Default constructor: initializes to point at infinity (ZZ=ZZZ=0)
|
||||
__host__ __device__ G1XYZZ() {
|
||||
fp_zero(X);
|
||||
fp_zero(Y);
|
||||
fp_zero(ZZ);
|
||||
fp_zero(ZZZ);
|
||||
}
|
||||
};
|
||||
|
||||
// G2 XYZZ point: (X, Y, ZZ, ZZZ) in Fp2
|
||||
struct G2XYZZ {
|
||||
Fp2 X;
|
||||
Fp2 Y;
|
||||
Fp2 ZZ;
|
||||
Fp2 ZZZ;
|
||||
|
||||
// Default constructor: initializes to point at infinity (ZZ=ZZZ=0)
|
||||
__host__ __device__ G2XYZZ() {
|
||||
fp2_zero(X);
|
||||
fp2_zero(Y);
|
||||
fp2_zero(ZZ);
|
||||
fp2_zero(ZZZ);
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize XYZZ from an affine point: X=x, Y=y, ZZ=ZZZ=1 (Montgomery form)
|
||||
__host__ __device__ void xyzz_from_affine(G1XYZZ &xyzz, const G1Affine &affine);
|
||||
__host__ __device__ void xyzz_from_affine(G2XYZZ &xyzz, const G2Affine &affine);
|
||||
|
||||
// Set XYZZ to the point at infinity: ZZ=ZZZ=0 (X,Y left undefined)
|
||||
__host__ __device__ void xyzz_infinity(G1XYZZ &p);
|
||||
__host__ __device__ void xyzz_infinity(G2XYZZ &p);
|
||||
|
||||
__host__ __device__ bool xyzz_is_infinity(const G1XYZZ &p);
|
||||
__host__ __device__ bool xyzz_is_infinity(const G2XYZZ &p);
|
||||
|
||||
__host__ __device__ void xyzz_mixed_add(G1XYZZ &acc, const G1Affine &p);
|
||||
__host__ __device__ void xyzz_mixed_add(G2XYZZ &acc, const G2Affine &p);
|
||||
|
||||
__host__ __device__ void xyzz_to_projective(G1Projective &proj,
|
||||
const G1XYZZ &xyzz);
|
||||
__host__ __device__ void xyzz_to_projective(G2Projective &proj,
|
||||
const G2XYZZ &xyzz);
|
||||
@@ -1413,7 +1413,7 @@ __host__ __device__ void projective_point_add(G1Projective &result,
|
||||
u = Y2Z1 - Y1Z2;
|
||||
|
||||
// uu = u^2
|
||||
fp_mont_mul(uu, u, u);
|
||||
fp_mont_sqr(uu, u);
|
||||
|
||||
// v = X2 * Z1 - X1 * Z2 = X2*Z1 - X1Z2
|
||||
Fp X2Z1;
|
||||
@@ -1428,7 +1428,7 @@ __host__ __device__ void projective_point_add(G1Projective &result,
|
||||
}
|
||||
|
||||
// vv = v^2
|
||||
fp_mont_mul(vv, v, v);
|
||||
fp_mont_sqr(vv, v);
|
||||
// vvv = v * vv
|
||||
fp_mont_mul(vvv, v, vv);
|
||||
|
||||
@@ -1568,9 +1568,9 @@ __host__ __device__ void projective_mixed_add(G1Projective &result,
|
||||
}
|
||||
|
||||
// uu = u^2
|
||||
fp_mont_mul(uu, u, u);
|
||||
fp_mont_sqr(uu, u);
|
||||
// vv = v^2
|
||||
fp_mont_mul(vv, v, v);
|
||||
fp_mont_sqr(vv, v);
|
||||
// vvv = v * vv
|
||||
fp_mont_mul(vvv, v, vv);
|
||||
|
||||
@@ -1692,7 +1692,7 @@ __host__ __device__ void projective_point_double(G1Projective &result,
|
||||
|
||||
// A = 3 * X^2
|
||||
Fp X_sq, A;
|
||||
fp_mont_mul(X_sq, p.X, p.X);
|
||||
fp_mont_sqr(X_sq, p.X);
|
||||
fp_mul3(A, X_sq);
|
||||
|
||||
// B = Y * Z
|
||||
@@ -1706,7 +1706,7 @@ __host__ __device__ void projective_point_double(G1Projective &result,
|
||||
|
||||
// D = A^2 - 8*C
|
||||
Fp A_sq, eight_C;
|
||||
fp_mont_mul(A_sq, A, A);
|
||||
fp_mont_sqr(A_sq, A);
|
||||
fp_mul8(eight_C, C);
|
||||
Fp D = A_sq - eight_C;
|
||||
|
||||
@@ -1716,14 +1716,16 @@ __host__ __device__ void projective_point_double(G1Projective &result,
|
||||
fp_double(result.X, BD);
|
||||
|
||||
// Y3 = A * (4*C - D) - 8 * Y^2 * B^2
|
||||
Fp four_C, A_times_diff;
|
||||
Fp four_C, four_C_minus_D, A_times_diff;
|
||||
fp_mul4(four_C, C);
|
||||
Fp four_C_minus_D = four_C - D;
|
||||
// Lazy sub: four_C_minus_D feeds fp_mont_mul, so skip the conditional
|
||||
// subtract and output in [0, 2p) instead of [0, p).
|
||||
fp_sub_lazy(four_C_minus_D, four_C, D);
|
||||
fp_mont_mul(A_times_diff, A, four_C_minus_D);
|
||||
|
||||
Fp Y_sq, B_sq, Y_sq_B_sq, eight_Y_sq_B_sq;
|
||||
fp_mont_mul(Y_sq, p.Y, p.Y);
|
||||
fp_mont_mul(B_sq, B, B);
|
||||
fp_mont_sqr(Y_sq, p.Y);
|
||||
fp_mont_sqr(B_sq, B);
|
||||
fp_mont_mul(Y_sq_B_sq, Y_sq, B_sq);
|
||||
fp_mul8(eight_Y_sq_B_sq, Y_sq_B_sq);
|
||||
result.Y = A_times_diff - eight_Y_sq_B_sq;
|
||||
@@ -1773,9 +1775,13 @@ __host__ __device__ void projective_point_double(G2Projective &result,
|
||||
fp2_double(result.X, BD);
|
||||
|
||||
// Y3 = A * (4*C - D) - 8 * Y^2 * B^2
|
||||
Fp2 four_C, A_times_diff;
|
||||
Fp2 four_C, four_C_minus_D, A_times_diff;
|
||||
fp2_mul4(four_C, C);
|
||||
Fp2 four_C_minus_D = four_C - D;
|
||||
|
||||
// we can't use lazy sub here because for fp2 with Karatsuba path we will end
|
||||
// up with values in [0, 4p) instead of [0, 2p), which would break the final
|
||||
// result
|
||||
fp2_sub(four_C_minus_D, four_C, D);
|
||||
fp2_mont_mul(A_times_diff, A, four_C_minus_D);
|
||||
|
||||
Fp2 Y_sq, B_sq, Y_sq_B_sq, eight_Y_sq_B_sq;
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
#include "helper_profile.cuh"
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t adler32(const unsigned char *data) {
|
||||
const uint32_t MOD_ADLER = 65521;
|
||||
uint32_t a = 1, b = 0;
|
||||
size_t index;
|
||||
for (index = 0; data[index] != 0; ++index) {
|
||||
a = (a + data[index] * 2) % MOD_ADLER;
|
||||
b = (b + a) % MOD_ADLER;
|
||||
}
|
||||
return (b << 16) | a;
|
||||
}
|
||||
|
||||
void cuda_nvtx_label_with_color(const char *name) {
|
||||
#ifdef USE_NVTOOLS
|
||||
int color_id = adler32((const unsigned char *)name);
|
||||
int r, g, b;
|
||||
r = color_id & 0x000000ff;
|
||||
g = (color_id & 0x000ff000) >> 12;
|
||||
b = (color_id & 0x0ff00000) >> 20;
|
||||
if (r < 64 & g < 64 & b < 64) {
|
||||
r = r * 3;
|
||||
g = g * 3 + 64;
|
||||
b = b * 4;
|
||||
}
|
||||
|
||||
color_id = 0xff000000 | (r << 16) | (g << 8) | (b);
|
||||
nvtxEventAttributes_t eventAttrib = {0};
|
||||
eventAttrib.version = NVTX_VERSION;
|
||||
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
|
||||
eventAttrib.colorType = NVTX_COLOR_ARGB;
|
||||
eventAttrib.color = color_id;
|
||||
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||
eventAttrib.message.ascii = name;
|
||||
nvtxRangePushEx(&eventAttrib);
|
||||
#endif
|
||||
}
|
||||
void cuda_nvtx_pop() {
|
||||
#ifdef USE_NVTOOLS
|
||||
nvtxRangePop();
|
||||
#endif
|
||||
}
|
||||
@@ -22,20 +22,20 @@ template <typename AffineType> struct Phase1KernelLaunchParams {
|
||||
Phase1KernelLaunchParams(uint32_t n, uint32_t requested_threads_per_block,
|
||||
uint32_t bucket_count, uint32_t gpu_index) {
|
||||
// Shared memory layout:
|
||||
// - bucket_counts: [bucket_count] * sizeof(uint32_t)
|
||||
// - bucket_counts: [bucket_count] * sizeof(uint32_t)
|
||||
// - bucket_offsets: [bucket_count] * sizeof(uint32_t)
|
||||
// - sorted_points: [threads_per_block] * sizeof(AffineType)
|
||||
// - sorted_buckets: [threads_per_block] * sizeof(uint32_t)
|
||||
constexpr size_t per_thread_shared_mem =
|
||||
sizeof(AffineType) + sizeof(uint32_t); // sorted_points + sorted_buckets
|
||||
const size_t fixed_shared_mem =
|
||||
2 * bucket_count * sizeof(uint32_t); // bucket_counts + bucket_offsets
|
||||
// - sorted_points: [threads_per_block] * sizeof(AffineType)
|
||||
//
|
||||
// sorted_points starts at 2*bucket_count uint32_t slots. Since
|
||||
// bucket_count = 2^(c-1)+1 (always odd), 2*bucket_count is always even,
|
||||
// so the byte offset 2*bucket_count*4 is always a multiple of 8.
|
||||
// No alignment padding is needed.
|
||||
const size_t fixed_shared_mem = 2 * bucket_count * sizeof(uint32_t);
|
||||
constexpr size_t per_thread_shared_mem = sizeof(AffineType);
|
||||
|
||||
// Query the actual per-block shared memory limit from the device
|
||||
const uint32_t max_shared_mem_per_block =
|
||||
cuda_get_max_shared_memory_per_block(gpu_index);
|
||||
|
||||
// Calculate maximum threads that fit within shared memory limit
|
||||
const size_t available_shared_mem =
|
||||
(max_shared_mem_per_block > fixed_shared_mem)
|
||||
? (max_shared_mem_per_block - fixed_shared_mem)
|
||||
@@ -43,7 +43,6 @@ template <typename AffineType> struct Phase1KernelLaunchParams {
|
||||
const uint32_t max_threads_for_shared_mem =
|
||||
available_shared_mem / per_thread_shared_mem;
|
||||
|
||||
// Cap threads_per_block to respect shared memory limit
|
||||
adjusted_threads_per_block =
|
||||
std::min(requested_threads_per_block, max_threads_for_shared_mem);
|
||||
|
||||
@@ -52,10 +51,7 @@ template <typename AffineType> struct Phase1KernelLaunchParams {
|
||||
"kernel launch (max_shared=%u, fixed=%zu)",
|
||||
max_shared_mem_per_block, fixed_shared_mem);
|
||||
|
||||
// Calculate number of blocks per window
|
||||
num_blocks_per_window = CEIL_DIV(n, adjusted_threads_per_block);
|
||||
|
||||
// Calculate actual shared memory requirement
|
||||
accum_shared_mem =
|
||||
fixed_shared_mem + adjusted_threads_per_block * per_thread_shared_mem;
|
||||
}
|
||||
@@ -67,15 +63,12 @@ template <typename ProjectiveType> struct Phase2KernelLaunchParams {
|
||||
size_t shared_mem;
|
||||
|
||||
Phase2KernelLaunchParams(uint32_t requested_threads, uint32_t gpu_index) {
|
||||
// Query the actual per-block shared memory limit from the device
|
||||
const uint32_t max_shared_mem_per_block =
|
||||
cuda_get_max_shared_memory_per_block(gpu_index);
|
||||
|
||||
// Calculate maximum threads that fit within shared memory limit
|
||||
const uint32_t max_threads_for_shared =
|
||||
max_shared_mem_per_block / sizeof(ProjectiveType);
|
||||
|
||||
// Cap threads to respect shared memory limit
|
||||
uint32_t threads = std::min(requested_threads, max_threads_for_shared);
|
||||
threads = std::min(threads, static_cast<uint32_t>(KERNEL_THREADS_MAX));
|
||||
|
||||
@@ -84,15 +77,11 @@ template <typename ProjectiveType> struct Phase2KernelLaunchParams {
|
||||
while (pow2_threads < threads)
|
||||
pow2_threads *= 2;
|
||||
|
||||
// After rounding to power of 2, verify shared memory doesn't exceed device
|
||||
// limit
|
||||
if (safe_mul_sizeof<ProjectiveType>(static_cast<size_t>(pow2_threads)) >
|
||||
max_shared_mem_per_block) {
|
||||
pow2_threads /= 2;
|
||||
}
|
||||
adjusted_threads = pow2_threads;
|
||||
|
||||
// Calculate actual shared memory requirement
|
||||
shared_mem =
|
||||
safe_mul_sizeof<ProjectiveType>(static_cast<size_t>(adjusted_threads));
|
||||
}
|
||||
@@ -153,19 +142,96 @@ __device__ __forceinline__ uint32_t extract_window_bigint(
|
||||
window_size);
|
||||
}
|
||||
|
||||
// Kernel: Accumulate ALL windows in parallel using SORT-THEN-REDUCE
|
||||
// Grid: (num_windows * num_blocks_per_window) blocks
|
||||
// Each block processes points for ONE window
|
||||
// Uses counting sort by bucket, then parallel tree reduction per bucket
|
||||
// Uses mixed addition (affine + projective) to save 3 field muls per add
|
||||
// ============================================================================
|
||||
// Preprocessing kernel: scalar → signed-digit representation
|
||||
// ============================================================================
|
||||
//
|
||||
// Converts each scalar into balanced signed-digit form before the main MSM,
|
||||
// eliminating any need for a correction term.
|
||||
//
|
||||
// For each window w (LSB-first, w=0 = least significant), with carry from the
|
||||
// previous window:
|
||||
//
|
||||
// effective = raw_digit + carry
|
||||
// if effective > half: digit = effective - 2^c (negative), carry = 1
|
||||
// else: digit = effective (zero or positive), carry
|
||||
// = 0
|
||||
//
|
||||
// where half = 2^(c-1), c = window_size.
|
||||
//
|
||||
// Result: digit ∈ {-(half-1), …, half}, so |digit| ≤ half = bucket_count - 1.
|
||||
// Positive digit → add point P to bucket[digit].
|
||||
// Negative digit → add -P (Y-negated) to bucket[|digit|].
|
||||
// Zero digit → skip.
|
||||
//
|
||||
// Output layout: d_signed_digits[window_idx * n + point_idx], where window_idx
|
||||
// is in Horner (MSB-first) order so the main kernel indexes it directly.
|
||||
// This layout is column-major in window_idx: threads in a warp (consecutive
|
||||
// point_idx) access the same window row → coalesced reads in the main kernel.
|
||||
//
|
||||
// num_windows is set to (scalar_bits + window_size) / window_size so there is
|
||||
// always at least one partial or empty window at the top to absorb any carry
|
||||
// propagated out of the last full window.
|
||||
__global__ void kernel_preprocess_signed_digits(
|
||||
int8_t *__restrict__ d_signed_digits, // [num_windows * n], Horner-ordered
|
||||
const Scalar *__restrict__ d_scalars, uint32_t n, uint32_t num_windows,
|
||||
uint32_t window_size) {
|
||||
const uint32_t point_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (point_idx >= n)
|
||||
return;
|
||||
|
||||
const uint32_t half = 1u << (window_size - 1);
|
||||
const uint32_t full = 1u << window_size;
|
||||
|
||||
const Scalar &scalar = d_scalars[point_idx];
|
||||
uint32_t carry = 0;
|
||||
|
||||
// Iterate windows LSB-first so carry flows correctly from low to high.
|
||||
for (uint32_t w = 0; w < num_windows; w++) {
|
||||
uint32_t raw = extract_window_bigint(scalar, w, window_size);
|
||||
uint32_t effective = raw + carry;
|
||||
carry = 0;
|
||||
|
||||
int8_t sd;
|
||||
if (effective > half) {
|
||||
carry = 1;
|
||||
sd = -(int8_t)(full - effective); // negative balanced digit
|
||||
} else {
|
||||
sd = (int8_t)effective; // zero or positive digit
|
||||
}
|
||||
|
||||
// Remap LSB-first index w to MSB-first kernel window_idx:
|
||||
// window_idx = num_windows - 1 - w
|
||||
d_signed_digits[(num_windows - 1 - w) * (size_t)n + point_idx] = sd;
|
||||
}
|
||||
// carry == 0 guaranteed: the extra top window absorbs any final carry.
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 1: Accumulate all windows in parallel using sort-then-reduce
|
||||
// ============================================================================
|
||||
//
|
||||
// Grid: (num_windows * num_blocks_per_window) blocks.
|
||||
// Each block processes a slice of points for ONE window.
|
||||
//
|
||||
// The signed digit for each point has already been computed by
|
||||
// kernel_preprocess_signed_digits. Negative digits mean the point's Y was
|
||||
// pre-negated in the preprocessing step — here we just negate it inline before
|
||||
// scattering, which is cheaper than reading a separate flag array.
|
||||
//
|
||||
// Shared memory layout (no sorted_buckets — not needed without correction):
|
||||
// [bucket_counts (bc)] [bucket_offsets (bc)] [sorted_points (blockDim.x)]
|
||||
template <typename AffineType, typename ProjectiveType>
|
||||
__global__ void kernel_accumulate_all_windows(
|
||||
ProjectiveType *__restrict__ all_block_buckets, // [num_windows * num_blocks
|
||||
// * bucket_count]
|
||||
const AffineType *__restrict__ points, const Scalar *__restrict__ scalars,
|
||||
const AffineType *__restrict__ points,
|
||||
const int8_t *__restrict__ d_signed_digits, // [num_windows * n]
|
||||
uint32_t num_points, uint32_t num_windows, uint32_t num_blocks_per_window,
|
||||
uint32_t window_size, uint32_t bucket_count) {
|
||||
uint32_t bucket_count) {
|
||||
using ProjectivePoint = Projective<ProjectiveType>;
|
||||
using XYZZType = typename XYZZFor<ProjectiveType>::Type;
|
||||
using XYZZPoint = XYZZ<XYZZType>;
|
||||
|
||||
const uint32_t window_idx = blockIdx.x / num_blocks_per_window;
|
||||
const uint32_t block_within_window = blockIdx.x % num_blocks_per_window;
|
||||
@@ -173,115 +239,89 @@ __global__ void kernel_accumulate_all_windows(
|
||||
if (window_idx >= num_windows)
|
||||
return;
|
||||
|
||||
// Output offset for this block's buckets
|
||||
uint32_t bucket_offset =
|
||||
(window_idx * num_blocks_per_window + block_within_window) * bucket_count;
|
||||
ProjectiveType *my_buckets = all_block_buckets + bucket_offset;
|
||||
|
||||
// Shared memory layout (register-based optimization):
|
||||
// - bucket_counts: [bucket_count] for counting sort
|
||||
// - bucket_offsets: [bucket_count] for prefix sums
|
||||
// - sorted_points: [blockDim.x] for sorted points (AFFINE - smaller!)
|
||||
// - sorted_buckets: [blockDim.x] for sorted bucket indices
|
||||
// NOTE: shared_buckets removed - using register-based accumulation instead
|
||||
// Shared memory: [bucket_counts][bucket_offsets][sorted_points]
|
||||
extern __shared__ char shared_mem[];
|
||||
auto *bucket_counts_arr = reinterpret_cast<uint32_t *>(shared_mem);
|
||||
auto *bucket_offsets = bucket_counts_arr + bucket_count;
|
||||
// Store affine points instead of projective - saves shared memory
|
||||
auto *bucket_offsets_arr = bucket_counts_arr + bucket_count;
|
||||
auto *sorted_points =
|
||||
reinterpret_cast<AffineType *>(bucket_offsets + bucket_count);
|
||||
auto *sorted_buckets =
|
||||
reinterpret_cast<uint32_t *>(sorted_points + blockDim.x);
|
||||
reinterpret_cast<AffineType *>(bucket_offsets_arr + bucket_count);
|
||||
|
||||
// Initialize bucket counts
|
||||
if (threadIdx.x < bucket_count) {
|
||||
bucket_counts_arr[threadIdx.x] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Each thread loads its affine point and computes bucket index
|
||||
// No conversion to projective here - we keep points affine
|
||||
uint32_t point_idx = threadIdx.x + block_within_window * blockDim.x;
|
||||
// Each thread reads its signed digit and loads its affine point.
|
||||
// Negative digit: negate Y and use |digit| as bucket index.
|
||||
// Zero digit: skip (my_bucket = 0).
|
||||
const uint32_t point_idx = threadIdx.x + block_within_window * blockDim.x;
|
||||
AffineType my_point;
|
||||
uint32_t my_bucket = 0;
|
||||
bool valid = point_idx < num_points;
|
||||
const bool valid = point_idx < num_points;
|
||||
|
||||
if (valid) {
|
||||
uint32_t scalar_window = num_windows - 1 - window_idx;
|
||||
my_bucket =
|
||||
extract_window_bigint(scalars[point_idx], scalar_window, window_size);
|
||||
my_point = points[point_idx]; // Keep as affine!
|
||||
const int8_t sd =
|
||||
d_signed_digits[window_idx * (size_t)num_points + point_idx];
|
||||
my_point = points[point_idx];
|
||||
|
||||
if (sd < 0) {
|
||||
my_point.y = -my_point.y; // negate Y for negative digit
|
||||
my_bucket = (uint32_t)(-sd);
|
||||
} else {
|
||||
my_bucket = (uint32_t)sd; // 0 means skip
|
||||
}
|
||||
}
|
||||
|
||||
// Count points per bucket (atomic within block)
|
||||
if (valid && my_bucket > 0) {
|
||||
atomicAdd(&bucket_counts_arr[my_bucket], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Compute prefix sums for bucket offsets
|
||||
// Thread 0 computes prefix sums (bucket start offsets).
|
||||
if (threadIdx.x == 0) {
|
||||
uint32_t offset = 0;
|
||||
for (uint32_t b = 0; b < bucket_count; b++) {
|
||||
bucket_offsets[b] = offset;
|
||||
bucket_offsets_arr[b] = offset;
|
||||
offset += bucket_counts_arr[b];
|
||||
bucket_counts_arr[b] = 0; // Reset for scatter phase
|
||||
bucket_counts_arr[b] = 0; // reset to zero for use as a scatter counter
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Scatter affine points to sorted positions
|
||||
// Scatter: each thread writes its (possibly Y-negated) point into the sorted
|
||||
// position for its bucket.
|
||||
if (valid && my_bucket > 0) {
|
||||
uint32_t pos =
|
||||
bucket_offsets[my_bucket] + atomicAdd(&bucket_counts_arr[my_bucket], 1);
|
||||
sorted_points[pos] = my_point; // Store affine point directly
|
||||
sorted_buckets[pos] = my_bucket;
|
||||
uint32_t pos = bucket_offsets_arr[my_bucket] +
|
||||
atomicAdd(&bucket_counts_arr[my_bucket], 1);
|
||||
sorted_points[pos] = my_point;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Parallel tree reduction within each bucket using MIXED ADDITION
|
||||
// Each thread is assigned to reduce points in one bucket
|
||||
// REGISTER-BASED: Accumulate in registers, write directly to global memory
|
||||
// Bucket reduction: each thread owns one or more buckets (stride by
|
||||
// blockDim.x). Points for bucket b occupy sorted_points[start..start+count].
|
||||
for (uint32_t bucket = threadIdx.x + 1; bucket < bucket_count;
|
||||
bucket += blockDim.x) {
|
||||
uint32_t start = bucket_offsets[bucket];
|
||||
uint32_t count = bucket_counts_arr[bucket];
|
||||
const uint32_t start = bucket_offsets_arr[bucket];
|
||||
const uint32_t count = bucket_counts_arr[bucket];
|
||||
|
||||
if (count == 0) {
|
||||
// Empty bucket - write infinity point
|
||||
ProjectivePoint::point_at_infinity(my_buckets[bucket]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Tree reduction for this bucket using mixed addition
|
||||
// Accumulate in registers (compiler will optimize this)
|
||||
ProjectiveType sum;
|
||||
// Initialize sum from first affine point
|
||||
const AffineType &first_point = sorted_points[start];
|
||||
if (first_point.infinity) {
|
||||
ProjectivePoint::point_at_infinity(sum);
|
||||
} else {
|
||||
ProjectivePoint::affine_to_projective(sum, first_point);
|
||||
XYZZType sum;
|
||||
XYZZPoint::point_at_infinity(sum);
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
XYZZPoint::mixed_add(sum, sorted_points[start + i]);
|
||||
}
|
||||
|
||||
// Use mixed addition for remaining points (saves 3 muls per add!)
|
||||
for (uint32_t i = 1; i < count; i++) {
|
||||
const AffineType &pt = sorted_points[start + i];
|
||||
if (!pt.infinity) {
|
||||
if (ProjectivePoint::is_infinity(sum)) {
|
||||
ProjectivePoint::affine_to_projective(sum, pt);
|
||||
} else {
|
||||
ProjectiveType temp;
|
||||
// MIXED ADDITION: projective + affine (saves 3 field muls)
|
||||
ProjectivePoint::mixed_add(temp, sum, pt);
|
||||
ProjectivePoint::point_copy(sum, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write directly from registers to global memory (no shared memory
|
||||
// intermediate)
|
||||
ProjectivePoint::point_copy(my_buckets[bucket], sum);
|
||||
ProjectiveType proj;
|
||||
XYZZPoint::to_projective(proj, sum);
|
||||
ProjectivePoint::point_copy(my_buckets[bucket], proj);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,7 +389,6 @@ __global__ void kernel_reduce_all_windows(
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// Thread 0 writes final bucket value
|
||||
if (threadIdx.x == 0) {
|
||||
uint32_t out_idx = window_idx * num_buckets + bucket_idx;
|
||||
ProjectivePoint::point_copy(all_final_buckets[out_idx], shared_sums[0]);
|
||||
@@ -358,7 +397,7 @@ __global__ void kernel_reduce_all_windows(
|
||||
|
||||
// Kernel: Compute window sums for ALL windows in parallel
|
||||
// Grid: num_windows blocks
|
||||
// Each block computes the window sum: sum(i * bucket[i]) for i=1..15
|
||||
// Each block computes the window sum: sum(i * bucket[i]) for i=1..n
|
||||
template <typename ProjectiveType>
|
||||
__global__ void kernel_compute_window_sums(
|
||||
ProjectiveType *__restrict__ window_sums, // [num_windows]
|
||||
@@ -427,7 +466,6 @@ __global__ void kernel_compute_window_sums(
|
||||
break;
|
||||
}
|
||||
|
||||
// Thread 0 writes window sum
|
||||
if (tid == 0) {
|
||||
ProjectivePoint::point_copy(window_sums[window_idx], work[0]);
|
||||
}
|
||||
@@ -454,7 +492,6 @@ void horner_combine_cpu(ProjectiveType &result,
|
||||
ProjectiveType acc;
|
||||
ProjectivePoint::point_at_infinity(acc);
|
||||
|
||||
// Process from MSB (window 0) to LSB (window num_windows-1)
|
||||
for (uint32_t w = 0; w < num_windows; w++) {
|
||||
const ProjectiveType &ws = window_sums[w];
|
||||
ProjectiveType temp;
|
||||
@@ -463,7 +500,6 @@ void horner_combine_cpu(ProjectiveType &result,
|
||||
if (ProjectivePoint::is_infinity(acc)) {
|
||||
ProjectivePoint::point_copy(acc, ws);
|
||||
} else {
|
||||
// acc = acc * 2^window_size + ws
|
||||
for (uint32_t i = 0; i < window_size; i++) {
|
||||
ProjectivePoint::projective_double(temp, acc);
|
||||
ProjectivePoint::point_copy(acc, temp);
|
||||
@@ -472,7 +508,6 @@ void horner_combine_cpu(ProjectiveType &result,
|
||||
ProjectivePoint::point_copy(acc, temp);
|
||||
}
|
||||
} else if (!ProjectivePoint::is_infinity(acc)) {
|
||||
// Window sum is infinity but accumulator is not -- still shift left
|
||||
for (uint32_t i = 0; i < window_size; i++) {
|
||||
ProjectivePoint::projective_double(temp, acc);
|
||||
ProjectivePoint::point_copy(acc, temp);
|
||||
@@ -492,6 +527,14 @@ void horner_combine_cpu(ProjectiveType &result,
|
||||
// d_scratch: caller-provided device buffer for intermediate bucket arrays and
|
||||
// window sums. The caller is responsible for allocating and freeing this
|
||||
// buffer.
|
||||
//
|
||||
// Scratch layout (all ProjectiveType elements):
|
||||
// d_all_block_buckets [num_windows * num_blocks * bucket_count]
|
||||
// d_all_final_buckets [num_windows * bucket_count]
|
||||
// d_window_sums [num_windows]
|
||||
//
|
||||
// d_signed_digits is allocated internally (stream-ordered) and freed before
|
||||
// the host memcpy, so it does not appear in the caller's scratch buffer.
|
||||
template <typename AffineType, typename ProjectiveType>
|
||||
void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
ProjectiveType *h_result,
|
||||
@@ -513,35 +556,29 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
// Calculate number of windows based on scalar bit width
|
||||
// Compute number of windows. We use (scalar_bits + window_size) / window_size
|
||||
// instead of the usual ceil formula so that there is always at least one
|
||||
// partial window at the top. This guarantees the preprocessing kernel's carry
|
||||
// propagation never overflows the digit array, regardless of window size.
|
||||
const uint32_t total_bits = Scalar::NUM_BITS;
|
||||
const uint32_t num_windows = CEIL_DIV(total_bits, window_size);
|
||||
const uint32_t num_windows = (total_bits + window_size) / window_size;
|
||||
|
||||
// Calculate kernel launch parameters respecting shared memory limits
|
||||
Phase1KernelLaunchParams<AffineType> launch_params(n, threads_per_block,
|
||||
bucket_count, gpu_index);
|
||||
|
||||
// Scratch space layout for ALL-WINDOWS-PARALLEL:
|
||||
// - all_block_buckets: [num_windows * num_blocks * bucket_count]
|
||||
// - all_final_buckets: [num_windows * bucket_count]
|
||||
// - window_sums: [num_windows]
|
||||
// Compute element counts in size_t (64-bit) so that intermediate products
|
||||
// of uint32_t inputs don't silently wrap at 2^32 before reaching the
|
||||
// explicit overflow check below (which multiplies by sizeof(ProjectiveType))
|
||||
const size_t all_block_buckets_size = static_cast<size_t>(num_windows) *
|
||||
launch_params.num_blocks_per_window *
|
||||
bucket_count;
|
||||
// Scratch layout
|
||||
const size_t num_blocks = launch_params.num_blocks_per_window;
|
||||
const size_t all_block_buckets_size =
|
||||
static_cast<size_t>(num_windows) * num_blocks * bucket_count;
|
||||
const size_t all_final_buckets_size =
|
||||
static_cast<size_t>(num_windows) * bucket_count;
|
||||
const size_t total_scratch =
|
||||
all_block_buckets_size + all_final_buckets_size + num_windows;
|
||||
|
||||
// Partition the caller-provided scratch buffer into sub-regions
|
||||
ProjectiveType *d_all_block_buckets = d_scratch;
|
||||
ProjectiveType *d_all_final_buckets = d_scratch + all_block_buckets_size;
|
||||
ProjectiveType *d_window_sums = d_all_final_buckets + all_final_buckets_size;
|
||||
|
||||
// Clear all scratch space
|
||||
const uint32_t clear_blocks = CEIL_DIV(total_scratch, KERNEL_THREADS_MAX);
|
||||
PANIC_IF_FALSE(clear_blocks * KERNEL_THREADS_MAX >= total_scratch,
|
||||
"kernel_clear_buckets: insufficient threads (%zu) to clear "
|
||||
@@ -553,11 +590,27 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
total_scratch);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 1: Accumulate ALL windows in parallel (SINGLE kernel launch!)
|
||||
// Preprocessing: convert scalars to signed-digit form.
|
||||
// Allocated stream-ordered; freed before the CPU phase once Phase 1 is done.
|
||||
int8_t *d_signed_digits = nullptr;
|
||||
const size_t signed_digits_bytes =
|
||||
static_cast<size_t>(num_windows) * n * sizeof(int8_t);
|
||||
cudaMallocAsync(&d_signed_digits, signed_digits_bytes, stream);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
constexpr uint32_t preprocess_threads = 128;
|
||||
const uint32_t preprocess_blocks = CEIL_DIV(n, preprocess_threads);
|
||||
kernel_preprocess_signed_digits<<<preprocess_blocks, preprocess_threads, 0,
|
||||
stream>>>(d_signed_digits, d_scalars, n,
|
||||
num_windows, window_size);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 1: Accumulate ALL windows in parallel.
|
||||
const uint32_t total_accum_blocks =
|
||||
num_windows * launch_params.num_blocks_per_window;
|
||||
PANIC_IF_FALSE(
|
||||
total_accum_blocks * bucket_count <= all_block_buckets_size,
|
||||
static_cast<size_t>(total_accum_blocks) * bucket_count <=
|
||||
all_block_buckets_size,
|
||||
"kernel_accumulate_all_windows: max write index (%zu) exceeds buffer "
|
||||
"(%zu)",
|
||||
static_cast<size_t>(total_accum_blocks) * bucket_count,
|
||||
@@ -565,11 +618,15 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
kernel_accumulate_all_windows<AffineType, ProjectiveType>
|
||||
<<<total_accum_blocks, launch_params.adjusted_threads_per_block,
|
||||
launch_params.accum_shared_mem, stream>>>(
|
||||
d_all_block_buckets, d_points, d_scalars, n, num_windows,
|
||||
launch_params.num_blocks_per_window, window_size, bucket_count);
|
||||
d_all_block_buckets, d_points, d_signed_digits, n, num_windows,
|
||||
launch_params.num_blocks_per_window, bucket_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 2: Reduce ALL windows' buckets in parallel (SINGLE kernel launch!)
|
||||
// d_signed_digits is no longer needed after Phase 1.
|
||||
cudaFreeAsync(d_signed_digits, stream);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 2: Reduce ALL windows' buckets in parallel.
|
||||
const uint32_t total_reduce_blocks = num_windows * bucket_count;
|
||||
Phase2KernelLaunchParams<ProjectiveType> reduce_params(
|
||||
launch_params.num_blocks_per_window, gpu_index);
|
||||
@@ -584,9 +641,7 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
launch_params.num_blocks_per_window, bucket_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 3: Compute window sums in parallel (SINGLE kernel launch!)
|
||||
// Round up to next multiple of 32 (warp size) for efficient scheduling.
|
||||
// The kernel already has `if (tid < n)` bounds checks for the excess threads.
|
||||
// Phase 3: Compute window sums in parallel.
|
||||
const uint32_t combine_threads = ((bucket_count - 1) + 31) & ~31u;
|
||||
const size_t combine_shared_mem =
|
||||
safe_mul_sizeof<ProjectiveType>(static_cast<size_t>(combine_threads));
|
||||
@@ -600,11 +655,7 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
d_window_sums, d_all_final_buckets, num_windows, bucket_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// Phase 4: CPU Horner combine, result written directly to host pointer
|
||||
//
|
||||
// The Horner loop is inherently sequential. A single CPU core is much faster
|
||||
// than a single GPU thread for this workload, so we run Horner on the CPU
|
||||
// and write the result directly to the caller's host pointer.
|
||||
// Phase 4: CPU Horner combine, result written directly to host pointer.
|
||||
std::vector<ProjectiveType> h_window_sums(num_windows);
|
||||
cuda_memcpy_async_to_cpu(
|
||||
h_window_sums.data(), d_window_sums,
|
||||
@@ -619,32 +670,48 @@ void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
// Dynamic Window Size Selection
|
||||
// ============================================================================
|
||||
|
||||
// Select optimal window size for G1 MSM based on input count
|
||||
// Trade-off: larger windows = fewer Horner doublings but more bucket work
|
||||
// Optimal window size grows with log(n) approximately
|
||||
// Select optimal window size for G1 MSM based on input count.
|
||||
//
|
||||
// Signed-digit preprocessing keeps the same window size as unsigned Pippenger
|
||||
// but halves the bucket count: bucket_count = 2^(c-1) + 1 instead of 2^c.
|
||||
// Fewer buckets speed up Phase 2 (cross-block reduce) and Phase 3 (window sum
|
||||
// suffix-scan), with no correction term overhead.
|
||||
//
|
||||
// n = bucket_count - 1 must be a power of 2 for kernel_compute_window_sums:
|
||||
// c=4 → half=8, bc=9, n=8=2^3 ✓
|
||||
// c=5 → half=16, bc=17, n=16=2^4 ✓
|
||||
// c=6 → half=32, bc=33, n=32=2^5 ✓
|
||||
inline void get_g1_window_params(uint32_t n, uint32_t &window_size,
|
||||
uint32_t &bucket_count) {
|
||||
// Signed-digit: same c as original unsigned Pippenger, but bucket_count
|
||||
// = 2^(c-1)+1 instead of 2^c. This halves Phase 2/3 work at no extra cost.
|
||||
if (n <= MSM_G1_SMALL_THRESHOLD) {
|
||||
window_size = 4;
|
||||
bucket_count = (1u << 4); // 2^window_size
|
||||
bucket_count = (1u << 3) + 1; // 9 = 2^3+1
|
||||
} else if (n <= MSM_G1_MEDIUM_THRESHOLD) {
|
||||
window_size = 5;
|
||||
bucket_count = (1u << 5);
|
||||
bucket_count = (1u << 4) + 1; // 17 = 2^4+1
|
||||
} else {
|
||||
window_size = 6;
|
||||
bucket_count = (1u << 6);
|
||||
bucket_count = (1u << 5) + 1; // 33 = 2^5+1
|
||||
}
|
||||
}
|
||||
|
||||
// Select optimal window size for G2 MSM based on input count
|
||||
// G2 has 2x more expensive field ops, but empirical testing shows
|
||||
// that the 5-bit fixed window size is optimal - larger windows cause
|
||||
// too much bucket overhead that exceeds the Horner doubling savings
|
||||
// Select optimal window size for G2 MSM.
|
||||
//
|
||||
// G2 Phase 1 is memory-bandwidth bound: only 1 block fits per SM (shared mem
|
||||
// limit). With c=5, bc=17 only 16 threads are active in the bucket reduce
|
||||
// (half-warp), causing regression vs the original bc=32 (31 active).
|
||||
//
|
||||
// c=6, bc=33 gives exactly 32 active threads (full first warp, same as c=6
|
||||
// G1), keeps Phase 1 cost identical to the original bc=32, and reduces windows
|
||||
// from 64 → 54 (15.6% fewer). 54×6=324 > 320 so the last window is partial
|
||||
// and carry never overflows — no extra window needed.
|
||||
inline void get_g2_window_params(uint32_t n, uint32_t &window_size,
|
||||
uint32_t &bucket_count) {
|
||||
(void)n; // Fixed window size works best for G2
|
||||
window_size = MSM_G2_WINDOW_SIZE; // 5-bit windows
|
||||
bucket_count = MSM_G2_BUCKET_COUNT; // 32 buckets
|
||||
(void)n;
|
||||
window_size = 6;
|
||||
bucket_count = (1u << 5) + 1; // 33 = 2^5+1
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
@@ -653,19 +720,18 @@ inline void get_g2_window_params(uint32_t n, uint32_t &window_size,
|
||||
// Computes the exact scratch buffer size (in bytes) needed by
|
||||
// point_msm_pippenger_impl_async for a given input count n. The formula must
|
||||
// stay in sync with the scratch partitioning inside that function:
|
||||
// all_block_buckets: num_windows * num_blocks_per_window * bucket_count
|
||||
// all_final_buckets: num_windows * bucket_count
|
||||
// window_sums: num_windows
|
||||
// Factoring this into a helper avoids duplicating the formula in every caller
|
||||
// and prevents the buffer-underallocation bug that occurs when callers use
|
||||
// ad-hoc estimates.
|
||||
// all_block_buckets: num_windows * num_blocks * bucket_count
|
||||
// all_final_buckets: num_windows * bucket_count
|
||||
// window_sums: num_windows
|
||||
//
|
||||
// d_signed_digits is allocated internally (stream-ordered) and is NOT included
|
||||
// here; callers only need to provide the ProjectiveType scratch buffer.
|
||||
template <typename AffineType, typename ProjectiveType>
|
||||
size_t pippenger_scratch_size(uint32_t n, uint32_t gpu_index) {
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
uint32_t window_size, bucket_count;
|
||||
// Use the same window parameter selection as the MSM entry points
|
||||
if constexpr (std::is_same_v<AffineType, G1Affine>) {
|
||||
get_g1_window_params(n, window_size, bucket_count);
|
||||
} else {
|
||||
@@ -673,16 +739,15 @@ size_t pippenger_scratch_size(uint32_t n, uint32_t gpu_index) {
|
||||
}
|
||||
|
||||
const uint32_t threads_per_block = msm_threads_per_block<AffineType>(n);
|
||||
const uint32_t num_windows = CEIL_DIV(Scalar::NUM_BITS, window_size);
|
||||
const uint32_t num_windows = (Scalar::NUM_BITS + window_size) / window_size;
|
||||
|
||||
// Phase1KernelLaunchParams computes the adjusted threads per block
|
||||
// respecting shared memory limits, which determines num_blocks_per_window
|
||||
Phase1KernelLaunchParams<AffineType> launch_params(n, threads_per_block,
|
||||
bucket_count, gpu_index);
|
||||
|
||||
const size_t all_block_buckets_elems = static_cast<size_t>(num_windows) *
|
||||
launch_params.num_blocks_per_window *
|
||||
bucket_count;
|
||||
const size_t num_blocks =
|
||||
static_cast<size_t>(launch_params.num_blocks_per_window);
|
||||
const size_t all_block_buckets_elems =
|
||||
static_cast<size_t>(num_windows) * num_blocks * bucket_count;
|
||||
const size_t all_final_buckets_elems =
|
||||
static_cast<size_t>(num_windows) * bucket_count;
|
||||
const size_t total_elems =
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// For CUDA device code, we use __constant__ memory
|
||||
// Constants are hardcoded at compile time (like sppark) to avoid
|
||||
// cudaMemcpyToSymbol
|
||||
// Note: DEVICE_MODULUS is in normal form (not Montgomery)
|
||||
__constant__ const Fp DEVICE_MODULUS = {BLS12_446_MODULUS_LIMBS};
|
||||
|
||||
@@ -104,7 +102,7 @@ __host__ __device__ ComparisonType fp_cmp(const Fp &a, const Fp &b) {
|
||||
|
||||
__host__ __device__ bool fp_is_zero(const Fp &a) {
|
||||
// By doing this way we avoid branching
|
||||
uint64_t acc = 0;
|
||||
UNSIGNED_LIMB acc = 0;
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
acc |= a.limb[i];
|
||||
}
|
||||
@@ -114,8 +112,8 @@ __host__ __device__ bool fp_is_zero(const Fp &a) {
|
||||
__host__ __device__ bool fp_is_one(const Fp &a) {
|
||||
if (a.limb[0] != 1)
|
||||
return false;
|
||||
// By doing this way we avoid branching
|
||||
uint64_t acc = 0;
|
||||
// All higher limbs must be zero.
|
||||
UNSIGNED_LIMB acc = 0;
|
||||
for (int i = 1; i < FP_LIMBS; i++) {
|
||||
acc |= a.limb[i];
|
||||
}
|
||||
@@ -207,6 +205,40 @@ __host__ __device__ UNSIGNED_LIMB fp_add_raw(Fp &c, const Fp &a, const Fp &b) {
|
||||
"l"(b.limb[1]), "l"(b.limb[2]), "l"(b.limb[3]), "l"(b.limb[4]),
|
||||
"l"(b.limb[5]), "l"(b.limb[6]));
|
||||
return carry_out;
|
||||
#elif defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 32
|
||||
// 32-bit PTX carry chain: add.cc.u32 sets the hardware carry flag,
|
||||
// addc.cc.u32 propagates it. Eliminates software carry-detect comparisons
|
||||
// across all 14 limbs.
|
||||
// Operand map: %0..%13 = c[0..13], %14 = carry_out,
|
||||
// %15..%28 = a[0..13], %29..%42 = b[0..13].
|
||||
uint32_t carry_out;
|
||||
asm("add.cc.u32 %0, %15, %29;\n\t" // c[0] = a[0] + b[0], set CF
|
||||
"addc.cc.u32 %1, %16, %30;\n\t" // c[1] = a[1] + b[1] + CF
|
||||
"addc.cc.u32 %2, %17, %31;\n\t" // c[2] = a[2] + b[2] + CF
|
||||
"addc.cc.u32 %3, %18, %32;\n\t" // c[3] = a[3] + b[3] + CF
|
||||
"addc.cc.u32 %4, %19, %33;\n\t" // c[4] = a[4] + b[4] + CF
|
||||
"addc.cc.u32 %5, %20, %34;\n\t" // c[5] = a[5] + b[5] + CF
|
||||
"addc.cc.u32 %6, %21, %35;\n\t" // c[6] = a[6] + b[6] + CF
|
||||
"addc.cc.u32 %7, %22, %36;\n\t" // c[7] = a[7] + b[7] + CF
|
||||
"addc.cc.u32 %8, %23, %37;\n\t" // c[8] = a[8] + b[8] + CF
|
||||
"addc.cc.u32 %9, %24, %38;\n\t" // c[9] = a[9] + b[9] + CF
|
||||
"addc.cc.u32 %10, %25, %39;\n\t" // c[10] = a[10] + b[10] + CF
|
||||
"addc.cc.u32 %11, %26, %40;\n\t" // c[11] = a[11] + b[11] + CF
|
||||
"addc.cc.u32 %12, %27, %41;\n\t" // c[12] = a[12] + b[12] + CF
|
||||
"addc.cc.u32 %13, %28, %42;\n\t" // c[13] = a[13] + b[13] + CF
|
||||
"addc.u32 %14, 0, 0;\n\t" // carry_out = 0 + 0 + CF (0 or 1)
|
||||
: "=r"(c.limb[0]), "=r"(c.limb[1]), "=r"(c.limb[2]), "=r"(c.limb[3]),
|
||||
"=r"(c.limb[4]), "=r"(c.limb[5]), "=r"(c.limb[6]), "=r"(c.limb[7]),
|
||||
"=r"(c.limb[8]), "=r"(c.limb[9]), "=r"(c.limb[10]), "=r"(c.limb[11]),
|
||||
"=r"(c.limb[12]), "=r"(c.limb[13]), "=r"(carry_out)
|
||||
: "r"(a.limb[0]), "r"(a.limb[1]), "r"(a.limb[2]), "r"(a.limb[3]),
|
||||
"r"(a.limb[4]), "r"(a.limb[5]), "r"(a.limb[6]), "r"(a.limb[7]),
|
||||
"r"(a.limb[8]), "r"(a.limb[9]), "r"(a.limb[10]), "r"(a.limb[11]),
|
||||
"r"(a.limb[12]), "r"(a.limb[13]), "r"(b.limb[0]), "r"(b.limb[1]),
|
||||
"r"(b.limb[2]), "r"(b.limb[3]), "r"(b.limb[4]), "r"(b.limb[5]),
|
||||
"r"(b.limb[6]), "r"(b.limb[7]), "r"(b.limb[8]), "r"(b.limb[9]),
|
||||
"r"(b.limb[10]), "r"(b.limb[11]), "r"(b.limb[12]), "r"(b.limb[13]));
|
||||
return static_cast<UNSIGNED_LIMB>(carry_out);
|
||||
#else
|
||||
// Host path: portable software carry detection
|
||||
UNSIGNED_LIMB carry = 0;
|
||||
@@ -248,6 +280,41 @@ __host__ __device__ UNSIGNED_LIMB fp_sub_raw(Fp &c, const Fp &a, const Fp &b) {
|
||||
// subc.u64 with 0-0-CF produces 0 if no borrow, or 0xFFFFFFFFFFFFFFFF if
|
||||
// borrow. Normalize to 0/1 for callers that check (borrow != 0) or add it.
|
||||
return borrow_out & 1;
|
||||
#elif defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 32
|
||||
// 32-bit PTX borrow chain: sub.cc.u32 sets the hardware borrow flag,
|
||||
// subc.cc.u32 propagates it across all 14 limbs.
|
||||
// subc.u32 with 0-0-BF gives 0xFFFFFFFF on borrow; normalize to 0/1.
|
||||
// Operand map: %0..%13 = c[0..13], %14 = borrow_out,
|
||||
// %15..%28 = a[0..13], %29..%42 = b[0..13].
|
||||
uint32_t borrow_out;
|
||||
asm("sub.cc.u32 %0, %15, %29;\n\t" // c[0] = a[0] - b[0], set BF
|
||||
"subc.cc.u32 %1, %16, %30;\n\t" // c[1] = a[1] - b[1] - BF
|
||||
"subc.cc.u32 %2, %17, %31;\n\t" // c[2] = a[2] - b[2] - BF
|
||||
"subc.cc.u32 %3, %18, %32;\n\t" // c[3] = a[3] - b[3] - BF
|
||||
"subc.cc.u32 %4, %19, %33;\n\t" // c[4] = a[4] - b[4] - BF
|
||||
"subc.cc.u32 %5, %20, %34;\n\t" // c[5] = a[5] - b[5] - BF
|
||||
"subc.cc.u32 %6, %21, %35;\n\t" // c[6] = a[6] - b[6] - BF
|
||||
"subc.cc.u32 %7, %22, %36;\n\t" // c[7] = a[7] - b[7] - BF
|
||||
"subc.cc.u32 %8, %23, %37;\n\t" // c[8] = a[8] - b[8] - BF
|
||||
"subc.cc.u32 %9, %24, %38;\n\t" // c[9] = a[9] - b[9] - BF
|
||||
"subc.cc.u32 %10, %25, %39;\n\t" // c[10] = a[10] - b[10] - BF
|
||||
"subc.cc.u32 %11, %26, %40;\n\t" // c[11] = a[11] - b[11] - BF
|
||||
"subc.cc.u32 %12, %27, %41;\n\t" // c[12] = a[12] - b[12] - BF
|
||||
"subc.cc.u32 %13, %28, %42;\n\t" // c[13] = a[13] - b[13] - BF
|
||||
"subc.u32 %14, 0, 0;\n\t" // borrow_out = 0 - 0 - BF (0 or
|
||||
// 0xFFFFFFFF)
|
||||
: "=r"(c.limb[0]), "=r"(c.limb[1]), "=r"(c.limb[2]), "=r"(c.limb[3]),
|
||||
"=r"(c.limb[4]), "=r"(c.limb[5]), "=r"(c.limb[6]), "=r"(c.limb[7]),
|
||||
"=r"(c.limb[8]), "=r"(c.limb[9]), "=r"(c.limb[10]), "=r"(c.limb[11]),
|
||||
"=r"(c.limb[12]), "=r"(c.limb[13]), "=r"(borrow_out)
|
||||
: "r"(a.limb[0]), "r"(a.limb[1]), "r"(a.limb[2]), "r"(a.limb[3]),
|
||||
"r"(a.limb[4]), "r"(a.limb[5]), "r"(a.limb[6]), "r"(a.limb[7]),
|
||||
"r"(a.limb[8]), "r"(a.limb[9]), "r"(a.limb[10]), "r"(a.limb[11]),
|
||||
"r"(a.limb[12]), "r"(a.limb[13]), "r"(b.limb[0]), "r"(b.limb[1]),
|
||||
"r"(b.limb[2]), "r"(b.limb[3]), "r"(b.limb[4]), "r"(b.limb[5]),
|
||||
"r"(b.limb[6]), "r"(b.limb[7]), "r"(b.limb[8]), "r"(b.limb[9]),
|
||||
"r"(b.limb[10]), "r"(b.limb[11]), "r"(b.limb[12]), "r"(b.limb[13]));
|
||||
return static_cast<UNSIGNED_LIMB>(borrow_out & 1u);
|
||||
#else
|
||||
// Host path: portable software borrow detection
|
||||
UNSIGNED_LIMB borrow = 0;
|
||||
@@ -287,6 +354,17 @@ __host__ __device__ void fp_add(Fp &c, const Fp &a, const Fp &b) {
|
||||
UNSIGNED_LIMB mask =
|
||||
-use_original; // all-ones if keep sum, all-zeros if keep reduced
|
||||
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
c.limb[i] = (sum.limb[i] & mask) | (reduced.limb[i] & ~mask);
|
||||
}
|
||||
#elif defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 32
|
||||
// Same branchless logic as the 64-bit path; mask arithmetic is identical
|
||||
// since UNSIGNED_LIMB is uint32_t: -1u == 0xFFFFFFFF (all-ones).
|
||||
Fp reduced;
|
||||
UNSIGNED_LIMB borrow = fp_sub_raw(reduced, sum, fp_modulus());
|
||||
UNSIGNED_LIMB use_original = ((carry ^ 1u) & borrow);
|
||||
UNSIGNED_LIMB mask = -use_original;
|
||||
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
c.limb[i] = (sum.limb[i] & mask) | (reduced.limb[i] & ~mask);
|
||||
}
|
||||
@@ -319,6 +397,15 @@ __host__ __device__ void fp_sub(Fp &c, const Fp &a, const Fp &b) {
|
||||
UNSIGNED_LIMB mask =
|
||||
-borrow; // all-ones if borrow (use corrected), all-zeros if not
|
||||
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
c.limb[i] = (corrected.limb[i] & mask) | (diff.limb[i] & ~mask);
|
||||
}
|
||||
#elif defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 32
|
||||
// Same branchless logic as the 64-bit path; -1u == 0xFFFFFFFF for uint32_t.
|
||||
Fp corrected;
|
||||
fp_add_raw(corrected, diff, fp_modulus());
|
||||
UNSIGNED_LIMB mask = -borrow;
|
||||
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
c.limb[i] = (corrected.limb[i] & mask) | (diff.limb[i] & ~mask);
|
||||
}
|
||||
@@ -333,6 +420,26 @@ __host__ __device__ void fp_sub(Fp &c, const Fp &a, const Fp &b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Lazy addition: c = a + b, result in [0, 2p) for inputs in [0, p).
|
||||
// Skips the conditional subtraction of fp_add; valid as input to fp_mont_mul
|
||||
// since CIOS accepts operands in [0, 2p).
|
||||
__host__ __device__ void fp_add_lazy(Fp &c, const Fp &a, const Fp &b) {
|
||||
fp_add_raw(c, a, b);
|
||||
}
|
||||
|
||||
// Lazy subtraction: c ≡ a - b (mod p), result in [0, 2p) for inputs in [0, p).
|
||||
// Adds p unconditionally (no borrow-select), saving one conditional branch.
|
||||
// Valid as input to fp_mont_mul; must NOT be used where [0, p) is
|
||||
// required (e.g. final results, inputs to fp_sub/fp_neg).
|
||||
__host__ __device__ void fp_sub_lazy(Fp &c, const Fp &a, const Fp &b) {
|
||||
Fp diff;
|
||||
fp_sub_raw(diff, a, b); // a - b, borrow absorbed into bit pattern
|
||||
fp_add_raw(c, diff, fp_modulus()); // always add p; carry discarded
|
||||
// For a >= b (no borrow): diff = a-b ∈ [0,p), result = a-b+p ∈ [p,2p) ✓
|
||||
// For a < b (borrow=1): diff wraps, result = a-b+2^N+p mod 2^N = a-b+p ∈
|
||||
// [0,p) ✓
|
||||
}
|
||||
|
||||
// Small-constant multiplication via addition chains.
|
||||
// These replace full Montgomery multiplications by 2, 3, 4, 8 with a few
|
||||
// modular additions, each ~25 instructions vs ~200+ for CIOS Montgomery mul.
|
||||
@@ -483,14 +590,32 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a) {
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
UNSIGNED_LIMB u = t[i] * p_prime; // u = t[i] * p' mod 2^LIMB_BITS
|
||||
|
||||
// Add u * p to t, starting at position i
|
||||
// Add u * p to t, starting at position i.
|
||||
// Use uint64_t accumulator in 32-bit mode to avoid carry overflow:
|
||||
// hi + carry1 + carry2 can reach 2^32 which overflows uint32_t.
|
||||
#if LIMB_BITS_CONFIG == 32
|
||||
uint64_t carry = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
uint64_t acc =
|
||||
(uint64_t)t[i + j] + (uint64_t)u * (uint64_t)p.limb[j] + carry;
|
||||
t[i + j] = (UNSIGNED_LIMB)acc;
|
||||
carry = acc >> LIMB_BITS;
|
||||
}
|
||||
// Propagate remaining carry (carry ≤ 2^32-1 at this point)
|
||||
int idx = i + FP_LIMBS;
|
||||
while (carry != 0 && idx <= 2 * FP_LIMBS) {
|
||||
uint64_t acc = (uint64_t)t[idx] + carry;
|
||||
t[idx] = (UNSIGNED_LIMB)acc;
|
||||
carry = acc >> LIMB_BITS;
|
||||
idx++;
|
||||
}
|
||||
#else
|
||||
UNSIGNED_LIMB carry = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
UNSIGNED_LIMB hi, lo;
|
||||
mul_limbs(u, p.limb[j], hi, lo);
|
||||
|
||||
// Three-way addition: t[i+j] + lo + carry
|
||||
// Do it in two steps to handle carries properly
|
||||
UNSIGNED_LIMB temp = t[i + j] + lo;
|
||||
UNSIGNED_LIMB carry1 = (temp < t[i + j]) ? 1 : 0;
|
||||
|
||||
@@ -499,7 +624,6 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a) {
|
||||
|
||||
t[i + j] = sum;
|
||||
|
||||
// Next carry is hi + carry1 + carry2
|
||||
carry = hi + carry1 + carry2;
|
||||
}
|
||||
|
||||
@@ -511,6 +635,7 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a) {
|
||||
t[idx] = sum;
|
||||
idx++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Result is in t[FP_LIMBS..2*FP_LIMBS-1] (high half)
|
||||
@@ -534,29 +659,7 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a) {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// PTX-accelerated CIOS Montgomery multiplication (device path)
|
||||
// ============================================================================
|
||||
// The CIOS algorithm for 7 x 64-bit limbs executes 98 multiply-accumulate
|
||||
// steps across 7 outer iterations. Each step computes:
|
||||
// (carry, t[j]) = t[j] + a[j] * b_i + carry
|
||||
// which is a 64x64->128 multiply plus a three-operand addition with carry.
|
||||
//
|
||||
// The C++ path uses software carry detection: carry = (sum < old) ? 1 : 0.
|
||||
// The PTX path below uses hardware carry flags via the .cc suffix:
|
||||
// - mul.lo.u64 / mul.hi.u64 : 64x64->128 wide multiply
|
||||
// - add.cc.u64 / addc.u64 : addition chain with hardware carry flag
|
||||
//
|
||||
// Each multiply-accumulate step uses 6 PTX instructions instead of ~10+ in
|
||||
// the software-carry version. The 7 outer iterations are fully unrolled, and
|
||||
// the limb-shift loop (t[j] = t[j+1]) is eliminated by register renaming.
|
||||
//
|
||||
// REGISTER ALIASING NOTE: All PTX temporaries (_lo, _hi) are declared as
|
||||
// .reg inside the asm block. This prevents nvcc's register allocator from
|
||||
// aliasing them with C operands (t_j, carry), which was the root cause of
|
||||
// previous correctness bugs where "+l" outputs could share registers with
|
||||
// "l" inputs in the same asm statement.
|
||||
// ============================================================================
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if LIMB_BITS_CONFIG == 64
|
||||
@@ -735,14 +838,408 @@ __device__ __noinline__ void fp_mont_mul_cios_ptx(Fp &c, const Fp &a,
|
||||
#endif // LIMB_BITS_CONFIG == 64
|
||||
#endif // __CUDA_ARCH__
|
||||
|
||||
// 32-bit dual MAD-chain Montgomery multiplication (device path)
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
|
||||
// PTX carry-chain primitives for 32-bit Montgomery arithmetic.
|
||||
//
|
||||
// These are macros rather than __forceinline__ functions because the hardware
|
||||
// carry flag (CC register) does not survive a function-call boundary
|
||||
// (lo, hi) = a * b : 64-bit product, no carry in or out.
|
||||
// Initialises a fresh wide accumulator slot.
|
||||
#define FP_MUL_WIDE_32(lo, hi, a, b) \
|
||||
asm("mul.lo.u32 %0, %2, %3; mul.hi.u32 %1, %2, %3;" \
|
||||
: "=r"(lo), "=r"(hi) \
|
||||
: "r"(a), "r"(b))
|
||||
|
||||
// lo += lo(a*b); hi += hi(a*b) + CC. Sets CC.
|
||||
// Opens a carry chain (mad.lo.cc / madc.hi.cc).
|
||||
#define FP_MAD_WIDE_CC_32(lo, hi, a, b) \
|
||||
asm("mad.lo.cc.u32 %0, %2, %3, %0; madc.hi.cc.u32 %1, %2, %3, %1;" \
|
||||
: "+r"(lo), "+r"(hi) \
|
||||
: "r"(a), "r"(b))
|
||||
|
||||
// lo += lo(a*b) + CC; hi += hi(a*b) + CC. Sets CC.
|
||||
// Continues a carry chain (madc.lo.cc / madc.hi.cc).
|
||||
#define FP_MADC_WIDE_CC_32(lo, hi, a, b) \
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, %0; madc.hi.cc.u32 %1, %2, %3, %1;" \
|
||||
: "+r"(lo), "+r"(hi) \
|
||||
: "r"(a), "r"(b))
|
||||
|
||||
// r += CC. No carry out terminates a carry chain.
|
||||
#define FP_ADDC_32(r) asm("addc.u32 %0, %0, 0;" : "+r"(r))
|
||||
|
||||
// dst = src + CC. No carry out
|
||||
#define FP_ADDC_INTO_32(dst, src) \
|
||||
asm("addc.u32 %0, %1, 0;" : "=r"(dst) : "r"(src))
|
||||
|
||||
// r = CC (capture carry flag as 0 or 1). No carry out.
|
||||
#define FP_CARRY_32(r) asm("addc.u32 %0, 0, 0;" : "=r"(r))
|
||||
|
||||
// dst = src + src. Sets CC opens a left-shift doubling chain.
|
||||
#define FP_DBL_CC_32(dst, src) \
|
||||
asm("add.cc.u32 %0, %1, %1;" : "=r"(dst) : "r"(src))
|
||||
|
||||
// r = r + r + CC. Sets CC continues a left-shift doubling chain.
|
||||
#define FP_DBLC_CC_32(r) asm("addc.cc.u32 %0, %0, %0;" : "+r"(r))
|
||||
|
||||
/// dst = lo32 | (hi32 << 32): pack two 32-bit halves into one 64-bit register.
|
||||
#define FP_PACK_U64(dst, lo32, hi32) \
|
||||
asm("mov.b64 %0, {%1, %2};" : "=l"(dst) : "r"(lo32), "r"(hi32))
|
||||
|
||||
// Initialize acc[0..n-1] with products of every other element of a and bi.
|
||||
// For each j (step 2): acc[j] = lo(a[j]*bi), acc[j+1] = hi(a[j]*bi).
|
||||
static __device__ __forceinline__ void
|
||||
fp_mul_n_32(uint32_t *acc, const uint32_t *a, uint32_t bi, int n) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n; j += 2) {
|
||||
asm("mul.lo.u32 %0, %1, %2;" : "=r"(acc[j]) : "r"(a[j]), "r"(bi));
|
||||
asm("mul.hi.u32 %0, %1, %2;" : "=r"(acc[j + 1]) : "r"(a[j]), "r"(bi));
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply-accumulate across n limbs with a hardware carry chain.
|
||||
// First pair uses mad.lo.cc + madc.hi.cc (initiates the chain).
|
||||
// Remaining pairs continue with madc.lo.cc + madc.hi.cc.
|
||||
// Carry flag exits in CC on return; caller must consume it.
|
||||
static __device__ __forceinline__ void
|
||||
fp_cmad_n_32(uint32_t *acc, const uint32_t *a, uint32_t bi, int n) {
|
||||
asm("mad.lo.cc.u32 %0, %2, %3, %0; madc.hi.cc.u32 %1, %2, %3, %1;"
|
||||
: "+r"(acc[0]), "+r"(acc[1])
|
||||
: "r"(a[0]), "r"(bi));
|
||||
#pragma unroll
|
||||
for (int j = 2; j < n; j += 2)
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, %0; madc.hi.cc.u32 %1, %2, %3, %1;"
|
||||
: "+r"(acc[j]), "+r"(acc[j + 1])
|
||||
: "r"(a[j]), "r"(bi));
|
||||
// CC holds the final carry on return
|
||||
}
|
||||
|
||||
// Multiply-accumulate with implicit right-shift of odd by two positions.
|
||||
// Each pair: odd[j] = lo/hi(a[j]*bi) + old_odd[j+2] + CC.
|
||||
// Reads are always two positions ahead of writes so forward iteration is safe.
|
||||
// Final pair terminates the chain with addend=0 and no carry-out (.hi only).
|
||||
static __device__ __forceinline__ void
|
||||
fp_madc_n_rshift_32(uint32_t *odd, const uint32_t *a, uint32_t bi, int n) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n - 2; j += 2)
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, %4; madc.hi.cc.u32 %1, %2, %3, %5;"
|
||||
: "=r"(odd[j]), "=r"(odd[j + 1])
|
||||
: "r"(a[j]), "r"(bi), "r"(odd[j + 2]), "r"(odd[j + 3]));
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, 0; madc.hi.u32 %1, %2, %3, 0;"
|
||||
: "=r"(odd[n - 2]), "=r"(odd[n - 1])
|
||||
: "r"(a[n - 2]), "r"(bi));
|
||||
// Note: final madc.hi.u32 has no .cc so CC is clear on return
|
||||
}
|
||||
|
||||
// After the call even[0] == 0 (by the Montgomery invariant), so the next
|
||||
// iteration's right-shift effectively advances the window by one limb.
|
||||
static __device__ __forceinline__ void
|
||||
fp_mad_n_redc_32(uint32_t *even, uint32_t *odd, const uint32_t *a,
|
||||
const uint32_t *p, uint32_t bi, uint32_t M0, bool first) {
|
||||
constexpr int n = 14; // 32-bit limbs for BLS12-446 (446 bits → 14 × 32-bit)
|
||||
|
||||
if (first) {
|
||||
// Fresh initialization: no carry from previous iteration.
|
||||
// even[2j] = lo(a[2j] * bi), even[2j+1] = hi(a[2j] * bi)
|
||||
// odd[2j] = lo(a[2j+1] * bi), odd[2j+1] = hi(a[2j+1] * bi)
|
||||
fp_mul_n_32(even, a, bi, n);
|
||||
fp_mul_n_32(odd, a + 1, bi, n);
|
||||
} else {
|
||||
// Merge carry from previous iteration and advance both accumulators.
|
||||
asm("add.cc.u32 %0, %0, %1;" : "+r"(even[0]) : "r"(odd[1]));
|
||||
fp_madc_n_rshift_32(odd, a + 1, bi, n);
|
||||
fp_cmad_n_32(even, a, bi, n);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
}
|
||||
|
||||
// Montgomery reduction: choose mi so that even[0] + lo(p[0]*mi) = 0 mod 2^32
|
||||
uint32_t mi = even[0] * M0;
|
||||
fp_cmad_n_32(odd, p + 1, mi, n);
|
||||
fp_cmad_n_32(even, p, mi, n);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
}
|
||||
|
||||
// Carry-add: acc[i] += a[i] for i = 0..n-1 with PTX carry chain.
|
||||
// Starts with add.cc (initiates chain); all subsequent adds use addc.cc.
|
||||
// Carry flag is left set in CC on return for the caller to consume.
|
||||
static __device__ __forceinline__ void fp_cadd_n_32(uint32_t *acc,
|
||||
const uint32_t *a, int n) {
|
||||
asm("add.cc.u32 %0, %0, %1;" : "+r"(acc[0]) : "r"(a[0]));
|
||||
#pragma unroll
|
||||
for (int i = 1; i < n; i++)
|
||||
asm("addc.cc.u32 %0, %0, %1;" : "+r"(acc[i]) : "r"(a[i]));
|
||||
}
|
||||
|
||||
// Even row of the upper-triangle squaring pass.
|
||||
// Adds a[1..n-2]*bi into odd[0..n-3] (cmad chain), places a[n-1]*bi into
|
||||
// odd[n-2..n-1] fresh (terminates carry), then adds a[0..n-1]*bi into
|
||||
// even[0..n-1] (independent cmad chain), folding the even carry into odd[n-1].
|
||||
static __device__ __forceinline__ void fp_mad_row_32(uint32_t *odd,
|
||||
uint32_t *even,
|
||||
const uint32_t *a,
|
||||
uint32_t bi, int n) {
|
||||
fp_cmad_n_32(odd, a + 1, bi, n - 2);
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, 0; madc.hi.u32 %1, %2, %3, 0;"
|
||||
: "=r"(odd[n - 2]), "=r"(odd[n - 1])
|
||||
: "r"(a[n - 1]), "r"(bi));
|
||||
fp_cmad_n_32(even, a, bi, n);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
}
|
||||
|
||||
// Odd row of the upper-triangle squaring pass.
|
||||
// Adds a[0..n-3]*bi into odd[0..n-3] (cmad chain), places a[n-2]*bi into
|
||||
// odd[n-2..n-1] fresh, then adds a[1..n-2]*bi into even[0..n-3] (n-2 terms),
|
||||
// folding the even carry into odd[n-1].
|
||||
static __device__ __forceinline__ void fp_qad_row_32(uint32_t *odd,
|
||||
uint32_t *even,
|
||||
const uint32_t *a,
|
||||
uint32_t bi, int n) {
|
||||
fp_cmad_n_32(odd, a, bi, n - 2);
|
||||
asm("madc.lo.cc.u32 %0, %2, %3, 0; madc.hi.u32 %1, %2, %3, 0;"
|
||||
: "=r"(odd[n - 2]), "=r"(odd[n - 1])
|
||||
: "r"(a[n - 2]), "r"(bi));
|
||||
fp_cmad_n_32(even, a + 1, bi, n - 2);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
}
|
||||
|
||||
// One Montgomery-reduction row without a multiply step (b_i = 0).
|
||||
// Used by fp_mont_sqr_mad32 to reduce the lower n words of the wide product.
|
||||
// Mirrors fp_mad_n_redc_32 but omits the initial product accumulation, leaving
|
||||
// only the annihilation step that drives even[0] to zero.
|
||||
static __device__ __forceinline__ void
|
||||
fp_mul_by_1_row_32(uint32_t *even, uint32_t *odd, const uint32_t *p,
|
||||
uint32_t M0, bool first) {
|
||||
constexpr int n = 14;
|
||||
// mi removes even[0]: even[0] + lo(p[0]*mi) == 0 mod 2^32.
|
||||
// IMPORTANT: mi must be computed from even[0] *after* any add.cc that
|
||||
// modifies it. Plain integer multiply does not touch CC.
|
||||
uint32_t mi;
|
||||
if (first) {
|
||||
mi = even[0] * M0;
|
||||
fp_mul_n_32(odd, p + 1, mi, n);
|
||||
fp_cmad_n_32(even, p, mi, n);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
} else {
|
||||
// Absorb the shifted carry word from the previous step, then reduce.
|
||||
asm("add.cc.u32 %0, %0, %1;" : "+r"(even[0]) : "r"(odd[1]));
|
||||
// Use PTX mul explicitly: a plain C multiply after add.cc could in theory
|
||||
// let the compiler insert an instruction that clobbers CC before
|
||||
// madc_n_rshift.
|
||||
asm("mul.lo.u32 %0, %1, %2;" : "=r"(mi) : "r"(even[0]), "r"(M0));
|
||||
fp_madc_n_rshift_32(odd, p + 1, mi, n);
|
||||
fp_cmad_n_32(even, p, mi, n);
|
||||
asm("addc.u32 %0, %0, 0;" : "+r"(odd[n - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
// Montgomery squaring using CIOS with triangular 32-bit MAD chains.
|
||||
// See fp_mont_mul_mad32 for the algorithm reference (Koç et al., 1996).
|
||||
//
|
||||
// Computes c = a^2 * R^{-1} mod p (input and output in Montgomery form).
|
||||
__device__ __noinline__ void fp_mont_sqr_mad32(Fp &c, const Fp &a) {
|
||||
constexpr int n = 14;
|
||||
|
||||
const uint32_t *a32 = reinterpret_cast<const uint32_t *>(a.limb);
|
||||
const uint32_t *p32 = reinterpret_cast<const uint32_t *>(DEVICE_MODULUS.limb);
|
||||
const uint32_t M0 = static_cast<uint32_t>(DEVICE_P_PRIME);
|
||||
|
||||
uint32_t wide[2 * n], wtemp[2 * n - 2];
|
||||
// Phase 1: upper triangle a[i]*a[j] for j > i
|
||||
fp_mul_n_32(wtemp, a32 + 1, a32[0], n);
|
||||
fp_mul_n_32(wide + 2, a32 + 2, a32[0], n - 2);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 2; i <= n - 4; i += 2) {
|
||||
fp_mad_row_32(&wide[2 * i], &wtemp[2 * i - 2], &a32[i], a32[i - 1], n - i);
|
||||
fp_qad_row_32(&wtemp[2 * i], &wide[2 * i + 2], &a32[i + 1], a32[i], n - i);
|
||||
}
|
||||
|
||||
FP_MUL_WIDE_32(wide[2 * n - 4], wide[2 * n - 3], a32[n - 1], a32[n - 3]);
|
||||
FP_MAD_WIDE_CC_32(wtemp[2 * n - 6], wtemp[2 * n - 5], a32[n - 2], a32[n - 3]);
|
||||
FP_ADDC_32(wide[2 * n - 3]);
|
||||
FP_MUL_WIDE_32(wtemp[2 * n - 4], wtemp[2 * n - 3], a32[n - 1], a32[n - 2]);
|
||||
|
||||
fp_cadd_n_32(&wide[2], &wtemp[1], 2 * n - 4);
|
||||
FP_ADDC_INTO_32(wide[2 * n - 2], wtemp[2 * n - 3]);
|
||||
|
||||
// Phase 2: double the upper-triangle sum (left-shift the 2n-bit value by 1)
|
||||
wide[0] = 0;
|
||||
FP_DBL_CC_32(wide[1], wtemp[0]);
|
||||
#pragma unroll
|
||||
for (int j = 2; j < 2 * n - 1; j++)
|
||||
FP_DBLC_CC_32(wide[j]);
|
||||
FP_CARRY_32(wide[2 * n - 1]);
|
||||
|
||||
// Phase 3: add diagonal a[i]^2 terms (squares of each limb)
|
||||
FP_MAD_WIDE_CC_32(wide[0], wide[1], a32[0], a32[0]);
|
||||
#pragma unroll
|
||||
for (int i = 1; i < n; i++)
|
||||
FP_MADC_WIDE_CC_32(wide[2 * i], wide[2 * i + 1], a32[i], a32[i]);
|
||||
|
||||
// Phase 4: Montgomery reduction
|
||||
uint32_t red_odd[n];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < n; i += 2) {
|
||||
fp_mul_by_1_row_32(&wide[0], &red_odd[0], p32, M0, i == 0);
|
||||
fp_mul_by_1_row_32(&red_odd[0], &wide[0], p32, M0, false);
|
||||
}
|
||||
// Merge the final red_odd word into wide[0..n-1].
|
||||
fp_cadd_n_32(&wide[0], &red_odd[1], n - 1);
|
||||
FP_ADDC_32(wide[n - 1]);
|
||||
|
||||
// Add reduced lower half into upper half wide[n..2n-1]; the result lives
|
||||
// in wide[n..2n-1] and is in [0, 2p).
|
||||
fp_cadd_n_32(&wide[n], &wide[0], n);
|
||||
FP_CARRY_32(wide[0]); // discard overflow (always 0 for p<2^446)
|
||||
|
||||
#if LIMB_BITS_CONFIG == 64
|
||||
// Pack uint32_t pairs back into uint64_t limbs.
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 7; j++)
|
||||
FP_PACK_U64(c.limb[j], wide[n + 2 * j], wide[n + 2 * j + 1]);
|
||||
|
||||
const uint64_t p0 = DEVICE_MODULUS.limb[0], p1 = DEVICE_MODULUS.limb[1],
|
||||
p2 = DEVICE_MODULUS.limb[2], p3 = DEVICE_MODULUS.limb[3],
|
||||
p4 = DEVICE_MODULUS.limb[4], p5 = DEVICE_MODULUS.limb[5],
|
||||
p6 = DEVICE_MODULUS.limb[6];
|
||||
uint64_t r0, r1, r2, r3, r4, r5, r6, mask64;
|
||||
asm("sub.cc.u64 %0, %8, %15;\n\t"
|
||||
"subc.cc.u64 %1, %9, %16;\n\t"
|
||||
"subc.cc.u64 %2, %10, %17;\n\t"
|
||||
"subc.cc.u64 %3, %11, %18;\n\t"
|
||||
"subc.cc.u64 %4, %12, %19;\n\t"
|
||||
"subc.cc.u64 %5, %13, %20;\n\t"
|
||||
"subc.cc.u64 %6, %14, %21;\n\t"
|
||||
"subc.u64 %7, 0, 0;\n\t"
|
||||
"shr.s64 %7, %7, 63;\n\t"
|
||||
: "=l"(r0), "=l"(r1), "=l"(r2), "=l"(r3), "=l"(r4), "=l"(r5), "=l"(r6),
|
||||
"=l"(mask64)
|
||||
: "l"(c.limb[0]), "l"(c.limb[1]), "l"(c.limb[2]), "l"(c.limb[3]),
|
||||
"l"(c.limb[4]), "l"(c.limb[5]), "l"(c.limb[6]), "l"(p0), "l"(p1),
|
||||
"l"(p2), "l"(p3), "l"(p4), "l"(p5), "l"(p6));
|
||||
c.limb[0] = (c.limb[0] & mask64) | (r0 & ~mask64);
|
||||
c.limb[1] = (c.limb[1] & mask64) | (r1 & ~mask64);
|
||||
c.limb[2] = (c.limb[2] & mask64) | (r2 & ~mask64);
|
||||
c.limb[3] = (c.limb[3] & mask64) | (r3 & ~mask64);
|
||||
c.limb[4] = (c.limb[4] & mask64) | (r4 & ~mask64);
|
||||
c.limb[5] = (c.limb[5] & mask64) | (r5 & ~mask64);
|
||||
c.limb[6] = (c.limb[6] & mask64) | (r6 & ~mask64);
|
||||
#else
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n; j++)
|
||||
c.limb[j] = wide[n + j];
|
||||
Fp reduced;
|
||||
UNSIGNED_LIMB borrow = fp_sub_raw(reduced, c, fp_modulus());
|
||||
UNSIGNED_LIMB mask32 = -borrow;
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n; j++)
|
||||
c.limb[j] = (c.limb[j] & mask32) | (reduced.limb[j] & ~mask32);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Montgomery multiplication using CIOS (Coarsely Integrated Operand Scanning):
|
||||
// Computes c = a * b * R^{-1} mod p (all operands in Montgomery form).
|
||||
// Inputs are stored as uint64_t[7]; they are reinterpreted as uint32_t[14]
|
||||
// (little-endian: a64[j] == a32[2j] | (a32[2j+1] << 32)).
|
||||
__device__ __noinline__ void fp_mont_mul_mad32(Fp &c, const Fp &a,
|
||||
const Fp &b) {
|
||||
constexpr int n = 14;
|
||||
|
||||
// Reinterpret 64-bit limb arrays as 32-bit on little-endian hardware.
|
||||
const uint32_t *a32 = reinterpret_cast<const uint32_t *>(a.limb);
|
||||
const uint32_t *b32 = reinterpret_cast<const uint32_t *>(b.limb);
|
||||
const uint32_t *p32 = reinterpret_cast<const uint32_t *>(DEVICE_MODULUS.limb);
|
||||
|
||||
// 32-bit Montgomery constant: low 32 bits of DEVICE_P_PRIME.
|
||||
// Correct because -p^{-1} mod 2^32 == (-p^{-1} mod 2^64) mod 2^32.
|
||||
const uint32_t M0 = static_cast<uint32_t>(DEVICE_P_PRIME);
|
||||
|
||||
uint32_t even[n], odd[n];
|
||||
|
||||
// Process every 32-bit limb of b in pairs, alternating primary accumulator.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < n; i += 2) {
|
||||
fp_mad_n_redc_32(even, odd, a32, p32, b32[i], M0, i == 0);
|
||||
fp_mad_n_redc_32(odd, even, a32, p32, b32[i + 1], M0, false);
|
||||
}
|
||||
|
||||
// Merge: even[0..n-2] += odd[1..n-1], propagate final carry into even[n-1].
|
||||
fp_cadd_n_32(even, odd + 1, n - 1);
|
||||
FP_ADDC_32(even[n - 1]);
|
||||
|
||||
// Pack and final reduction layout depends on LIMB_BITS_CONFIG.
|
||||
// In both cases UNSIGNED_LIMB* and uint32_t* point to the same 56-byte block.
|
||||
#if LIMB_BITS_CONFIG == 64
|
||||
// 64-bit limbs: pack pairs into uint64_t with PTX mov.b64, then do a
|
||||
// branchless 7-limb 64-bit conditional subtraction.
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 7; j++)
|
||||
FP_PACK_U64(c.limb[j], even[2 * j], even[2 * j + 1]);
|
||||
|
||||
// subc.u64 0-0-borrow gives 0xFFFF... when c<p (keep), 0 when c>=p (reduce).
|
||||
// shr.s64 sign-extends to a per-bit selection mask.
|
||||
const uint64_t p0 = DEVICE_MODULUS.limb[0], p1 = DEVICE_MODULUS.limb[1],
|
||||
p2 = DEVICE_MODULUS.limb[2], p3 = DEVICE_MODULUS.limb[3],
|
||||
p4 = DEVICE_MODULUS.limb[4], p5 = DEVICE_MODULUS.limb[5],
|
||||
p6 = DEVICE_MODULUS.limb[6];
|
||||
uint64_t r0, r1, r2, r3, r4, r5, r6, mask64;
|
||||
asm("sub.cc.u64 %0, %8, %15;\n\t"
|
||||
"subc.cc.u64 %1, %9, %16;\n\t"
|
||||
"subc.cc.u64 %2, %10, %17;\n\t"
|
||||
"subc.cc.u64 %3, %11, %18;\n\t"
|
||||
"subc.cc.u64 %4, %12, %19;\n\t"
|
||||
"subc.cc.u64 %5, %13, %20;\n\t"
|
||||
"subc.cc.u64 %6, %14, %21;\n\t"
|
||||
"subc.u64 %7, 0, 0;\n\t"
|
||||
"shr.s64 %7, %7, 63;\n\t"
|
||||
: "=l"(r0), "=l"(r1), "=l"(r2), "=l"(r3), "=l"(r4), "=l"(r5), "=l"(r6),
|
||||
"=l"(mask64)
|
||||
: "l"(c.limb[0]), "l"(c.limb[1]), "l"(c.limb[2]), "l"(c.limb[3]),
|
||||
"l"(c.limb[4]), "l"(c.limb[5]), "l"(c.limb[6]), "l"(p0), "l"(p1),
|
||||
"l"(p2), "l"(p3), "l"(p4), "l"(p5), "l"(p6));
|
||||
c.limb[0] = (c.limb[0] & mask64) | (r0 & ~mask64);
|
||||
c.limb[1] = (c.limb[1] & mask64) | (r1 & ~mask64);
|
||||
c.limb[2] = (c.limb[2] & mask64) | (r2 & ~mask64);
|
||||
c.limb[3] = (c.limb[3] & mask64) | (r3 & ~mask64);
|
||||
c.limb[4] = (c.limb[4] & mask64) | (r4 & ~mask64);
|
||||
c.limb[5] = (c.limb[5] & mask64) | (r5 & ~mask64);
|
||||
c.limb[6] = (c.limb[6] & mask64) | (r6 & ~mask64);
|
||||
#else
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n; j++)
|
||||
c.limb[j] = even[j];
|
||||
|
||||
Fp reduced;
|
||||
UNSIGNED_LIMB borrow = fp_sub_raw(reduced, c, fp_modulus());
|
||||
UNSIGNED_LIMB mask32 = -borrow; // all-ones if c<p (keep), all-zeros if c>=p
|
||||
#pragma unroll
|
||||
for (int j = 0; j < n; j++)
|
||||
c.limb[j] = (c.limb[j] & mask32) | (reduced.limb[j] & ~mask32);
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef FP_MUL_WIDE_32
|
||||
#undef FP_MAD_WIDE_CC_32
|
||||
#undef FP_MADC_WIDE_CC_32
|
||||
#undef FP_ADDC_32
|
||||
#undef FP_ADDC_INTO_32
|
||||
#undef FP_CARRY_32
|
||||
#undef FP_DBL_CC_32
|
||||
#undef FP_DBLC_CC_32
|
||||
#undef FP_PACK_U64
|
||||
|
||||
#endif // __CUDA_ARCH__
|
||||
|
||||
// CIOS (Coarsely Integrated Operand Scanning) Montgomery multiplication
|
||||
// Fuses multiplication and reduction in a single pass for better efficiency.
|
||||
// Uses only FP_LIMBS+1 limbs of working space instead of 2*FP_LIMBS.
|
||||
// Both a and b are in Montgomery form, result is in Montgomery form.
|
||||
__host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
|
||||
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
|
||||
// Device path: fully unrolled PTX with hardware carry flags
|
||||
fp_mont_mul_cios_ptx(c, a, b);
|
||||
#ifdef __CUDA_ARCH__
|
||||
// Device path: 32-bit dual MAD chain
|
||||
fp_mont_mul_mad32(c, a, b);
|
||||
#else
|
||||
// Host path: portable C++ implementation
|
||||
const Fp &p = fp_modulus();
|
||||
@@ -750,11 +1247,31 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
|
||||
|
||||
// Working array: only n+1 limbs needed (vs 2n for separate mul+reduce)
|
||||
UNSIGNED_LIMB t[FP_LIMBS + 1];
|
||||
// memset is not guaranteed available in all device compilation contexts;
|
||||
// use an explicit loop which the compiler will unroll anyway.
|
||||
#ifdef __CUDA_ARCH__
|
||||
for (int i = 0; i <= FP_LIMBS; i++) {
|
||||
t[i] = 0;
|
||||
}
|
||||
#else
|
||||
memset(t, 0, (FP_LIMBS + 1) * sizeof(UNSIGNED_LIMB));
|
||||
#endif
|
||||
|
||||
// Main CIOS loop: for each limb of b
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
// Step 1: Multiply-accumulate t += a * b[i]
|
||||
#if LIMB_BITS_CONFIG == 32
|
||||
uint64_t carry64 = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
uint64_t acc =
|
||||
(uint64_t)t[j] + (uint64_t)a.limb[j] * (uint64_t)b.limb[i] + carry64;
|
||||
t[j] = (UNSIGNED_LIMB)acc;
|
||||
carry64 = acc >> LIMB_BITS;
|
||||
}
|
||||
uint64_t sum64 = (uint64_t)t[FP_LIMBS] + carry64;
|
||||
UNSIGNED_LIMB overflow = (UNSIGNED_LIMB)(sum64 >> LIMB_BITS);
|
||||
t[FP_LIMBS] = (UNSIGNED_LIMB)sum64;
|
||||
#else
|
||||
UNSIGNED_LIMB carry = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
UNSIGNED_LIMB hi, lo;
|
||||
@@ -767,18 +1284,31 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
|
||||
UNSIGNED_LIMB c2 = (sum2 < sum1) ? 1 : 0;
|
||||
t[j] = sum2;
|
||||
|
||||
// carry = hi + c1 + c2
|
||||
carry = hi + c1 + c2;
|
||||
}
|
||||
// Add carry to t[n]
|
||||
UNSIGNED_LIMB sum = t[FP_LIMBS] + carry;
|
||||
UNSIGNED_LIMB overflow = (sum < t[FP_LIMBS]) ? 1 : 0;
|
||||
t[FP_LIMBS] = sum;
|
||||
#endif
|
||||
|
||||
// Step 2: Reduction - compute m = t[0] * p' mod 2^LIMB_BITS
|
||||
UNSIGNED_LIMB m = t[0] * p_prime;
|
||||
|
||||
// Add m * p to t (this zeros out t[0])
|
||||
#if LIMB_BITS_CONFIG == 32
|
||||
carry64 = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
uint64_t acc =
|
||||
(uint64_t)t[j] + (uint64_t)m * (uint64_t)p.limb[j] + carry64;
|
||||
t[j] = (UNSIGNED_LIMB)acc;
|
||||
carry64 = acc >> LIMB_BITS;
|
||||
}
|
||||
// Merge carry from reduction with the overflow from step 1.
|
||||
// sum64 ≤ (2^32-1) + (2^32-1) + 1 = 2^33-1, so the new overflow is 0 or 1.
|
||||
uint64_t s64 = (uint64_t)t[FP_LIMBS] + carry64 + (uint64_t)overflow;
|
||||
t[FP_LIMBS] = (UNSIGNED_LIMB)s64;
|
||||
overflow = (UNSIGNED_LIMB)(s64 >> LIMB_BITS);
|
||||
#else
|
||||
carry = 0;
|
||||
for (int j = 0; j < FP_LIMBS; j++) {
|
||||
UNSIGNED_LIMB hi, lo;
|
||||
@@ -800,6 +1330,7 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
|
||||
UNSIGNED_LIMB c2 = (s2 < s1) ? 1 : 0;
|
||||
t[FP_LIMBS] = s2;
|
||||
overflow = c1 + c2; // Track overflow for final reduction
|
||||
#endif
|
||||
|
||||
// Step 3: Shift right by one limb (divide by 2^LIMB_BITS)
|
||||
// t[0..n-1] = t[1..n], t[n] = overflow
|
||||
@@ -810,7 +1341,13 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
|
||||
}
|
||||
|
||||
// Copy result to output
|
||||
#ifdef __CUDA_ARCH__
|
||||
for (int i = 0; i < FP_LIMBS; i++) {
|
||||
c.limb[i] = t[i];
|
||||
}
|
||||
#else
|
||||
memcpy(&c.limb[0], t, FP_LIMBS * sizeof(UNSIGNED_LIMB));
|
||||
#endif
|
||||
|
||||
// Final reduction: if result >= p or there's overflow, subtract p
|
||||
if (t[FP_LIMBS] != 0 || fp_cmp(c, p) != ComparisonType::Less) {
|
||||
@@ -829,6 +1366,19 @@ __host__ __device__ void fp_mont_mul(Fp &c, const Fp &a, const Fp &b) {
|
||||
fp_mont_mul_cios(c, a, b);
|
||||
}
|
||||
|
||||
// Montgomery squaring: c = (a^2 * R_INV) mod p
|
||||
// Input and output in Montgomery form.
|
||||
// On device: uses fp_mont_sqr_mad32 (triangular MAD chain, ~30-40% fewer
|
||||
// multiplications than fp_mont_mul(c, a, a)).
|
||||
// On host: delegates to fp_mont_mul_cios(c, a, a).
|
||||
__host__ __device__ void fp_mont_sqr(Fp &c, const Fp &a) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
fp_mont_sqr_mad32(c, a);
|
||||
#else
|
||||
fp_mont_mul_cios(c, a, a);
|
||||
#endif
|
||||
}
|
||||
|
||||
// CONVERSION: Convert from normal form to Montgomery form
|
||||
// Input a is in normal form, output c is in Montgomery form
|
||||
// Uses CIOS: c = a * R^2 * R^-1 mod p = a * R mod p
|
||||
@@ -900,9 +1450,9 @@ __host__ __device__ static void fp_pow_internal_mont(Fp &result,
|
||||
int start_bit = (limb_idx == msb_idx) ? bit_pos : LIMB_BITS - 1;
|
||||
|
||||
for (int bit = start_bit; bit >= 0; bit--) {
|
||||
// Square result
|
||||
// Square result using the optimised squaring path
|
||||
Fp temp;
|
||||
fp_mont_mul(temp, result, result);
|
||||
fp_mont_sqr(temp, result);
|
||||
fp_copy(result, temp);
|
||||
|
||||
// Multiply by base if current bit is set
|
||||
@@ -1081,7 +1631,7 @@ __host__ __device__ bool fp_sqrt(Fp &c, const Fp &a) {
|
||||
// Verify: c^2 should equal a (mod p) - using Montgomery form
|
||||
Fp c_mont, c_squared_mont;
|
||||
fp_to_montgomery(c_mont, c);
|
||||
fp_mont_mul(c_squared_mont, c_mont, c_mont);
|
||||
fp_mont_sqr(c_squared_mont, c_mont);
|
||||
|
||||
if (fp_cmp(c_squared_mont, a_mont) == ComparisonType::Equal) {
|
||||
return true;
|
||||
@@ -1091,7 +1641,7 @@ __host__ __device__ bool fp_sqrt(Fp &c, const Fp &a) {
|
||||
Fp alt_c, alt_c_mont;
|
||||
fp_sub(alt_c, p, c);
|
||||
fp_to_montgomery(alt_c_mont, alt_c);
|
||||
fp_mont_mul(c_squared_mont, alt_c_mont, alt_c_mont);
|
||||
fp_mont_sqr(c_squared_mont, alt_c_mont);
|
||||
if (fp_cmp(c_squared_mont, a_mont) == ComparisonType::Equal) {
|
||||
fp_copy(c, alt_c);
|
||||
return true;
|
||||
@@ -1103,7 +1653,7 @@ __host__ __device__ bool fp_sqrt(Fp &c, const Fp &a) {
|
||||
fp_sub(reduced_c, c, p);
|
||||
fp_copy(c, reduced_c);
|
||||
fp_to_montgomery(reduced_c_mont, reduced_c);
|
||||
fp_mont_mul(c_squared_mont, reduced_c_mont, reduced_c_mont);
|
||||
fp_mont_sqr(c_squared_mont, reduced_c_mont);
|
||||
if (fp_cmp(c_squared_mont, a_mont) == ComparisonType::Equal) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -74,6 +74,18 @@ __host__ __device__ void fp2_sub(Fp2 &c, const Fp2 &a, const Fp2 &b) {
|
||||
fp_sub(c.c1, a.c1, b.c1);
|
||||
}
|
||||
|
||||
// Lazy add/sub for Fp2: component-wise fp_add_lazy / fp_sub_lazy.
|
||||
// Outputs each component in [0, 2p); safe as input to fp2_mont_mul.
|
||||
__host__ __device__ void fp2_add_lazy(Fp2 &c, const Fp2 &a, const Fp2 &b) {
|
||||
fp_add_lazy(c.c0, a.c0, b.c0);
|
||||
fp_add_lazy(c.c1, a.c1, b.c1);
|
||||
}
|
||||
|
||||
__host__ __device__ void fp2_sub_lazy(Fp2 &c, const Fp2 &a, const Fp2 &b) {
|
||||
fp_sub_lazy(c.c0, a.c0, b.c0);
|
||||
fp_sub_lazy(c.c1, a.c1, b.c1);
|
||||
}
|
||||
|
||||
// Small-constant multiplication via addition chains.
|
||||
// These replace full Fp2 Montgomery multiplications by 2, 3, 4, 8 with
|
||||
// modular additions on each component.
|
||||
@@ -158,8 +170,10 @@ __host__ __device__ void fp2_mont_mul(Fp2 &c, const Fp2 &a, const Fp2 &b) {
|
||||
|
||||
fp_mont_mul(t0, a.c0, b.c0);
|
||||
fp_mont_mul(t1, a.c1, b.c1);
|
||||
fp_add(t2, a.c0, a.c1);
|
||||
fp_add(t3, b.c0, b.c1);
|
||||
// Lazy add: skip the conditional subtraction since t2, t3 feed fp_mont_mul
|
||||
// which accepts inputs in [0, 2p). Saves 2 conditional subtractions.
|
||||
fp_add_lazy(t2, a.c0, a.c1);
|
||||
fp_add_lazy(t3, b.c0, b.c1);
|
||||
fp_mont_mul(t2, t2, t3);
|
||||
fp_sub(c.c0, t0, t1);
|
||||
fp_sub(c.c1, t2, t0);
|
||||
@@ -176,8 +190,10 @@ __host__ __device__ void fp2_mont_mul(Fp2 &c, const Fp2 &a, const Fp2 &b) {
|
||||
__host__ __device__ void fp2_mont_square(Fp2 &c, const Fp2 &a) {
|
||||
Fp sum, diff, c0_tmp, prod;
|
||||
|
||||
fp_add(sum, a.c0, a.c1);
|
||||
fp_sub(diff, a.c0, a.c1);
|
||||
// Lazy add/sub: sum and diff feed fp_mont_mul (accepts [0, 2p)).
|
||||
// Saves 2 conditional subtractions vs canonical fp_add + fp_sub.
|
||||
fp_add_lazy(sum, a.c0, a.c1);
|
||||
fp_sub_lazy(diff, a.c0, a.c1);
|
||||
fp_mont_mul(c0_tmp, sum, diff);
|
||||
|
||||
fp_mont_mul(prod, a.c0, a.c1);
|
||||
@@ -242,7 +258,7 @@ __host__ __device__ void fp_inv_fermat(Fp &result, const Fp &a) {
|
||||
if (found_first_bit || ((p_minus_2.limb[limb] >> bit) & 1)) {
|
||||
found_first_bit = true;
|
||||
Fp temp;
|
||||
fp_mont_mul(temp, result_mont, result_mont);
|
||||
fp_mont_sqr(temp, result_mont);
|
||||
fp_copy(result_mont, temp);
|
||||
|
||||
if ((p_minus_2.limb[limb] >> bit) & 1) {
|
||||
@@ -267,8 +283,8 @@ __host__ __device__ void fp2_inv(Fp2 &c, const Fp2 &a) {
|
||||
|
||||
// Compute norm = a0^2 + a1^2 in Montgomery form
|
||||
Fp t0, t1, norm_m;
|
||||
fp_mont_mul(t0, a0_m, a0_m);
|
||||
fp_mont_mul(t1, a1_m, a1_m);
|
||||
fp_mont_sqr(t0, a0_m);
|
||||
fp_mont_sqr(t1, a1_m);
|
||||
fp_add(norm_m, t0, t1);
|
||||
|
||||
// Convert norm to normal form for inversion, then back to Montgomery
|
||||
@@ -295,8 +311,8 @@ __host__ __device__ void fp2_inv(Fp2 &c, const Fp2 &a) {
|
||||
__host__ __device__ void fp2_mont_inv(Fp2 &c, const Fp2 &a) {
|
||||
Fp t0, t1, norm, norm_inv;
|
||||
|
||||
fp_mont_mul(t0, a.c0, a.c0);
|
||||
fp_mont_mul(t1, a.c1, a.c1);
|
||||
fp_mont_sqr(t0, a.c0);
|
||||
fp_mont_sqr(t1, a.c1);
|
||||
fp_add(norm, t0, t1);
|
||||
fp_mont_inv(norm_inv, norm);
|
||||
fp_mont_mul(c.c0, a.c0, norm_inv);
|
||||
|
||||
174
backends/zk-cuda-backend/cuda/src/primitives/xyzz.cu
Normal file
@@ -0,0 +1,174 @@
|
||||
#include "fp.h"
|
||||
#include "fp2.h"
|
||||
#include "xyzz.h"
|
||||
|
||||
__host__ __device__ void xyzz_infinity(G1XYZZ &p) {
|
||||
fp_zero(p.ZZ);
|
||||
fp_zero(p.ZZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ bool xyzz_is_infinity(const G1XYZZ &p) {
|
||||
return fp_is_zero(p.ZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_from_affine(G1XYZZ &xyzz,
|
||||
const G1Affine &affine) {
|
||||
xyzz.X = affine.x;
|
||||
xyzz.Y = affine.y;
|
||||
fp_one_montgomery(xyzz.ZZ);
|
||||
fp_one_montgomery(xyzz.ZZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_mixed_add(G1XYZZ &acc, const G1Affine &p) {
|
||||
if (p.infinity)
|
||||
return;
|
||||
|
||||
if (xyzz_is_infinity(acc)) {
|
||||
xyzz_from_affine(acc, p);
|
||||
return;
|
||||
}
|
||||
|
||||
// S2 = y2*ZZZ1, U2 = x2*ZZ1
|
||||
Fp S2, U2;
|
||||
fp_mont_mul(S2, p.y, acc.ZZZ);
|
||||
fp_mont_mul(U2, p.x, acc.ZZ);
|
||||
|
||||
Fp P = U2 - acc.X; // P = U2 - X1
|
||||
Fp R = S2 - acc.Y; // R = S2 - Y1
|
||||
|
||||
if (fp_is_zero(P)) {
|
||||
if (fp_is_zero(R)) {
|
||||
// U = 2*y2
|
||||
// ZZ3 = V = U^2
|
||||
// ZZZ3 = W = V*U
|
||||
// S = x2*V
|
||||
// M = 3*x2^2
|
||||
// X3 = M^2 - 2*S
|
||||
// Y3 = M*(S-X3) - W*y2
|
||||
Fp U, S, M;
|
||||
fp_double(U, p.y); // U = 2*y2
|
||||
fp_mont_sqr(acc.ZZ, U); // ZZ3 = V = U^2
|
||||
fp_mont_mul(acc.ZZZ, acc.ZZ, U); // ZZZ3 = W = V*U
|
||||
fp_mont_mul(S, p.x, acc.ZZ); // S = x2*V
|
||||
fp_mont_sqr(M, p.x); // x2^2
|
||||
fp_mul3(M, M); // M = 3*x2^2
|
||||
fp_mont_sqr(acc.X, M); // M^2
|
||||
acc.X = acc.X - S - S; // X3 = M^2 - 2*S
|
||||
fp_mont_mul(acc.Y, acc.ZZZ, p.y); // W*y2
|
||||
Fp tmp = S - acc.X; // S - X3
|
||||
fp_mont_mul(tmp, tmp, M); // M*(S-X3)
|
||||
acc.Y = tmp - acc.Y; // Y3 = M*(S-X3) - W*y2
|
||||
} else {
|
||||
xyzz_infinity(acc);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// General addition (P != 0): 8M + 2S
|
||||
Fp PP, PPP, Q;
|
||||
fp_mont_sqr(PP, P); // PP = P^2
|
||||
fp_mont_mul(PPP, P, PP); // PPP = P*PP
|
||||
fp_mont_mul(Q, acc.X, PP); // Q = X1*PP
|
||||
fp_mont_mul(acc.ZZ, acc.ZZ, PP); // ZZ3 = ZZ1*PP
|
||||
fp_mont_mul(acc.ZZZ, acc.ZZZ, PPP); // ZZZ3 = ZZZ1*PPP
|
||||
|
||||
Fp X3;
|
||||
fp_mont_sqr(X3, R); // R^2
|
||||
X3 = X3 - PPP - Q - Q; // X3 = R^2 - PPP - 2*Q
|
||||
|
||||
Fp QmX3 = Q - X3;
|
||||
fp_mont_mul(QmX3, QmX3, R); // R*(Q-X3)
|
||||
fp_mont_mul(acc.Y, acc.Y, PPP); // Y1*PPP
|
||||
acc.Y = QmX3 - acc.Y; // Y3 = R*(Q-X3) - Y1*PPP
|
||||
acc.X = X3;
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_to_projective(G1Projective &proj,
|
||||
const G1XYZZ &xyzz) {
|
||||
fp_mont_mul(proj.X, xyzz.X, xyzz.ZZZ);
|
||||
fp_mont_mul(proj.Y, xyzz.Y, xyzz.ZZ);
|
||||
fp_mont_mul(proj.Z, xyzz.ZZ, xyzz.ZZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_infinity(G2XYZZ &p) {
|
||||
fp2_zero(p.ZZ);
|
||||
fp2_zero(p.ZZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ bool xyzz_is_infinity(const G2XYZZ &p) {
|
||||
return fp2_is_zero(p.ZZ);
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_from_affine(G2XYZZ &xyzz,
|
||||
const G2Affine &affine) {
|
||||
xyzz.X = affine.x;
|
||||
xyzz.Y = affine.y;
|
||||
// ZZ = ZZZ = 1 in Fp2 Montgomery form: (1_mont, 0)
|
||||
fp_one_montgomery(xyzz.ZZ.c0);
|
||||
fp_zero(xyzz.ZZ.c1);
|
||||
fp_one_montgomery(xyzz.ZZZ.c0);
|
||||
fp_zero(xyzz.ZZZ.c1);
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_mixed_add(G2XYZZ &acc, const G2Affine &p) {
|
||||
if (p.infinity)
|
||||
return;
|
||||
|
||||
if (xyzz_is_infinity(acc)) {
|
||||
xyzz_from_affine(acc, p);
|
||||
return;
|
||||
}
|
||||
|
||||
Fp2 S2, U2;
|
||||
fp2_mont_mul(S2, p.y, acc.ZZZ); // S2 = y2*ZZZ1
|
||||
fp2_mont_mul(U2, p.x, acc.ZZ); // U2 = x2*ZZ1
|
||||
|
||||
Fp2 P = U2 - acc.X;
|
||||
Fp2 R = S2 - acc.Y;
|
||||
|
||||
if (fp2_is_zero(P)) {
|
||||
if (fp2_is_zero(R)) {
|
||||
Fp2 U, S, M;
|
||||
fp2_double(U, p.y);
|
||||
fp2_mont_square(acc.ZZ, U); // ZZ3 = V = U^2
|
||||
fp2_mont_mul(acc.ZZZ, acc.ZZ, U); // ZZZ3 = W = V*U
|
||||
fp2_mont_mul(S, p.x, acc.ZZ); // S = x2*V
|
||||
fp2_mont_square(M, p.x); // x2^2
|
||||
fp2_mul3(M, M); // M = 3*x2^2
|
||||
fp2_mont_square(acc.X, M); // M^2
|
||||
acc.X = acc.X - S - S; // X3 = M^2 - 2*S
|
||||
fp2_mont_mul(acc.Y, acc.ZZZ, p.y); // W*y2
|
||||
Fp2 tmp = S - acc.X;
|
||||
fp2_mont_mul(tmp, tmp, M); // M*(S-X3)
|
||||
acc.Y = tmp - acc.Y; // Y3 = M*(S-X3)-W*y2
|
||||
} else {
|
||||
xyzz_infinity(acc);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// General addition (8M_Fp2 + 2S_Fp2)
|
||||
Fp2 PP, PPP, Q;
|
||||
fp2_mont_square(PP, P); // PP = P^2
|
||||
fp2_mont_mul(PPP, P, PP); // PPP = P*PP
|
||||
fp2_mont_mul(Q, acc.X, PP); // Q = X1*PP
|
||||
fp2_mont_mul(acc.ZZ, acc.ZZ, PP); // ZZ3 = ZZ1*PP
|
||||
fp2_mont_mul(acc.ZZZ, acc.ZZZ, PPP); // ZZZ3 = ZZZ1*PPP
|
||||
|
||||
Fp2 X3;
|
||||
fp2_mont_square(X3, R); // R^2
|
||||
X3 = X3 - PPP - Q - Q; // X3 = R^2 - PPP - 2*Q
|
||||
|
||||
Fp2 QmX3 = Q - X3;
|
||||
fp2_mont_mul(QmX3, QmX3, R); // R*(Q-X3)
|
||||
fp2_mont_mul(acc.Y, acc.Y, PPP); // Y1*PPP
|
||||
acc.Y = QmX3 - acc.Y; // Y3 = R*(Q-X3) - Y1*PPP
|
||||
acc.X = X3;
|
||||
}
|
||||
|
||||
__host__ __device__ void xyzz_to_projective(G2Projective &proj,
|
||||
const G2XYZZ &xyzz) {
|
||||
fp2_mont_mul(proj.X, xyzz.X, xyzz.ZZZ);
|
||||
fp2_mont_mul(proj.Y, xyzz.Y, xyzz.ZZ);
|
||||
fp2_mont_mul(proj.Z, xyzz.ZZ, xyzz.ZZZ);
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <stddef.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "helper_profile.cuh"
|
||||
#include "../../tfhe-cuda-backend/cuda/src/utils/helper_profile.cuh"
|
||||
|
||||
// C++ helper functions (not exported, used internally)
|
||||
// These can call template functions since they have C++ linkage
|
||||
|
||||
@@ -16,14 +16,19 @@ tfhe = { path = "../../tfhe", features = ["hpu", "hpu-debug"] }
|
||||
|
||||
ipc-channel = "0.18.3"
|
||||
|
||||
strum = { version = "0.26.2", features = ["derive"] }
|
||||
strum_macros = "0.26.2"
|
||||
bytemuck = { workspace = true }
|
||||
|
||||
clap = { version = "4.4.4", features = ["derive"] }
|
||||
clap-num = "*"
|
||||
anyhow = "1.0.82"
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
|
||||
serde_json = "1.0"
|
||||
rand = "0.8.5"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
bitflags = "2.6.0"
|
||||
|
||||
[[bin]]
|
||||
name = "hpu_mockup"
|
||||
|
||||
@@ -1 +1 @@
|
||||
nightly-2026-04-22
|
||||
nightly-2026-01-14
|
||||
|
||||
@@ -25,7 +25,7 @@ use tfhe::{
|
||||
CompressedKVStore, CompressedPublicKey, CompressedServerKey,
|
||||
CompressedSquashedNoiseCiphertextList, CompressedSquashedNoiseCiphertextListBuilder, FheBool,
|
||||
FheInt8, FheUint32, FheUint64, FheUint8, ReRandomizationContext, ReRandomizationMode,
|
||||
ReRandomizationSupport, Seed, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
|
||||
ReRandomizationSupport, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
|
||||
SquashedNoiseFheUint,
|
||||
};
|
||||
use tfhe_backward_compat_data::load::{
|
||||
@@ -748,22 +748,6 @@ fn test_hl_key_features(
|
||||
}
|
||||
}
|
||||
|
||||
// OPRF: check that oblivious pseudo-random generation works with the dedicated key.
|
||||
// The decrypted values only need to be within range; the seed is deterministic but we
|
||||
// don't compare to specific bit values (those are validated in the unit tests).
|
||||
if server_key.supports_oprf() {
|
||||
let seed = Seed(42u128);
|
||||
|
||||
let rand_bool = FheBool::generate_oblivious_pseudo_random(seed);
|
||||
let _: bool = rand_bool.decrypt(client_key);
|
||||
|
||||
let rand_uint = FheUint8::generate_oblivious_pseudo_random(seed);
|
||||
let _: u8 = rand_uint.decrypt(client_key);
|
||||
|
||||
let rand_int = FheInt8::generate_oblivious_pseudo_random(seed);
|
||||
let _: i8 = rand_int.decrypt(client_key);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,6 @@ internal-keycache = ["tfhe/internal-keycache"]
|
||||
avx512 = ["tfhe/avx512"]
|
||||
pbs-stats = ["tfhe/pbs-stats"]
|
||||
zk-pok = ["tfhe/zk-pok", "dep:tfhe-zk-pok"]
|
||||
experimental = ["tfhe/experimental"]
|
||||
|
||||
[[bench]]
|
||||
name = "boolean"
|
||||
@@ -231,9 +230,3 @@ required-features = ["integer", "internal-keycache"]
|
||||
name = "wasm_benchmarks_parser"
|
||||
path = "src/bin/wasm_benchmarks_parser.rs"
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "cm-bench"
|
||||
path = "benches/core_crypto/cm_bench.rs"
|
||||
harness = false
|
||||
required-features = ["experimental"]
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
use cm_fft64::programmable_bootstrap_cm_lwe_ciphertext;
|
||||
use criterion::{black_box, criterion_main, Criterion};
|
||||
use tfhe::core_crypto::experimental::prelude::cm_lwe_keyswitch_key_generation::allocate_and_generate_new_cm_lwe_keyswitch_key;
|
||||
use tfhe::core_crypto::experimental::prelude::cm_modulus_switch_noise_reduction::improve_lwe_ciphertext_modulus_switch_noise_for_binary_key_cm;
|
||||
use tfhe::core_crypto::experimental::prelude::*;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
fn cm_bench(c: &mut Criterion) {
|
||||
let bench_cm_params_2_minus_64: Vec<CmApParams> = vec![
|
||||
CM_PARAM_2_2_MINUS_64,
|
||||
CM_PARAM_4_2_MINUS_64,
|
||||
CM_PARAM_6_2_MINUS_64,
|
||||
CM_PARAM_8_2_MINUS_64,
|
||||
CM_PARAM_10_2_MINUS_64,
|
||||
CM_PARAM_2_4_MINUS_64,
|
||||
CM_PARAM_4_4_MINUS_64,
|
||||
CM_PARAM_6_4_MINUS_64,
|
||||
CM_PARAM_8_4_MINUS_64,
|
||||
CM_PARAM_10_4_MINUS_64,
|
||||
CM_PARAM_2_6_MINUS_64,
|
||||
CM_PARAM_4_6_MINUS_64,
|
||||
CM_PARAM_6_6_MINUS_64,
|
||||
CM_PARAM_8_6_MINUS_64,
|
||||
CM_PARAM_10_6_MINUS_64,
|
||||
CM_PARAM_2_8_MINUS_64,
|
||||
CM_PARAM_4_8_MINUS_64,
|
||||
CM_PARAM_6_8_MINUS_64,
|
||||
CM_PARAM_8_8_MINUS_64,
|
||||
CM_PARAM_10_8_MINUS_64,
|
||||
];
|
||||
|
||||
cm_bench_for_pfail(c, &bench_cm_params_2_minus_64, "2^-64");
|
||||
|
||||
let bench_cm_params_2_minus_128: Vec<CmApParams> = vec![
|
||||
CM_PARAM_2_2_MINUS_128,
|
||||
CM_PARAM_4_2_MINUS_128,
|
||||
CM_PARAM_6_2_MINUS_128,
|
||||
CM_PARAM_8_2_MINUS_128,
|
||||
CM_PARAM_10_2_MINUS_128,
|
||||
CM_PARAM_2_4_MINUS_128,
|
||||
CM_PARAM_4_4_MINUS_128,
|
||||
CM_PARAM_6_4_MINUS_128,
|
||||
CM_PARAM_8_4_MINUS_128,
|
||||
CM_PARAM_10_4_MINUS_128,
|
||||
CM_PARAM_2_6_MINUS_128,
|
||||
CM_PARAM_4_6_MINUS_128,
|
||||
CM_PARAM_6_6_MINUS_128,
|
||||
CM_PARAM_8_6_MINUS_128,
|
||||
CM_PARAM_10_6_MINUS_128,
|
||||
CM_PARAM_2_8_MINUS_128,
|
||||
CM_PARAM_4_8_MINUS_128,
|
||||
CM_PARAM_6_8_MINUS_128,
|
||||
CM_PARAM_8_8_MINUS_128,
|
||||
CM_PARAM_10_8_MINUS_128,
|
||||
];
|
||||
|
||||
cm_bench_for_pfail(c, &bench_cm_params_2_minus_128, "2^-128");
|
||||
}
|
||||
|
||||
fn cm_bench_for_pfail(c: &mut Criterion, bench_cm_params: &[CmApParams], p_fail: &str) {
|
||||
let mut bench_group = c.benchmark_group("Common Mask Benchmarks");
|
||||
bench_group.sample_size(10);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
for cm_param in bench_cm_params {
|
||||
let cm_dimension = cm_param.cm_dimension;
|
||||
|
||||
let ciphertext_modulus = cm_param.ciphertext_modulus;
|
||||
|
||||
let bench_name = format!(
|
||||
"KS-CM-PBS_p={}_w={}_pfail={p_fail}",
|
||||
cm_param.precision, cm_dimension.0,
|
||||
);
|
||||
|
||||
let lwe_noise_distribution = cm_param.lwe_noise_distribution;
|
||||
|
||||
assert_eq!(
|
||||
cm_param.ciphertext_modulus,
|
||||
CiphertextModulus::<u64>::new_native()
|
||||
);
|
||||
let encoding_with_padding = 1 << 63;
|
||||
let glwe_dimension = cm_param.glwe_dimension;
|
||||
let polynomial_size = cm_param.polynomial_size;
|
||||
|
||||
let msg_modulus = 1u64 << cm_param.precision;
|
||||
let delta = encoding_with_padding / msg_modulus;
|
||||
|
||||
let f = |x| x;
|
||||
|
||||
let accumulator = cm_generate_programmable_bootstrap_glwe_lut(
|
||||
polynomial_size,
|
||||
glwe_dimension,
|
||||
cm_dimension,
|
||||
msg_modulus.cast_into(),
|
||||
cm_param.ciphertext_modulus,
|
||||
delta,
|
||||
f,
|
||||
);
|
||||
|
||||
let CmBootstrapKeys {
|
||||
small_lwe_sk,
|
||||
big_lwe_sk,
|
||||
bsk,
|
||||
fbsk,
|
||||
} = generate_cm_pbs_keys(cm_param, &mut encryption_generator, &mut secret_generator);
|
||||
drop(bsk);
|
||||
|
||||
let cm_lwe_keyswitch_key = allocate_and_generate_new_cm_lwe_keyswitch_key(
|
||||
&big_lwe_sk,
|
||||
&small_lwe_sk,
|
||||
cm_dimension,
|
||||
cm_param.base_log_ks,
|
||||
cm_param.level_ks,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let plaintexts = PlaintextList::from_container(vec![0_u64; cm_dimension.0]);
|
||||
|
||||
let ct_in = allocate_and_encrypt_new_cm_lwe_ciphertext(
|
||||
&big_lwe_sk,
|
||||
&plaintexts,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut ct_after_ks = CmLweCiphertext::new(
|
||||
0u64,
|
||||
cm_lwe_keyswitch_key.output_lwe_dimension(),
|
||||
cm_dimension,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let mut ct_out = CmLweCiphertext::new(
|
||||
0u64,
|
||||
fbsk.output_lwe_dimension(),
|
||||
cm_dimension,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let max_nb_zeros_n = cm_param.max_nb_zeros_n.ceil() as usize;
|
||||
|
||||
let mut encryptions_of_zero = CmLweCiphertextList::new(
|
||||
0,
|
||||
cm_param.lwe_dimension,
|
||||
cm_dimension,
|
||||
CmLweCiphertextCount(max_nb_zeros_n),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let plaintext_list = PlaintextList::new(0, PlaintextCount(cm_dimension.0));
|
||||
|
||||
let plaintext_lists: Vec<_> = (0..max_nb_zeros_n)
|
||||
.map(|_| plaintext_list.clone())
|
||||
.collect();
|
||||
|
||||
encrypt_cm_lwe_ciphertext_list(
|
||||
&small_lwe_sk,
|
||||
&mut encryptions_of_zero,
|
||||
&plaintext_lists,
|
||||
lwe_noise_distribution,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let log_modulus = polynomial_size.to_blind_rotation_input_modulus_log();
|
||||
|
||||
{
|
||||
bench_group.bench_function(&bench_name, |b| {
|
||||
b.iter(|| {
|
||||
cm_keyswitch_lwe_ciphertext(&cm_lwe_keyswitch_key, &ct_in, &mut ct_after_ks);
|
||||
|
||||
improve_lwe_ciphertext_modulus_switch_noise_for_binary_key_cm(
|
||||
&mut ct_after_ks,
|
||||
&encryptions_of_zero,
|
||||
cm_param.r_sigma_factor_n,
|
||||
cm_param.ms_bound_n,
|
||||
cm_param.ms_input_variance_n,
|
||||
log_modulus,
|
||||
);
|
||||
|
||||
programmable_bootstrap_cm_lwe_ciphertext(
|
||||
&ct_after_ks,
|
||||
&mut ct_out,
|
||||
&accumulator.as_view(),
|
||||
&fbsk,
|
||||
);
|
||||
|
||||
black_box(&mut ct_out);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish();
|
||||
}
|
||||
|
||||
pub fn cm_group() {
|
||||
let mut criterion: Criterion<_> = (Criterion::default()).configure_from_args();
|
||||
|
||||
cm_bench(&mut criterion);
|
||||
}
|
||||
|
||||
criterion_main!(cm_group);
|
||||
@@ -8,7 +8,6 @@ use rayon::prelude::*;
|
||||
#[cfg(any(feature = "gpu", feature = "hpu"))]
|
||||
use std::cmp::max;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::oprf::{OprfPrivateKey, OprfServerKey};
|
||||
use tfhe::integer::IntegerKeyKind;
|
||||
use tfhe::keycache::NamedParam;
|
||||
#[cfg(any(feature = "gpu", feature = "hpu"))]
|
||||
@@ -36,42 +35,32 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
format!("{bench_name}_bounded::{param_name}::{bit_size}_bits");
|
||||
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&sks,
|
||||
),
|
||||
);
|
||||
_ = black_box(sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
));
|
||||
})
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
),
|
||||
);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
bench_id_oprf = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
bench_id_oprf_bounded =
|
||||
@@ -82,11 +71,10 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
{
|
||||
// Execute the operation once to know its cost.
|
||||
reset_pbs_count();
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1);
|
||||
throughput_num_threads(num_block, pbs_count)
|
||||
@@ -97,13 +85,11 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
let setup = |_batch_size: usize| ();
|
||||
let run = |_: &mut (), batch_size: usize| {
|
||||
(0..batch_size).into_par_iter().for_each(|_| {
|
||||
oprf_sk
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
);
|
||||
});
|
||||
};
|
||||
find_optimal_batch(run, setup) as u64
|
||||
@@ -114,10 +100,9 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
})
|
||||
})
|
||||
@@ -126,11 +111,10 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&sks,
|
||||
);
|
||||
})
|
||||
})
|
||||
@@ -164,8 +148,6 @@ pub mod cuda {
|
||||
use criterion::black_box;
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::gpu::server_key::CudaServerKey;
|
||||
use tfhe::integer::gpu::CudaOprfServerKey;
|
||||
use tfhe::integer::oprf::{CompressedOprfServerKey, OprfPrivateKey};
|
||||
use tfhe::GpuIndex;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
@@ -195,18 +177,12 @@ pub mod cuda {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let cuda_oprf_sk =
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
cuda_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
gpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&gpu_sks,
|
||||
&streams,
|
||||
),
|
||||
);
|
||||
@@ -217,20 +193,14 @@ pub mod cuda {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
let oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
|
||||
let cuda_oprf_sk =
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
cuda_oprf_sk
|
||||
gpu_sks
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&gpu_sks,
|
||||
&streams,
|
||||
),
|
||||
);
|
||||
@@ -240,25 +210,13 @@ pub mod cuda {
|
||||
BenchmarkType::Throughput => {
|
||||
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks_vec = cuda_local_keys(&cks);
|
||||
let cpu_oprf_pk = OprfPrivateKey::new(&cks);
|
||||
let cpu_oprf_sk = OprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
|
||||
let compressed_oprf_sk =
|
||||
CompressedOprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
|
||||
// One CudaOprfServerKey per GPU, matching `gpu_sks_vec`.
|
||||
let cuda_oprf_sks_vec: Vec<CudaOprfServerKey> = (0..get_number_of_gpus())
|
||||
.map(|gpu_index| {
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &stream)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Execute the operation once to know its cost.
|
||||
reset_pbs_count();
|
||||
cpu_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
cpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&cpu_sks,
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
@@ -274,11 +232,10 @@ pub mod cuda {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
let gpu_index: u32 = i as u32 % get_number_of_gpus();
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
cuda_oprf_sks_vec[gpu_index as usize]
|
||||
gpu_sks_vec[gpu_index as usize]
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
Seed(0),
|
||||
num_block as u64,
|
||||
&gpu_sks_vec[gpu_index as usize],
|
||||
&stream,
|
||||
);
|
||||
})
|
||||
@@ -290,12 +247,11 @@ pub mod cuda {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
let gpu_index: u32 = i as u32 % get_number_of_gpus();
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
cuda_oprf_sks_vec[gpu_index as usize]
|
||||
gpu_sks_vec[gpu_index as usize]
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&gpu_sks_vec[gpu_index as usize],
|
||||
&stream,
|
||||
);
|
||||
})
|
||||
|
||||
@@ -2,7 +2,6 @@ use benchmark::params_aliases::*;
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::keycache::KEY_CACHE;
|
||||
use tfhe::shortint::oprf::{OprfPrivateKey, OprfServerKey};
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
fn oprf(c: &mut Criterion) {
|
||||
@@ -13,15 +12,11 @@ fn oprf(c: &mut Criterion) {
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
|
||||
let keys = KEY_CACHE.get_from_param(param);
|
||||
let cks = keys.client_key();
|
||||
let sks = keys.server_key();
|
||||
|
||||
let oprf_pk = OprfPrivateKey::new(cks);
|
||||
let oprf_sk = OprfServerKey::new(&oprf_pk, cks).unwrap();
|
||||
|
||||
bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| {
|
||||
b.iter(|| {
|
||||
_ = black_box(oprf_sk.generate_oblivious_pseudo_random(Seed(0), 2, sks));
|
||||
_ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ mod generic_tests {
|
||||
fn test_xof_seed_getters() {
|
||||
let seed_bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let bits = u128::from_le_bytes(seed_bytes);
|
||||
let dsep = *b"tfheksps";
|
||||
let dsep = [b't', b'f', b'h', b'e', b'k', b's', b'p', b's'];
|
||||
let seed = XofSeed::new_u128(bits, dsep);
|
||||
|
||||
let s = u128::from_le_bytes(seed.seed().try_into().unwrap());
|
||||
|
||||
@@ -342,28 +342,6 @@ impl<G: Curve> Proof<G> {
|
||||
None => ComputeLoad::Verify,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_le_bytes(&self) -> Vec<u8> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
let Self {
|
||||
c_hat,
|
||||
c_y,
|
||||
pi,
|
||||
compute_load_proof_fields,
|
||||
} = self;
|
||||
|
||||
bytes.extend_from_slice(c_hat.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(c_y.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(pi.to_le_bytes().as_ref());
|
||||
let (c_hat_t_bytes, c_h_bytes, pi_kzg_bytes) =
|
||||
ComputeLoadProofFields::to_le_bytes(compute_load_proof_fields);
|
||||
bytes.extend_from_slice(&c_hat_t_bytes);
|
||||
bytes.extend_from_slice(&c_h_bytes);
|
||||
bytes.extend_from_slice(&pi_kzg_bytes);
|
||||
|
||||
bytes
|
||||
}
|
||||
}
|
||||
|
||||
impl<G: Curve> ParameterSetConformant for Proof<G> {
|
||||
@@ -426,26 +404,6 @@ pub(crate) struct ComputeLoadProofFields<G: Curve> {
|
||||
pub(crate) pi_kzg: G::G1,
|
||||
}
|
||||
|
||||
impl<G: Curve> ComputeLoadProofFields<G> {
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn to_le_bytes(fields: &Option<Self>) -> (Box<[u8]>, Box<[u8]>, Box<[u8]>) {
|
||||
if let Some(ComputeLoadProofFields {
|
||||
c_hat_t,
|
||||
c_h,
|
||||
pi_kzg,
|
||||
}) = fields.as_ref()
|
||||
{
|
||||
(
|
||||
Box::from(G::G2::to_le_bytes(*c_hat_t).as_ref()),
|
||||
Box::from(G::G1::to_le_bytes(*c_h).as_ref()),
|
||||
Box::from(G::G1::to_le_bytes(*pi_kzg).as_ref()),
|
||||
)
|
||||
} else {
|
||||
(Box::from([]), Box::from([]), Box::from([]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type CompressedG2<G> = <<G as Curve>::G2 as Compressible>::Compressed;
|
||||
type CompressedG1<G> = <<G as Curve>::G1 as Compressible>::Compressed;
|
||||
|
||||
|
||||
@@ -440,44 +440,6 @@ impl<G: Curve> Proof<G> {
|
||||
pub fn hash_config(&self) -> PkeV2SupportedHashConfig {
|
||||
self.hash_config
|
||||
}
|
||||
|
||||
pub fn to_le_bytes(&self) -> Vec<u8> {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
let Self {
|
||||
C_hat_e,
|
||||
C_e,
|
||||
C_r_tilde,
|
||||
C_R,
|
||||
C_hat_bin,
|
||||
C_y,
|
||||
C_h1,
|
||||
C_h2,
|
||||
C_hat_t,
|
||||
pi,
|
||||
pi_kzg,
|
||||
compute_load_proof_fields,
|
||||
hash_config: _,
|
||||
} = self;
|
||||
|
||||
bytes.extend_from_slice(C_hat_e.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_e.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_r_tilde.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_R.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_hat_bin.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_y.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_h1.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_h2.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(C_hat_t.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(pi.to_le_bytes().as_ref());
|
||||
bytes.extend_from_slice(pi_kzg.to_le_bytes().as_ref());
|
||||
let (C_hat_h3_bytes, C_hat_w_bytes) =
|
||||
ComputeLoadProofFields::to_le_bytes(compute_load_proof_fields);
|
||||
bytes.extend_from_slice(&C_hat_h3_bytes);
|
||||
bytes.extend_from_slice(&C_hat_w_bytes);
|
||||
|
||||
bytes
|
||||
}
|
||||
}
|
||||
|
||||
/// These fields can be pre-computed on the prover side in the faster Verifier scheme. If that's the
|
||||
|
||||
@@ -64,7 +64,7 @@ tfhe-fft = { version = "0.10.1", path = "../tfhe-fft", features = [
|
||||
"serde",
|
||||
"fft128",
|
||||
] }
|
||||
tfhe-ntt = { version = "0.7.1", path = "../tfhe-ntt" }
|
||||
tfhe-ntt = { version = "0.7.0", path = "../tfhe-ntt" }
|
||||
pulp = { workspace = true, features = ["default"] }
|
||||
tfhe-cuda-backend = { version = "0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
aligned-vec = { workspace = true, features = ["default", "serde"] }
|
||||
@@ -99,7 +99,7 @@ serde-wasm-bindgen = { workspace = true, optional = true }
|
||||
getrandom = { workspace = true, optional = true }
|
||||
bytemuck = { workspace = true }
|
||||
|
||||
tfhe-hpu-backend = { version = "0.5", path = "../backends/tfhe-hpu-backend", optional = true }
|
||||
tfhe-hpu-backend = { version = "0.4", path = "../backends/tfhe-hpu-backend", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["avx512"]
|
||||
|
||||
@@ -8,23 +8,23 @@
|
||||
<rect x="0" y="40" width="300" height="520" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="520" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Negation (-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">77.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">71.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">9.08 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">8.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Add / Sub (+,-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">91.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">93.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">9.07 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">8.35 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Mul (x)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">357 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">352 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">32.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">122 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Equal / Not Equal (eq, ne)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="180.0">72.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="180.0">70.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">7.03 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="180.0">6.77 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Comparisons (ge, gt, le, lt)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="220.0">89.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="220.0">87.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">10.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="220.0">6.81 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Max / Min (max, min)</text>
|
||||
@@ -32,31 +32,31 @@
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">15.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="260.0">11.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Bitwise operations (&, |, ^)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="300.0">19.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="300.0">19.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">1.99 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="300.0">2.95 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Div / Rem (/, %)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="340.0">4.88 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="340.0">5.04 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">514 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="340.0">912 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (<<, >>)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="380.0">121 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="380.0">119 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">18.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="380.0">25.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="420.0">121 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="420.0">119 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">18.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="420.0">27.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="460.0">222 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="460.0">223 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">20.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="460.0">14.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="500.0">246 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="500.0">244 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">21.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="500.0">14.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="540.0">Select</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="540.0">40.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="540.0">39.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">4.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="540.0">5.53 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
@@ -7,13 +7,13 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">whitepaper</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="60.0">253 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="60.0">25.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="60.0">276 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="60.0">23.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">no_cmux</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="100.0">256 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="100.0">25.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="100.0">238 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="100.0">24.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">overflow</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="140.0">238 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="140.0">225 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="140.0">21.3 ops/s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 2.8 KiB After Width: | Height: | Size: 2.8 KiB |
@@ -15,83 +15,83 @@
|
||||
<rect x="0" y="40" width="300" height="520" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="520" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Negation (-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">50.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">55.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">57.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">77.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">96.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">52.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">55.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">54.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">76.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">96.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Add / Sub (+,-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">50.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">55.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">74.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">91.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">150 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">50.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">55.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">75.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">96.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">145 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Mul (x)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">89.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">89.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">131 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">195 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">357 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">1.02 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">363 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">1.01 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Equal / Not Equal (eq, ne)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">33.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">52.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">52.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">72.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">72.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">33.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">50.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">51.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">71.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">72.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Comparisons (ge, gt, le, lt)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">32.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">52.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">70.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">89.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">34.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">50.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">70.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">88.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">128 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Max / Min (max, min)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">69.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">88.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">70.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">88.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">109 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">128 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">173 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">131 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">168 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Bitwise operations (&, |, ^)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">17.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">18.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">19.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">19.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">19.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">17.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">18.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">18.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">18.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">20.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Div / Rem (/, %)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">460 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">1.01 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">2.22 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">4.88 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">12.6 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">457 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">1.0 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">2.2 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">4.99 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">12.5 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (<<, >>)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">53.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">74.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">97.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">121 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">158 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">54.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">75.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">97.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">122 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">150 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">54.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">75.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">94.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">121 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">165 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">53.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">75.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">96.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">116 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">164 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">67.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">70.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">89.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">92.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">113 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">86.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">140 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">164 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">220 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">264 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">110 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">163 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">186 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="500.0">246 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="500.0">290 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">103 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">159 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">183 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="500.0">236 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="500.0">279 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="540.0">Select</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="540.0">36.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="540.0">36.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">38.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="540.0">40.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="540.0">43.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="540.0">35.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="540.0">37.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">36.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="540.0">39.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="540.0">42.0 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@@ -15,65 +15,65 @@
|
||||
<rect x="0" y="40" width="300" height="400" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="400" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Add / Sub (+,-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">53.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">55.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">57.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">78.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">99.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">50.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">54.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">54.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">76.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">95.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Mul (x)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">71.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">70.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">115 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">155 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">207 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">422 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">156 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">208 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">412 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Equal / Not Equal (eq, ne)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">34.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">33.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">52.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">52.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">72.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">33.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">33.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">52.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">53.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">71.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Comparisons (ge, gt, le, lt)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">38.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">34.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">54.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">31.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">34.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">51.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">70.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">90.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">90.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Max / Min (max, min)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">54.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">53.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">71.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">91.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">110 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">52.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">52.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">71.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">91.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">108 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Bitwise operations (&, |, ^)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">17.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">17.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">18.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">19.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">19.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">19.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">19.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">20.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Div (/)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">136 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">172 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">245 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">437 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">792 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">126 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">182 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">234 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">427 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">799 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Rem (%)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">235 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">337 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">468 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">690 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">1.27 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">244 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">334 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">462 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">657 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">1.19 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (<<, >>)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">17.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">18.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">19.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">19.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">21.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">17.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">18.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">19.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">19.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">19.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">18.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">18.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">19.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">19.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">20.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">17.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">18.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">18.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">20.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">21.0 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
@@ -15,83 +15,83 @@
|
||||
<rect x="0" y="40" width="300" height="520" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="520" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Negation (-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">804 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">372 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">181 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">86.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">42.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">824 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">388 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">184 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">88.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">42.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Add / Sub (+,-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">733 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">356 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">167 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">82.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">40.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">752 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">368 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">172 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">82.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">39.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Mul (x)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">293 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">71.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">18.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">4.58 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">1.19 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">283 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">65.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">17.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">4.68 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">1.17 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Equal / Not Equal (eq, ne)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">1.6 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">740 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">392 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">200 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">101 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">1.65 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">748 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">391 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">195 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">102 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Comparisons (ge, gt, le, lt)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">1.58 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">733 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">354 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">171 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">64.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">1.62 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">745 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">355 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">170 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">65.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Max / Min (max, min)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">493 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">236 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">116 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">58.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">25.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">488 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">239 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">117 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">57.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">25.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Bitwise operations (&, |, ^)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">2.1 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">981 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">490 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">262 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">130 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">2.14 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">1.06 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">537 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">270 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">136 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Div / Rem (/, %)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">45.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">12.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">3.56 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">0.893 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">0.223 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">42.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">12.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">3.51 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">0.914 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">0.143 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (<<, >>)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">464 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">183 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">76.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">32.4 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">14.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">469 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">182 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">74.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">32.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">14.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">391 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">397 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">170 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">74.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">32.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">72.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">32.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">14.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">824 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">487 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">222 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">119 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">57.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="460.0">621 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="460.0">235 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">104 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="460.0">41.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="460.0">17.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">542 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">220 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">102 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="500.0">42.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="500.0">18.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="500.0">536 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="500.0">207 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">96.4 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="500.0">40.4 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="500.0">17.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="540.0">Select</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="540.0">676 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="540.0">350 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">176 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="540.0">84.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="540.0">42.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="540.0">699 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="540.0">351 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">175 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="540.0">87.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="540.0">43.4 ops/s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
@@ -15,65 +15,65 @@
|
||||
<rect x="0" y="40" width="300" height="400" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="400" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Add / Sub (+,-)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">810 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">379 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">178 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">86.0 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">41.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="60.0">836 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="60.0">383 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">184 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="60.0">87.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="60.0">42.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Mul (x)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">658 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">185 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">57.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">17.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">4.83 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="100.0">659 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="100.0">182 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">52.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="100.0">16.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="100.0">4.79 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Equal / Not Equal (eq, ne)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">2.69 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">1.57 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">723 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">378 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">192 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="140.0">2.73 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="140.0">1.68 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">757 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="140.0">399 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="140.0">198 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Comparisons (ge, gt, le, lt)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">2.61 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">1.63 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">717 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">348 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">172 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="180.0">2.82 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="180.0">1.64 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">747 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="180.0">356 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="180.0">173 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Max / Min (max, min)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">1.15 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">621 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">302 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">148 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">73.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="220.0">1.18 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="220.0">645 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">305 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="220.0">150 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="220.0">73.2 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Bitwise operations (&, |, ^)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">2.11 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">1.04 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">516 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">260 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">128 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="260.0">2.31 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="260.0">1.12 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">555 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="260.0">276 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="260.0">139 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Div (/)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">203 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">73.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">24.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">7.38 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">2.16 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="300.0">196 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="300.0">69.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">23.7 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="300.0">7.63 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="300.0">2.13 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Rem (%)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">130 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">49.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">17.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">5.65 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">1.75 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="340.0">114 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="340.0">44.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">16.6 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="340.0">5.78 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="340.0">1.66 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (<<, >>)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">2.01 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">1.02 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">510 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">247 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">124 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="380.0">2.13 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="380.0">1.07 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">546 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="380.0">270 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="380.0">138 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">2.01 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">992 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">517 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">254 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">124 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="342.0" y="420.0">2.14 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="426.0" y="420.0">1.07 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">541 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="594.0" y="420.0">270 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="678.0" y="420.0">137 ops/s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
@@ -9,25 +9,25 @@
|
||||
<rect x="0" y="40" width="300" height="160" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="160" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="60.0">9.57 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="60.0">12.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="60.0">112 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="60.0">1.58 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="60.0">9.54 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="60.0">12.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="60.0">111 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="60.0">1.39 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">MB-PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="100.0">4.42 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="100.0">4.71 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="100.0">30.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="100.0">257 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="100.0">4.02 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="100.0">4.55 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="100.0">30.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="100.0">244 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">KS - PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="140.0">11.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="140.0">15.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="140.0">126 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="140.0">1.58 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="140.0">10.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="140.0">15.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="140.0">125 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="140.0">1.51 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">KS - MB-PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="180.0">6.67 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="180.0">8.49 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="180.0">46.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="180.0">388 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="180.0">5.56 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="180.0">7.29 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="180.0">61.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="180.0">418 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 5.0 KiB |
@@ -9,25 +9,25 @@
|
||||
<rect x="0" y="40" width="300" height="160" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="160" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="60.0">8.93 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="60.0">8.94 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="60.0">11.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="60.0">102 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="60.0">654 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="60.0">104 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="60.0">670 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">MB-PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="100.0">4.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="100.0">4.58 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="100.0">28.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="100.0">214 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="100.0">4.87 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="100.0">4.53 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="100.0">30.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="100.0">185 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">KS - PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="140.0">10.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="140.0">14.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="140.0">119 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="140.0">865 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="140.0">10.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="140.0">15.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="140.0">120 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="140.0">871 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">KS - MB-PBS</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="180.0">6.96 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="180.0">7.59 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="180.0">47.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="180.0">247 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="352.5" y="180.0">6.83 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="457.5" y="180.0">7.13 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="562.5" y="180.0">44.7 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="667.5" y="180.0">228 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 5.0 KiB |
@@ -6,11 +6,11 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">1.53 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">1.66 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">1.55 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">1.66 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">1.76 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">1.8 s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.1 KiB |
@@ -8,17 +8,17 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">209 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">43.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">67.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">276 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">44.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">66.0 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">211 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">277 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">44.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">72.9 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">70.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">219 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">293 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">49.1 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">185 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">184 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
@@ -8,17 +8,17 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">8.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">265 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">129 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">7.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">274 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">131 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">8.36 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">259 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">50.8 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">7.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">277 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">51.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">8.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">236 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">8.38 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">7.73 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">242 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">8.62 ops/s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
@@ -6,11 +6,11 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">1.71 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">1.94 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">1.72 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">1.96 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">1.93 s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">2.13 s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.1 KiB |
@@ -8,17 +8,17 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">214 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">31.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">52.5 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">292 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">31.4 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">51.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">217 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">31.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">57.3 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">294 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">31.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">56.2 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">225 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">33.6 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">170 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">317 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">33.8 ms</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">167 ms</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |
@@ -8,17 +8,17 @@
|
||||
<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
|
||||
<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">1xFheUint64 (64 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">7.78 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">877 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">200 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">7.3 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">988 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">201 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">4xFheUint64 (256 bits) </text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">7.79 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">931 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">58.9 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">7.23 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">987 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">59.5 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">32xFheUint64 (2048 bits)</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">7.77 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">993 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">8.59 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">7.1 ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">1.11 k.ops/s</text>
|
||||
<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">8.85 ops/s</text>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
|
||||
<line stroke="white" stroke-width="2" x1="0" y1="80" x2="720" y2="80"/>
|
||||
|
||||
|
Before Width: | Height: | Size: 3.5 KiB After Width: | Height: | Size: 3.5 KiB |