mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 06:13:58 -05:00
chore(tfhe): rename nightly feature flag to avx512
This commit is contained in:
committed by
Nicolas Sarlin
parent
851bd01873
commit
f8a958663b
100
Makefile
100
Makefile
@@ -377,10 +377,11 @@ clippy_core: install_rs_check_toolchain
|
||||
--features=experimental \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=nightly-avx512 \
|
||||
--no-default-features \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=experimental,nightly-avx512 \
|
||||
--no-default-features \
|
||||
--features=experimental \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=zk-pok \
|
||||
@@ -585,19 +586,19 @@ audit_dependencies: install_rs_build_toolchain install_cargo_audit
|
||||
.PHONY: build_core # Build core_crypto without experimental features
|
||||
build_core: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
-p tfhe
|
||||
--no-default-features -p tfhe
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=nightly-avx512 -p tfhe; \
|
||||
--features=avx512 -p tfhe; \
|
||||
fi
|
||||
|
||||
.PHONY: build_core_experimental # Build core_crypto with experimental features
|
||||
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=experimental -p tfhe
|
||||
--no-default-features --features=experimental -p tfhe
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,nightly-avx512 -p tfhe; \
|
||||
--features=experimental,avx512 -p tfhe; \
|
||||
fi
|
||||
|
||||
.PHONY: build_boolean # Build with boolean enabled
|
||||
@@ -693,10 +694,10 @@ build_tfhe_csprng: install_rs_build_toolchain
|
||||
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
|
||||
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,zk-pok -p tfhe -- core_crypto::
|
||||
--no-default-features --features=experimental,zk-pok -p tfhe -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,zk-pok,nightly-avx512 -p tfhe -- core_crypto::; \
|
||||
--features=experimental,zk-pok -p tfhe -- core_crypto::; \
|
||||
fi
|
||||
|
||||
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
|
||||
@@ -704,13 +705,14 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--no-default-features \
|
||||
--features=experimental,internal-keycache \
|
||||
-p tfhe -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=experimental,internal-keycache,nightly-avx512 \
|
||||
--features=experimental,internal-keycache,avx512 \
|
||||
-p tfhe -- -Z unstable-options --report-time core_crypto::; \
|
||||
fi
|
||||
|
||||
@@ -1009,9 +1011,9 @@ test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
|
||||
test_noise_check: install_rs_check_toolchain
|
||||
@# First run the sanity checks to make sure the atomic patterns are correct
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- sanity_check
|
||||
--features=boolean,shortint,integer -p tfhe -- sanity_check
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- noise_check \
|
||||
--features=boolean,shortint,integer -p tfhe -- noise_check \
|
||||
--test-threads=1 --nocapture
|
||||
|
||||
.PHONY: test_safe_serialization # Run the tests for safe serialization
|
||||
@@ -1353,13 +1355,13 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
|
||||
clippy_bench: install_rs_check_toolchain
|
||||
! (grep --recursive "trivial" tfhe-benchmark && echo "trivial found in benches")
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,nightly-avx512,pbs-stats,zk-pok \
|
||||
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_bench_gpu # Run clippy lints on tfhe-benchmark
|
||||
clippy_bench_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=gpu,shortint,integer,internal-keycache,nightly-avx512,pbs-stats,zk-pok \
|
||||
--features=gpu,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_bench_hpu # Run clippy lints on tfhe-benchmark
|
||||
@@ -1378,28 +1380,28 @@ bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_signed_integer_gpu # Run benchmarks for signed integer on GPU backend
|
||||
bench_signed_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
|
||||
bench_integer_hpu: install_rs_check_toolchain
|
||||
@@ -1415,7 +1417,7 @@ bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-glwe_packing_compression \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
@@ -1458,7 +1460,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1466,7 +1468,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1474,7 +1476,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit_gpu # Run benchmarks for signed integer on GPU backend using multi-bit parameters
|
||||
bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1482,14 +1484,14 @@ bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
|
||||
--features=integer,internal-keycache,zk-pok,pbs-stats \
|
||||
-p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
@@ -1497,76 +1499,76 @@ bench_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint \
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
|
||||
bench_shortint_oprf: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-oprf \
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_boolean # Run benchmarks for boolean
|
||||
bench_boolean: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench boolean \
|
||||
--features=boolean,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=boolean,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-ks \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=boolean,shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_ks_gpu # Run benchmarks for keyswitch on GPU backend
|
||||
bench_ks_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-ks \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --profile release_lto_off
|
||||
--features=boolean,shortint,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off
|
||||
|
||||
.PHONY: bench_pbs # Run benchmarks for PBS
|
||||
bench_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-pbs \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=boolean,shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-pbs \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --profile release_lto_off
|
||||
--features=boolean,shortint,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off
|
||||
|
||||
.PHONY: bench_ks_pbs # Run benchmarks for KS-PBS
|
||||
bench_ks_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-ks-pbs \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=boolean,shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_ks_pbs_gpu # Run benchmarks for KS-PBS on GPU backend
|
||||
bench_ks_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-ks-pbs \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --profile release_lto_off
|
||||
--features=boolean,shortint,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-pbs128 \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p tfhe-benchmark
|
||||
--features=boolean,shortint,internal-keycache -p tfhe-benchmark
|
||||
|
||||
.PHONY: bench_pbs128_gpu # Run benchmarks for PBS using FFT 128 bits on GPU
|
||||
bench_pbs128_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench core_crypto-pbs128 \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --profile release_lto_off
|
||||
--features=boolean,shortint,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off
|
||||
|
||||
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
@@ -1602,13 +1604,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
|
||||
bench_hlapi: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi \
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_gpu # Run benchmarks for integer operations on GPU
|
||||
bench_hlapi_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
|
||||
bench_hlapi_hpu: install_rs_check_toolchain
|
||||
@@ -1624,28 +1626,28 @@ bench_hlapi_erc20: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
|
||||
bench_hlapi_erc20_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_dex # Run benchmarks for DEX operations
|
||||
bench_hlapi_dex: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-dex \
|
||||
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_dex_gpu # Run benchmarks for DEX operations on GPU
|
||||
bench_hlapi_dex_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-dex \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
|
||||
bench_hlapi_erc20_hpu: install_rs_check_toolchain
|
||||
@@ -1666,14 +1668,14 @@ bench_hlapi_noise_squash: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_noise_squash_gpu # Run benchmarks for noise squash operation on GPU
|
||||
bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
|
||||
.PHONY: bench_custom # Run benchmarks with a user-defined command
|
||||
@@ -1940,15 +1942,15 @@ test_fft_no_std:
|
||||
--no-default-features \
|
||||
--features=fft128
|
||||
|
||||
.PHONY: test_fft_no_std_nightly
|
||||
test_fft_no_std_nightly: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --release -p tfhe-fft \
|
||||
.PHONY: test_fft_no_std_avx512
|
||||
test_fft_no_std_avx512:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --release -p tfhe-fft \
|
||||
--no-default-features \
|
||||
--features=avx512,fft128
|
||||
|
||||
.PHONY: test_fft_node_js
|
||||
test_fft_node_js: install_rs_build_toolchain install_build_wasm32_target install_wasm_bindgen_cli
|
||||
RUSTFLAGS="" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release \
|
||||
test_fft_node_js: install_build_wasm32_target install_wasm_bindgen_cli
|
||||
RUSTFLAGS="" cargo test --release \
|
||||
--features=serde --target wasm32-unknown-unknown -p tfhe-fft
|
||||
|
||||
.PHONY: test_fft_node_js_ci
|
||||
@@ -1959,7 +1961,7 @@ test_fft_node_js_ci: check_nvm_installed
|
||||
"$(MAKE)" test_fft_node_js
|
||||
|
||||
.PHONY: test_fft_all
|
||||
test_fft_all: test_fft test_fft_serde test_fft_nightly test_fft_no_std test_fft_no_std_nightly \
|
||||
test_fft_all: test_fft test_fft_serde test_fft_avx512 test_fft_no_std test_fft_no_std_avx512 \
|
||||
test_fft_node_js_ci
|
||||
|
||||
##### Bench #####
|
||||
@@ -1968,7 +1970,7 @@ test_fft_node_js_ci
|
||||
bench_fft: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" bench --bench fft -p tfhe-fft \
|
||||
--features=serde \
|
||||
--features=nightly \
|
||||
--features=avx512 \
|
||||
--features=fft128
|
||||
#============================End FFT Section ==================================
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ do
|
||||
"--avx512-support" )
|
||||
shift
|
||||
if [[ "$1" == "ON" ]]; then
|
||||
avx512_feature=nightly-avx512
|
||||
avx512_feature=avx512
|
||||
fi
|
||||
;;
|
||||
|
||||
@@ -173,6 +173,7 @@ cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--no-default-features \
|
||||
--features=integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--test-threads "${test_threads}" \
|
||||
-E "$filter_expression"
|
||||
@@ -181,6 +182,7 @@ if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--no-default-features \
|
||||
--features=integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--doc \
|
||||
-- --test-threads="${doctest_threads}" integer::"${gpu_feature}"
|
||||
|
||||
@@ -27,10 +27,11 @@ serde_json = "1.0.94"
|
||||
paste = "1.0.7"
|
||||
rand = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
tfhe = { path = "../tfhe" }
|
||||
tfhe = { path = "../tfhe", default-features = false }
|
||||
tfhe-csprng = { path = "../tfhe-csprng" }
|
||||
|
||||
[features]
|
||||
default = ["avx512"]
|
||||
boolean = ["tfhe/boolean"]
|
||||
shortint = ["tfhe/shortint"]
|
||||
integer = ["shortint", "tfhe/integer"]
|
||||
@@ -38,7 +39,7 @@ gpu = ["tfhe/gpu"]
|
||||
hpu = ["tfhe/hpu"]
|
||||
hpu-v80 = ["tfhe/hpu-v80"]
|
||||
internal-keycache = ["tfhe/internal-keycache"]
|
||||
nightly-avx512 = ["tfhe/nightly-avx512"]
|
||||
avx512 = ["tfhe/avx512"]
|
||||
pbs-stats = ["tfhe/pbs-stats"]
|
||||
zk-pok = ["tfhe/zk-pok"]
|
||||
|
||||
|
||||
@@ -94,6 +94,7 @@ bytemuck = { workspace = true }
|
||||
tfhe-hpu-backend = { version = "0.3", path = "../backends/tfhe-hpu-backend", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["avx512"]
|
||||
boolean = []
|
||||
shortint = ["dep:sha3", "dep:blake3"]
|
||||
integer = ["shortint", "dep:strum"]
|
||||
@@ -150,7 +151,9 @@ high-level-client-js-wasm-api = [
|
||||
]
|
||||
parallel-wasm-api = ["dep:wasm-bindgen-rayon"]
|
||||
|
||||
nightly-avx512 = ["tfhe-fft/avx512", "tfhe-ntt/avx512", "pulp/x86-v4"]
|
||||
avx512 = ["tfhe-fft/avx512", "tfhe-ntt/avx512", "pulp/x86-v4"]
|
||||
# Kept for backward compatibility
|
||||
nightly-avx512 = ["avx512"]
|
||||
|
||||
# Private features
|
||||
__profiling = []
|
||||
|
||||
@@ -65,11 +65,3 @@ This crate provides 4 kinds of data types. Each kind is enabled by activating th
|
||||
| Strings | `strings` | ASCII strings |
|
||||
|
||||
The `Integers+` kind refers to types which have non-standard bit-width like `FheUint24` for example. Having more granular types can allow to improve performance. The feature is not enabled by default to avoid very long compile times if users don't need the extended-types.
|
||||
|
||||
### AVX-512
|
||||
|
||||
While the library generally selects automatically the best instruction sets available by the host, in the case of 'AVX-512', you have to choose it explicitly. This requires to use a nightly toolchain with the feature `nightly-avx512`.
|
||||
|
||||
```shell
|
||||
cargo +nightly build --release --features=nightly-avx512
|
||||
```
|
||||
|
||||
@@ -234,7 +234,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
fn collect_next_term_split_avx512(
|
||||
simd: pulp::x86::V4,
|
||||
glwe_decomp_term_lo: &mut [u64],
|
||||
@@ -564,7 +564,7 @@ fn collect_next_term_split(
|
||||
base_log: usize,
|
||||
) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = pulp::x86::V4::try_new() {
|
||||
return collect_next_term_split_avx512(
|
||||
simd,
|
||||
|
||||
@@ -4,7 +4,7 @@ use dyn_stack::PodStack;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use pulp::{f64x4, u64x4, x86::V3};
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
use pulp::{f64x8, u64x8, x86::V4};
|
||||
use tfhe_fft::fft128::f128;
|
||||
|
||||
@@ -101,7 +101,7 @@ pub fn u128_to_f64_avx2(simd: V3, (lo, hi): (u64x4, u64x4)) -> f64x4 {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn u128_to_f64_avx512(simd: V4, (lo, hi): (u64x8, u64x8)) -> f64x8 {
|
||||
const A: f64 = (1u128 << 52) as f64;
|
||||
@@ -213,7 +213,7 @@ pub fn wrapping_neg_avx2(simd: V3, (lo, hi): (u64x4, u64x4)) -> (u64x4, u64x4) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn wrapping_sub_avx512(
|
||||
simd: V4,
|
||||
@@ -228,7 +228,7 @@ pub fn wrapping_sub_avx512(
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn wrapping_add_avx512(
|
||||
simd: V4,
|
||||
@@ -243,7 +243,7 @@ pub fn wrapping_add_avx512(
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn wrapping_neg_avx512(simd: V4, (lo, hi): (u64x8, u64x8)) -> (u64x8, u64x8) {
|
||||
wrapping_add_avx512(
|
||||
@@ -280,7 +280,7 @@ fn i128_to_f64_avx2(simd: V3, (lo, hi): (u64x4, u64x4)) -> f64x4 {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn i128_to_f64_avx512(simd: V4, (lo, hi): (u64x8, u64x8)) -> f64x8 {
|
||||
let sign_bit = simd.splat_u64x8(1 << 63);
|
||||
@@ -399,7 +399,7 @@ fn f64_to_i128_avx2(simd: V3, f: f64x4) -> (u64x4, u64x4) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn f64_to_i128_avx512(simd: V4, f: f64x8) -> (u64x8, u64x8) {
|
||||
let sign_bit = simd.splat_u64x8(1 << 63);
|
||||
@@ -432,7 +432,7 @@ fn f64_to_i128_avx512(simd: V4, f: f64x8) -> (u64x8, u64x8) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn f64_to_u128_avx512(simd: V4, f: f64x8) -> (u64x8, u64x8) {
|
||||
let f = pulp::cast(f);
|
||||
@@ -520,7 +520,7 @@ fn to_signed_to_f128_avx2(simd: V3, (lo, hi): (u64x4, u64x4)) -> (f64x4, f64x4)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn to_signed_to_f128_avx512(simd: V4, (lo, hi): (u64x8, u64x8)) -> (f64x8, f64x8) {
|
||||
// convert to signed then to float
|
||||
@@ -580,7 +580,7 @@ fn f128_floor_avx2(simd: V3, (x0, x1): (f64x4, f64x4)) -> (f64x4, f64x4) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn f128_floor_avx512(simd: V4, (x0, x1): (f64x8, f64x8)) -> (f64x8, f64x8) {
|
||||
let x0_floor = simd.floor_f64x8(x0);
|
||||
@@ -609,7 +609,7 @@ fn f128_round_avx2(simd: V3, (x0, x1): (f64x4, f64x4)) -> (f64x4, f64x4) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn f128_round_avx512(simd: V4, (x0, x1): (f64x8, f64x8)) -> (f64x8, f64x8) {
|
||||
f128_floor_avx512(simd, add_f128_f64x8(simd, x0, x1, simd.splat_f64x8(0.5)))
|
||||
@@ -680,14 +680,14 @@ pub fn add_f128_f64x4(simd: V3, a0: f64x4, a1: f64x4, b: f64x4) -> (f64x4, f64x4
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn quick_two_sum_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
let s = simd.add_f64x8(a, b);
|
||||
(s, simd.sub_f64x8(b, simd.sub_f64x8(s, a)))
|
||||
}
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub(crate) fn two_sum_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
let s = simd.add_f64x8(a, b);
|
||||
@@ -701,7 +701,7 @@ pub(crate) fn two_sum_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
)
|
||||
}
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn two_diff_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
let s = simd.sub_f64x8(a, b);
|
||||
@@ -715,7 +715,7 @@ fn two_diff_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
)
|
||||
}
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn sub_estimate_f128x8(simd: V4, a0: f64x8, a1: f64x8, b0: f64x8, b1: f64x8) -> (f64x8, f64x8) {
|
||||
let (s, e) = two_diff_f64x8(simd, a0, b0);
|
||||
@@ -724,7 +724,7 @@ fn sub_estimate_f128x8(simd: V4, a0: f64x8, a1: f64x8, b0: f64x8, b1: f64x8) ->
|
||||
quick_two_sum_f64x8(simd, s, e)
|
||||
}
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn add_f128_f64x8(simd: V4, a0: f64x8, a1: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
let (s1, s2) = two_sum_f64x8(simd, a0, b);
|
||||
@@ -751,7 +751,7 @@ fn from_torus_f128_avx2(simd: V3, x: (f64x4, f64x4)) -> (u64x4, u64x4) {
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
fn from_torus_f128_avx512(simd: V4, x: (f64x8, f64x8)) -> (u64x8, u64x8) {
|
||||
let floor = f128_floor_avx512(simd, x);
|
||||
@@ -847,7 +847,7 @@ pub fn convert_forward_integer_avx2(
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_forward_integer_avx512(
|
||||
simd: V4,
|
||||
out_re0: &mut [f64],
|
||||
@@ -961,7 +961,7 @@ pub fn convert_forward_integer(
|
||||
in_im_hi: &[u64],
|
||||
) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_forward_integer_avx512(
|
||||
simd, out_re0, out_re1, out_im0, out_im1, in_re_lo, in_re_hi, in_im_lo, in_im_hi,
|
||||
@@ -1097,7 +1097,7 @@ pub fn convert_add_backward_torus_avx2(
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_add_backward_torus_avx512(
|
||||
simd: V4,
|
||||
out_re_lo: &mut [u64],
|
||||
@@ -1201,7 +1201,7 @@ pub fn convert_add_backward_torus(
|
||||
in_im1: &[f64],
|
||||
) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_add_backward_torus_avx512(
|
||||
simd, out_re_lo, out_re_hi, out_im_lo, out_im_hi, in_re0, in_re1, in_im0, in_im1,
|
||||
|
||||
@@ -17,7 +17,7 @@ use core::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use core::arch::x86_64::*;
|
||||
use pulp::x86::V3;
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
use pulp::x86::V4;
|
||||
|
||||
/// Convert a vector of f64 values to a vector of i64 values.
|
||||
@@ -80,7 +80,7 @@ pub fn mm256_cvtpd_epi64(simd: V3, x: __m256d) -> __m256i {
|
||||
|
||||
/// Convert a vector of f64 values to a vector of i64 values with rounding to nearest integer.
|
||||
/// [`Intel's documentation`](`https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64`)
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn mm512_cvt_round_nearest_pd_epi64(simd: V4, x: __m512d) -> __m512i {
|
||||
let _ = simd.avx512dq;
|
||||
@@ -124,7 +124,7 @@ pub fn mm256_cvtepi64_pd(simd: V3, x: __m256i) -> __m256d {
|
||||
/// `_`.
|
||||
///
|
||||
/// [`Intel's documentation`](`https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd`)
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn mm512_cvtepi64_pd(simd: V4, x: __m512i) -> __m512d {
|
||||
// SAFETY: simd contains an instance of avx512dq, that matches the target feature of
|
||||
@@ -133,7 +133,7 @@ pub fn mm512_cvtepi64_pd(simd: V4, x: __m512i) -> __m512d {
|
||||
unsafe { _mm512_cvtepi64_pd(x) }
|
||||
}
|
||||
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_forward_integer_u32_v4(
|
||||
simd: V4,
|
||||
out: &mut [c64],
|
||||
@@ -226,7 +226,7 @@ pub fn convert_forward_integer_u32_v4(
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_forward_integer_u64_v4(
|
||||
simd: V4,
|
||||
out: &mut [c64],
|
||||
@@ -508,7 +508,7 @@ pub fn convert_forward_integer_u64_avx2_v3(
|
||||
///
|
||||
/// This deinterleaves two vectors of c64 values into two vectors of real part and imaginary part,
|
||||
/// then returns the scaled fractional part.
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[inline(always)]
|
||||
pub fn prologue_convert_torus_v4(
|
||||
simd: V4,
|
||||
@@ -557,7 +557,7 @@ pub fn prologue_convert_torus_v4(
|
||||
}
|
||||
|
||||
/// See [`convert_add_backward_torus`].
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_add_backward_torus_u32_v4(
|
||||
simd: V4,
|
||||
out_re: &mut [u32],
|
||||
@@ -644,7 +644,7 @@ pub fn convert_add_backward_torus_u32_v4(
|
||||
}
|
||||
|
||||
/// See [`convert_add_backward_torus`].
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
pub fn convert_add_backward_torus_u64_v4(
|
||||
simd: V4,
|
||||
out_re: &mut [u64],
|
||||
@@ -961,7 +961,7 @@ pub fn convert_forward_integer_u32(
|
||||
in_im: &[u32],
|
||||
twisties: TwistiesView<'_>,
|
||||
) {
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_forward_integer_u32_v4(simd, out, in_re, in_im, twisties);
|
||||
}
|
||||
@@ -977,7 +977,7 @@ pub fn convert_forward_integer_u64(
|
||||
in_im: &[u64],
|
||||
twisties: TwistiesView<'_>,
|
||||
) {
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_forward_integer_u64_v4(simd, out, in_re, in_im, twisties);
|
||||
}
|
||||
@@ -993,7 +993,7 @@ pub fn convert_add_backward_torus_u32(
|
||||
inp: &[c64],
|
||||
twisties: TwistiesView<'_>,
|
||||
) {
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_add_backward_torus_u32_v4(simd, out_re, out_im, inp, twisties);
|
||||
}
|
||||
@@ -1009,7 +1009,7 @@ pub fn convert_add_backward_torus_u64(
|
||||
inp: &[c64],
|
||||
twisties: TwistiesView<'_>,
|
||||
) {
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
return convert_add_backward_torus_u64_v4(simd, out_re, out_im, inp, twisties);
|
||||
}
|
||||
@@ -1068,7 +1068,7 @@ mod tests {
|
||||
assert_eq!(target, computed);
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
if let Some(simd) = V4::try_new() {
|
||||
for v in [
|
||||
[
|
||||
@@ -1148,7 +1148,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
#[cfg(feature = "avx512")]
|
||||
#[test]
|
||||
fn add_backward_torus_v4() {
|
||||
if let Some(simd) = V4::try_new() {
|
||||
|
||||
@@ -67,15 +67,6 @@
|
||||
#![allow(clippy::iter_with_drain)] // 2
|
||||
#![allow(clippy::large_stack_frames)] // 1
|
||||
#![cfg_attr(feature = "__wasm_api", allow(dead_code))]
|
||||
// Temporary workaround until we raise msrv to 1.89
|
||||
#![allow(stable_features)]
|
||||
#![cfg_attr(
|
||||
all(
|
||||
any(target_arch = "x86", target_arch = "x86_64"),
|
||||
feature = "nightly-avx512"
|
||||
),
|
||||
feature(avx512_target_feature, stdarch_x86_avx512)
|
||||
)]
|
||||
#![cfg_attr(all(doc, not(doctest)), feature(doc_cfg))]
|
||||
// Weird clippy lint triggering without any code location
|
||||
#![cfg_attr(test, allow(clippy::large_stack_arrays))]
|
||||
|
||||
Reference in New Issue
Block a user