Compare commits

..

3 Commits

Author SHA1 Message Date
Pedro Alves
eff405bab1 fix(gpu): define CUDA_ARCH in tfhe-cuda-common CMakeLists
device.cu uses CUDA_ARCH in preprocessor conditionals (e.g.,
cuda_check_support_thread_block_clusters relies on #if CUDA_ARCH >= 900).
Without this define, CUDA_ARCH defaults to 0, silently disabling thread
block cluster support on Hopper GPUs.
2026-04-23 20:35:27 -03:00
Pedro Alves
6932138917 chore(gpu): bump tfhe-cuda-backend to 0.14.1 and zk-cuda-backend to 0.1.1 2026-04-23 16:42:55 -03:00
Pedro Alves
a1cc8c4883 chore(gpu): extract shared CUDA device utilities into tfhe-rs-cuda-common 2026-04-23 16:35:30 -03:00
75 changed files with 450 additions and 412 deletions

View File

@@ -54,7 +54,7 @@ jobs:
- name: Retrieve data from cache
id: retrieve-data-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor
@@ -89,7 +89,7 @@ jobs:
- name: Store data in cache
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor

View File

@@ -69,7 +69,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
dependencies:
@@ -200,7 +200,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -213,7 +213,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -56,7 +56,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
integer:

View File

@@ -57,7 +57,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
integer:

View File

@@ -78,7 +78,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
dependencies:

View File

@@ -45,7 +45,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
wasm:
@@ -92,7 +92,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -105,7 +105,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -34,7 +34,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
backward:

View File

@@ -204,7 +204,7 @@ jobs:
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
- name: Cache cargo
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: |
~/.cargo/registry
@@ -232,7 +232,7 @@ jobs:
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Use Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version: 20.x

View File

@@ -46,7 +46,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
wasm_bench:

View File

@@ -124,7 +124,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -137,7 +137,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -138,7 +138,7 @@ jobs:
- name: Node cache restoration
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -151,7 +151,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -40,7 +40,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
fft:

View File

@@ -42,7 +42,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
ntt:

View File

@@ -43,7 +43,7 @@ jobs:
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
- name: Check workflows security
uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
with:
advanced-security: 'false' # Print results directly in logs
persona: pedantic

View File

@@ -44,7 +44,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
tfhe:

View File

@@ -46,7 +46,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
csprng:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -45,7 +45,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -41,7 +41,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
hpu:

View File

@@ -89,7 +89,7 @@ jobs:
make build_web_js_api_parallel
- name: Authenticate on NPM
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
with:
node-version: '24'
registry-url: 'https://registry.npmjs.org'

View File

@@ -53,7 +53,7 @@ jobs:
- name: Restore Sagemath image from cache
id: docker-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
@@ -76,7 +76,7 @@ jobs:
- name: Store Sagemath image in cache
if: steps.docker-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}

View File

@@ -2,6 +2,7 @@
resolver = "3"
members = [
"apps/test-vectors",
"backends/tfhe-cuda-common",
"backends/tfhe-cuda-backend",
"backends/tfhe-hpu-backend",
"backends/zk-cuda-backend",

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-cuda-backend"
version = "0.14.0"
version = "0.14.1"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"
@@ -11,6 +11,9 @@ repository = "https://github.com/zama-ai/tfhe-rs"
readme = "README.md"
keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
[dependencies]
tfhe-cuda-common = { version = "0.1.0", path = "../tfhe-cuda-common" }
[build-dependencies]
cmake.workspace = true
pkg-config.workspace = true
@@ -18,6 +21,6 @@ bindgen.workspace = true
[features]
experimental-multi-arch = []
profile = []
debug = []
profile = ["tfhe-cuda-common/profile"]
debug = ["tfhe-cuda-common/debug"]
debug-fake-multi-gpu = []

View File

@@ -1,15 +1,5 @@
use std::path::PathBuf;
fn get_linux_distribution_name() -> Option<String> {
let content = std::fs::read_to_string("/etc/os-release").ok()?;
for line in content.lines() {
if let Some(value) = line.strip_prefix("NAME=") {
return Some(value.trim_matches('"').to_string());
}
}
None
}
fn main() {
if let Ok(val) = std::env::var("DOCS_RS") {
if val.parse::<u32>() == Ok(1) {
@@ -36,14 +26,9 @@ fn main() {
println!("cargo::rerun-if-changed=cuda/CMakeLists.txt");
println!("cargo::rerun-if-changed=src");
// Platform/distro check is performed by tfhe-cuda-common's build.rs, which
// Cargo builds first as a dependency.
if std::env::consts::OS == "linux" {
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
println!(
"cargo:warning=This Linux distribution is not officially supported. \
Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
);
}
let mut cmake_config = cmake::Config::new("cuda");
// Conditionally pass the "MULTI_ARCH" variable to CMake if the feature is enabled
@@ -67,6 +52,10 @@ fn main() {
cmake_config.define("FAKE_MULTI_GPU", "ON");
}
if let Ok(common_include) = std::env::var("DEP_TFHE_CUDA_COMMON_INCLUDE") {
cmake_config.define("TFHE_CUDA_COMMON_INCLUDE_DIR", &common_include);
}
// Build the CMake project
let dest = cmake_config.build();
println!("cargo:rustc-link-search=native={}", dest.display());

View File

@@ -75,9 +75,7 @@ rules:
- "*.cuh"
- "*.cpp"
- "*.h"
exclude:
- backends/tfhe-cuda-backend/cuda/check_cuda.cu # contains cuda checking functions
- backends/tfhe-cuda-backend/cuda/include/device.h # contains the cuda_check_error macro (and others)
exclude: []
patterns:
- pattern: $FUNC(...)
- metavariable-regex:

View File

@@ -18,9 +18,9 @@ endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
endif()
# Get CUDA compute capability
# Get CUDA compute capability (check_cuda.cu lives in tfhe-cuda-common)
set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script) # No suffix required
set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-common/cuda/check_cuda.cu)
execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
execute_process(
COMMAND ${OUTPUTFILE}
@@ -116,10 +116,21 @@ set(CMAKE_CUDA_FLAGS
set(INCLUDE_DIR include)
set(TFHE_CUDA_COMMON_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-common/cuda")
if(NOT TFHE_CUDA_COMMON_INCLUDE_DIR)
set(TFHE_CUDA_COMMON_INCLUDE_DIR "${TFHE_CUDA_COMMON_DIR}/include")
endif()
if(NOT TARGET tfhe_cuda_common)
add_subdirectory("${TFHE_CUDA_COMMON_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/tfhe_cuda_common")
endif()
add_subdirectory(src)
enable_testing()
add_subdirectory(tests_and_benchmarks)
target_include_directories(tfhe_cuda_backend PRIVATE ${INCLUDE_DIR})
target_include_directories(tfhe_cuda_backend PRIVATE ${TFHE_CUDA_COMMON_INCLUDE_DIR})
target_link_libraries(tfhe_cuda_backend PUBLIC tfhe_cuda_common)
# This is required for rust cargo build
install(TARGETS tfhe_cuda_backend DESTINATION .)

View File

@@ -5,6 +5,7 @@
#include "crypto/keyswitch.cuh"
#include "device.h"
#include "helper_multi_gpu.h"
#include "helper_profile.cuh"
#include "integer/comparison.h"
#include "integer/integer_utilities.h"
#include "integer/scalar_addition.cuh"
@@ -14,7 +15,6 @@
#include "polynomial/functions.cuh"
#include "utils/helper.cuh"
#include "utils/helper_multi_gpu.cuh"
#include "utils/helper_profile.cuh"
#include <algorithm>
#include <functional>

View File

@@ -3,10 +3,10 @@
#include "checked_arithmetic.h"
#include "device.h"
#include "helper_profile.cuh"
#include "integer/integer.h"
#include "integer/radix_ciphertext.h"
#include "utils/helper.cuh"
#include "utils/helper_profile.cuh"
inline CudaLweCiphertextListFFI
to_lwe_ciphertext_list(CudaRadixCiphertextFFI *radix) {

View File

@@ -1,12 +1,12 @@
#pragma once
#include "device.h"
#include "helper_profile.cuh"
#include "integer/integer.h"
#include "integer/radix_ciphertext.h"
#include "integer/rerand.h"
#include "integer/rerand_utilities.h"
#include "utils/helper.cuh"
#include "utils/helper_profile.cuh"
#include "zk/zk_utilities.h"
template <typename Torus, class params>

View File

@@ -1,7 +1,7 @@
#ifndef HELPER_CUH
#define HELPER_CUH
#define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
#include "device.h"
inline int nextPow2(int x) {
--x;

View File

@@ -55,6 +55,7 @@ endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
include_directories(${CONCRETE_CUDA_SOURCE_DIR}/include)
include_directories(${CONCRETE_CUDA_SOURCE_DIR}/src)
include_directories(${TFHE_CUDA_COMMON_INCLUDE_DIR})
include_directories(${TFHE_RS_BINARY_DIR})
include_directories(${TFHE_RS_BINARY_DIR}/deps)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")

View File

@@ -46,6 +46,7 @@ endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
include_directories(${CONCRETE_CUDA_SOURCE_DIR}/include)
include_directories(${TFHE_CUDA_COMMON_INCLUDE_DIR})
include_directories(${TFHE_RS_BINARY_DIR})
include_directories(${TFHE_RS_BINARY_DIR}/deps)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")

View File

@@ -1,6 +1,9 @@
use std::ffi::c_void;
#[link(name = "tfhe_cuda_backend", kind = "static")]
// Declarations forwarded from tfhe-cuda-common. cargo-semver-checks cannot trace
// `pub use` re-exports of extern "C" functions, so the signatures must be repeated
// here to keep the public API stable across patch versions.
#[link(name = "tfhe_cuda_common", kind = "static")]
extern "C" {
pub fn cuda_create_stream(gpu_index: u32) -> *mut c_void;

View File

@@ -0,0 +1,21 @@
[package]
name = "tfhe-cuda-common"
version = "0.1.0"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"
description = "Shared CUDA device utilities for tfhe-rs backends."
homepage = "https://www.zama.org/"
documentation = "https://docs.zama.org/tfhe-rs"
repository = "https://github.com/zama-ai/tfhe-rs"
keywords = ["cuda", "gpu", "fhe", "cryptography"]
links = "tfhe_cuda_common"
rust-version.workspace = true
[build-dependencies]
cmake.workspace = true
pkg-config.workspace = true
[features]
profile = []
debug = []

View File

@@ -0,0 +1,79 @@
fn get_linux_distribution_name() -> Option<String> {
let content = std::fs::read_to_string("/etc/os-release").ok()?;
for line in content.lines() {
if let Some(value) = line.strip_prefix("NAME=") {
return Some(value.trim_matches('"').to_string());
}
}
None
}
fn main() {
if let Ok(val) = std::env::var("DOCS_RS") {
if val.parse::<u32>() == Ok(1) {
return;
}
}
if std::env::var("_CBINDGEN_IS_RUNNING").is_ok() {
return;
}
println!("cargo::rerun-if-changed=cuda/include");
println!("cargo::rerun-if-changed=cuda/src");
println!("cargo::rerun-if-changed=cuda/CMakeLists.txt");
println!("cargo::rerun-if-changed=src");
if std::env::consts::OS == "linux" {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
.expect("CARGO_MANIFEST_DIR must be set by cargo during build");
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
println!(
"cargo:warning=This Linux distribution is not officially supported. \
Only Ubuntu is supported by tfhe-cuda-common at this time. Build may fail\n"
);
}
let mut cmake_config = cmake::Config::new("cuda");
if cfg!(feature = "profile") {
cmake_config.define("USE_NVTOOLS", "ON");
} else {
cmake_config.define("USE_NVTOOLS", "OFF");
}
if cfg!(feature = "debug") {
cmake_config.define("CMAKE_BUILD_TYPE", "Debug");
}
let dest = cmake_config.build();
println!(
"cargo:rustc-link-search=native={}",
dest.join("lib").display()
);
println!("cargo:rustc-link-lib=static=tfhe_cuda_common");
if pkg_config::Config::new()
.atleast_version("10")
.probe("cuda")
.is_err()
{
println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
}
println!("cargo:rustc-link-lib=cudart");
println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu/");
println!("cargo:rustc-link-lib=stdc++");
// Expose the include path so dependent crates can access headers via
// DEP_TFHE_CUDA_COMMON_INCLUDE
let include_dir = std::path::PathBuf::from(&manifest_dir).join("cuda/include");
println!("cargo:include={}", include_dir.display());
} else {
panic!(
"Error: platform not supported, tfhe-cuda-common not built (only Linux is supported)"
);
}
}

View File

@@ -0,0 +1,93 @@
cmake_minimum_required(VERSION 3.18)
project(
CommonCudaBackend
VERSION 1.0.0
LANGUAGES CXX)
set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0)
include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(FATAL_ERROR "CUDA compiler not found.")
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
# Auto-detect CUDA compute capability
set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script)
set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
execute_process(
COMMAND ${OUTPUTFILE}
RESULT_VARIABLE CUDA_RETURN_CODE
OUTPUT_VARIABLE ARCH)
file(REMOVE ${OUTPUTFILE})
if(${CUDA_RETURN_CODE} EQUAL 0)
set(CUDA_SUCCESS "TRUE")
else()
set(CUDA_SUCCESS "FALSE")
endif()
if(${CUDA_SUCCESS})
set(CMAKE_CUDA_ARCHITECTURES native)
string(REPLACE "-arch=sm_" "" CUDA_ARCH "${ARCH}")
set(CUDA_ARCH "${CUDA_ARCH}0")
else()
set(CMAKE_CUDA_ARCHITECTURES 70)
set(CUDA_ARCH "700")
endif()
add_compile_definitions(CUDA_ARCH=${CUDA_ARCH})
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE
Release
CACHE STRING "Build type" FORCE)
endif()
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
if(${USE_NVTOOLS})
add_definitions(-DUSE_NVTOOLS)
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")
set(COMMON_SOURCES src/device.cu src/helper_profile.cu)
add_library(tfhe_cuda_common STATIC ${COMMON_SOURCES})
set_target_properties(
tfhe_cuda_common
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set_target_properties(tfhe_cuda_common PROPERTIES CUDA_OPTIMIZE_DEPENDENCIES ON)
elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(tfhe_cuda_common PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-g -O0 -G>)
endif()
target_include_directories(tfhe_cuda_common PUBLIC include)
target_link_libraries(tfhe_cuda_common PUBLIC cudart)
install(
TARGETS tfhe_cuda_common
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)

View File

@@ -6,6 +6,8 @@
#include <cstdlib>
#include <cuda_runtime.h>
#define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
extern "C" {
#define check_cuda_error(ans) \

View File

@@ -1,5 +1,4 @@
#include "device.h"
#include "utils/helper.cuh"
#include <cstdint>
#include <cuda_runtime.h>
#include <mutex>

View File

@@ -0,0 +1,62 @@
use std::ffi::c_void;
#[link(name = "tfhe_cuda_common", kind = "static")]
extern "C" {
pub fn cuda_create_stream(gpu_index: u32) -> *mut c_void;
pub fn cuda_destroy_stream(stream: *mut c_void, gpu_index: u32);
pub fn cuda_synchronize_stream(stream: *mut c_void, gpu_index: u32);
pub fn cuda_is_available() -> u32;
pub fn cuda_malloc(size: u64, gpu_index: u32) -> *mut c_void;
pub fn cuda_malloc_async(size: u64, stream: *mut c_void, gpu_index: u32) -> *mut c_void;
pub fn cuda_check_valid_malloc(size: u64, gpu_index: u32) -> bool;
pub fn cuda_device_total_memory(gpu_index: u32) -> u64;
pub fn cuda_memcpy_async_to_gpu(
dest: *mut c_void,
src: *const c_void,
size: u64,
stream: *mut c_void,
gpu_index: u32,
);
pub fn cuda_memcpy_gpu_to_gpu(dest: *mut c_void, src: *const c_void, size: u64, gpu_index: u32);
pub fn cuda_memcpy_async_gpu_to_gpu(
dest: *mut c_void,
src: *const c_void,
size: u64,
stream: *mut c_void,
gpu_index: u32,
);
pub fn cuda_memcpy_async_to_cpu(
dest: *mut c_void,
src: *const c_void,
size: u64,
stream: *mut c_void,
gpu_index: u32,
);
pub fn cuda_memset_async(
dest: *mut c_void,
val: u64,
size: u64,
stream: *mut c_void,
gpu_index: u32,
);
pub fn cuda_get_number_of_gpus() -> i32;
pub fn cuda_get_number_of_sms() -> i32;
pub fn cuda_synchronize_device(gpu_index: u32);
pub fn cuda_drop(ptr: *mut c_void, gpu_index: u32);
} // extern "C"

View File

@@ -0,0 +1 @@
pub mod cuda_bind;

View File

@@ -1,6 +1,6 @@
[package]
name = "zk-cuda-backend"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
rust-version.workspace = true
authors = ["Zama team"]
@@ -24,7 +24,7 @@ bindgen.workspace = true
[dependencies]
ark-ec.workspace = true
ark-ff.workspace = true
tfhe-cuda-backend = { version = "0.14.0", path = "../tfhe-cuda-backend" }
tfhe-cuda-common = { version = "0.1.0", path = "../tfhe-cuda-common" }
[features]
default = []

View File

@@ -1,27 +1,12 @@
use std::path::PathBuf;
fn get_linux_distribution_name() -> Option<String> {
let content = std::fs::read_to_string("/etc/os-release").ok()?;
for line in content.lines() {
if let Some(value) = line.strip_prefix("NAME=") {
return Some(value.trim_matches('"').to_string());
}
}
None
}
fn main() {
// Handle docs.rs builds (no CUDA available)
if let Ok(val) = std::env::var("DOCS_RS") {
if val.parse::<u32>() == Ok(1) {
return;
}
}
// Workaround for cbindgen running during builds: cbindgen can trigger a second
// compilation pass that may forward incorrect arguments to cmake, crashing builds
// on make < 4.4. Since zk-cuda-backend has no macro expansions for cbindgen to
// inspect, skipping this compilation also speeds up C API builds.
if std::env::var("_CBINDGEN_IS_RUNNING").is_ok() {
return;
}
@@ -32,24 +17,18 @@ fn main() {
println!("cargo::rerun-if-changed=cuda/CMakeLists.txt");
println!("cargo::rerun-if-changed=src");
// Platform/distro check is performed by tfhe-cuda-common's build.rs, which
// Cargo builds first as a dependency.
if std::env::consts::OS == "linux" {
// GNU linker flags for handling duplicate symbols between tfhe-cuda-backend
// and zk-cuda-backend (e.g., shared device utilities)
println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition");
println!("cargo:rustc-link-arg=-Wl,--no-as-needed");
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
.expect("CARGO_MANIFEST_DIR must be set by cargo during build");
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
println!(
"cargo:warning=This Linux distribution is not officially supported. \
Only Ubuntu is supported by zk-cuda-backend at this time. Build may fail\n"
);
let mut cmake_config = cmake::Config::new("cuda");
if let Ok(common_include) = std::env::var("DEP_TFHE_CUDA_COMMON_INCLUDE") {
cmake_config.define("TFHE_CUDA_COMMON_INCLUDE_DIR", &common_include);
}
// Build CUDA library using cmake crate
let mut cmake_config = cmake::Config::new("cuda");
let dest = cmake_config.build();
// cmake crate installs to dest/lib subdirectory

View File

@@ -25,9 +25,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
# Auto-detect CUDA compute capability using the same mechanism as tfhe-cuda-backend
# Auto-detect CUDA compute capability (check_cuda.cu lives in tfhe-cuda-common)
set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script)
set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-common/cuda/check_cuda.cu)
execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
execute_process(
COMMAND ${OUTPUTFILE}
@@ -71,7 +71,7 @@ set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
# Additional CUDA flags (aligned with tfhe-cuda-backend)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")
# Core source files (without device utilities) Device utilities come from tfhe-cuda-backend.
# Core source files
set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/curve.cu src/msm/pippenger/msm_pippenger.cu
src/msm/msm.cu)
@@ -81,12 +81,10 @@ set(FP_MSM_HEADERS src/msm/common.cuh)
# C wrapper source for FFI bindings (CUDA for CUDA headers)
set(C_WRAPPER_SOURCES ../src/c_wrapper.cu)
# Headers (device.h comes from tfhe-cuda-backend)
set(FP_HEADERS include/fp.h include/fp_kernels.h include/fp2.h include/fp2_kernels.h include/curve.h)
# =============================================================================
# zk_cuda_backend: For Rust/Cargo builds (WITHOUT device.cu) Device utilities are provided by tfhe-cuda-backend when
# linking with tfhe.
# zk_cuda_backend: Static library for Rust/Cargo builds
# =============================================================================
add_library(zk_cuda_backend STATIC ${FP_CORE_SOURCES} ${FP_HEADERS} ${C_WRAPPER_SOURCES})
@@ -106,8 +104,11 @@ endif()
target_link_libraries(zk_cuda_backend PUBLIC cudart)
# Include both local headers and tfhe-cuda-backend headers (for device.h)
target_include_directories(zk_cuda_backend PUBLIC include ../src/include)
if(NOT TFHE_CUDA_COMMON_INCLUDE_DIR)
set(TFHE_CUDA_COMMON_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-common/cuda/include)
endif()
target_include_directories(zk_cuda_backend PUBLIC ${TFHE_CUDA_COMMON_INCLUDE_DIR})
# =============================================================================
# Tests and Benchmarks (optional, controlled by ZK_CUDA_BACKEND_BUILD_TESTS/BENCHMARKS)

View File

@@ -1,22 +0,0 @@
#include <stdio.h>
int main(int argc, char **argv) {
cudaDeviceProp dP;
float min_cc = 3.0;
int rc = cudaGetDeviceProperties(&dP, 0);
if (rc != cudaSuccess) {
cudaError_t error = cudaGetLastError();
printf("CUDA error: %s", cudaGetErrorString(error));
return rc; /* Failure */
}
if ((dP.major + (dP.minor / 10)) < min_cc) {
printf("Min Compute Capability of %2.1f required: %d.%d found\n Not "
"Building CUDA Code",
min_cc, dP.major, dP.minor);
return 1; /* Failure */
} else {
printf("-arch=sm_%d%d", dP.major, dP.minor);
return 0; /* Success */
}
}

View File

@@ -1,35 +0,0 @@
#pragma once
#include <cstddef>
#include <cstdio>
#include "device.h"
// Variadic checked multiplication of size_t values.
// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
// On overflow the value written to *out is unspecified.
template <typename... Args>
inline bool checked_mul(size_t *out, size_t first, Args... rest) {
size_t result = first;
for (size_t value : {static_cast<size_t>(rest)...}) {
if (__builtin_mul_overflow(result, value, &result))
return true;
}
*out = result;
return false;
}
// Variadic safe multiplication: computes the product and panics on overflow.
template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
size_t result;
bool overflow = checked_mul(&result, first, rest...);
PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
return result;
}
// Variadic safe multiplication with an appended sizeof(T) factor.
// Computes (args... * sizeof(T)) with overflow checking.
template <typename T, typename... Args>
inline size_t safe_mul_sizeof(Args... args) {
return safe_mul(args..., sizeof(T));
}

View File

@@ -1,145 +0,0 @@
#ifndef DEVICE_H
#define DEVICE_H
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime.h>
extern "C" {
#define check_cuda_error(ans) \
{ cuda_error((ans), __FILE__, __LINE__); }
inline void cuda_error(cudaError_t code, const char *file, int line) {
if (code != cudaSuccess) {
std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
file, line);
std::abort();
}
}
// The PANIC macro should be used to validate user-inputs to GPU functions
// it will execute in all targets, including production settings
// e.g., cudaMemCopy to the device should check that the destination pointer is
// a device pointer
#define PANIC(format, ...) \
{ \
std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__, \
__LINE__, __func__, ##__VA_ARGS__); \
std::abort(); \
}
// This is a generic assertion checking macro with user defined printf-style
// message
#define PANIC_IF_FALSE(cond, format, ...) \
do { \
if (!(cond)) { \
PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond); \
} \
} while (0)
#ifndef GPU_ASSERTS_DISABLE
// The GPU assert should be used to validate assumptions in algorithms,
// for example, checking that two user-provided quantities have a certain
// relationship or that the size of the buffer provided to a function is
// sufficient when it is filled with some algorithm that depends on
// user-provided inputs e.g., OPRF corrections buffer should not have a size
// higher than the number of blocks in the datatype that is generated
#define GPU_ASSERT(cond, format, ...) \
PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
#else
#define GPU_ASSERT(cond) \
do { \
} while (0)
#endif
uint32_t cuda_get_device();
void cuda_set_device(uint32_t gpu_index);
cudaEvent_t cuda_create_event(uint32_t gpu_index);
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
uint32_t gpu_index);
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
uint32_t gpu_index);
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
cudaStream_t cuda_create_stream(uint32_t gpu_index);
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
uint32_t cuda_is_available();
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
uint64_t &size_tracker,
bool allocate_gpu_memory);
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
uint64_t cuda_device_total_memory(uint32_t gpu_index);
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
uint64_t size,
cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
void *dest, void const *src, uint64_t size, cudaStream_t stream,
uint32_t gpu_index, bool gpu_memory_allocated);
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
uint32_t gpu_index);
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
int cuda_get_number_of_gpus();
int cuda_get_number_of_sms();
void cuda_synchronize_device(uint32_t gpu_index);
void cuda_drop(void *ptr, uint32_t gpu_index);
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
}
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
bool cuda_check_support_cooperative_groups();
bool cuda_check_support_thread_block_clusters();
template <typename Torus>
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
Torus *d_array, Torus value, Torus n);
#endif

View File

@@ -3,8 +3,7 @@
#include <cstdint>
#include <cuda_runtime.h>
// Ceiling division: computes (M + N - 1) / N
#define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
#include "device.h"
// CUDA architecture constant
#define CUDA_WARP_SIZE 32 // NVIDIA warp size (threads per warp)

View File

@@ -1,16 +0,0 @@
#ifndef HELPER_PROFILE
#define HELPER_PROFILE
#ifdef USE_NVTOOLS
#include <nvtx3/nvToolsExt.h>
#endif
void cuda_nvtx_label_with_color(const char *name);
void cuda_nvtx_pop();
#define PUSH_RANGE(name) \
{ cuda_nvtx_label_with_color(name); }
#define POP_RANGE() \
{ cuda_nvtx_pop(); }
#endif

View File

@@ -1,43 +0,0 @@
#include "helper_profile.cuh"
#include <stdint.h>
uint32_t adler32(const unsigned char *data) {
const uint32_t MOD_ADLER = 65521;
uint32_t a = 1, b = 0;
size_t index;
for (index = 0; data[index] != 0; ++index) {
a = (a + data[index] * 2) % MOD_ADLER;
b = (b + a) % MOD_ADLER;
}
return (b << 16) | a;
}
void cuda_nvtx_label_with_color(const char *name) {
#ifdef USE_NVTOOLS
int color_id = adler32((const unsigned char *)name);
int r, g, b;
r = color_id & 0x000000ff;
g = (color_id & 0x000ff000) >> 12;
b = (color_id & 0x0ff00000) >> 20;
if (r < 64 & g < 64 & b < 64) {
r = r * 3;
g = g * 3 + 64;
b = b * 4;
}
color_id = 0xff000000 | (r << 16) | (g << 8) | (b);
nvtxEventAttributes_t eventAttrib = {0};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = color_id;
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxRangePushEx(&eventAttrib);
#endif
}
void cuda_nvtx_pop() {
#ifdef USE_NVTOOLS
nvtxRangePop();
#endif
}

View File

@@ -15,29 +15,27 @@ set(BENCHMARK_ENABLE_GTEST_TESTS
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googlebenchmark)
# Path to tfhe-cuda-backend for device utilities
set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../tfhe-cuda-backend/cuda)
set(TFHE_CUDA_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../tfhe-cuda-common/cuda)
# Path to main source (needed for CUDA device linking)
set(ZK_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
set(ZK_PRIMITIVES_DIR ${ZK_SRC_DIR}/primitives)
# Build device library from tfhe-cuda-backend
add_library(tfhe_device_bench STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu
${TFHE_CUDA_BACKEND_DIR}/src/utils/helper_profile.cu)
add_library(common_device_bench STATIC ${TFHE_CUDA_COMMON_DIR}/src/device.cu
${TFHE_CUDA_COMMON_DIR}/src/helper_profile.cu)
set_target_properties(
tfhe_device_bench
common_device_bench
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_include_directories(tfhe_device_bench PUBLIC ${TFHE_CUDA_BACKEND_DIR}/include)
target_link_libraries(tfhe_device_bench PUBLIC cudart)
target_include_directories(common_device_bench PUBLIC ${TFHE_CUDA_COMMON_DIR}/include)
target_link_libraries(common_device_bench PUBLIC cudart)
# Benchmark executable for Fp Include fp.cu, fp2.cu and curve.cu directly to enable proper CUDA device linking with
# benchmark kernels (curve.cu depends on both fp and fp2)
add_executable(benchmark_fp benchmark_fp.cu ../tests/primitives/fp_helpers.cu ${ZK_PRIMITIVES_DIR}/fp.cu
${ZK_PRIMITIVES_DIR}/fp2.cu ${ZK_SRC_DIR}/curve.cu)
target_link_libraries(benchmark_fp tfhe_device_bench benchmark::benchmark benchmark::benchmark_main)
target_link_libraries(benchmark_fp common_device_bench benchmark::benchmark benchmark::benchmark_main)
target_include_directories(benchmark_fp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
set_target_properties(
benchmark_fp
@@ -49,7 +47,7 @@ set_target_properties(
# benchmark kernels
add_executable(benchmark_fp2 benchmark_fp2.cu ../tests/primitives/fp2_helpers.cu ${ZK_PRIMITIVES_DIR}/fp.cu
${ZK_PRIMITIVES_DIR}/fp2.cu ${ZK_SRC_DIR}/curve.cu)
target_link_libraries(benchmark_fp2 tfhe_device_bench benchmark::benchmark benchmark::benchmark_main)
target_link_libraries(benchmark_fp2 common_device_bench benchmark::benchmark benchmark::benchmark_main)
target_include_directories(benchmark_fp2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
set_target_properties(
benchmark_fp2
@@ -59,7 +57,7 @@ set_target_properties(
# Benchmark executable for MSM
add_executable(benchmark_msm benchmark_msm.cu)
target_link_libraries(benchmark_msm zk_cuda_backend tfhe_device_bench benchmark::benchmark benchmark::benchmark_main)
target_link_libraries(benchmark_msm zk_cuda_backend common_device_bench benchmark::benchmark benchmark::benchmark_main)
set_target_properties(
benchmark_msm
PROPERTIES CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}

View File

@@ -12,29 +12,26 @@ set(gtest_force_shared_crt
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
# Path to tfhe-cuda-backend for device utilities
set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../tfhe-cuda-backend/cuda)
set(TFHE_CUDA_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../tfhe-cuda-common/cuda)
# Path to main source (needed for CUDA device linking)
set(ZK_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
set(ZK_PRIMITIVES_DIR ${ZK_SRC_DIR}/primitives)
# Build device library from tfhe-cuda-backend
add_library(tfhe_device STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu
${TFHE_CUDA_BACKEND_DIR}/src/utils/helper_profile.cu)
add_library(common_device STATIC ${TFHE_CUDA_COMMON_DIR}/src/device.cu ${TFHE_CUDA_COMMON_DIR}/src/helper_profile.cu)
set_target_properties(
tfhe_device
common_device
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_include_directories(tfhe_device PUBLIC ${TFHE_CUDA_BACKEND_DIR}/include)
target_link_libraries(tfhe_device PUBLIC cudart)
target_include_directories(common_device PUBLIC ${TFHE_CUDA_COMMON_DIR}/include)
target_link_libraries(common_device PUBLIC cudart)
# Test executable for Fp Include fp.cu, fp2.cu and curve.cu directly to enable proper CUDA device linking with test
# kernels (curve.cu depends on both fp and fp2)
add_executable(test_fp primitives/test_fp.cu primitives/test_fp_gpu_helpers.cu primitives/fp_helpers.cu
${ZK_PRIMITIVES_DIR}/fp.cu ${ZK_PRIMITIVES_DIR}/fp2.cu ${ZK_SRC_DIR}/curve.cu)
target_link_libraries(test_fp tfhe_device GTest::gtest_main)
target_link_libraries(test_fp common_device GTest::gtest_main)
target_include_directories(test_fp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
set_target_properties(
test_fp
@@ -46,7 +43,7 @@ set_target_properties(
# kernels
add_executable(test_fp2 primitives/test_fp2.cu primitives/test_fp2_gpu_helpers.cu primitives/fp2_helpers.cu
${ZK_PRIMITIVES_DIR}/fp.cu ${ZK_PRIMITIVES_DIR}/fp2.cu ${ZK_SRC_DIR}/curve.cu)
target_link_libraries(test_fp2 tfhe_device GTest::gtest_main)
target_link_libraries(test_fp2 common_device GTest::gtest_main)
target_include_directories(test_fp2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
set_target_properties(
test_fp2
@@ -56,7 +53,7 @@ set_target_properties(
# Test executable for MSM
add_executable(test_msm test_msm.cu)
target_link_libraries(test_msm zk_cuda_backend tfhe_device GTest::gtest_main)
target_link_libraries(test_msm zk_cuda_backend common_device GTest::gtest_main)
set_target_properties(
test_msm
PROPERTIES CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}
@@ -65,7 +62,7 @@ set_target_properties(
# Test executable for point operations
add_executable(test_point_ops test_point_ops.cu)
target_link_libraries(test_point_ops zk_cuda_backend tfhe_device GTest::gtest_main)
target_link_libraries(test_point_ops zk_cuda_backend common_device GTest::gtest_main)
set_target_properties(
test_point_ops
PROPERTIES CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}

View File

@@ -5,7 +5,7 @@ set(ZK_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../include)
# basic_fp_ops: host-side Fp field arithmetic
add_executable(basic_fp_ops basic_fp_ops.cu)
target_link_libraries(basic_fp_ops zk_cuda_backend tfhe_device)
target_link_libraries(basic_fp_ops zk_cuda_backend common_device)
target_include_directories(basic_fp_ops PRIVATE ${ZK_INCLUDE_DIR})
set_target_properties(
basic_fp_ops
@@ -15,7 +15,7 @@ set_target_properties(
# basic_curve_ops: host-side G1 elliptic curve operations
add_executable(basic_curve_ops basic_curve_ops.cu)
target_link_libraries(basic_curve_ops zk_cuda_backend tfhe_device)
target_link_libraries(basic_curve_ops zk_cuda_backend common_device)
target_include_directories(basic_curve_ops PRIVATE ${ZK_INCLUDE_DIR})
set_target_properties(
basic_curve_ops
@@ -25,7 +25,7 @@ set_target_properties(
# basic_msm: GPU-accelerated multi-scalar multiplication
add_executable(basic_msm basic_msm.cu)
target_link_libraries(basic_msm zk_cuda_backend tfhe_device)
target_link_libraries(basic_msm zk_cuda_backend common_device)
target_include_directories(basic_msm PRIVATE ${ZK_INCLUDE_DIR})
set_target_properties(
basic_msm

View File

@@ -44,9 +44,7 @@
//! let g1_affine_again = g1_proj.to_affine();
//! ```
// Force linking of tfhe-cuda-backend which provides device utilities (cuda_malloc, cuda_set_device,
// etc.)
extern crate tfhe_cuda_backend;
extern crate tfhe_cuda_common;
// Auto-generated bindgen bindings (matching tfhe-cuda-backend pattern)
#[allow(warnings)]

View File

@@ -193,9 +193,9 @@ mod tests {
let gen = G1Affine::new(Fp::new(G1_GENERATOR_X), Fp::new(G1_GENERATOR_Y), false);
let one = Scalar::from_u64(1);
let stream = unsafe { tfhe_cuda_backend::cuda_bind::cuda_create_stream(0) };
let stream = unsafe { tfhe_cuda_common::cuda_bind::cuda_create_stream(0) };
let result = G1Projective::msm(&[gen], &[one], stream, 0, false).unwrap();
unsafe { tfhe_cuda_backend::cuda_bind::cuda_destroy_stream(stream, 0) };
unsafe { tfhe_cuda_common::cuda_bind::cuda_destroy_stream(stream, 0) };
// from_montgomery_normalized() normalizes (divides by Z in Montgomery form)
// then converts X, Y to normal form and sets Z = 1 (normal). The resulting
@@ -218,9 +218,9 @@ mod tests {
let gen = G2Affine::new(x, y, false);
let one = Scalar::from_u64(1);
let stream = unsafe { tfhe_cuda_backend::cuda_bind::cuda_create_stream(0) };
let stream = unsafe { tfhe_cuda_common::cuda_bind::cuda_create_stream(0) };
let result = G2Projective::msm(&[gen], &[one], stream, 0, false).unwrap();
unsafe { tfhe_cuda_backend::cuda_bind::cuda_destroy_stream(stream, 0) };
unsafe { tfhe_cuda_common::cuda_bind::cuda_destroy_stream(stream, 0) };
// Same approach: extract affine coordinates directly from normalized projective
let normalized = result.from_montgomery_normalized();

View File

@@ -25,8 +25,8 @@ zeroize = "1.7.0"
num-bigint = "0.4.5"
tfhe-versionable = { version = "0.7.0", path = "../utils/tfhe-versionable" }
tfhe-safe-serialize = { version = "0.1.0", path = "../utils/tfhe-safe-serialize" }
zk-cuda-backend = { version = "0.1.0", path = "../backends/zk-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "=0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
zk-cuda-backend = { version = "0.1.1", path = "../backends/zk-cuda-backend", optional = true }
tfhe-cuda-common = { version = "0.1.0", path = "../backends/tfhe-cuda-common", optional = true }
itertools.workspace = true
[target.'cfg(target_family = "wasm")'.dependencies]
getrandom = { workspace = true, features = ["js"] }
@@ -37,7 +37,7 @@ wasm-par-mq = { version = "0.1.0", path = "../utils/wasm-par-mq", features = [
[features]
experimental = []
cross-origin-wasm = ["dep:wasm-par-mq"]
gpu-experimental = ["dep:zk-cuda-backend", "dep:tfhe-cuda-backend"]
gpu-experimental = ["dep:zk-cuda-backend", "dep:tfhe-cuda-common"]
[dev-dependencies]
serde_json = "~1.0"

View File

@@ -14,7 +14,7 @@ use crate::curve_api::bls12_446::{G1Affine, G2Affine, Zp, G1, G2};
use crate::curve_api::CurveGroupOps;
use ark_ec::CurveGroup;
use ark_ff::{BigInt, MontFp, PrimeField};
use tfhe_cuda_backend::cuda_bind::{
use tfhe_cuda_common::cuda_bind::{
cuda_create_stream, cuda_destroy_stream, cuda_get_number_of_gpus,
};
use zk_cuda_backend::{G1Affine as CudaG1Affine, G2Affine as CudaG2Affine, Scalar as CudaScalar};

View File

@@ -3,7 +3,7 @@
use crate::curve_api::bls12_446::{Zp, G1, G2};
use crate::curve_api::CurveGroupOps;
use crate::gpu::{g1_affine_from_cuda, g1_affine_to_cuda, g2_affine_from_cuda, g2_affine_to_cuda};
use tfhe_cuda_backend::cuda_bind::{cuda_create_stream, cuda_destroy_stream};
use tfhe_cuda_common::cuda_bind::{cuda_create_stream, cuda_destroy_stream};
use zk_cuda_backend::conversions::{g1_affine_from_montgomery, g2_affine_from_montgomery};
use zk_cuda_backend::{
G1Affine as CudaG1Affine, G1Projective as CudaG1Projective, G2Affine as CudaG2Affine,

View File

@@ -64,9 +64,9 @@ tfhe-fft = { version = "0.10.1", path = "../tfhe-fft", features = [
"serde",
"fft128",
] }
tfhe-ntt = { version = "0.7.1", path = "../tfhe-ntt" }
tfhe-ntt = { version = "0.7.0", path = "../tfhe-ntt" }
pulp = { workspace = true, features = ["default"] }
tfhe-cuda-backend = { version = "0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "0.14.1", path = "../backends/tfhe-cuda-backend", optional = true }
aligned-vec = { workspace = true, features = ["default", "serde"] }
dyn-stack = { workspace = true, features = ["default"] }
paste = { workspace = true }

View File

@@ -1477,4 +1477,67 @@ pub(crate) mod test {
}
}
}
#[test]
fn oprf_test_uniformity_bits_ci_run_filter() {
let sample_count: usize = 100_000;
let p_value_limit: f64 = 0.000_01;
use crate::shortint::gen_keys;
use crate::shortint::parameters::test_params::{
TEST_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128,
TEST_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
for params in [
ShortintParameterSet::from(
TEST_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
),
ShortintParameterSet::from(PARAM_MESSAGE_2_CARRY_2_KS_PBS),
ShortintParameterSet::from(TEST_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128),
] {
let (ck, sk) = gen_keys(params);
let oprf_ck = OprfPrivateKey::new(&ck);
let oprf_sk = OprfServerKey::new(&oprf_ck, &ck).unwrap();
let random_bits_per_block = sk.message_modulus.0.ilog2() as u64;
for random_bits_count in [3u64, 4] {
let expected_num_blocks =
random_bits_count.div_ceil(random_bits_per_block) as usize;
test_uniformity(
sample_count,
p_value_limit,
1 << random_bits_count,
|seed| {
let seed = (seed as u128).to_le_bytes();
let blocks = oprf_sk.generate_oblivious_pseudo_random_bits(
seed.as_slice(),
random_bits_count,
&sk,
);
let mut combined: u64 = 0;
let mut shift = 0u64;
for (i, block) in blocks.iter().enumerate() {
let decrypted = ck.decrypt_message_and_carry(block);
let block_bits = bits_in_block(
i,
expected_num_blocks,
random_bits_count,
random_bits_per_block,
);
combined |= decrypted << shift;
shift += block_bits;
}
combined
},
);
}
}
}
}