Compare commits

..

17 Commits

Author SHA1 Message Date
Ethan-000
621676bd41 feat: add num_bits() function (#570)
## Describe the changes

adding a `num_bits()` function similar to
dcf73a5f96/ff/src/biginteger/mod.rs (L482)

this could be useful for small field optimizations

## Linked Issues

Resolves #
2024-08-07 09:37:16 +03:00
Otsar
badb8c5d68 Removed ZK containers from docs sidebar (#571)
## Describe the changes

This PR...

## Linked Issues

Resolves #
2024-08-04 18:38:37 +03:00
Otsar
1300434bbe Removed ZK containers from docs sidebar 2024-08-04 11:14:06 +03:00
yshekel
6a67893773 remove the recommnedation to use zk-contariners in examples (#569) 2024-08-01 14:58:02 +03:00
ChickenLover
0cb0b49be9 Add Sha3 (#560)
## Describe the changes

This PR...

## Linked Issues

Resolves #
2024-07-28 15:31:28 +07:00
Vlad
8411ed1451 Feat/vlad/refactor from affine (#554)
## Describe the changes

This PR refactors the different affine to projective conversion
functions using the C function

also small bug fix for ProjectiveToAffine() function in Go

## Linked Issues

Resolves #
2024-07-22 10:37:24 +02:00
Vlad
877018c84c more go fmt 2024-07-15 16:55:40 +02:00
Vlad
91ac666e06 Merge branch 'feat/vlad/refactor-from-affine' of github.com:ingonyama-zk/icicle into feat/vlad/refactor-from-affine 2024-07-15 16:48:25 +02:00
Vlad
46e6c20440 go fmt 2024-07-15 16:47:40 +02:00
Vlad
e4eda8938d go fmt 2024-07-05 21:29:44 +02:00
Vlad
fb707d5350 Merge branch 'main' into feat/vlad/refactor-from-affine 2024-07-05 15:40:34 +02:00
Vlad
6336e74d5a refactor from_affine with C link 2024-07-04 11:03:58 +02:00
Vlad
279cdc66e0 generated go files 2024-07-03 10:41:32 +02:00
Vlad
81644fc28c use zero method of projective in toProjective
Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com>
2024-07-03 10:37:02 +02:00
Vlad
17732ea013 use zero method of projective in fromAffine
Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com>
2024-07-03 10:36:14 +02:00
Vlad
9e057c835d fixed to_projective in rust 2024-07-03 09:18:41 +02:00
Vlad
f08b5bb49d fixed fromAffine and toProj in golang 2024-07-03 09:07:43 +02:00
52 changed files with 371 additions and 1219 deletions

View File

@@ -12,6 +12,10 @@ At its core, Keccak consists of a permutation function operating on a state arra
- **Chi:** This step applies a nonlinear mixing operation to each lane of the state array.
- **Iota:** This step introduces a round constant to the state array.
## Keccak vs Sha3
There exists a [confusion](https://www.cybertest.com/blog/keccak-vs-sha3) between what is called `Keccak` and `Sha3`. In ICICLE we support both. `Keccak256` relates to the old hash function used in Ethereum, and `Sha3-256` relates to the modern hash function.
## Using Keccak
ICICLE Keccak supports batch hashing, which can be utilized for constructing a merkle tree or running multiple hashes in parallel.
@@ -35,7 +39,7 @@ let input_block_len = 136;
let number_of_hashes = 1024;
let preimages = vec![1u8; number_of_hashes * input_block_len];
let mut digests = vec![0u8; number_of_hashes * 64];
let mut digests = vec![0u8; number_of_hashes * 32];
let preimages_slice = HostSlice::from_slice(&preimages);
let digests_slice = HostSlice::from_mut_slice(&mut digests);

View File

@@ -191,11 +191,6 @@ module.exports = {
},
]
},
{
type: "doc",
label: "ZK Containers",
id: "ZKContainers",
},
{
type: "doc",
label: "Ingonyama Grant program",

View File

@@ -1,24 +0,0 @@
cmake_minimum_required(VERSION 3.18)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
else()
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
endif ()
project(example LANGUAGES CUDA CXX)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS_RELEASE "")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
add_executable(
example
example.cu
)
target_include_directories(example PRIVATE "../../../icicle/include")
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
target_link_libraries(example ${NVML_LIBRARY})
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

View File

@@ -1,38 +0,0 @@
#Icicle example : Montgomery vector operations(mul, add, sub) for allpossible options:
is_a_on_device
is_b_on_device
is_result_on_device
is_in_montgomery_form
(is_async isn't checked)
## Key-Takeaway
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
## Concise Usage Explanation
Define field to be used, e. g.:
```c++
#include "api/bn254.h"
```
```c++
using namespace bn254;
typedef scalar_t T;
```
## Running the example
- `cd` to your example directory
- compile with `./compile.sh`
- run with `./run.sh`
## What's in the example
1. Define the parameters for the example such as vector size
2. Generate random vectors on-host
3. Copy them on-device
4. Execute element-wise vector multiplication on-device
5. Copy results on-host

View File

@@ -1,15 +0,0 @@
#!/bin/bash
# Exit immediately on error
set -e
mkdir -p build/example
mkdir -p build/icicle
# Configure and build Icicle
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Debug -DCURVE=bn254
cmake --build build/icicle -j
# Configure and build the example application
cmake -DCMAKE_BUILD_TYPE=Debug -S. -B build/example
cmake --build build/example

View File

@@ -1,15 +0,0 @@
#!/bin/bash
# Exit immediately on error
set -e
mkdir -p build/example
mkdir -p build/icicle
# Configure and build Icicle
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
cmake --build build/icicle
# Configure and build the example application
cmake -S . -B build/example
cmake --build build/example

View File

@@ -1,734 +0,0 @@
#include <iostream>
#include <iomanip>
#include <chrono>
#include <nvml.h>
#include "api/bn254.h"
#include "vec_ops/vec_ops.cuh"
#include <vec_ops/../../include/utils/mont.cuh>
using namespace vec_ops;
using namespace bn254;
typedef scalar_t T;
enum Op { MUL, ADD, SUB, LAST };
// bn254 p = 21888242871839275222246405745257275088548364400416034343698204186575808495617
int vector_op(
T* vec_a,
T* vec_b,
T* vec_result,
size_t n_elements,
device_context::DeviceContext ctx,
vec_ops::VecOpsConfig config,
Op op)
{
cudaError_t err;
switch (op) {
case MUL:
err = bn254_mul_cuda(vec_a, vec_b, n_elements, config, vec_result);
break;
case ADD:
err = bn254_add_cuda(vec_a, vec_b, n_elements, config, vec_result);
break;
case SUB:
err = bn254_sub_cuda(vec_a, vec_b, n_elements, config, vec_result);
break;
}
// cudaError_t err = bn254_mul_cuda(vec_a, vec_b, n_elements, config, vec_result);
if (err != cudaSuccess) {
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
return 0;
}
return 0;
}
int vector_mul(
T* vec_a, T* vec_b, T* vec_result, size_t n_elements, device_context::DeviceContext ctx, vec_ops::VecOpsConfig config)
{
cudaError_t err = bn254_mul_cuda(vec_a, vec_b, n_elements, config, vec_result);
if (err != cudaSuccess) {
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
return 0;
}
return 0;
}
int vector_add(
T* vec_a, T* vec_b, T* vec_result, size_t n_elements, device_context::DeviceContext ctx, vec_ops::VecOpsConfig config)
{
cudaError_t err = bn254_add_cuda(vec_a, vec_b, n_elements, config, vec_result);
if (err != cudaSuccess) {
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
return 0;
}
return 0;
}
int vector_sub(
T* vec_a, T* vec_b, T* vec_result, size_t n_elements, device_context::DeviceContext ctx, vec_ops::VecOpsConfig config)
{
cudaError_t err = bn254_sub_cuda(vec_a, vec_b, n_elements, config, vec_result);
if (err != cudaSuccess) {
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
return 0;
}
return 0;
}
int main(int argc, char** argv)
{
const unsigned vector_size = 1 << 0;
const unsigned repetitions = 1 << 0;
cudaError_t err;
nvmlInit();
nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(0, &device); // for GPU 0
std::cout << "Icicle-Examples: vector mul / add / sub operations." << std::endl;
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
if (nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE) == NVML_SUCCESS) {
std::cout << "GPU Model: " << name << std::endl;
} else {
std::cerr << "Failed to get GPU model name." << std::endl;
}
unsigned power_limit;
nvmlDeviceGetPowerManagementLimit(device, &power_limit);
std::cout << "Vector size: " << vector_size << std::endl;
std::cout << "Repetitions: " << repetitions << std::endl;
std::cout << "Power limit: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_limit << " W" << std::endl;
unsigned int baseline_power;
nvmlDeviceGetPowerUsage(device, &baseline_power);
std::cout << "Baseline power: " << std::fixed << std::setprecision(3) << 1.0e-3 * baseline_power << " W" << std::endl;
unsigned baseline_temperature;
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &baseline_temperature) == NVML_SUCCESS) {
std::cout << "Baseline GPU Temperature: " << baseline_temperature << " C" << std::endl;
} else {
std::cerr << "Failed to get GPU temperature." << std::endl;
}
// host data
std::cout << "Allocate memory for the input vectors (both normal and Montgomery presentation)" << std::endl;
T* host_in1_init = (T*)malloc(vector_size * sizeof(T));
T* host_in2_init = (T*)malloc(vector_size * sizeof(T));
std::cout << "Initializing vectors with normal presentation random data" << std::endl;
T::rand_host_many(host_in1_init, vector_size);
T::rand_host_many(host_in2_init, vector_size);
std::cout << "Allocate memory for the output vectors" << std::endl;
T* host_out = (T*)malloc(vector_size * sizeof(T)); // This memory will be used for the test output.
T* host_out_ref_mul = (T*)malloc(
vector_size *
sizeof(T)); // This memory will be used as a reference result for mul (will be compared to host_out content).
T* host_out_ref_add = (T*)malloc(
vector_size *
sizeof(T)); // This memory will be used as a reference result for add (will be compared to host_out content).
T* host_out_ref_sub = (T*)malloc(
vector_size *
sizeof(T)); // This memory will be used as a reference result for sub (will be compared to host_out content).
std::cout << "Initializing output vectors with random data" << std::endl;
T::rand_host_many(host_out, vector_size);
T::rand_host_many(host_out_ref_mul, vector_size);
T::rand_host_many(host_out_ref_add, vector_size);
T::rand_host_many(host_out_ref_sub, vector_size);
// device data
device_context::DeviceContext ctx = device_context::get_default_device_context();
T* device_in1;
T* device_in2;
T* device_out;
err = cudaMalloc((void**)&device_in1, vector_size * sizeof(T));
if (err != cudaSuccess) {
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
return 0;
}
err = cudaMalloc((void**)&device_in2, vector_size * sizeof(T));
if (err != cudaSuccess) {
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
return 0;
}
err = cudaMalloc((void**)&device_out, vector_size * sizeof(T));
if (err != cudaSuccess) {
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
return 0;
}
vec_ops::VecOpsConfig config = vec_ops::DefaultVecOpsConfig();
//****************************************
// Test warn-up and reference output config. Reference output to be used to check if test passed or not.
//****************************************
// copy from host to device
err = cudaMemcpy(device_in1, host_in1_init, vector_size * sizeof(T), cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
return 0;
}
err = cudaMemcpy(device_in2, host_in2_init, vector_size * sizeof(T), cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
return 0;
}
std::cout << "Starting warm-up run" << std::endl;
// Warm-up loop
for (int op = MUL; op != LAST; op++) {
for (int i = 0; i < repetitions; i++) {
// vector_mul(device_in1, device_in2, device_out, vector_size, ctx, config);
vector_op(device_in1, device_in2, device_out, vector_size, ctx, config, (Op)op);
}
switch (op) {
case MUL:
err = cudaMemcpy(host_out_ref_mul, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
break;
case ADD:
err = cudaMemcpy(host_out_ref_add, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
break;
case SUB:
err = cudaMemcpy(host_out_ref_sub, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
break;
}
}
// copy the result from device to host_out_ref_mul to keep it for later comparisons.
// err = cudaMemcpy(host_out_ref_mul, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_out to host - " << cudaGetErrorString(err) << std::endl;
return 0;
}
//****************************************
// End of test warn-up and reference output config.
//****************************************
std::cout << "Starting benchmarking" << std::endl;
unsigned power_before;
nvmlDeviceGetPowerUsage(device, &power_before);
std::cout << "Power before: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_before << " W" << std::endl;
std::cout << "Power utilization: " << std::fixed << std::setprecision(1) << (float)100.0 * power_before / power_limit
<< " %" << std::endl;
unsigned temperature_before;
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_before) == NVML_SUCCESS) {
std::cout << "GPU Temperature before: " << temperature_before << " C" << std::endl;
} else {
std::cerr << "Failed to get GPU temperature." << std::endl;
}
//*******************************************************
// Benchmark test:
// Loop for (mul, add, sub):
// Loop (is_a_on_device, is_b_on_device, is_result_on_device, is_in_montgomery_form):
//*******************************************************
T* host_in1 =
(T*)malloc(vector_size * sizeof(T)); // This buffer is used to load the data from host_in1_init for the benchmark.
T* host_in2 =
(T*)malloc(vector_size * sizeof(T)); // This buffer is used to load the data from host_in2_init for the benchmark.
// Test when the result is not in-place
for (int op = MUL; op != LAST; op++) {
// for (int config_idx = 0; config_idx < 0; config_idx++) {
for (int config_idx = 0; config_idx < 16; config_idx++) {
std::cout << "Start benchmark loop for config_idx " << config_idx << std::endl;
for (int i = 0; i < vector_size; i++) {
host_in1[i] = host_in1_init[i];
host_in2[i] = host_in2_init[i];
}
config.is_a_on_device = (config_idx >> 3) & 0x1;
config.is_b_on_device = (config_idx >> 2) & 0x1;
config.is_result_on_device = (config_idx >> 1) & 0x1;
config.is_in_montgomery_form = (config_idx >> 0) & 0x1;
// Copy from host to device (copy again in order to be used later in the loop and device_inX was already
// overwritten by warmup.
if (config.is_a_on_device) {
if (config.is_in_montgomery_form) {
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(
mont::to_montgomery(device_in1, vector_size, config.ctx.stream, device_in1)); // Convert in-place.
} else { // Normal presentation.
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
} else {
if (config.is_in_montgomery_form) { // Copy to device, cnvert to montgomery and copy back to host.
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::to_montgomery(device_in1, vector_size, config.ctx.stream, device_in1));
err = cudaMemcpy(host_in1, device_in1, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_in1 to host_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
}
if (config.is_b_on_device) {
if (config.is_in_montgomery_form) {
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(
mont::to_montgomery(device_in2, vector_size, config.ctx.stream, device_in2)); // Convert in-place.
} else {
// Normal presentation.
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
} else {
if (config.is_in_montgomery_form) { // Copy to device, cnvert to montgomery and copy back to host.
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::to_montgomery(device_in2, vector_size, config.ctx.stream, device_in2));
err = cudaMemcpy(host_in2, device_in2, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_in2 to host_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
}
CHK_IF_RETURN(cudaPeekAtLastError());
auto start_time = std::chrono::high_resolution_clock::now();
// Benchmark loop
for (int i = 0; i < repetitions; i++) {
switch (config_idx >> 1) { // {is_a_on_device, is_b_on_device, is_result_on_device}
case 0b000:
vector_op(host_in1, host_in2, host_out, vector_size, ctx, config, (Op)op);
break;
case 0b001:
vector_op(host_in1, host_in2, device_out, vector_size, ctx, config, (Op)op);
break;
case 0b010:
vector_op(host_in1, device_in2, host_out, vector_size, ctx, config, (Op)op);
break;
case 0b011:
vector_op(host_in1, device_in2, device_out, vector_size, ctx, config, (Op)op);
break;
case 0b100:
vector_op(device_in1, host_in2, host_out, vector_size, ctx, config, (Op)op);
break;
case 0b101:
vector_op(device_in1, host_in2, device_out, vector_size, ctx, config, (Op)op);
break;
case 0b110:
vector_op(device_in1, device_in2, host_out, vector_size, ctx, config, (Op)op);
break;
case 0b111:
vector_op(device_in1, device_in2, device_out, vector_size, ctx, config, (Op)op);
break;
}
CHK_IF_RETURN(cudaPeekAtLastError());
}
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
switch (op) {
case MUL:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation MUL for config_idx "
<< config_idx << " and result not in-place" << std::endl;
break;
case ADD:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation ADD for config_idx "
<< config_idx << " and result not in-place" << std::endl;
break;
case SUB:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation SUB for config_idx "
<< config_idx << " and result not in-place" << std::endl;
break;
}
if (config.is_result_on_device) { // Copy the data to host_out in order to compare it vs. host_out_ref_mul value.
if (config.is_in_montgomery_form) { // Convert to normal from montgomery if needed.
CHK_IF_RETURN(mont::from_montgomery(
device_out, vector_size, config.ctx.stream,
device_out)); // Convert to normal in order to check vs. host_out_ref_mul.
}
err = cudaMemcpy(
host_out, device_out, vector_size * sizeof(T),
cudaMemcpyDeviceToHost); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_out to host - " << cudaGetErrorString(err) << std::endl;
return 0;
}
} else { // Data is not on device but it is in host_out.
if (config.is_in_montgomery_form) { // host_out should be written to device, converted to mmontgomery and
// written back to host. Then compared vs. host_out_ref_mul.
err = cudaMemcpy(
device_out, host_out, vector_size * sizeof(T),
cudaMemcpyHostToDevice); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_out to device_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::from_montgomery(
device_out, vector_size, config.ctx.stream,
device_out)); // Convert to normal in order to check vs. host_out_ref_mul.
err = cudaMemcpy(
host_out, device_out, vector_size * sizeof(T),
cudaMemcpyDeviceToHost); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_out to host_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
} else { // host_out could be compared vs. host_out_ref_mul as is.
}
}
//****************************************
// End of benchmark test.
//****************************************
//***********************************************
// Test result check
// Check is performed by executing the operation in a normal presentation
// (located in in host_out_ref_mul) and comparing it with the
// benchmark test result.
//***********************************************
int test_failed = 0;
// std::cout << "===>>> host_out_ref_mul[" << i << "]: " << host_out_ref_mul[i] << std::endl;
// std::cout << "===>>> host_out[" << i << "] after test run: " << host_out[i] << std::endl;
switch (op) {
case MUL:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_mul[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! MUL: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
test_failed = 1;
}
}
break;
case ADD:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_add[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! ADD: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
test_failed = 1;
}
}
break;
case SUB:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_sub[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! SUB: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
test_failed = 1;
}
}
break;
}
if (test_failed) {
// std::cout << "===>>> ERROR!!! Test failed for vector index " << i << ", config is printed below:" <<
// std::endl;
std::cout << "===>>> result is not in-place: " << std::endl;
std::cout << "===>>> is_a_on_device: " << config.is_a_on_device << std::endl;
std::cout << "===>>> is_b_on_device: " << config.is_b_on_device << std::endl;
std::cout << "===>>> is_result_on_device: " << config.is_result_on_device << std::endl;
std::cout << "===>>> is_in_montgomery_form: " << config.is_in_montgomery_form << std::endl;
exit(2);
}
unsigned power_after;
nvmlDeviceGetPowerUsage(device, &power_after);
std::cout << "Power after: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_after << " W" << std::endl;
std::cout << "Power utilization: " << std::fixed << std::setprecision(1)
<< (float)100.0 * power_after / power_limit << " %" << std::endl;
unsigned temperature_after;
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_after) == NVML_SUCCESS) {
std::cout << "GPU Temperature after: " << temperature_after << " C" << std::endl;
} else {
std::cerr << "Failed to get GPU temperature." << std::endl;
}
// Report performance in GMPS: Giga Multiplications Per Second
double GMPS = 1.0e-9 * repetitions * vector_size / (1.0e-6 * duration.count());
std::cout << "Performance: " << GMPS << " Giga Multiplications Per Second" << std::endl;
}
}
// Test when the result is in-place
for (int op = MUL; op != LAST; op++) {
for (int config_idx = 0; config_idx < 16; config_idx++) {
for (int i = 0; i < vector_size; i++) {
host_in1[i] = host_in1_init[i];
host_in2[i] = host_in2_init[i];
}
config.is_a_on_device = (config_idx >> 4) & 0x1;
config.is_b_on_device = (config_idx >> 3) & 0x1;
config.is_result_on_device = (config_idx >> 2) & 0x1;
config.is_in_montgomery_form = (config_idx >> 1) & 0x1;
if (config.is_a_on_device ^ config.is_result_on_device == 1) { continue; }
// Copy from host to device (copy again in order to be used later in the loop and device_inX was already
// overwritten by warmup.
if (config.is_a_on_device) {
if (config.is_in_montgomery_form) {
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(
mont::to_montgomery(device_in1, vector_size, config.ctx.stream, device_in1)); // Convert in-place.
} else { // Normal presentation.
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
} else {
if (config.is_in_montgomery_form) { // Copy to device, cnvert to montgomery and copy back to host.
err =
cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::to_montgomery(device_in1, vector_size, config.ctx.stream, device_in1));
err = cudaMemcpy(host_in1, device_in1, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_in1 to host_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
}
if (config.is_b_on_device) {
if (config.is_in_montgomery_form) {
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in1 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(
mont::to_montgomery(device_in2, vector_size, config.ctx.stream, device_in2)); // Convert in-place.
} else {
// Normal presentation.
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
} else {
if (config.is_in_montgomery_form) { // Copy to device, cnvert to montgomery and copy back to host.
err =
cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice); // Copy data to device.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in2 to device_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::to_montgomery(device_in2, vector_size, config.ctx.stream, device_in2));
err = cudaMemcpy(host_in2, device_in2, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_in2 to host_in2 - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
}
CHK_IF_RETURN(cudaPeekAtLastError());
auto start_time = std::chrono::high_resolution_clock::now();
// Benchmark loop
for (int i = 0; i < repetitions; i++) {
switch (config_idx >> 2) { // {is_a_on_device, is_b_on_device, is_result_on_device}
case 0b000:
vector_op(host_in1, host_in2, host_in1, vector_size, ctx, config, (Op)op);
break;
case 0b001:
break;
case 0b010:
vector_op(host_in1, device_in2, host_in1, vector_size, ctx, config, (Op)op);
break;
case 0b011:
break;
case 0b100:
break;
case 0b101:
vector_op(device_in1, host_in2, device_in1, vector_size, ctx, config, (Op)op);
break;
case 0b110:
break;
case 0b111:
vector_op(device_in1, device_in2, device_in1, vector_size, ctx, config, (Op)op);
break;
}
CHK_IF_RETURN(cudaPeekAtLastError());
}
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
switch (op) {
case MUL:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation MUL for config_idx "
<< config_idx << " and result in-place" << std::endl;
break;
case ADD:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation ADD for config_idx "
<< config_idx << " and result in-place" << std::endl;
break;
case SUB:
std::cout << "Elapsed time: " << duration.count() << " microseconds, operation SUB for config_idx "
<< config_idx << " and result in-place" << std::endl;
break;
}
if (config.is_result_on_device) { // Copy the data to host_out in order to compare it vs. host_out_ref_mul value.
if (config.is_in_montgomery_form) { // Convert to normal from montgomery if needed.
CHK_IF_RETURN(mont::from_montgomery(
device_in1, vector_size, config.ctx.stream,
device_in1)); // Convert to normal in order to check vs. host_out_ref_mul.
}
err = cudaMemcpy(
host_out, device_in1, vector_size * sizeof(T),
cudaMemcpyDeviceToHost); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_in1 to host_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
} else { // Data is not on device but it is in host_in1. It should be moved to host_out for test pass/fail check.
if (config.is_in_montgomery_form) { // host_out should be written to device, converted to mmontgomery and
// written back to host. Then compared vs. host_out_ref_mul.
err = cudaMemcpy(
device_out, host_in1, vector_size * sizeof(T),
cudaMemcpyHostToDevice); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
CHK_IF_RETURN(mont::from_montgomery(
device_out, vector_size, config.ctx.stream,
device_out)); // Convert to normal in order to check vs. host_out_ref_mul.
err = cudaMemcpy(
host_out, device_out, vector_size * sizeof(T),
cudaMemcpyDeviceToHost); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_out to host_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
} else { // host_out could be compared vs. host_out_ref_mul as is.
err = cudaMemcpy(
device_out, host_in1, vector_size * sizeof(T),
cudaMemcpyHostToDevice); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from host_in1 to device_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
err = cudaMemcpy(
host_out, device_out, vector_size * sizeof(T),
cudaMemcpyDeviceToHost); // Copy to host_out in order to check vs. host_out_ref_mul.
if (err != cudaSuccess) {
std::cerr << "Failed to copy data from device_out to host_out - " << cudaGetErrorString(err) << std::endl;
return 0;
}
}
}
//****************************************
// End of benchmark test.
//****************************************
//***********************************************
// Test result check
// Check is performed by executing the operation in a normal presentation
// (located in in host_out_ref_mul) and comparing it with the
// benchmark test result.
//***********************************************
int test_failed = 0;
// std::cout << "===>>> host_out_ref_mul[" << i << "]: " << host_out_ref_mul[i] << std::endl;
// std::cout << "===>>> host_out[" << i << "] after test run: " << host_out[i] << std::endl;
switch (op) {
case MUL:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_mul[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! MUL: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
std::cout << "host_out_ref_mul[0] = " << host_out_ref_mul[0] << std::endl;
test_failed = 1;
}
}
break;
case ADD:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_add[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! ADD: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
std::cout << "host_out_ref_add[0] = " << host_out_ref_add[0] << std::endl;
test_failed = 1;
}
}
break;
case SUB:
for (int i = 0; i < vector_size; i++) {
if (host_out_ref_sub[i] != host_out[i]) {
std::cout << "===>>> ERROR!!! SUB: Test failed for vector index " << i
<< ", config is printed below:" << std::endl;
std::cout << "host_out_ref_sub[0] = " << host_out_ref_sub[0] << std::endl;
test_failed = 1;
}
}
break;
}
if (test_failed) {
// std::cout << "===>>> ERROR!!! Test failed for vector index " << i << ", config is printed below:" <<
// std::endl;
std::cout << "===>>> result is in-place: " << std::endl;
std::cout << "===>>> is_a_on_device: " << config.is_a_on_device << std::endl;
std::cout << "===>>> is_b_on_device: " << config.is_b_on_device << std::endl;
std::cout << "===>>> is_result_on_device: " << config.is_result_on_device << std::endl;
std::cout << "===>>> is_in_montgomery_form: " << config.is_in_montgomery_form << std::endl;
std::cout << "host_out[0] = " << host_out[0] << std::endl;
exit(2);
}
unsigned power_after;
nvmlDeviceGetPowerUsage(device, &power_after);
std::cout << "Power after: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_after << " W" << std::endl;
std::cout << "Power utilization: " << std::fixed << std::setprecision(1)
<< (float)100.0 * power_after / power_limit << " %" << std::endl;
unsigned temperature_after;
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_after) == NVML_SUCCESS) {
std::cout << "GPU Temperature after: " << temperature_after << " C" << std::endl;
} else {
std::cerr << "Failed to get GPU temperature." << std::endl;
}
// Report performance in GMPS: Giga Multiplications Per Second
double GMPS = 1.0e-9 * repetitions * vector_size / (1.0e-6 * duration.count());
std::cout << "Performance: " << GMPS << " Giga Multiplications Per Second" << std::endl;
}
}
// clean up and exit
free(host_in1_init);
free(host_in2_init);
free(host_in1);
free(host_in2);
free(host_out);
free(host_out_ref_mul);
cudaFree(device_in1);
cudaFree(device_in2);
cudaFree(device_out);
nvmlShutdown();
return 0;
}

View File

@@ -1,2 +0,0 @@
#! /bin/bash
./build/example/example

View File

@@ -1,9 +1,5 @@
# Icicle example: Muli-Scalar Multiplication (MSM)
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
`Icicle` provides CUDA C++ template function `MSM` to accelerate [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).

View File

@@ -1,9 +1,5 @@
# Icicle example: Multiplication
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)

View File

@@ -1,9 +1,5 @@
# Icicle example: Number-Theoretical Transform (NTT)
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
`Icicle` provides CUDA C++ template function NTT for [Number Theoretical Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md), also known as Discrete Fourier Transform.

View File

@@ -1,9 +1,5 @@
# ICICLE example: Pedersen Commitment
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
A Pedersen Commitment is a cryptographic primitive to commit to a value or a vector of values while keeping it hidden, yet enabling the committer to reveal the value later. It provides both hiding (the commitment does not reveal any information about the value) and binding properties (once a value is committed, it cannot be changed without detection).

View File

@@ -1,9 +1,5 @@
# ICICLE examples: computations with polynomials
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
Polynomials are crucial for Zero-Knowledge Proofs (ZKPs): they enable efficient representation and verification of computational statements, facilitate privacy-preserving protocols, and support complex mathematical operations essential for constructing and verifying proofs without revealing underlying data. Polynomial API is documented [here](https://dev.ingonyama.com/icicle/polynomials/overview)

View File

@@ -1,9 +1,5 @@
# Icicle example: build a Merkle tree using Poseidon hash
## Best-Practices
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
## Key-Takeaway
`Icicle` provides CUDA C++ template `poseidon_hash` to accelerate the popular [Poseidon hash function](https://www.poseidon-hash.info/).

View File

@@ -2,10 +2,6 @@
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
## Best Practices
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
## Usage
```rust

View File

@@ -4,10 +4,6 @@
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Number Theoretic Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md).
## Best Practices
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
## Usage
```rust

View File

@@ -124,6 +124,19 @@ public:
*/
static constexpr HOST_DEVICE_INLINE unsigned num_of_reductions() { return CONFIG::num_of_reductions; }
// count number of bits of the field element without leading zeros.
static constexpr HOST_DEVICE_INLINE unsigned num_bits(const Field& x)
{
size_t size = sizeof(x.limbs_storage.limbs[0]) * 8;
unsigned ret = size * TLC;
for (unsigned i = TLC; i-- > 0;) {
int leading = __clz(x.limbs_storage.limbs[i]);
ret -= leading;
if (leading != size) { break; }
}
return ret;
}
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
struct Wide {

View File

@@ -22,9 +22,14 @@ namespace keccak {
// Number of state elements in u64
const int KECCAK_STATE_SIZE = 25;
const int KECCAK_PADDING_CONST = 1;
const int SHA3_PADDING_CONST = 6;
class Keccak : public Hasher<uint8_t, uint64_t>
{
public:
const int PADDING_CONST;
cudaError_t run_hash_many_kernel(
const uint8_t* input,
uint64_t* output,
@@ -33,7 +38,34 @@ namespace keccak {
unsigned int output_len,
const device_context::DeviceContext& ctx) const override;
Keccak(unsigned int rate) : Hasher<uint8_t, uint64_t>(KECCAK_STATE_SIZE, KECCAK_STATE_SIZE, rate, 0) {}
Keccak(unsigned int rate, unsigned int padding_const)
: Hasher<uint8_t, uint64_t>(KECCAK_STATE_SIZE, KECCAK_STATE_SIZE, rate, 0), PADDING_CONST(padding_const)
{
}
};
class Keccak256 : public Keccak
{
public:
Keccak256() : Keccak(KECCAK_256_RATE, KECCAK_PADDING_CONST) {}
};
class Keccak512 : public Keccak
{
public:
Keccak512() : Keccak(KECCAK_512_RATE, KECCAK_PADDING_CONST) {}
};
class Sha3_256 : public Keccak
{
public:
Sha3_256() : Keccak(KECCAK_256_RATE, SHA3_PADDING_CONST) {}
};
class Sha3_512 : public Keccak
{
public:
Sha3_512() : Keccak(KECCAK_512_RATE, SHA3_PADDING_CONST) {}
};
} // namespace keccak

View File

@@ -27,8 +27,6 @@ namespace vec_ops {
* non-blocking and you'd need to synchronize it explicitly by running
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
* function will block the current CPU thread. */
bool is_in_montgomery_form; /**< If true then vec_a, vec_b and result are in montgomery form.
* Default value: false. */
};
/**
@@ -44,7 +42,6 @@ namespace vec_ops {
false, // is_b_on_device
false, // is_result_on_device
false, // is_async
false, // is_in_montgomery_form
};
return config;
}

View File

@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, to_affine)(projective_t* point, affine_t* p
*point_out = projective_t::to_affine(*point);
}
extern "C" void CONCAT_EXPAND(CURVE, from_affine)(affine_t* point, projective_t* point_out)
{
*point_out = projective_t::from_affine(*point);
}
extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* points, int size)
{
projective_t::rand_host_many(points, size);

View File

@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_to_affine)(g2_projective_t* point, g2_af
*point_out = g2_projective_t::to_affine(*point);
}
extern "C" void CONCAT_EXPAND(CURVE, g2_from_affine)(g2_affine_t* point, g2_projective_t* point_out)
{
*point_out = g2_projective_t::from_affine(*point);
}
extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projective_t* points, int size)
{
g2_projective_t::rand_host_many(points, size);

View File

@@ -11,15 +11,29 @@ namespace keccak {
extern "C" cudaError_t
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
{
return Keccak(KECCAK_256_RATE)
.hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_256_DIGEST, config);
return Keccak256().hash_many(
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_256_DIGEST, config);
}
extern "C" cudaError_t
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
{
return Keccak(KECCAK_512_RATE)
.hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_512_DIGEST, config);
return Keccak512().hash_many(
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_512_DIGEST, config);
}
extern "C" cudaError_t
sha3_256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
{
return Sha3_256().hash_many(
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_256_DIGEST, config);
}
extern "C" cudaError_t
sha3_512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
{
return Sha3_512().hash_many(
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_512_DIGEST, config);
}
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
@@ -29,7 +43,7 @@ namespace keccak {
unsigned int input_block_len,
const merkle_tree::TreeBuilderConfig& tree_config)
{
Keccak keccak(KECCAK_256_RATE);
Keccak256 keccak;
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
}
@@ -41,7 +55,31 @@ namespace keccak {
unsigned int input_block_len,
const merkle_tree::TreeBuilderConfig& tree_config)
{
Keccak keccak(KECCAK_512_RATE);
Keccak512 keccak;
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
}
extern "C" cudaError_t build_sha3_256_merkle_tree_cuda(
const uint8_t* leaves,
uint64_t* digests,
unsigned int height,
unsigned int input_block_len,
const merkle_tree::TreeBuilderConfig& tree_config)
{
Sha3_256 keccak;
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
}
extern "C" cudaError_t build_sha3_512_merkle_tree_cuda(
const uint8_t* leaves,
uint64_t* digests,
unsigned int height,
unsigned int input_block_len,
const merkle_tree::TreeBuilderConfig& tree_config)
{
Sha3_512 keccak;
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
}

View File

@@ -180,8 +180,13 @@ namespace keccak {
}
template <const int R>
__global__ void
keccak_hash_blocks(const uint8_t* input, int input_block_size, int output_len, int number_of_blocks, uint64_t* output)
__global__ void keccak_hash_blocks(
const uint8_t* input,
int input_block_size,
int output_len,
int number_of_blocks,
uint64_t* output,
int padding_const)
{
int sid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (sid >= number_of_blocks) { return; }
@@ -209,7 +214,7 @@ namespace keccak {
}
// pad 10*1
last_block[input_len] = 1;
last_block[input_len] = padding_const;
for (int i = 0; i < R - input_len - 1; i++) {
last_block[input_len + i + 1] = 0;
}
@@ -240,11 +245,11 @@ namespace keccak {
switch (rate) {
case KECCAK_256_RATE:
keccak_hash_blocks<KECCAK_256_RATE><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
input, input_len, output_len, number_of_states, output);
input, input_len, output_len, number_of_states, output, PADDING_CONST);
break;
case KECCAK_512_RATE:
keccak_hash_blocks<KECCAK_512_RATE><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
input, input_len, output_len, number_of_states, output);
input, input_len, output_len, number_of_states, output, PADDING_CONST);
break;
default:
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "KeccakHash: #rate must be one of [136, 72]");

View File

@@ -129,8 +129,9 @@ namespace merkle_tree {
while (number_of_states > 0) {
CHK_IF_RETURN(compression.run_hash_many_kernel(
(L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity,
tree_config.digest_elements, hash_config.ctx));
(L*)prev_layer, next_layer, number_of_states,
tree_config.digest_elements * tree_config.arity * (sizeof(D) / sizeof(L)), tree_config.digest_elements,
hash_config.ctx));
if (!keep_rows || subtree_height < keep_rows) {
D* digests_with_offset =
@@ -298,8 +299,9 @@ namespace merkle_tree {
size_t segment_offset = start_segment_offset;
while (number_of_states > 0) {
CHK_IF_RETURN(compression.run_hash_many_kernel(
(L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity,
tree_config.digest_elements, tree_config.ctx));
(L*)prev_layer, next_layer, number_of_states,
tree_config.digest_elements * tree_config.arity * (sizeof(D) / sizeof(L)), tree_config.digest_elements,
tree_config.ctx));
if (!tree_config.keep_rows || cap_height < tree_config.keep_rows + (int)caps_mode) {
D* digests_with_offset = digests + segment_offset;
CHK_IF_RETURN(cudaMemcpyAsync(

View File

@@ -95,64 +95,25 @@ namespace vec_ops {
E *d_result, *d_alloc_vec_a, *d_alloc_vec_b;
E* d_vec_a;
const E* d_vec_b;
int is_d_alloc_vec_a_allocated = 0;
if (!config.is_a_on_device) {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_a, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_a, vec_a, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
CHK_IF_RETURN(mont::from_montgomery(d_alloc_vec_a, n * sizeof(E), config.ctx.stream, d_alloc_vec_a));
is_d_alloc_vec_a_allocated = 1;
d_vec_a = d_alloc_vec_a;
} else {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_a, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_a, vec_a, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
is_d_alloc_vec_a_allocated = 1;
d_vec_a = d_alloc_vec_a;
}
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_a, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_a, vec_a, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
d_vec_a = d_alloc_vec_a;
} else {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(cudaMallocAsync(
&d_alloc_vec_a, n * sizeof(E), config.ctx.stream)); // Allocate in order not to change the input.
CHK_IF_RETURN(mont::from_montgomery(vec_a, n * sizeof(E), config.ctx.stream, d_alloc_vec_a));
is_d_alloc_vec_a_allocated = 1;
d_vec_a = d_alloc_vec_a;
} else {
d_vec_a = vec_a;
}
d_vec_a = vec_a;
}
int is_d_alloc_vec_b_allocated = 0;
if (!config.is_b_on_device) {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_b, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_b, vec_b, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
CHK_IF_RETURN(mont::from_montgomery(d_alloc_vec_b, n * sizeof(E), config.ctx.stream, d_alloc_vec_b));
is_d_alloc_vec_b_allocated = 1;
d_vec_b = d_alloc_vec_b;
} else {
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_b, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_b, vec_b, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
is_d_alloc_vec_b_allocated = 1;
d_vec_b = d_alloc_vec_b;
}
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_vec_b, n * sizeof(E), config.ctx.stream));
CHK_IF_RETURN(cudaMemcpyAsync(d_alloc_vec_b, vec_b, n * sizeof(E), cudaMemcpyHostToDevice, config.ctx.stream));
d_vec_b = d_alloc_vec_b;
} else {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(cudaMallocAsync(
&d_alloc_vec_b, n * sizeof(E), config.ctx.stream)); // Allocate in order not to change the input.
CHK_IF_RETURN(mont::from_montgomery(vec_b, n * sizeof(E), config.ctx.stream, d_alloc_vec_b));
is_d_alloc_vec_b_allocated = 1;
d_vec_b = d_alloc_vec_b;
} else {
d_vec_b = vec_b;
}
d_vec_b = vec_b;
}
int is_d_result_allocated = 0;
if (!config.is_result_on_device) {
if (!is_in_place) {
CHK_IF_RETURN(cudaMallocAsync(&d_result, n * sizeof(E), config.ctx.stream));
is_d_result_allocated = 1;
} else {
d_result = d_vec_a;
}
@@ -168,21 +129,12 @@ namespace vec_ops {
Kernel<<<num_blocks, num_threads, 0, config.ctx.stream>>>(d_vec_a, d_vec_b, n, d_result);
if (!config.is_result_on_device) {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(mont::to_montgomery(d_result, n * sizeof(E), config.ctx.stream, d_result)); // Convert in-place.
CHK_IF_RETURN(cudaMemcpyAsync(result, d_result, n * sizeof(E), cudaMemcpyDeviceToHost, config.ctx.stream));
} else {
CHK_IF_RETURN(cudaMemcpyAsync(result, d_result, n * sizeof(E), cudaMemcpyDeviceToHost, config.ctx.stream));
}
} else {
if (config.is_in_montgomery_form) {
CHK_IF_RETURN(mont::to_montgomery(d_result, n * sizeof(E), config.ctx.stream, d_result)); // Convert in-place.
}
CHK_IF_RETURN(cudaMemcpyAsync(result, d_result, n * sizeof(E), cudaMemcpyDeviceToHost, config.ctx.stream));
CHK_IF_RETURN(cudaFreeAsync(d_result, config.ctx.stream));
}
if (is_d_alloc_vec_a_allocated) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_vec_a, config.ctx.stream)); }
if (is_d_alloc_vec_b_allocated) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_vec_b, config.ctx.stream)); }
if (is_d_result_allocated) { CHK_IF_RETURN(cudaFreeAsync(d_result, config.ctx.stream)); }
if (!config.is_a_on_device && !is_in_place) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_vec_a, config.ctx.stream)); }
if (!config.is_b_on_device) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_vec_b, config.ctx.stream)); }
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(config.ctx.stream));

View File

@@ -28,21 +28,6 @@ func (p *MockProjective) FromLimbs(x, y, z []uint32) MockProjective {
return *p
}
func (p *MockProjective) FromAffine(a MockAffine) MockProjective {
z := MockBaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
return *p
}
type MockAffine struct {
X, Y MockBaseField
}
@@ -68,18 +53,3 @@ func (a *MockAffine) FromLimbs(x, y []uint32) MockAffine {
return *a
}
func (a MockAffine) ToProjective() MockProjective {
var z MockBaseField
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p MockProjective
return p.Zero()
}
return MockProjective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}

View File

@@ -28,8 +28,6 @@ type VecOpsConfig struct {
* non-blocking and you'll need to synchronize it explicitly by calling
* `SynchronizeStream`. If set to false, the function will block the current CPU thread. */
IsAsync bool
/* If true then vec_a, vec_b and result are in montgomery form. Default value: false. */
IsInMontgomeryForm bool
}
/**
@@ -44,7 +42,6 @@ func DefaultVecOpsConfig() VecOpsConfig {
false, // isBOnDevice
false, // isResultOnDevice
false, // IsAsync
false, // IsInMontgomeryForm
}
return config

View File

@@ -15,7 +15,6 @@ func TestVecOpsDefaultConfig(t *testing.T) {
false, // isBOnDevice
false, // isResultOnDevice
false, // IsAsync
false, // IsInMontgomeryForm
}
actual := DefaultVecOpsConfig()

View File

@@ -40,17 +40,10 @@ func (p *Projective) FromLimbs(x, y, z []uint32) Projective {
}
func (p *Projective) FromAffine(a Affine) Projective {
z := BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bls12_377_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *Projective) ProjectiveToAffine() Affine {
var a Affine
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bls12_377_to_affine(cP, cA)
return a
}
@@ -111,18 +104,12 @@ func (a *Affine) FromLimbs(x, y []uint32) Affine {
}
func (a Affine) ToProjective() Projective {
var z BaseField
var p Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p Projective
return p.Zero()
}
return Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
C.bls12_377_from_affine(cA, cP)
return p
}
func AffineFromProjective(p *Projective) Affine {

View File

@@ -40,17 +40,10 @@ func (p *G2Projective) FromLimbs(x, y, z []uint32) G2Projective {
}
func (p *G2Projective) FromAffine(a G2Affine) G2Projective {
z := G2BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bls12_377_g2_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *G2Projective) ProjectiveToAffine() G2Affine {
var a G2Affine
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bls12_377_g2_to_affine(cP, cA)
return a
}
@@ -111,18 +104,12 @@ func (a *G2Affine) FromLimbs(x, y []uint32) G2Affine {
}
func (a G2Affine) ToProjective() G2Projective {
var z G2BaseField
var p G2Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p G2Projective
return p.Zero()
}
return G2Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
C.bls12_377_g2_from_affine(cA, cP)
return p
}
func G2AffineFromProjective(p *G2Projective) G2Affine {

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bls12_377_g2_eq(g2_projective_t* point1, g2_projective_t* point2);
void bls12_377_g2_to_affine(g2_projective_t* point, g2_affine_t* point_out);
void bls12_377_g2_from_affine(g2_affine_t* point, g2_projective_t* point_out);
void bls12_377_g2_generate_projective_points(g2_projective_t* points, int size);
void bls12_377_g2_generate_affine_points(g2_affine_t* points, int size);
cudaError_t bls12_377_g2_affine_convert_montgomery(g2_affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bls12_377_eq(projective_t* point1, projective_t* point2);
void bls12_377_to_affine(projective_t* point, affine_t* point_out);
void bls12_377_from_affine(affine_t* point, projective_t* point_out);
void bls12_377_generate_projective_points(projective_t* points, int size);
void bls12_377_generate_affine_points(affine_t* points, int size);
cudaError_t bls12_377_affine_convert_montgomery(affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -40,17 +40,10 @@ func (p *Projective) FromLimbs(x, y, z []uint32) Projective {
}
func (p *Projective) FromAffine(a Affine) Projective {
z := BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bls12_381_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *Projective) ProjectiveToAffine() Affine {
var a Affine
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bls12_381_to_affine(cP, cA)
return a
}
@@ -111,18 +104,12 @@ func (a *Affine) FromLimbs(x, y []uint32) Affine {
}
func (a Affine) ToProjective() Projective {
var z BaseField
var p Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p Projective
return p.Zero()
}
return Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
C.bls12_381_from_affine(cA, cP)
return p
}
func AffineFromProjective(p *Projective) Affine {

View File

@@ -40,17 +40,10 @@ func (p *G2Projective) FromLimbs(x, y, z []uint32) G2Projective {
}
func (p *G2Projective) FromAffine(a G2Affine) G2Projective {
z := G2BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bls12_381_g2_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *G2Projective) ProjectiveToAffine() G2Affine {
var a G2Affine
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bls12_381_g2_to_affine(cP, cA)
return a
}
@@ -111,18 +104,13 @@ func (a *G2Affine) FromLimbs(x, y []uint32) G2Affine {
}
func (a G2Affine) ToProjective() G2Projective {
var z G2BaseField
var p G2Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p G2Projective
return p.Zero()
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
C.bls12_381_g2_from_affine(cA, cP)
return p
return G2Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}
func G2AffineFromProjective(p *G2Projective) G2Affine {

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bls12_381_g2_eq(g2_projective_t* point1, g2_projective_t* point2);
void bls12_381_g2_to_affine(g2_projective_t* point, g2_affine_t* point_out);
void bls12_381_g2_from_affine(g2_affine_t* point, g2_projective_t* point_out);
void bls12_381_g2_generate_projective_points(g2_projective_t* points, int size);
void bls12_381_g2_generate_affine_points(g2_affine_t* points, int size);
cudaError_t bls12_381_g2_affine_convert_montgomery(g2_affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bls12_381_eq(projective_t* point1, projective_t* point2);
void bls12_381_to_affine(projective_t* point, affine_t* point_out);
void bls12_381_from_affine(affine_t* point, projective_t* point_out);
void bls12_381_generate_projective_points(projective_t* points, int size);
void bls12_381_generate_affine_points(affine_t* points, int size);
cudaError_t bls12_381_affine_convert_montgomery(affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -40,17 +40,10 @@ func (p *Projective) FromLimbs(x, y, z []uint32) Projective {
}
func (p *Projective) FromAffine(a Affine) Projective {
z := BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bn254_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *Projective) ProjectiveToAffine() Affine {
var a Affine
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bn254_to_affine(cP, cA)
return a
}
@@ -111,18 +104,13 @@ func (a *Affine) FromLimbs(x, y []uint32) Affine {
}
func (a Affine) ToProjective() Projective {
var z BaseField
var p Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p Projective
return p.Zero()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
C.bn254_from_affine(cA, cP)
return p
return Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}
func AffineFromProjective(p *Projective) Affine {

View File

@@ -40,17 +40,10 @@ func (p *G2Projective) FromLimbs(x, y, z []uint32) G2Projective {
}
func (p *G2Projective) FromAffine(a G2Affine) G2Projective {
z := G2BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bn254_g2_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *G2Projective) ProjectiveToAffine() G2Affine {
var a G2Affine
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bn254_g2_to_affine(cP, cA)
return a
}
@@ -111,18 +104,12 @@ func (a *G2Affine) FromLimbs(x, y []uint32) G2Affine {
}
func (a G2Affine) ToProjective() G2Projective {
var z G2BaseField
var p G2Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p G2Projective
return p.Zero()
}
return G2Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
C.bn254_g2_from_affine(cA, cP)
return p
}
func G2AffineFromProjective(p *G2Projective) G2Affine {

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bn254_g2_eq(g2_projective_t* point1, g2_projective_t* point2);
void bn254_g2_to_affine(g2_projective_t* point, g2_affine_t* point_out);
void bn254_g2_from_affine(g2_affine_t* point, g2_projective_t* point_out);
void bn254_g2_generate_projective_points(g2_projective_t* points, int size);
void bn254_g2_generate_affine_points(g2_affine_t* points, int size);
cudaError_t bn254_g2_affine_convert_montgomery(g2_affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bn254_eq(projective_t* point1, projective_t* point2);
void bn254_to_affine(projective_t* point, affine_t* point_out);
void bn254_from_affine(affine_t* point, projective_t* point_out);
void bn254_generate_projective_points(projective_t* points, int size);
void bn254_generate_affine_points(affine_t* points, int size);
cudaError_t bn254_affine_convert_montgomery(affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -40,17 +40,10 @@ func (p *Projective) FromLimbs(x, y, z []uint32) Projective {
}
func (p *Projective) FromAffine(a Affine) Projective {
z := BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bw6_761_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *Projective) ProjectiveToAffine() Affine {
var a Affine
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.bw6_761_to_affine(cP, cA)
return a
}
@@ -111,18 +104,13 @@ func (a *Affine) FromLimbs(x, y []uint32) Affine {
}
func (a Affine) ToProjective() Projective {
var z BaseField
var p Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p Projective
return p.Zero()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
C.bw6_761_from_affine(cA, cP)
return p
return Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}
func AffineFromProjective(p *Projective) Affine {

View File

@@ -40,17 +40,10 @@ func (p *G2Projective) FromLimbs(x, y, z []uint32) G2Projective {
}
func (p *G2Projective) FromAffine(a G2Affine) G2Projective {
z := G2BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bw6_761_g2_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *G2Projective) ProjectiveToAffine() G2Affine {
var a G2Affine
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
cP := (*C.g2_projective_t)(unsafe.Pointer(p))
C.bw6_761_g2_to_affine(cP, cA)
return a
}
@@ -111,18 +104,12 @@ func (a *G2Affine) FromLimbs(x, y []uint32) G2Affine {
}
func (a G2Affine) ToProjective() G2Projective {
var z G2BaseField
var p G2Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p G2Projective
return p.Zero()
}
return G2Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
cA := (*C.g2_affine_t)(unsafe.Pointer(&a))
cP := (*C.g2_projective_t)(unsafe.Pointer(&p))
C.bw6_761_g2_from_affine(cA, cP)
return p
}
func G2AffineFromProjective(p *G2Projective) G2Affine {

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bw6_761_g2_eq(g2_projective_t* point1, g2_projective_t* point2);
void bw6_761_g2_to_affine(g2_projective_t* point, g2_affine_t* point_out);
void bw6_761_g2_from_affine(g2_affine_t* point, g2_projective_t* point_out);
void bw6_761_g2_generate_projective_points(g2_projective_t* points, int size);
void bw6_761_g2_generate_affine_points(g2_affine_t* points, int size);
cudaError_t bw6_761_g2_affine_convert_montgomery(g2_affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool bw6_761_eq(projective_t* point1, projective_t* point2);
void bw6_761_to_affine(projective_t* point, affine_t* point_out);
void bw6_761_from_affine(affine_t* point, projective_t* point_out);
void bw6_761_generate_projective_points(projective_t* points, int size);
void bw6_761_generate_affine_points(affine_t* points, int size);
cudaError_t bw6_761_affine_convert_montgomery(affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -40,17 +40,10 @@ func (p *Projective) FromLimbs(x, y, z []uint32) Projective {
}
func (p *Projective) FromAffine(a Affine) Projective {
z := BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
} else {
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.grumpkin_from_affine(cA, cP)
return *p
}
@@ -65,7 +58,7 @@ func (p *Projective) ProjectiveToAffine() Affine {
var a Affine
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
cP := (*C.projective_t)(unsafe.Pointer(p))
C.grumpkin_to_affine(cP, cA)
return a
}
@@ -111,18 +104,13 @@ func (a *Affine) FromLimbs(x, y []uint32) Affine {
}
func (a Affine) ToProjective() Projective {
var z BaseField
var p Projective
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p Projective
return p.Zero()
}
cA := (*C.affine_t)(unsafe.Pointer(&a))
cP := (*C.projective_t)(unsafe.Pointer(&p))
C.grumpkin_from_affine(cA, cP)
return p
return Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}
func AffineFromProjective(p *Projective) Affine {

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool grumpkin_eq(projective_t* point1, projective_t* point2);
void grumpkin_to_affine(projective_t* point, affine_t* point_out);
void grumpkin_from_affine(affine_t* point, projective_t* point_out);
void grumpkin_generate_projective_points(projective_t* points, int size);
void grumpkin_generate_affine_points(affine_t* points, int size);
cudaError_t grumpkin_affine_convert_montgomery(affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -39,21 +39,17 @@ func (p *{{.CurvePrefix}}Projective) FromLimbs(x, y, z []uint32) {{.CurvePrefix}
return *p
}
{{if ne .CurvePrefix "Mock"}}
func (p *{{.CurvePrefix}}Projective) FromAffine(a {{.CurvePrefix}}Affine) {{.CurvePrefix}}Projective {
z := {{.CurvePrefix}}BaseField{}
z.One()
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
p.Zero()
}else{
p.X = a.X
p.Y = a.Y
p.Z = z.One()
}
cA := (*C.{{toCName .CurvePrefix}}affine_t)(unsafe.Pointer(&a))
cP := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(p))
C.{{.Curve}}{{toCNameBackwards .CurvePrefix}}_from_affine(cA, cP)
return *p
}
{{if ne .CurvePrefix "Mock"}}
func (p {{.CurvePrefix}}Projective) ProjectiveEq(p2 *{{.CurvePrefix}}Projective) bool {
cP := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(&p))
cP2 := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(&p2))
@@ -65,7 +61,7 @@ func (p *{{.CurvePrefix}}Projective) ProjectiveToAffine() {{.CurvePrefix}}Affine
var a {{.CurvePrefix}}Affine
cA := (*C.{{toCName .CurvePrefix}}affine_t)(unsafe.Pointer(&a))
cP := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(&p))
cP := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(p))
C.{{.Curve}}{{toCNameBackwards .CurvePrefix}}_to_affine(cP, cA)
return a
}
@@ -110,21 +106,17 @@ func (a *{{.CurvePrefix}}Affine) FromLimbs(x, y []uint32) {{.CurvePrefix}}Affine
return *a
}
func (a {{.CurvePrefix}}Affine) ToProjective() {{.CurvePrefix}}Projective {
var z {{.CurvePrefix}}BaseField
if (a.X == z.Zero()) && (a.Y == z.Zero()) {
var p {{.CurvePrefix}}Projective
return p.Zero()
}
return {{.CurvePrefix}}Projective{
X: a.X,
Y: a.Y,
Z: z.One(),
}
}
{{if ne .CurvePrefix "Mock"}}
func (a {{.CurvePrefix}}Affine) ToProjective() {{.CurvePrefix}}Projective {
var p {{.CurvePrefix}}Projective
cA := (*C.{{toCName .CurvePrefix}}affine_t)(unsafe.Pointer(&a))
cP := (*C.{{toCName .CurvePrefix}}projective_t)(unsafe.Pointer(&p))
C.{{.Curve}}{{toCNameBackwards .CurvePrefix}}_from_affine(cA, cP)
return p
}
func {{.CurvePrefix}}AffineFromProjective(p *{{.CurvePrefix}}Projective) {{.CurvePrefix}}Affine {
return p.ProjectiveToAffine()
}

View File

@@ -14,6 +14,7 @@ typedef struct DeviceContext DeviceContext;
bool {{.Curve}}{{toCNameBackwards .CurvePrefix}}_eq({{toCName .CurvePrefix}}projective_t* point1, {{toCName .CurvePrefix}}projective_t* point2);
void {{.Curve}}{{toCNameBackwards .CurvePrefix}}_to_affine({{toCName .CurvePrefix}}projective_t* point, {{toCName .CurvePrefix}}affine_t* point_out);
void {{.Curve}}{{toCNameBackwards .CurvePrefix}}_from_affine({{toCName .CurvePrefix}}affine_t* point, {{toCName .CurvePrefix}}projective_t* point_out);
void {{.Curve}}{{toCNameBackwards .CurvePrefix}}_generate_projective_points({{toCName .CurvePrefix}}projective_t* points, int size);
void {{.Curve}}{{toCNameBackwards .CurvePrefix}}_generate_affine_points({{toCName .CurvePrefix}}affine_t* points, int size);
cudaError_t {{.Curve}}{{toCNameBackwards .CurvePrefix}}_affine_convert_montgomery({{toCName .CurvePrefix}}affine_t* points, size_t n, bool is_into, DeviceContext* ctx);

View File

@@ -22,6 +22,8 @@ pub trait Curve: Debug + PartialEq + Copy + Clone {
#[doc(hidden)]
fn to_affine(point: *const Projective<Self>, point_aff: *mut Affine<Self>);
#[doc(hidden)]
fn from_affine(point: *const Affine<Self>, point_proj: *mut Projective<Self>);
#[doc(hidden)]
fn generate_random_projective_points(size: usize) -> Vec<Projective<Self>>;
#[doc(hidden)]
fn generate_random_affine_points(size: usize) -> Vec<Affine<Self>>;
@@ -79,27 +81,17 @@ impl<C: Curve> Affine<C> {
}
pub fn to_projective(&self) -> Projective<C> {
if *self == Self::zero() {
return Projective::<C>::zero();
}
Projective {
x: self.x,
y: self.y,
z: C::BaseField::one(),
}
let mut proj = Projective::<C>::zero();
C::from_affine(self as *const Self, &mut proj as *mut Projective<C>);
proj
}
}
impl<C: Curve> From<Affine<C>> for Projective<C> {
fn from(item: Affine<C>) -> Self {
if item == (Affine::<C>::zero()) {
return Self::zero();
}
Self {
x: item.x,
y: item.y,
z: C::BaseField::one(),
}
let mut proj = Self::zero();
C::from_affine(&item as *const Affine<C>, &mut proj as *mut Self);
proj
}
}
@@ -282,6 +274,8 @@ macro_rules! impl_curve {
pub(crate) fn eq(point1: *const $projective_type, point2: *const $projective_type) -> bool;
#[link_name = concat!($curve_prefix, "_to_affine")]
pub(crate) fn proj_to_affine(point: *const $projective_type, point_out: *mut $affine_type);
#[link_name = concat!($curve_prefix, "_from_affine")]
pub(crate) fn proj_from_affine(point: *const $affine_type, point_out: *mut $projective_type);
#[link_name = concat!($curve_prefix, "_generate_projective_points")]
pub(crate) fn generate_projective_points(points: *mut $projective_type, size: usize);
#[link_name = concat!($curve_prefix, "_generate_affine_points")]
@@ -315,6 +309,10 @@ macro_rules! impl_curve {
unsafe { $curve_prefix_ident::proj_to_affine(point, point_out) };
}
fn from_affine(point: *const $affine_type, point_out: *mut $projective_type) {
unsafe { $curve_prefix_ident::proj_from_affine(point, point_out) };
}
fn generate_random_projective_points(size: usize) -> Vec<$projective_type> {
let mut res = vec![$projective_type::zero(); size];
unsafe {

View File

@@ -20,8 +20,6 @@ pub struct VecOpsConfig<'a> {
/// Whether to run the vector operations asynchronously. If set to `true`, the functions will be non-blocking and you'd need to synchronize
/// it explicitly by running `stream.synchronize()`. If set to false, the functions will block the current CPU thread.
pub is_async: bool,
/// If true then vec_a, vec_b and result are in montgomery form. Default value: false.
pub is_in_montgomery_form: bool,
}
impl<'a> Default for VecOpsConfig<'a> {
@@ -38,7 +36,6 @@ impl<'a> VecOpsConfig<'a> {
is_b_on_device: false,
is_result_on_device: false,
is_async: false,
is_in_montgomery_form: false,
}
}
}

View File

@@ -25,6 +25,22 @@ extern "C" {
config: &HashConfig,
) -> CudaError;
pub(crate) fn sha3_256_cuda(
input: *const u8,
input_block_size: u32,
number_of_blocks: u32,
output: *mut u8,
config: &HashConfig,
) -> CudaError;
pub(crate) fn sha3_512_cuda(
input: *const u8,
input_block_size: u32,
number_of_blocks: u32,
output: *mut u8,
config: &HashConfig,
) -> CudaError;
pub(crate) fn build_keccak256_merkle_tree_cuda(
leaves: *const u8,
digests: *mut u64,
@@ -40,6 +56,22 @@ extern "C" {
input_block_len: u32,
config: &TreeBuilderConfig,
) -> CudaError;
pub(crate) fn build_sha3_256_merkle_tree_cuda(
leaves: *const u8,
digests: *mut u64,
height: u32,
input_block_len: u32,
config: &TreeBuilderConfig,
) -> CudaError;
pub(crate) fn build_sha3_512_merkle_tree_cuda(
leaves: *const u8,
digests: *mut u64,
height: u32,
input_block_len: u32,
config: &TreeBuilderConfig,
) -> CudaError;
}
pub fn keccak256(
@@ -86,6 +118,50 @@ pub fn keccak512(
}
}
pub fn sha3_256(
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
input_block_size: u32,
number_of_blocks: u32,
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
config: &HashConfig,
) -> IcicleResult<()> {
let mut local_cfg = config.clone();
local_cfg.are_inputs_on_device = input.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
unsafe {
sha3_256_cuda(
input.as_ptr(),
input_block_size,
number_of_blocks,
output.as_mut_ptr(),
&local_cfg,
)
.wrap()
}
}
pub fn sha3_512(
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
input_block_size: u32,
number_of_blocks: u32,
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
config: &HashConfig,
) -> IcicleResult<()> {
let mut local_cfg = config.clone();
local_cfg.are_inputs_on_device = input.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
unsafe {
sha3_512_cuda(
input.as_ptr(),
input_block_size,
number_of_blocks,
output.as_mut_ptr(),
&local_cfg,
)
.wrap()
}
}
pub fn build_keccak256_merkle_tree(
leaves: &(impl HostOrDeviceSlice<u8> + ?Sized),
digests: &mut (impl HostOrDeviceSlice<u64> + ?Sized),
@@ -123,3 +199,41 @@ pub fn build_keccak512_merkle_tree(
.wrap()
}
}
pub fn build_sha3_256_merkle_tree(
leaves: &(impl HostOrDeviceSlice<u8> + ?Sized),
digests: &mut (impl HostOrDeviceSlice<u64> + ?Sized),
height: usize,
input_block_len: usize,
config: &TreeBuilderConfig,
) -> IcicleResult<()> {
unsafe {
build_sha3_256_merkle_tree_cuda(
leaves.as_ptr(),
digests.as_mut_ptr(),
height as u32,
input_block_len as u32,
config,
)
.wrap()
}
}
pub fn build_sha3_512_merkle_tree(
leaves: &(impl HostOrDeviceSlice<u8> + ?Sized),
digests: &mut (impl HostOrDeviceSlice<u64> + ?Sized),
height: usize,
input_block_len: usize,
config: &TreeBuilderConfig,
) -> IcicleResult<()> {
unsafe {
build_sha3_512_merkle_tree_cuda(
leaves.as_ptr(),
digests.as_mut_ptr(),
height as u32,
input_block_len as u32,
config,
)
.wrap()
}
}

View File

@@ -15,7 +15,7 @@ pub(crate) mod tests {
let number_of_hashes = 1024;
let preimages = vec![1u8; number_of_hashes * input_block_len];
let mut digests = vec![0u8; number_of_hashes * 64];
let mut digests = vec![0u8; number_of_hashes * 32];
let preimages_slice = HostSlice::from_slice(&preimages);
let digests_slice = HostSlice::from_mut_slice(&mut digests);