mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-09 13:07:59 -05:00
issue with init_optimized_poseidon_constants
This commit is contained in:
25
examples/c++/multi-gpu-poseidon/CMakeLists.txt
Normal file
25
examples/c++/multi-gpu-poseidon/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(icicle LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
# change the path to your Icicle location
|
||||
include_directories("../../../icicle")
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
|
||||
target_link_libraries(example ${NVML_LIBRARY})
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
2
examples/c++/multi-gpu-poseidon/README.md
Normal file
2
examples/c++/multi-gpu-poseidon/README.md
Normal file
@@ -0,0 +1,2 @@
|
||||
# Muliple GPU on a single host
|
||||
|
||||
9
examples/c++/multi-gpu-poseidon/compile.sh
Executable file
9
examples/c++/multi-gpu-poseidon/compile.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
rm -rf build
|
||||
mkdir -p build
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
119
examples/c++/multi-gpu-poseidon/example.cu
Normal file
119
examples/c++/multi-gpu-poseidon/example.cu
Normal file
@@ -0,0 +1,119 @@
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// select the curve (only 2 available so far)
|
||||
#define CURVE_ID 2
|
||||
#include "appUtils/poseidon/poseidon.cu"
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace curve_config;
|
||||
|
||||
void setCudaDevice(const unsigned device_id) {
|
||||
// Example function to set the CUDA device
|
||||
std::cout << "Setting CUDA device to " << device_id << std::endl;
|
||||
// cudaSetDevice(device_id);
|
||||
}
|
||||
|
||||
// function that a thread will execute
|
||||
void processData(const device_context::DeviceContext ctx, const std::vector<int>& inputData, std::vector<int>& outputData) {
|
||||
// Simulate some processing
|
||||
PoseidonConstants<scalar_t> column_constants;
|
||||
int size_col = 11;
|
||||
// init_optimized_poseidon_constants<scalar_t>(ctx, &column_constants);
|
||||
init_optimized_poseidon_constants<scalar_t>(size_col, ctx, &column_constants);
|
||||
PoseidonConfig column_config = default_poseidon_config<scalar_t>(size_col+1);
|
||||
column_config.are_inputs_on_device = true;
|
||||
column_config.are_outputs_on_device = true;
|
||||
|
||||
for (int num : inputData) {
|
||||
outputData.push_back(num * 2); // Example operation
|
||||
}
|
||||
}
|
||||
|
||||
void checkCudaError(cudaError_t error) {
|
||||
if (error != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(error) << std::endl;
|
||||
// Handle the error, e.g., exit the program or throw an exception.
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
const uint32_t size_col=11;
|
||||
const unsigned size_partition = 1024; // size_row / nof_partitions;
|
||||
// layers is allocated only for one partition, need to resuse for different partitions
|
||||
const uint32_t size_layers = size_col * size_partition; // size_col * size_row
|
||||
// Input data for each thread
|
||||
std::vector<int> inputData1 = {1, 2, 3, 4};
|
||||
std::vector<int> inputData2 = {5, 6, 7, 8};
|
||||
|
||||
// Output data for each thread
|
||||
std::vector<int> outputData1, outputData2;
|
||||
|
||||
|
||||
// Multiple devices are supported by device context
|
||||
|
||||
// setCudaDevice(device_id);
|
||||
cudaStream_t stream0, stream1;
|
||||
cudaError_t err;
|
||||
err = cudaStreamCreate(&stream0);
|
||||
checkCudaError(err);
|
||||
err = cudaStreamCreate(&stream1);
|
||||
checkCudaError(err);
|
||||
|
||||
device_context::DeviceContext ctx0 = device_context::DeviceContext{
|
||||
(cudaStream_t&)stream0, // SP: simulate different device as stream
|
||||
0, // device_id
|
||||
0, // mempool
|
||||
};
|
||||
device_context::DeviceContext ctx1 = device_context::DeviceContext{
|
||||
(cudaStream_t&)stream1, // SP: simulate different device as stream
|
||||
0, // device_id
|
||||
0, // mempool
|
||||
};
|
||||
|
||||
// Allocate and initialize memory for the layers
|
||||
scalar_t* layers0 = static_cast<scalar_t*>(malloc(size_layers * sizeof(scalar_t)));
|
||||
if (layers0 == nullptr) {
|
||||
std::cerr << "Memory allocation for 'layers' failed." << std::endl;
|
||||
}
|
||||
scalar_t s = scalar_t::zero();
|
||||
for (unsigned i = 0; i < size_col*size_partition ; i++) {
|
||||
layers0[i] = s;
|
||||
s = s + scalar_t::one();
|
||||
}
|
||||
scalar_t* layers1 = static_cast<scalar_t*>(malloc(size_layers * sizeof(scalar_t)));
|
||||
if (layers1 == nullptr) {
|
||||
std::cerr << "Memory allocation for 'layers' failed." << std::endl;
|
||||
}
|
||||
s = scalar_t::zero() + scalar_t::one();
|
||||
for (unsigned i = 0; i < size_col*size_partition ; i++) {
|
||||
layers1[i] = s;
|
||||
s = s + scalar_t::one();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Start threads
|
||||
std::thread thread1(processData, ctx0, std::ref(inputData1), std::ref(outputData1));
|
||||
std::thread thread2(processData, ctx1, std::ref(inputData2), std::ref(outputData2));
|
||||
|
||||
// Wait for the threads to finish
|
||||
thread1.join();
|
||||
thread2.join();
|
||||
|
||||
// Process the output data (example: print the data)
|
||||
std::cout << "Output Data from Thread 1: ";
|
||||
for (int num : outputData1) {
|
||||
std::cout << num << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "Output Data from Thread 2: ";
|
||||
for (int num : outputData2) {
|
||||
std::cout << num << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
2
examples/c++/multi-gpu-poseidon/run.sh
Executable file
2
examples/c++/multi-gpu-poseidon/run.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
./build/example
|
||||
Reference in New Issue
Block a user