issue with init_optimized_poseidon_constants

2026-01-09 13:07:59 -05:00 · 2024-02-14 22:28:17 +00:00
parent 774250926c
commit 4d75fbac93
5 changed files with 157 additions and 0 deletions
--- a/examples/c++/multi-gpu-poseidon/CMakeLists.txt
+++ b/examples/c++/multi-gpu-poseidon/CMakeLists.txt
@@ -0,0 +1,25 @@
+cmake_minimum_required(VERSION 3.18)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
+    set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
+else()
+    set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
+endif ()
+project(icicle LANGUAGES CUDA CXX)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
+set(CMAKE_CUDA_FLAGS_RELEASE "")
+set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
+# change the path to your Icicle location
+include_directories("../../../icicle")
+add_executable(
+  example
+  example.cu
+)
+find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
+target_link_libraries(example ${NVML_LIBRARY})
+set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
--- a/examples/c++/multi-gpu-poseidon/README.md
+++ b/examples/c++/multi-gpu-poseidon/README.md
@@ -0,0 +1,2 @@
+# Muliple GPU on a single host
+
--- a/examples/c++/multi-gpu-poseidon/compile.sh
+++ b/examples/c++/multi-gpu-poseidon/compile.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# Exit immediately on error
+set -e
+
+rm -rf build
+mkdir -p build
+cmake -S . -B build
+cmake --build build
--- a/examples/c++/multi-gpu-poseidon/example.cu
+++ b/examples/c++/multi-gpu-poseidon/example.cu
@@ -0,0 +1,119 @@
+#include <iostream>
+#include <thread>
+#include <vector>
+
+// select the curve (only 2 available so far)
+#define CURVE_ID 2
+#include "appUtils/poseidon/poseidon.cu"
+
+using namespace poseidon;
+using namespace curve_config;
+
+void setCudaDevice(const unsigned device_id) {
+    // Example function to set the CUDA device
+    std::cout << "Setting CUDA device to " << device_id << std::endl;
+    // cudaSetDevice(device_id);
+}
+
+// function that a thread will execute
+void processData(const device_context::DeviceContext ctx, const std::vector<int>& inputData, std::vector<int>& outputData) {
+    // Simulate some processing
+    PoseidonConstants<scalar_t> column_constants;
+    int size_col = 11;
+    // init_optimized_poseidon_constants<scalar_t>(ctx, &column_constants);
+    init_optimized_poseidon_constants<scalar_t>(size_col, ctx, &column_constants);
+    PoseidonConfig column_config = default_poseidon_config<scalar_t>(size_col+1);
+    column_config.are_inputs_on_device = true;
+    column_config.are_outputs_on_device = true;
+
+    for (int num : inputData) {
+        outputData.push_back(num * 2); // Example operation
+    }
+}
+
+void checkCudaError(cudaError_t error) {
+    if (error != cudaSuccess) {
+        std::cerr << "CUDA error: " << cudaGetErrorString(error) << std::endl;
+        // Handle the error, e.g., exit the program or throw an exception.
+    }
+}
+
+int main() {
+    const uint32_t size_col=11;
+    const unsigned size_partition = 1024; // size_row / nof_partitions;
+    // layers is allocated only for one partition, need to resuse for different partitions
+    const uint32_t size_layers = size_col * size_partition; // size_col * size_row
+    // Input data for each thread
+    std::vector<int> inputData1 = {1, 2, 3, 4};
+    std::vector<int> inputData2 = {5, 6, 7, 8};
+
+    // Output data for each thread
+    std::vector<int> outputData1, outputData2;
+
+
+    // Multiple devices are supported by device context
+
+    // setCudaDevice(device_id);
+    cudaStream_t stream0, stream1;
+    cudaError_t err;
+    err = cudaStreamCreate(&stream0);
+    checkCudaError(err);
+    err = cudaStreamCreate(&stream1);
+    checkCudaError(err);
+
+    device_context::DeviceContext ctx0 = device_context::DeviceContext{
+      (cudaStream_t&)stream0, // SP: simulate different device as stream
+      0,                             // device_id
+      0,                             // mempool
+    };
+    device_context::DeviceContext ctx1 = device_context::DeviceContext{
+      (cudaStream_t&)stream1, // SP: simulate different device as stream
+      0,                             // device_id
+      0,                             // mempool
+    };
+
+    // Allocate and initialize memory for the layers
+    scalar_t* layers0 = static_cast<scalar_t*>(malloc(size_layers * sizeof(scalar_t)));
+    if (layers0 == nullptr) {
+        std::cerr << "Memory allocation for 'layers' failed." << std::endl;
+    }
+    scalar_t s = scalar_t::zero();
+    for (unsigned i = 0; i < size_col*size_partition ; i++) {
+        layers0[i] = s;
+        s = s + scalar_t::one();
+    }
+    scalar_t* layers1 = static_cast<scalar_t*>(malloc(size_layers * sizeof(scalar_t)));
+    if (layers1 == nullptr) {
+        std::cerr << "Memory allocation for 'layers' failed." << std::endl;
+    }
+    s = scalar_t::zero() + scalar_t::one();
+    for (unsigned i = 0; i < size_col*size_partition ; i++) {
+        layers1[i] = s;
+        s = s + scalar_t::one();
+    }
+
+
+
+    // Start threads
+    std::thread thread1(processData, ctx0, std::ref(inputData1), std::ref(outputData1));
+    std::thread thread2(processData, ctx1, std::ref(inputData2), std::ref(outputData2));
+
+    // Wait for the threads to finish
+    thread1.join();
+    thread2.join();
+
+    // Process the output data (example: print the data)
+    std::cout << "Output Data from Thread 1: ";
+    for (int num : outputData1) {
+        std::cout << num << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "Output Data from Thread 2: ";
+    for (int num : outputData2) {
+        std::cout << num << " ";
+    }
+    std::cout << std::endl;
+
+    return 0;
+}
--- a/examples/c++/multi-gpu-poseidon/run.sh
+++ b/examples/c++/multi-gpu-poseidon/run.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+./build/example