mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-09 15:37:58 -05:00
blake2s working for single hash using Hasher
This commit is contained in:
@@ -24,6 +24,31 @@
|
||||
using namespace hash;
|
||||
|
||||
namespace blake2s{
|
||||
#define BLAKE2S_ROUNDS 10
|
||||
#define BLAKE2S_BLOCK_LENGTH 64
|
||||
#define BLAKE2S_CHAIN_SIZE 8
|
||||
#define BLAKE2S_CHAIN_LENGTH (BLAKE2S_CHAIN_SIZE * sizeof(uint32_t))
|
||||
#define BLAKE2S_STATE_SIZE 16
|
||||
#define BLAKE2S_STATE_LENGTH (BLAKE2S_STATE_SIZE * sizeof(uint32_t))
|
||||
|
||||
class Blake2s : public Hasher<BYTE, BYTE>
|
||||
{
|
||||
public:
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const BYTE* input,
|
||||
BYTE* output,
|
||||
WORD number_of_states,
|
||||
WORD input_len,
|
||||
WORD output_len,
|
||||
const device_context::DeviceContext& ctx) const override;
|
||||
|
||||
|
||||
Blake2s()
|
||||
: Hasher<BYTE, BYTE>(BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, BLAKE2S_STATE_SIZE, 0)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
void mcm_cuda_blake2s_hash_batch(BYTE* key, WORD keylen, BYTE * in, WORD inlen, BYTE * out, WORD n_outbit, WORD n_batch);
|
||||
}
|
||||
|
||||
@@ -1,2 +1,5 @@
|
||||
test_blake2s: test_blake2s.cu blake2s.cu
|
||||
nvcc -o test_blake2s -I. -I../../../include test_blake2s.cu blake2s.cu -g
|
||||
nvcc -o test_blake2s -I. -I../../../include test_blake2s.cu blake2s.cu -g
|
||||
|
||||
test_blake2s_hasher: test_blake2s_hasher.cu blake2s.cu
|
||||
nvcc -o test_blake2s_hasher -I. -I../../../include test_blake2s_hasher.cu -g
|
||||
@@ -11,12 +11,7 @@
|
||||
using namespace hash;
|
||||
|
||||
namespace blake2s {
|
||||
#define BLAKE2S_ROUNDS 10
|
||||
#define BLAKE2S_BLOCK_LENGTH 64
|
||||
#define BLAKE2S_CHAIN_SIZE 8
|
||||
#define BLAKE2S_CHAIN_LENGTH (BLAKE2S_CHAIN_SIZE * sizeof(uint32_t))
|
||||
#define BLAKE2S_STATE_SIZE 16
|
||||
#define BLAKE2S_STATE_LENGTH (BLAKE2S_STATE_SIZE * sizeof(uint32_t))
|
||||
|
||||
|
||||
typedef struct {
|
||||
WORD digestlen;
|
||||
@@ -82,7 +77,7 @@ __constant__ uint8_t BLAKE2S_SIGMA[10][16] = {
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }
|
||||
};
|
||||
|
||||
__device__ uint32_t cuda_blake2s_leuint32(BYTE *in) {
|
||||
__device__ uint32_t cuda_blake2s_leuint32(const BYTE *in) {
|
||||
uint32_t a;
|
||||
memcpy(&a, in, 4);
|
||||
return a;
|
||||
@@ -122,7 +117,7 @@ __device__ __forceinline__ void cuda_blake2s_init_state(cuda_blake2s_ctx_t *ctx)
|
||||
// ctx->state[14] = BLAKE2S_IVS[7];
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void cuda_blake2s_compress(cuda_blake2s_ctx_t *ctx, BYTE *in, WORD inoffset) {
|
||||
__device__ __forceinline__ void cuda_blake2s_compress(cuda_blake2s_ctx_t *ctx, const BYTE *in, WORD inoffset) {
|
||||
cuda_blake2s_init_state(ctx);
|
||||
uint32_t m[16] = { 0 };
|
||||
for (int j = 0; j < 16; j++)
|
||||
@@ -167,7 +162,7 @@ __device__ void cuda_blake2s_init(cuda_blake2s_ctx_t *ctx, BYTE *key, WORD keyle
|
||||
ctx->pos = (keylen > 0) ? BLAKE2S_BLOCK_LENGTH : 0;
|
||||
}
|
||||
|
||||
__device__ void cuda_blake2s_update(cuda_blake2s_ctx_t *ctx, BYTE *in, LONG inlen) {
|
||||
__device__ void cuda_blake2s_update(cuda_blake2s_ctx_t *ctx, const BYTE *in, LONG inlen) {
|
||||
if (inlen == 0)
|
||||
return;
|
||||
|
||||
@@ -225,12 +220,12 @@ __device__ void cuda_blake2s_final(cuda_blake2s_ctx_t *ctx, BYTE *out) {
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void kernel_blake2s_hash(BYTE *indata, WORD inlen, BYTE *outdata, WORD n_batch, WORD BLAKE2S_BLOCK_SIZE) {
|
||||
__global__ void kernel_blake2s_hash(const BYTE *indata, WORD inlen, BYTE *outdata, WORD n_batch, WORD BLAKE2S_BLOCK_SIZE) {
|
||||
WORD thread = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (thread >= n_batch) {
|
||||
return;
|
||||
}
|
||||
BYTE *in = indata + thread * inlen;
|
||||
const BYTE *in = indata + thread * inlen;
|
||||
BYTE *out = outdata + thread * BLAKE2S_BLOCK_SIZE;
|
||||
CUDA_BLAKE2S_CTX ctx = c_CTX;
|
||||
cuda_blake2s_update(&ctx, in, inlen);
|
||||
@@ -265,4 +260,36 @@ void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, B
|
||||
cudaFree(cuda_outdata);
|
||||
}
|
||||
}
|
||||
|
||||
cudaError_t Blake2s::run_hash_many_kernel(
|
||||
const BYTE* input,
|
||||
BYTE* output,
|
||||
WORD number_of_states,
|
||||
WORD input_len,
|
||||
WORD output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
BYTE key[32] = ""; // Null key
|
||||
WORD keylen = strlen((char *)key);
|
||||
const WORD BLAKE2S_BLOCK_SIZE = (output_len >> 3);
|
||||
CUDA_BLAKE2S_CTX blake_ctx;
|
||||
cpu_blake2s_init(&blake_ctx, key, keylen, output_len);
|
||||
cudaMemcpyToSymbol(c_CTX, &blake_ctx, sizeof(CUDA_BLAKE2S_CTX), 0, cudaMemcpyHostToDevice);
|
||||
|
||||
WORD thread = 256;
|
||||
WORD block = (number_of_states + thread - 1) / thread;
|
||||
|
||||
kernel_blake2s_hash<<<block, thread, 0, ctx.stream>>>(input, input_len, output, number_of_states, BLAKE2S_BLOCK_SIZE);
|
||||
// cudaDeviceSynchronize();
|
||||
// cudaError_t error = cudaGetLastError();
|
||||
// if (error != cudaSuccess) {
|
||||
// printf("Error cuda blake2s hash: %s \n", cudaGetErrorString(error));
|
||||
// }
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
|
||||
}
|
||||
|
||||
|
||||
} // namespace blake2s
|
||||
19
icicle/src/hash/blake2s/extern.cu
Normal file
19
icicle/src/hash/blake2s/extern.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "hash/blake2s/blake2s.cuh"
|
||||
#include "blake2s.cu"
|
||||
#include "../../merkle-tree/merkle.cu"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace blake2s {
|
||||
extern "C" cudaError_t
|
||||
blake2s_cuda(BYTE * input, BYTE * output, WORD number_of_blocks, WORD input_block_size, WORD output_block_size, HashConfig& config)
|
||||
{
|
||||
return Blake2s().hash_many(
|
||||
input, output, number_of_blocks, input_block_size, output_block_size, config);
|
||||
}
|
||||
|
||||
} // namespace blake2s
|
||||
111
icicle/src/hash/blake2s/test_blake2s_hasher.cu
Normal file
111
icicle/src/hash/blake2s/test_blake2s_hasher.cu
Normal file
@@ -0,0 +1,111 @@
|
||||
#include <chrono>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "extern.cu"
|
||||
|
||||
|
||||
using namespace blake2s;
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f us\n", msg, FpMicroseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
extern "C" {
|
||||
void mcm_cuda_blake2s_hash_batch(BYTE *key, WORD keylen, BYTE *in, WORD inlen, BYTE *out, WORD n_outbit, WORD n_batch);
|
||||
}
|
||||
|
||||
void print_hash(BYTE *hash, WORD len) {
|
||||
printf("Hash Len: %d \n", len);
|
||||
printf("BLAKE2S hash:\n");
|
||||
for (WORD i = 0; i < len; i++) {
|
||||
printf("%02x", hash[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
BYTE *read_file(const char *filename, size_t *filesize) {
|
||||
FILE *file = fopen(filename, "rb");
|
||||
if (!file) {
|
||||
perror("Failed to open file");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
*filesize = ftell(file);
|
||||
fseek(file, 0, SEEK_SET);
|
||||
|
||||
BYTE *buffer = (BYTE *)malloc(*filesize);
|
||||
if (!buffer) {
|
||||
perror("Failed to allocate memory");
|
||||
fclose(file);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
size_t bytesRead = fread(buffer, 1, *filesize, file);
|
||||
if (bytesRead != *filesize) {
|
||||
perror("Failed to read file");
|
||||
free(buffer);
|
||||
fclose(file);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
BYTE *input;
|
||||
size_t inlen;
|
||||
const char *input_filename;
|
||||
const char *default_input = "aaaaaaaaaaa";
|
||||
|
||||
if (argc < 2) {
|
||||
// Use default input if no file is provided
|
||||
input = (BYTE *)default_input;
|
||||
inlen = strlen(default_input);
|
||||
} else {
|
||||
input_filename = argv[1];
|
||||
input = read_file(input_filename, &inlen);
|
||||
}
|
||||
|
||||
// Test parameters
|
||||
BYTE key[32] = ""; // Example key
|
||||
WORD keylen = strlen((char *)key);
|
||||
WORD n_outbit = 256; // Output length in bits
|
||||
WORD n_batch = 1; // Number of hashes to compute in parallel
|
||||
|
||||
// Allocate memory for the output
|
||||
WORD outlen = n_outbit / 8;
|
||||
BYTE *output = (BYTE *)malloc(outlen * n_batch);
|
||||
if (!output) {
|
||||
perror("Failed to allocate memory for output");
|
||||
if (argc >= 2) free(input); // Free file buffer if it was allocated
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
printf("Key len: %d \n", keylen);
|
||||
|
||||
// Perform the hashing
|
||||
START_TIMER(blake_timer)
|
||||
HashConfig config = default_hash_config();
|
||||
|
||||
blake2s_cuda(input, output, n_batch, inlen, n_outbit, config);
|
||||
END_TIMER(blake_timer, "Blake Timer")
|
||||
|
||||
// Print the result
|
||||
print_hash(output, outlen);
|
||||
|
||||
// Clean up
|
||||
free(output);
|
||||
if (argc >= 2) free(input); // Free file buffer if it was allocated
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user