mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-09 21:17:56 -05:00
[FEAT]: Add codespell to CI and pre-commit hooks (#344)
Add codespell to pre-commit hook/CI and fix typos
This commit is contained in:
3
.codespellignore
Normal file
3
.codespellignore
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
inout
|
||||||
|
crate
|
||||||
|
lmit
|
||||||
20
.github/workflows/codespell.yml
vendored
Normal file
20
.github/workflows/codespell.yml
vendored
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
name: Check Spelling
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- dev
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
spelling-checker:
|
||||||
|
name: Check Spelling
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: codespell-project/actions-codespell@v2
|
||||||
|
with:
|
||||||
|
# https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-skip
|
||||||
|
skip: ./**/target,./**/build
|
||||||
|
# https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-ignore_words_file
|
||||||
|
ignore_words_file: .codespellignore
|
||||||
@@ -105,6 +105,8 @@ In case `clang-format` is missing on your system, you can install it using the
|
|||||||
sudo apt install clang-format
|
sudo apt install clang-format
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You will also need to install [codespell](https://github.com/codespell-project/codespell?tab=readme-ov-file#installation) to check for typos.
|
||||||
|
|
||||||
This will ensure our custom hooks are run and will make it easier to follow our coding guidelines.
|
This will ensure our custom hooks are run and will make it easier to follow our coding guidelines.
|
||||||
|
|
||||||
### Hall of Fame
|
### Hall of Fame
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ poseidon.hash_blocks(inBlocks, nBlocks, outHashes, hashType, stream);
|
|||||||
|
|
||||||
## What's in the example
|
## What's in the example
|
||||||
|
|
||||||
1. Define the size of the example: the hight of the full binary Merkle tree.
|
1. Define the size of the example: the height of the full binary Merkle tree.
|
||||||
2. Hash blocks in parallel. The tree width determines the number of blocks to hash.
|
2. Hash blocks in parallel. The tree width determines the number of blocks to hash.
|
||||||
3. Build a Merkle tree from the hashes.
|
3. Build a Merkle tree from the hashes.
|
||||||
4. Use the tree to generate a membership proof for one of computed hashes.
|
4. Use the tree to generate a membership proof for one of computed hashes.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to s
|
|||||||
|
|
||||||
## Key-Takeaway
|
## Key-Takeaway
|
||||||
|
|
||||||
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorythm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
|
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
|
||||||
|
|
||||||
## Concise Usage Explanation
|
## Concise Usage Explanation
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ type G1BaseField struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BaseField Constrctors
|
* BaseField Constructors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (f *G1BaseField) SetZero() *G1BaseField {
|
func (f *G1BaseField) SetZero() *G1BaseField {
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ type G1BaseField struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BaseField Constrctors
|
* BaseField Constructors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (f *G1BaseField) SetZero() *G1BaseField {
|
func (f *G1BaseField) SetZero() *G1BaseField {
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ type G1BaseField struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BaseField Constrctors
|
* BaseField Constructors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (f *G1BaseField) SetZero() *G1BaseField {
|
func (f *G1BaseField) SetZero() *G1BaseField {
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ type G1BaseField struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BaseField Constrctors
|
* BaseField Constructors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (f *G1BaseField) SetZero() *G1BaseField {
|
func (f *G1BaseField) SetZero() *G1BaseField {
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ type G1BaseField struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BaseField Constrctors
|
* BaseField Constructors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (f *G1BaseField) SetZero() *G1BaseField {
|
func (f *G1BaseField) SetZero() *G1BaseField {
|
||||||
|
|||||||
@@ -328,7 +328,7 @@ namespace msm {
|
|||||||
if (tid >= nof_msms) return;
|
if (tid >= nof_msms) return;
|
||||||
P final_result = P::zero();
|
P final_result = P::zero();
|
||||||
// Note: in some cases accumulation of bm is implemented such that some bms are known to be empty. Therefore
|
// Note: in some cases accumulation of bm is implemented such that some bms are known to be empty. Therefore
|
||||||
// skiping them.
|
// skipping them.
|
||||||
for (unsigned i = nof_bms - nof_empty_bms; i > 1; i--) {
|
for (unsigned i = nof_bms - nof_empty_bms; i > 1; i--) {
|
||||||
final_result = final_result + final_sums[i - 1 + tid * nof_bms]; // add
|
final_result = final_result + final_sums[i - 1 + tid * nof_bms]; // add
|
||||||
for (unsigned j = 0; j < c; j++) // double
|
for (unsigned j = 0; j < c; j++) // double
|
||||||
@@ -348,7 +348,7 @@ namespace msm {
|
|||||||
A* points,
|
A* points,
|
||||||
unsigned batch_size, // number of MSMs to compute
|
unsigned batch_size, // number of MSMs to compute
|
||||||
unsigned single_msm_size, // number of elements per MSM (a.k.a N)
|
unsigned single_msm_size, // number of elements per MSM (a.k.a N)
|
||||||
unsigned nof_points, // numer of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are
|
unsigned nof_points, // number of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are
|
||||||
// sharing points or (2) single_msm_size*batch_size otherwise
|
// sharing points or (2) single_msm_size*batch_size otherwise
|
||||||
P* final_result,
|
P* final_result,
|
||||||
bool are_scalars_on_device,
|
bool are_scalars_on_device,
|
||||||
@@ -558,7 +558,7 @@ namespace msm {
|
|||||||
CHK_IF_RETURN(cudaMallocAsync(&nof_large_buckets, sizeof(unsigned), stream));
|
CHK_IF_RETURN(cudaMallocAsync(&nof_large_buckets, sizeof(unsigned), stream));
|
||||||
CHK_IF_RETURN(cudaMemset(nof_large_buckets, 0, sizeof(unsigned)));
|
CHK_IF_RETURN(cudaMemset(nof_large_buckets, 0, sizeof(unsigned)));
|
||||||
|
|
||||||
unsigned TOTAL_THREADS = 129000; // todo - device dependant
|
unsigned TOTAL_THREADS = 129000; // todo - device dependent
|
||||||
unsigned cutoff_run_length = max(2, h_nof_buckets_to_compute / TOTAL_THREADS);
|
unsigned cutoff_run_length = max(2, h_nof_buckets_to_compute / TOTAL_THREADS);
|
||||||
unsigned cutoff_nof_runs = (h_nof_buckets_to_compute + cutoff_run_length - 1) / cutoff_run_length;
|
unsigned cutoff_nof_runs = (h_nof_buckets_to_compute + cutoff_run_length - 1) / cutoff_run_length;
|
||||||
NUM_THREADS = min(1 << 5, cutoff_nof_runs);
|
NUM_THREADS = min(1 << 5, cutoff_nof_runs);
|
||||||
@@ -717,10 +717,10 @@ namespace msm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (target_bits_count == 1) {
|
if (target_bits_count == 1) {
|
||||||
// Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guranteed to
|
// Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guaranteed to
|
||||||
// be empty when target_windows_count>bitsize.
|
// be empty when target_windows_count>bitsize.
|
||||||
// for example consider bitsize=253 and c=2. The reduction ends with 254 bms but the most significant one is
|
// for example consider bitsize=253 and c=2. The reduction ends with 254 bms but the most significant one is
|
||||||
// guranteed to be zero since the scalars are 253b.
|
// guaranteed to be zero since the scalars are 253b.
|
||||||
nof_bms_per_msm = target_windows_count;
|
nof_bms_per_msm = target_windows_count;
|
||||||
nof_empty_bms_per_batch = target_windows_count - bitsize;
|
nof_empty_bms_per_batch = target_windows_count - bitsize;
|
||||||
nof_bms_in_batch = nof_bms_per_msm * batch_size;
|
nof_bms_in_batch = nof_bms_per_msm * batch_size;
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ namespace msm {
|
|||||||
bool is_big_triangle; /**< Whether to do "bucket accumulation" serially. Decreases computational complexity
|
bool is_big_triangle; /**< Whether to do "bucket accumulation" serially. Decreases computational complexity
|
||||||
* but also greatly decreases parallelism, so only suitable for large batches of MSMs.
|
* but also greatly decreases parallelism, so only suitable for large batches of MSMs.
|
||||||
* Default value: false. */
|
* Default value: false. */
|
||||||
bool is_async; /**< Whether to run the MSM asyncronously. If set to true, the MSM function will be
|
bool is_async; /**< Whether to run the MSM asynchronously. If set to true, the MSM function will be
|
||||||
* non-blocking and you'd need to synchronize it explicitly by running
|
* non-blocking and you'd need to synchronize it explicitly by running
|
||||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the MSM
|
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the MSM
|
||||||
* function will block the current CPU thread. */
|
* function will block the current CPU thread. */
|
||||||
@@ -103,7 +103,7 @@ namespace msm {
|
|||||||
*
|
*
|
||||||
* **Note:** this function is still WIP and the following [MSMConfig](@ref MSMConfig) members do not yet have any
|
* **Note:** this function is still WIP and the following [MSMConfig](@ref MSMConfig) members do not yet have any
|
||||||
* effect: `precompute_factor` (always equals 1) and `ctx.device_id` (0 device is always used).
|
* effect: `precompute_factor` (always equals 1) and `ctx.device_id` (0 device is always used).
|
||||||
* Also, it's currently better to use `batch_size=1` in most cases (expept with dealing with very many MSMs).
|
* Also, it's currently better to use `batch_size=1` in most cases (except with dealing with very many MSMs).
|
||||||
*/
|
*/
|
||||||
template <typename S, typename A, typename P>
|
template <typename S, typename A, typename P>
|
||||||
cudaError_t MSM(S* scalars, A* points, int msm_size, MSMConfig& config, P* results);
|
cudaError_t MSM(S* scalars, A* points, int msm_size, MSMConfig& config, P* results);
|
||||||
|
|||||||
@@ -260,7 +260,7 @@ namespace ntt {
|
|||||||
* @param n_twiddles Size of `d_twiddles`
|
* @param n_twiddles Size of `d_twiddles`
|
||||||
* @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`.
|
* @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`.
|
||||||
* @param inverse true for iNTT
|
* @param inverse true for iNTT
|
||||||
* @param coset should be array of lenght n or a nullptr if NTT is not computed on a coset
|
* @param coset should be array of length n or a nullptr if NTT is not computed on a coset
|
||||||
* @param stream CUDA stream
|
* @param stream CUDA stream
|
||||||
* @param is_async if false, perform sync of the supplied CUDA stream at the end of processing
|
* @param is_async if false, perform sync of the supplied CUDA stream at the end of processing
|
||||||
* @param d_output Output array
|
* @param d_output Output array
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ namespace ntt {
|
|||||||
* `Ordering::kNN`. */
|
* `Ordering::kNN`. */
|
||||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||||
bool is_async; /**< Whether to run the NTT asyncronously. If set to `true`, the NTT function will be
|
bool is_async; /**< Whether to run the NTT asynchronously. If set to `true`, the NTT function will be
|
||||||
* non-blocking and you'd need to synchronize it explicitly by running
|
* non-blocking and you'd need to synchronize it explicitly by running
|
||||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the NTT
|
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the NTT
|
||||||
* function will block the current CPU thread. */
|
* function will block the current CPU thread. */
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ Poseidon<S>::hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type
|
|||||||
CHK_IF_RETURN(cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream));
|
CHK_IF_RETURN(cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream));
|
||||||
|
|
||||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||||
// padded and coppied to device in a T x NumberOfBlocks matrix
|
// padded and copied to device in a T x NumberOfBlocks matrix
|
||||||
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
||||||
states, this->t * sizeof(S), // Device pointer and device pitch
|
states, this->t * sizeof(S), // Device pointer and device pitch
|
||||||
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
|
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
|
||||||
|
|||||||
@@ -119,10 +119,10 @@ public:
|
|||||||
return ExtensionField{real_prod + i_sq_times_im, re_im + im_re};
|
return ExtensionField{real_prod + i_sq_times_im, re_im + im_re};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
|
template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
|
||||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
|
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
|
||||||
{
|
{
|
||||||
return {FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary)};
|
return {FF::template mul_unsigned<multiplier>(xs.real), FF::template mul_unsigned<multiplier>(xs.imaginary)};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned MODULUS_MULTIPLE = 1>
|
template <unsigned MODULUS_MULTIPLE = 1>
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
* refactoring it is low in the priority list.
|
* refactoring it is low in the priority list.
|
||||||
*
|
*
|
||||||
* Documentation of methods is intended to explain inner workings to developers working on icicle. In its current state
|
* Documentation of methods is intended to explain inner workings to developers working on icicle. In its current state
|
||||||
* it mostly explains modular mutliplication and related methods. One important quirk of modern CUDA that's affecting
|
* it mostly explains modular multiplication and related methods. One important quirk of modern CUDA that's affecting
|
||||||
* most methods is explained by [Niall Emmart](https://youtu.be/KAWlySN7Hm8?si=h7nzDujnvubWXeDX&t=4039). In short, when
|
* most methods is explained by [Niall Emmart](https://youtu.be/KAWlySN7Hm8?si=h7nzDujnvubWXeDX&t=4039). In short, when
|
||||||
* 64-bit MAD (`r = a * b + c`) instructions get compiled down to SASS (CUDA assembly) they require two-register values
|
* 64-bit MAD (`r = a * b + c`) instructions get compiled down to SASS (CUDA assembly) they require two-register values
|
||||||
* `r` and `c` to start from even register (e.g. `r` can live in registers 20 and 21, or 14 and 15, but not 15 and 16).
|
* `r` and `c` to start from even register (e.g. `r` can live in registers 20 and 21, or 14 and 15, but not 15 and 16).
|
||||||
@@ -490,7 +490,7 @@ public:
|
|||||||
__align__(16) uint32_t odd[TLC - 1];
|
__align__(16) uint32_t odd[TLC - 1];
|
||||||
size_t i;
|
size_t i;
|
||||||
// `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
|
// `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
|
||||||
// are neccessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||||
if (b[0] == UINT32_MAX) {
|
if (b[0] == UINT32_MAX) {
|
||||||
add_sub_u32_device<true, false>(cs.limbs, a, even, TLC);
|
add_sub_u32_device<true, false>(cs.limbs, a, even, TLC);
|
||||||
@@ -747,7 +747,7 @@ public:
|
|||||||
* This method reduces a Wide number `xs` modulo `p` and returns the result as a Field element.
|
* This method reduces a Wide number `xs` modulo `p` and returns the result as a Field element.
|
||||||
*
|
*
|
||||||
* It is assumed that the high `2 * slack_bits` bits of `xs` are unset which is always the case for the product of 2
|
* It is assumed that the high `2 * slack_bits` bits of `xs` are unset which is always the case for the product of 2
|
||||||
* numbers with thier high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate
|
* numbers with their high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate
|
||||||
* factor of `modulus_squared` first.
|
* factor of `modulus_squared` first.
|
||||||
*
|
*
|
||||||
* This function implements ["multi-precision Barrett"](https://github.com/ingonyama-zk/modular_multiplication). As
|
* This function implements ["multi-precision Barrett"](https://github.com/ingonyama-zk/modular_multiplication). As
|
||||||
@@ -830,7 +830,7 @@ public:
|
|||||||
return mul * xs;
|
return mul * xs;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
|
template <uint32_t multiplier, class T, unsigned REDUCTION_SIZE = 1>
|
||||||
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T& xs)
|
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T& xs)
|
||||||
{
|
{
|
||||||
T rs = {};
|
T rs = {};
|
||||||
@@ -840,11 +840,11 @@ public:
|
|||||||
#pragma unroll
|
#pragma unroll
|
||||||
#endif
|
#endif
|
||||||
for (unsigned i = 0; i < 32; i++) {
|
for (unsigned i = 0; i < 32; i++) {
|
||||||
if (mutliplier & (1 << i)) {
|
if (multiplier & (1 << i)) {
|
||||||
rs = is_zero ? temp : (rs + temp);
|
rs = is_zero ? temp : (rs + temp);
|
||||||
is_zero = false;
|
is_zero = false;
|
||||||
}
|
}
|
||||||
if (mutliplier & ((1 << (31 - i) - 1) << (i + 1))) break;
|
if (multiplier & ((1 << (31 - i) - 1) << (i + 1))) break;
|
||||||
temp = temp + temp;
|
temp = temp + temp;
|
||||||
}
|
}
|
||||||
return rs;
|
return rs;
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ namespace vec_ops {
|
|||||||
* @param vec_b Second input vector.
|
* @param vec_b Second input vector.
|
||||||
* @param n Size of vectors `vec_a` and `vec_b`.
|
* @param n Size of vectors `vec_a` and `vec_b`.
|
||||||
* @param is_on_device If true, inputs and outputs are on device, if false - on the host.
|
* @param is_on_device If true, inputs and outputs are on device, if false - on the host.
|
||||||
* @param is_montgomery If true, inputs are expected to be in Montgomery form and results are retured in Montgomery
|
* @param is_montgomery If true, inputs are expected to be in Montgomery form and results are returned in Montgomery
|
||||||
* form. If false - inputs and outputs are non-Montgomery.
|
* form. If false - inputs and outputs are non-Montgomery.
|
||||||
* @param ctx [DeviceContext](@ref device_context::DeviceContext) used in this method.
|
* @param ctx [DeviceContext](@ref device_context::DeviceContext) used in this method.
|
||||||
* @param result Resulting vector - element-wise product of `vec_a` and `vec_b`, can be the same pointer as `vec_b`.
|
* @param result Resulting vector - element-wise product of `vec_a` and `vec_b`, can be the same pointer as `vec_b`.
|
||||||
|
|||||||
@@ -1,6 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
status=0
|
status=0
|
||||||
|
|
||||||
|
if [[ $(codespell --skip ./**/target,./**/build -I .codespellignore 2>&1) ]];
|
||||||
|
then
|
||||||
|
echo "There are typos in some of the files you've changed. Please run the following to check what they are:"
|
||||||
|
echo "codespell --skip ./**/target,./**/build -I .codespellignore"
|
||||||
|
echo ""
|
||||||
|
status=1
|
||||||
|
fi
|
||||||
|
|
||||||
# Run clang-format on CUDA, C, and CPP files
|
# Run clang-format on CUDA, C, and CPP files
|
||||||
# clang-format writes to stderr in dry-run mode. In order to capture the output to detect if there are changes needed we redirect stderr to stdin
|
# clang-format writes to stderr in dry-run mode. In order to capture the output to detect if there are changes needed we redirect stderr to stdin
|
||||||
if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]];
|
if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]];
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ pub struct MSMConfig<'a> {
|
|||||||
/// decreases parallelism, so only suitable for large batches of MSMs. Default value: false.
|
/// decreases parallelism, so only suitable for large batches of MSMs. Default value: false.
|
||||||
pub is_big_triangle: bool,
|
pub is_big_triangle: bool,
|
||||||
|
|
||||||
/// Whether to run the MSM asyncronously. If set to `true`, the MSM function will be non-blocking
|
/// Whether to run the MSM asynchronously. If set to `true`, the MSM function will be non-blocking
|
||||||
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||||
/// If set to `false`, the MSM function will block the current CPU thread.
|
/// If set to `false`, the MSM function will block the current CPU thread.
|
||||||
pub is_async: bool,
|
pub is_async: bool,
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ pub struct NTTConfig<'a, S> {
|
|||||||
pub ordering: Ordering,
|
pub ordering: Ordering,
|
||||||
are_inputs_on_device: bool,
|
are_inputs_on_device: bool,
|
||||||
are_outputs_on_device: bool,
|
are_outputs_on_device: bool,
|
||||||
/// Whether to run the NTT asyncronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
|
/// Whether to run the NTT asynchronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
|
||||||
/// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread.
|
/// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread.
|
||||||
pub is_async: bool,
|
pub is_async: bool,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user