diff --git a/.codespellignore b/.codespellignore new file mode 100644 index 00000000..b41990df --- /dev/null +++ b/.codespellignore @@ -0,0 +1,3 @@ +inout +crate +lmit diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000..90fc5932 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,20 @@ +name: Check Spelling + +on: + pull_request: + branches: + - main + - dev + +jobs: + spelling-checker: + name: Check Spelling + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: codespell-project/actions-codespell@v2 + with: + # https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-skip + skip: ./**/target,./**/build + # https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-ignore_words_file + ignore_words_file: .codespellignore diff --git a/README.md b/README.md index c8f7e5c9..6a8f794c 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,8 @@ In case `clang-format` is missing on your system, you can install it using the sudo apt install clang-format ``` +You will also need to install [codespell](https://github.com/codespell-project/codespell?tab=readme-ov-file#installation) to check for typos. + This will ensure our custom hooks are run and will make it easier to follow our coding guidelines. ### Hall of Fame diff --git a/examples/c++/Poseidon-hash/README.md b/examples/c++/Poseidon-hash/README.md index b7f649b4..533a8db1 100644 --- a/examples/c++/Poseidon-hash/README.md +++ b/examples/c++/Poseidon-hash/README.md @@ -43,7 +43,7 @@ poseidon.hash_blocks(inBlocks, nBlocks, outHashes, hashType, stream); ## What's in the example -1. Define the size of the example: the hight of the full binary Merkle tree. +1. Define the size of the example: the height of the full binary Merkle tree. 2. Hash blocks in parallel. The tree width determines the number of blocks to hash. 3. Build a Merkle tree from the hashes. 4. Use the tree to generate a membership proof for one of computed hashes. diff --git a/examples/c++/multiply/README.md b/examples/c++/multiply/README.md index 56ae4552..da65b7e5 100644 --- a/examples/c++/multiply/README.md +++ b/examples/c++/multiply/README.md @@ -6,7 +6,7 @@ We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to s ## Key-Takeaway -`Icicle` accelerates multiplication operation `*` using [Karatsuba algorythm](https://en.wikipedia.org/wiki/Karatsuba_algorithm) +`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm) ## Concise Usage Explanation diff --git a/goicicle/curves/bls12377/g1.go b/goicicle/curves/bls12377/g1.go index b7c77eaf..a8dd5814 100644 --- a/goicicle/curves/bls12377/g1.go +++ b/goicicle/curves/bls12377/g1.go @@ -41,7 +41,7 @@ type G1BaseField struct { } /* - * BaseField Constrctors + * BaseField Constructors */ func (f *G1BaseField) SetZero() *G1BaseField { diff --git a/goicicle/curves/bls12381/g1.go b/goicicle/curves/bls12381/g1.go index dedd18a0..60a5a3ec 100644 --- a/goicicle/curves/bls12381/g1.go +++ b/goicicle/curves/bls12381/g1.go @@ -41,7 +41,7 @@ type G1BaseField struct { } /* - * BaseField Constrctors + * BaseField Constructors */ func (f *G1BaseField) SetZero() *G1BaseField { diff --git a/goicicle/curves/bn254/g1.go b/goicicle/curves/bn254/g1.go index b934d6ed..4e6cb14e 100644 --- a/goicicle/curves/bn254/g1.go +++ b/goicicle/curves/bn254/g1.go @@ -41,7 +41,7 @@ type G1BaseField struct { } /* - * BaseField Constrctors + * BaseField Constructors */ func (f *G1BaseField) SetZero() *G1BaseField { diff --git a/goicicle/curves/bw6761/g1.go b/goicicle/curves/bw6761/g1.go index 4b69ba05..acd764a0 100644 --- a/goicicle/curves/bw6761/g1.go +++ b/goicicle/curves/bw6761/g1.go @@ -41,7 +41,7 @@ type G1BaseField struct { } /* - * BaseField Constrctors + * BaseField Constructors */ func (f *G1BaseField) SetZero() *G1BaseField { diff --git a/goicicle/templates/curves/g1.go.tmpl b/goicicle/templates/curves/g1.go.tmpl index 595bd790..2edac159 100644 --- a/goicicle/templates/curves/g1.go.tmpl +++ b/goicicle/templates/curves/g1.go.tmpl @@ -23,7 +23,7 @@ type G1BaseField struct { } /* - * BaseField Constrctors + * BaseField Constructors */ func (f *G1BaseField) SetZero() *G1BaseField { diff --git a/icicle/appUtils/msm/msm.cu b/icicle/appUtils/msm/msm.cu index 03c3963f..af61b7be 100644 --- a/icicle/appUtils/msm/msm.cu +++ b/icicle/appUtils/msm/msm.cu @@ -328,7 +328,7 @@ namespace msm { if (tid >= nof_msms) return; P final_result = P::zero(); // Note: in some cases accumulation of bm is implemented such that some bms are known to be empty. Therefore - // skiping them. + // skipping them. for (unsigned i = nof_bms - nof_empty_bms; i > 1; i--) { final_result = final_result + final_sums[i - 1 + tid * nof_bms]; // add for (unsigned j = 0; j < c; j++) // double @@ -348,7 +348,7 @@ namespace msm { A* points, unsigned batch_size, // number of MSMs to compute unsigned single_msm_size, // number of elements per MSM (a.k.a N) - unsigned nof_points, // numer of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are + unsigned nof_points, // number of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are // sharing points or (2) single_msm_size*batch_size otherwise P* final_result, bool are_scalars_on_device, @@ -558,7 +558,7 @@ namespace msm { CHK_IF_RETURN(cudaMallocAsync(&nof_large_buckets, sizeof(unsigned), stream)); CHK_IF_RETURN(cudaMemset(nof_large_buckets, 0, sizeof(unsigned))); - unsigned TOTAL_THREADS = 129000; // todo - device dependant + unsigned TOTAL_THREADS = 129000; // todo - device dependent unsigned cutoff_run_length = max(2, h_nof_buckets_to_compute / TOTAL_THREADS); unsigned cutoff_nof_runs = (h_nof_buckets_to_compute + cutoff_run_length - 1) / cutoff_run_length; NUM_THREADS = min(1 << 5, cutoff_nof_runs); @@ -717,10 +717,10 @@ namespace msm { } } if (target_bits_count == 1) { - // Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guranteed to + // Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guaranteed to // be empty when target_windows_count>bitsize. // for example consider bitsize=253 and c=2. The reduction ends with 254 bms but the most significant one is - // guranteed to be zero since the scalars are 253b. + // guaranteed to be zero since the scalars are 253b. nof_bms_per_msm = target_windows_count; nof_empty_bms_per_batch = target_windows_count - bitsize; nof_bms_in_batch = nof_bms_per_msm * batch_size; diff --git a/icicle/appUtils/msm/msm.cuh b/icicle/appUtils/msm/msm.cuh index 26f17f5e..eecb374e 100644 --- a/icicle/appUtils/msm/msm.cuh +++ b/icicle/appUtils/msm/msm.cuh @@ -73,7 +73,7 @@ namespace msm { bool is_big_triangle; /**< Whether to do "bucket accumulation" serially. Decreases computational complexity * but also greatly decreases parallelism, so only suitable for large batches of MSMs. * Default value: false. */ - bool is_async; /**< Whether to run the MSM asyncronously. If set to true, the MSM function will be + bool is_async; /**< Whether to run the MSM asynchronously. If set to true, the MSM function will be * non-blocking and you'd need to synchronize it explicitly by running * `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the MSM * function will block the current CPU thread. */ @@ -103,7 +103,7 @@ namespace msm { * * **Note:** this function is still WIP and the following [MSMConfig](@ref MSMConfig) members do not yet have any * effect: `precompute_factor` (always equals 1) and `ctx.device_id` (0 device is always used). - * Also, it's currently better to use `batch_size=1` in most cases (expept with dealing with very many MSMs). + * Also, it's currently better to use `batch_size=1` in most cases (except with dealing with very many MSMs). */ template cudaError_t MSM(S* scalars, A* points, int msm_size, MSMConfig& config, P* results); diff --git a/icicle/appUtils/ntt/ntt.cu b/icicle/appUtils/ntt/ntt.cu index 935a03f8..afe4d4cb 100644 --- a/icicle/appUtils/ntt/ntt.cu +++ b/icicle/appUtils/ntt/ntt.cu @@ -260,7 +260,7 @@ namespace ntt { * @param n_twiddles Size of `d_twiddles` * @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`. * @param inverse true for iNTT - * @param coset should be array of lenght n or a nullptr if NTT is not computed on a coset + * @param coset should be array of length n or a nullptr if NTT is not computed on a coset * @param stream CUDA stream * @param is_async if false, perform sync of the supplied CUDA stream at the end of processing * @param d_output Output array diff --git a/icicle/appUtils/ntt/ntt.cuh b/icicle/appUtils/ntt/ntt.cuh index 13bd1024..10c70988 100644 --- a/icicle/appUtils/ntt/ntt.cuh +++ b/icicle/appUtils/ntt/ntt.cuh @@ -76,7 +76,7 @@ namespace ntt { * `Ordering::kNN`. */ bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */ bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */ - bool is_async; /**< Whether to run the NTT asyncronously. If set to `true`, the NTT function will be + bool is_async; /**< Whether to run the NTT asynchronously. If set to `true`, the NTT function will be * non-blocking and you'd need to synchronize it explicitly by running * `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the NTT * function will block the current CPU thread. */ diff --git a/icicle/appUtils/poseidon/poseidon.cu b/icicle/appUtils/poseidon/poseidon.cu index bc852602..17a29038 100644 --- a/icicle/appUtils/poseidon/poseidon.cu +++ b/icicle/appUtils/poseidon/poseidon.cu @@ -147,7 +147,7 @@ Poseidon::hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type CHK_IF_RETURN(cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream)); // This is where the input matrix of size Arity x NumberOfBlocks is - // padded and coppied to device in a T x NumberOfBlocks matrix + // padded and copied to device in a T x NumberOfBlocks matrix CHK_IF_RETURN(cudaMemcpy2DAsync( states, this->t * sizeof(S), // Device pointer and device pitch inp, (this->t - 1) * sizeof(S), // Host pointer and pitch diff --git a/icicle/primitives/extension_field.cuh b/icicle/primitives/extension_field.cuh index 3189469a..8c57ea53 100644 --- a/icicle/primitives/extension_field.cuh +++ b/icicle/primitives/extension_field.cuh @@ -119,10 +119,10 @@ public: return ExtensionField{real_prod + i_sq_times_im, re_im + im_re}; } - template + template static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs) { - return {FF::template mul_unsigned(xs.real), FF::template mul_unsigned(xs.imaginary)}; + return {FF::template mul_unsigned(xs.real), FF::template mul_unsigned(xs.imaginary)}; } template diff --git a/icicle/primitives/field.cuh b/icicle/primitives/field.cuh index 2a0c3f19..4e89cd12 100644 --- a/icicle/primitives/field.cuh +++ b/icicle/primitives/field.cuh @@ -8,7 +8,7 @@ * refactoring it is low in the priority list. * * Documentation of methods is intended to explain inner workings to developers working on icicle. In its current state - * it mostly explains modular mutliplication and related methods. One important quirk of modern CUDA that's affecting + * it mostly explains modular multiplication and related methods. One important quirk of modern CUDA that's affecting * most methods is explained by [Niall Emmart](https://youtu.be/KAWlySN7Hm8?si=h7nzDujnvubWXeDX&t=4039). In short, when * 64-bit MAD (`r = a * b + c`) instructions get compiled down to SASS (CUDA assembly) they require two-register values * `r` and `c` to start from even register (e.g. `r` can live in registers 20 and 21, or 14 and 15, but not 15 and 16). @@ -490,7 +490,7 @@ public: __align__(16) uint32_t odd[TLC - 1]; size_t i; // `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes - // are neccessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to + // are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to // less efficient SASS generated by nvcc, so this case needed separate handling. if (b[0] == UINT32_MAX) { add_sub_u32_device(cs.limbs, a, even, TLC); @@ -747,7 +747,7 @@ public: * This method reduces a Wide number `xs` modulo `p` and returns the result as a Field element. * * It is assumed that the high `2 * slack_bits` bits of `xs` are unset which is always the case for the product of 2 - * numbers with thier high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate + * numbers with their high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate * factor of `modulus_squared` first. * * This function implements ["multi-precision Barrett"](https://github.com/ingonyama-zk/modular_multiplication). As @@ -830,7 +830,7 @@ public: return mul * xs; } - template + template static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T& xs) { T rs = {}; @@ -840,11 +840,11 @@ public: #pragma unroll #endif for (unsigned i = 0; i < 32; i++) { - if (mutliplier & (1 << i)) { + if (multiplier & (1 << i)) { rs = is_zero ? temp : (rs + temp); is_zero = false; } - if (mutliplier & ((1 << (31 - i) - 1) << (i + 1))) break; + if (multiplier & ((1 << (31 - i) - 1) << (i + 1))) break; temp = temp + temp; } return rs; diff --git a/icicle/utils/vec_ops.cuh b/icicle/utils/vec_ops.cuh index 7ed1e9a1..3b7ce6ef 100644 --- a/icicle/utils/vec_ops.cuh +++ b/icicle/utils/vec_ops.cuh @@ -16,7 +16,7 @@ namespace vec_ops { * @param vec_b Second input vector. * @param n Size of vectors `vec_a` and `vec_b`. * @param is_on_device If true, inputs and outputs are on device, if false - on the host. - * @param is_montgomery If true, inputs are expected to be in Montgomery form and results are retured in Montgomery + * @param is_montgomery If true, inputs are expected to be in Montgomery form and results are returned in Montgomery * form. If false - inputs and outputs are non-Montgomery. * @param ctx [DeviceContext](@ref device_context::DeviceContext) used in this method. * @param result Resulting vector - element-wise product of `vec_a` and `vec_b`, can be the same pointer as `vec_b`. diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push index d95160f2..6d9d265e 100755 --- a/scripts/hooks/pre-push +++ b/scripts/hooks/pre-push @@ -1,6 +1,15 @@ #!/bin/bash status=0 + +if [[ $(codespell --skip ./**/target,./**/build -I .codespellignore 2>&1) ]]; +then + echo "There are typos in some of the files you've changed. Please run the following to check what they are:" + echo "codespell --skip ./**/target,./**/build -I .codespellignore" + echo "" + status=1 +fi + # Run clang-format on CUDA, C, and CPP files # clang-format writes to stderr in dry-run mode. In order to capture the output to detect if there are changes needed we redirect stderr to stdin if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; diff --git a/wrappers/rust/icicle-core/src/msm/mod.rs b/wrappers/rust/icicle-core/src/msm/mod.rs index b2d56fda..d23bd4a2 100644 --- a/wrappers/rust/icicle-core/src/msm/mod.rs +++ b/wrappers/rust/icicle-core/src/msm/mod.rs @@ -53,7 +53,7 @@ pub struct MSMConfig<'a> { /// decreases parallelism, so only suitable for large batches of MSMs. Default value: false. pub is_big_triangle: bool, - /// Whether to run the MSM asyncronously. If set to `true`, the MSM function will be non-blocking + /// Whether to run the MSM asynchronously. If set to `true`, the MSM function will be non-blocking /// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`. /// If set to `false`, the MSM function will block the current CPU thread. pub is_async: bool, diff --git a/wrappers/rust/icicle-core/src/ntt/mod.rs b/wrappers/rust/icicle-core/src/ntt/mod.rs index e0fc30b0..577e2d6a 100644 --- a/wrappers/rust/icicle-core/src/ntt/mod.rs +++ b/wrappers/rust/icicle-core/src/ntt/mod.rs @@ -54,7 +54,7 @@ pub struct NTTConfig<'a, S> { pub ordering: Ordering, are_inputs_on_device: bool, are_outputs_on_device: bool, - /// Whether to run the NTT asyncronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize + /// Whether to run the NTT asynchronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize /// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread. pub is_async: bool, }