[FEAT]: Add codespell to CI and pre-commit hooks (#344)

Add codespell to pre-commit hook/CI and fix typos
2026-01-09 21:17:56 -05:00 · 2024-01-22 14:27:52 +02:00
parent 45f6db666b
commit 69af0bef91
21 changed files with 62 additions and 28 deletions
--- a/.codespellignore
+++ b/.codespellignore
@@ -0,0 +1,3 @@
 inout
 crate
 lmit
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,20 @@
 name: Check Spelling
 on:
  pull_request:
    branches:
      - main
      - dev
 jobs:
  spelling-checker:
    name: Check Spelling
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: codespell-project/actions-codespell@v2
        with:
          # https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-skip
          skip: ./**/target,./**/build
          # https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-ignore_words_file
          ignore_words_file: .codespellignore
--- a/README.md
+++ b/README.md
@@ -105,6 +105,8 @@ In case `clang-format` is missing on your system, you can install it  using the
 sudo apt install clang-format
 ```
 You will also need to install [codespell](https://github.com/codespell-project/codespell?tab=readme-ov-file#installation) to check for typos.
 This will ensure our custom hooks are run and will make it easier to follow our coding guidelines.
 ### Hall of Fame
--- a/examples/c++/Poseidon-hash/README.md
+++ b/examples/c++/Poseidon-hash/README.md
@@ -43,7 +43,7 @@ poseidon.hash_blocks(inBlocks, nBlocks, outHashes, hashType, stream);
 ## What's in the example
-1. Define the size of the example: the hight of the full binary Merkle tree. 
+1. Define the size of the example: the height of the full binary Merkle tree. 
 2. Hash blocks in parallel. The tree width determines the number of blocks to hash.
 3. Build a Merkle tree from the hashes.
 4. Use the tree to generate a membership proof for one of computed hashes.
--- a/examples/c++/multiply/README.md
+++ b/examples/c++/multiply/README.md
@@ -6,7 +6,7 @@ We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to s
 ## Key-Takeaway
-`Icicle` accelerates multiplication operation `*` using [Karatsuba algorythm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
+`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
 ## Concise Usage Explanation
--- a/goicicle/curves/bls12377/g1.go
+++ b/goicicle/curves/bls12377/g1.go
@@ -41,7 +41,7 @@ type G1BaseField struct {
 }
 /*
- * BaseField Constrctors
+ * BaseField Constructors
 */
 func (f *G1BaseField) SetZero() *G1BaseField {
--- a/goicicle/curves/bls12381/g1.go
+++ b/goicicle/curves/bls12381/g1.go
@@ -41,7 +41,7 @@ type G1BaseField struct {
 }
 /*
- * BaseField Constrctors
+ * BaseField Constructors
 */
 func (f *G1BaseField) SetZero() *G1BaseField {
--- a/goicicle/curves/bn254/g1.go
+++ b/goicicle/curves/bn254/g1.go
@@ -41,7 +41,7 @@ type G1BaseField struct {
 }
 /*
- * BaseField Constrctors
+ * BaseField Constructors
 */
 func (f *G1BaseField) SetZero() *G1BaseField {
--- a/goicicle/curves/bw6761/g1.go
+++ b/goicicle/curves/bw6761/g1.go
@@ -41,7 +41,7 @@ type G1BaseField struct {
 }
 /*
- * BaseField Constrctors
+ * BaseField Constructors
 */
 func (f *G1BaseField) SetZero() *G1BaseField {
--- a/goicicle/templates/curves/g1.go.tmpl
+++ b/goicicle/templates/curves/g1.go.tmpl
@@ -23,7 +23,7 @@ type G1BaseField struct {
 }
 /*
- * BaseField Constrctors
+ * BaseField Constructors
 */
 func (f *G1BaseField) SetZero() *G1BaseField {
--- a/icicle/appUtils/msm/msm.cu
+++ b/icicle/appUtils/msm/msm.cu
@@ -328,7 +328,7 @@ namespace msm {
      if (tid >= nof_msms) return;
      P final_result = P::zero();
      // Note: in some cases accumulation of bm is implemented such that some bms are known to be empty. Therefore
-      // skiping them.
+      // skipping them.
      for (unsigned i = nof_bms - nof_empty_bms; i > 1; i--) {
        final_result = final_result + final_sums[i - 1 + tid * nof_bms]; // add
        for (unsigned j = 0; j < c; j++)                                 // double
@@ -348,7 +348,7 @@ namespace msm {
      A* points,
      unsigned batch_size,      // number of MSMs to compute
      unsigned single_msm_size, // number of elements per MSM (a.k.a N)
-      unsigned nof_points,      // numer of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are
+      unsigned nof_points,      // number of EC points in 'points' array. Must be either (1) single_msm_size if MSMs are
                                // sharing points or (2) single_msm_size*batch_size otherwise
      P* final_result,
      bool are_scalars_on_device,
@@ -558,7 +558,7 @@ namespace msm {
      CHK_IF_RETURN(cudaMallocAsync(&nof_large_buckets, sizeof(unsigned), stream));
      CHK_IF_RETURN(cudaMemset(nof_large_buckets, 0, sizeof(unsigned)));
-      unsigned TOTAL_THREADS = 129000; // todo - device dependant
+      unsigned TOTAL_THREADS = 129000; // todo - device dependent
      unsigned cutoff_run_length = max(2, h_nof_buckets_to_compute / TOTAL_THREADS);
      unsigned cutoff_nof_runs = (h_nof_buckets_to_compute + cutoff_run_length - 1) / cutoff_run_length;
      NUM_THREADS = min(1 << 5, cutoff_nof_runs);
@@ -717,10 +717,10 @@ namespace msm {
            }
          }
          if (target_bits_count == 1) {
-            // Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guranteed to
+            // Note: the reduction ends up with 'target_windows_count' windows per batch element. Some are guaranteed to
            // be empty when target_windows_count>bitsize.
            // for example consider bitsize=253 and c=2. The reduction ends with 254 bms but the most significant one is
-            // guranteed to be zero since the scalars are 253b.
+            // guaranteed to be zero since the scalars are 253b.
            nof_bms_per_msm = target_windows_count;
            nof_empty_bms_per_batch = target_windows_count - bitsize;
            nof_bms_in_batch = nof_bms_per_msm * batch_size;
--- a/icicle/appUtils/msm/msm.cuh
+++ b/icicle/appUtils/msm/msm.cuh
@@ -73,7 +73,7 @@ namespace msm {
    bool is_big_triangle;       /**< Whether to do "bucket accumulation" serially. Decreases computational complexity
                                 *   but also greatly decreases parallelism, so only suitable for large batches of MSMs.
                                 *   Default value: false. */
-    bool is_async;              /**< Whether to run the MSM asyncronously. If set to true, the MSM function will be
+    bool is_async;              /**< Whether to run the MSM asynchronously. If set to true, the MSM function will be
                                 *   non-blocking and you'd need to synchronize it explicitly by running
                                 *   `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the MSM
                                 *   function will block the current CPU thread. */
@@ -103,7 +103,7 @@ namespace msm {
   *
   * **Note:** this function is still WIP and the following [MSMConfig](@ref MSMConfig) members do not yet have any
   * effect: `precompute_factor` (always equals 1) and `ctx.device_id` (0 device is always used).
-   * Also, it's currently better to use `batch_size=1` in most cases (expept with dealing with very many MSMs).
+   * Also, it's currently better to use `batch_size=1` in most cases (except with dealing with very many MSMs).
   */
  template <typename S, typename A, typename P>
  cudaError_t MSM(S* scalars, A* points, int msm_size, MSMConfig& config, P* results);
--- a/icicle/appUtils/ntt/ntt.cu
+++ b/icicle/appUtils/ntt/ntt.cu
@@ -260,7 +260,7 @@ namespace ntt {
     * @param n_twiddles Size of `d_twiddles`
     * @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`.
     * @param inverse true for iNTT
-     * @param coset should be array of lenght n or a nullptr if NTT is not computed on a coset
+     * @param coset should be array of length n or a nullptr if NTT is not computed on a coset
     * @param stream CUDA stream
     * @param is_async if false, perform sync of the supplied CUDA stream at the end of processing
     * @param d_output Output array
--- a/icicle/appUtils/ntt/ntt.cuh
+++ b/icicle/appUtils/ntt/ntt.cuh
@@ -76,7 +76,7 @@ namespace ntt {
                                 *   `Ordering::kNN`. */
    bool are_inputs_on_device;  /**< True if inputs are on device and false if they're on host. Default value: false. */
    bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
-    bool is_async;              /**< Whether to run the NTT asyncronously. If set to `true`, the NTT function will be
+    bool is_async;              /**< Whether to run the NTT asynchronously. If set to `true`, the NTT function will be
                                 *   non-blocking and you'd need to synchronize it explicitly by running
                                 *   `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the NTT
                                 *   function will block the current CPU thread. */
--- a/icicle/appUtils/poseidon/poseidon.cu
+++ b/icicle/appUtils/poseidon/poseidon.cu
@@ -147,7 +147,7 @@ Poseidon<S>::hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type
  CHK_IF_RETURN(cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream));
  // This is where the input matrix of size Arity x NumberOfBlocks is
-  // padded and coppied to device in a T x NumberOfBlocks matrix
+  // padded and copied to device in a T x NumberOfBlocks matrix
  CHK_IF_RETURN(cudaMemcpy2DAsync(
    states, this->t * sizeof(S),       // Device pointer and device pitch
    inp, (this->t - 1) * sizeof(S),    // Host pointer and pitch
--- a/icicle/primitives/extension_field.cuh
+++ b/icicle/primitives/extension_field.cuh
@@ -119,10 +119,10 @@ public:
    return ExtensionField{real_prod + i_sq_times_im, re_im + im_re};
  }
-  template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
+  template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
  static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
  {
-    return {FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary)};
+    return {FF::template mul_unsigned<multiplier>(xs.real), FF::template mul_unsigned<multiplier>(xs.imaginary)};
  }
  template <unsigned MODULUS_MULTIPLE = 1>
--- a/icicle/primitives/field.cuh
+++ b/icicle/primitives/field.cuh
@@ -8,7 +8,7 @@
 * refactoring it is low in the priority list.
 *
 * Documentation of methods is intended to explain inner workings to developers working on icicle. In its current state
- * it mostly explains modular mutliplication and related methods. One important quirk of modern CUDA that's affecting
+ * it mostly explains modular multiplication and related methods. One important quirk of modern CUDA that's affecting
 * most methods is explained by [Niall Emmart](https://youtu.be/KAWlySN7Hm8?si=h7nzDujnvubWXeDX&t=4039). In short, when
 * 64-bit MAD (`r = a * b + c`) instructions get compiled down to SASS (CUDA assembly) they require two-register values
 * `r` and `c` to start from even register (e.g. `r` can live in registers 20 and 21, or 14 and 15, but not 15 and 16).
@@ -490,7 +490,7 @@ public:
    __align__(16) uint32_t odd[TLC - 1];
    size_t i;
    // `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
-    // are neccessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
+    // are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
    // less efficient SASS generated by nvcc, so this case needed separate handling.
    if (b[0] == UINT32_MAX) {
      add_sub_u32_device<true, false>(cs.limbs, a, even, TLC);
@@ -747,7 +747,7 @@ public:
   * This method reduces a Wide number `xs` modulo `p` and returns the result as a Field element.
   *
   * It is assumed that the high `2 * slack_bits` bits of `xs` are unset which is always the case for the product of 2
-   * numbers with thier high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate
+   * numbers with their high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate
   * factor of `modulus_squared` first.
   *
   * This function implements ["multi-precision Barrett"](https://github.com/ingonyama-zk/modular_multiplication). As
@@ -830,7 +830,7 @@ public:
    return mul * xs;
  }
-  template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
+  template <uint32_t multiplier, class T, unsigned REDUCTION_SIZE = 1>
  static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T& xs)
  {
    T rs = {};
@@ -840,11 +840,11 @@ public:
 #pragma unroll
 #endif
    for (unsigned i = 0; i < 32; i++) {
-      if (mutliplier & (1 << i)) {
+      if (multiplier & (1 << i)) {
        rs = is_zero ? temp : (rs + temp);
        is_zero = false;
      }
-      if (mutliplier & ((1 << (31 - i) - 1) << (i + 1))) break;
+      if (multiplier & ((1 << (31 - i) - 1) << (i + 1))) break;
      temp = temp + temp;
    }
    return rs;
--- a/icicle/utils/vec_ops.cuh
+++ b/icicle/utils/vec_ops.cuh
@@ -16,7 +16,7 @@ namespace vec_ops {
   * @param vec_b Second input vector.
   * @param n Size of vectors `vec_a` and `vec_b`.
   * @param is_on_device If true, inputs and outputs are on device, if false - on the host.
-   * @param is_montgomery If true, inputs are expected to be in Montgomery form and results are retured in Montgomery
+   * @param is_montgomery If true, inputs are expected to be in Montgomery form and results are returned in Montgomery
   * form. If false - inputs and outputs are non-Montgomery.
   * @param ctx [DeviceContext](@ref device_context::DeviceContext) used in this method.
   * @param result Resulting vector - element-wise product of `vec_a` and `vec_b`, can be the same pointer as `vec_b`.
--- a/scripts/hooks/pre-push
+++ b/scripts/hooks/pre-push
@@ -1,6 +1,15 @@
 #!/bin/bash
 status=0
 if [[ $(codespell --skip ./**/target,./**/build -I .codespellignore 2>&1) ]];
 then
    echo "There are typos in some of the files you've changed. Please run the following to check what they are:"
    echo "codespell --skip ./**/target,./**/build -I .codespellignore"
    echo ""
    status=1
 fi
 # Run clang-format on CUDA, C, and CPP files
 # clang-format writes to stderr in dry-run mode. In order to capture the output to detect if there are changes needed we redirect stderr to stdin
 if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]];
--- a/wrappers/rust/icicle-core/src/msm/mod.rs
+++ b/wrappers/rust/icicle-core/src/msm/mod.rs
@@ -53,7 +53,7 @@ pub struct MSMConfig<'a> {
    /// decreases parallelism, so only suitable for large batches of MSMs. Default value: false.
    pub is_big_triangle: bool,
-    /// Whether to run the MSM asyncronously. If set to `true`, the MSM function will be non-blocking
+    /// Whether to run the MSM asynchronously. If set to `true`, the MSM function will be non-blocking
    /// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
    /// If set to `false`, the MSM function will block the current CPU thread.
    pub is_async: bool,
--- a/wrappers/rust/icicle-core/src/ntt/mod.rs
+++ b/wrappers/rust/icicle-core/src/ntt/mod.rs
@@ -54,7 +54,7 @@ pub struct NTTConfig<'a, S> {
    pub ordering: Ordering,
    are_inputs_on_device: bool,
    are_outputs_on_device: bool,
-    /// Whether to run the NTT asyncronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
+    /// Whether to run the NTT asynchronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
    /// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread.
    pub is_async: bool,
 }