chore(tools): add tasks tools to escape latex equations in docs

- add all checks to pcc and run that in CI
2026-01-08 22:28:01 -05:00 · 2022-12-08 14:54:39 +01:00
parent 1a72c4a814
commit 4ef7a73efe
8 changed files with 623 additions and 32 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[alias]
+xtask = "run --manifest-path ./tasks/Cargo.toml --"
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -28,53 +28,25 @@ jobs:
        run: |
          echo "rs-toolchain=$(make rs_toolchain)" >> "${GITHUB_OUTPUT}"

-      - name: Check format
+      - name: Run pcc checks
        run: |
-          make check_fmt
-
-      - name: Build doc
-        run: |
-          make doc
-
-      - name: Build tests without running them
-        run: |
-          make check_compile_tests
-
-      - name: Clippy boolean
-        run: |
-          make clippy_boolean
+          make pcc

      - name: Build Release boolean
        run: |
          make build_boolean

-      - name: Clippy shortint
-        run: |
-          make clippy_shortint
-
      - name: Build Release shortint
        run: |
          make build_shortint

-      - name: Clippy shortint and boolean
-        run: |
-          make clippy
-
      - name: Build Release shortint and boolean
        run: |
          make build_boolean_and_shortint

-      - name: C API Clippy
-        run: |
-          make clippy_c_api
-
      - name: Build Release c_api
        run: |
          make build_c_api

-      - name: wasm API Clippy
-        run: |
-          make clippy_js_wasm_api
-
      # The wasm build check is a bit annoying to set-up here and is done during the tests in
      # aws_tfhe_tests.yml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["tfhe"]
+members = ["tfhe", "tasks"]

 [profile.bench]
 lto = "fat"
--- a/16
+++ b/16
@@ -79,8 +79,13 @@ clippy_js_wasm_api: install_rs_check_toolchain
 		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api \
 		-p tfhe -- --no-deps -D warnings

+.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
+clippy_tasks:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		-p tasks -- --no-deps -D warnings
+
 .PHONY: clippy_all # Run all clippy targets
-clippy_all: clippy clippy_c_api clippy_js_wasm_api
+clippy_all: clippy clippy_boolean clippy_shortint clippy_c_api clippy_js_wasm_api clippy_tasks

 .PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
 gen_key_cache: install_rs_build_toolchain
@@ -143,6 +148,15 @@ doc: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint --no-deps

+.PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering.
+format_doc_latex:
+	cargo xtask format_latex_doc
+	@"$(MAKE)" --no-print-directory fmt
+	@printf "\n===============================\n\n"
+	@printf "Please manually inspect changes made by format_latex_doc, rustfmt can break equations \
+	if the line length is exceeded\n"
+	@printf "\n===============================\n"
+
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--- a/tasks/Cargo.toml
+++ b/tasks/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "tasks"
+version = "0.0.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+clap = "3.1"
+lazy_static = "1.4"
+log = "0.4"
+simplelog = "0.12"
--- a/tasks/src/format_latex_doc.rs
+++ b/tasks/src/format_latex_doc.rs
@@ -0,0 +1,453 @@
+use crate::utils::project_root;
+use std::io::{Error, ErrorKind};
+use std::{fmt, fs};
+
+fn recurse_find_rs_files(
+    root_dir: std::path::PathBuf,
+    rs_files: &mut Vec<std::path::PathBuf>,
+    at_root: bool,
+) {
+    for curr_entry in root_dir.read_dir().unwrap() {
+        let curr_path = curr_entry.unwrap().path().canonicalize().unwrap();
+        if curr_path.is_file() {
+            if let Some(extension) = curr_path.extension() {
+                if extension == "rs" {
+                    rs_files.push(curr_path);
+                }
+            }
+        } else if curr_path.is_dir() {
+            if at_root {
+                // Hardcoded ignores for root .git and target
+                match curr_path.file_name().unwrap().to_str().unwrap() {
+                    ".git" => continue,
+                    "target" => continue,
+                    _ => recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false),
+                };
+            } else {
+                recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false);
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct LatexEscapeToolError {
+    details: String,
+}
+
+impl LatexEscapeToolError {
+    fn new(msg: &str) -> LatexEscapeToolError {
+        LatexEscapeToolError {
+            details: msg.to_string(),
+        }
+    }
+}
+
+impl fmt::Display for LatexEscapeToolError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.details)
+    }
+}
+
+impl std::error::Error for LatexEscapeToolError {}
+
+const DOC_TEST_START: &str = "///";
+const DOC_COMMENT_START: &str = "//!";
+const BACKSLASH_UTF8_LEN: usize = '\\'.len_utf8();
+
+enum LineType {
+    DocTest { code_block_limit: bool },
+    DocComment { code_block_limit: bool },
+    EmptyLine,
+    Other,
+}
+
+fn get_line_type_and_trimmed_line(line: &str) -> (LineType, &str) {
+    let mut trimmed_line = line.trim_start();
+    let line_type = if trimmed_line.starts_with(DOC_COMMENT_START) {
+        trimmed_line = trimmed_line
+            .strip_prefix(DOC_COMMENT_START)
+            .unwrap()
+            .trim_start();
+        let has_code_block_limit = trimmed_line.starts_with("```");
+        LineType::DocComment {
+            code_block_limit: has_code_block_limit,
+        }
+    } else if trimmed_line.starts_with(DOC_TEST_START) {
+        trimmed_line = trimmed_line
+            .strip_prefix(DOC_TEST_START)
+            .unwrap()
+            .trim_start();
+        let has_code_block_limit = trimmed_line.starts_with("```");
+        LineType::DocTest {
+            code_block_limit: has_code_block_limit,
+        }
+    } else if trimmed_line.is_empty() {
+        LineType::EmptyLine
+    } else {
+        LineType::Other
+    };
+    (line_type, trimmed_line)
+}
+
+struct CommentContent<'a> {
+    is_in_code_block: bool,
+    line_start: &'a str,
+    line_content: &'a str,
+}
+
+fn find_contiguous_doc_comment<'a>(
+    lines: &[&'a str],
+    start_line_idx: usize,
+) -> (Vec<CommentContent<'a>>, usize) {
+    let mut doc_comment_end_line_idx = start_line_idx + 1;
+
+    let mut is_in_code_block = false;
+    let mut contiguous_doc_comment = Vec::<CommentContent>::new();
+
+    for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) {
+        let (line_type, line_content) = get_line_type_and_trimmed_line(line);
+
+        let line_start = &line[..line.len() - line_content.len()];
+        // If there is an empty line we are still in the DocComment
+        let line_type = if let LineType::EmptyLine = line_type {
+            LineType::DocComment {
+                code_block_limit: false,
+            }
+        } else {
+            line_type
+        };
+
+        match line_type {
+            LineType::DocComment { code_block_limit } => {
+                if code_block_limit {
+                    // We have found a code block limit, either starting or ending, toggle the
+                    // flag
+                    is_in_code_block = !is_in_code_block;
+                };
+                contiguous_doc_comment.push(CommentContent {
+                    is_in_code_block,
+                    line_start,
+                    line_content,
+                });
+                // For now the only thing we know is that the next line is potentially the end of
+                // the comment block, required if a file is a giant comment block to have the proper
+                // bound
+                doc_comment_end_line_idx = line_idx + 1;
+            }
+            _ => {
+                // We are sure that the current line is the end of the comment block
+                doc_comment_end_line_idx = line_idx;
+                break;
+            }
+        };
+    }
+    (contiguous_doc_comment, doc_comment_end_line_idx)
+}
+
+fn find_contiguous_doc_test<'a>(
+    lines: &[&'a str],
+    start_line_idx: usize,
+) -> (Vec<CommentContent<'a>>, usize) {
+    let mut doc_test_end_line_idx = start_line_idx + 1;
+
+    let mut is_in_code_block = false;
+    let mut contiguous_doc_test = Vec::<CommentContent>::new();
+
+    for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) {
+        let (line_type, line_content) = get_line_type_and_trimmed_line(line);
+
+        let line_start = &line[..line.len() - line_content.len()];
+        // If there is an empty line we are still in the DocTest
+        let line_type = if let LineType::EmptyLine = line_type {
+            LineType::DocTest {
+                code_block_limit: false,
+            }
+        } else {
+            line_type
+        };
+
+        match line_type {
+            LineType::DocTest { code_block_limit } => {
+                if code_block_limit {
+                    // We have found a code block limit, either starting or ending, toggle the
+                    // flag
+                    is_in_code_block = !is_in_code_block;
+                };
+                contiguous_doc_test.push(CommentContent {
+                    is_in_code_block,
+                    line_start,
+                    line_content,
+                });
+                // For now the only thing we know is that the next line is potentially the end of
+                // the comment block, required if a file is a giant comment block to have the proper
+                // bound
+                doc_test_end_line_idx = line_idx + 1;
+            }
+            _ => {
+                // We are sure that the current line is the end of the comment block
+                doc_test_end_line_idx = line_idx;
+                break;
+            }
+        };
+    }
+    (contiguous_doc_test, doc_test_end_line_idx)
+}
+
+fn find_contiguous_part_in_doc_test_or_comment(
+    part_is_code_block: bool,
+    full_doc_comment_content: &Vec<CommentContent>,
+    part_start_idx: usize,
+) -> (usize, usize) {
+    let mut next_line_idx = part_start_idx + 1;
+    loop {
+        // We have exhausted the doc comment content, break
+        if next_line_idx == full_doc_comment_content.len() {
+            break;
+        }
+
+        let CommentContent {
+            is_in_code_block: next_line_is_in_code_block,
+            line_start: _,
+            line_content: _,
+        } = full_doc_comment_content[next_line_idx];
+
+        // We check if the next line is in a different part, if so we break
+        if next_line_is_in_code_block != part_is_code_block {
+            break;
+        }
+        next_line_idx += 1;
+    }
+    // next_line_idx points to the end of the part and is therefore returned as the part_stop_idx
+    (part_start_idx, next_line_idx)
+}
+
+enum LatexEquationKind {
+    Inline,
+    Multiline,
+    NotAnEquation,
+}
+
+fn escape_underscores_rewrite_equations(
+    comment_to_rewrite: &[CommentContent],
+    rewritten_content: &mut String,
+) -> Result<(), LatexEscapeToolError> {
+    let mut latex_equation_kind = LatexEquationKind::NotAnEquation;
+    for CommentContent {
+        is_in_code_block: _,
+        line_start,
+        line_content,
+    } in comment_to_rewrite.iter()
+    {
+        rewritten_content.push_str(line_start);
+        let mut previous_char = '\0';
+        let mut chars = line_content.chars().peekable();
+        while let Some(current_char) = chars.next() {
+            match (previous_char, current_char) {
+                ('$', '$') => {
+                    match latex_equation_kind {
+                        LatexEquationKind::Inline => {
+                            // Problem we find an opening $$ after an opening $, return an error
+                            return Err(LatexEscapeToolError::new(
+                                "Found an opening '$' without a corresponding closing '$'",
+                            ));
+                        }
+                        LatexEquationKind::Multiline => {
+                            // Closing $$, no more in a latex equation
+                            latex_equation_kind = LatexEquationKind::NotAnEquation
+                        }
+                        LatexEquationKind::NotAnEquation => {
+                            // Opening $$, in a multiline latex equation
+                            latex_equation_kind = LatexEquationKind::Multiline
+                        }
+                    };
+                }
+                (_, '$') => {
+                    let is_inline_marker = chars.peek() != Some(&'$');
+                    if is_inline_marker {
+                        match latex_equation_kind {
+                            LatexEquationKind::Multiline => {
+                                // Problem we find an opening $ after an opening $$, return an error
+                                return Err(LatexEscapeToolError::new(
+                                    "Found an opening '$$' without a corresponding closing '$$'",
+                                ));
+                            }
+                            LatexEquationKind::Inline => {
+                                // Closing $, no more in a latex equation
+                                latex_equation_kind = LatexEquationKind::NotAnEquation
+                            }
+                            LatexEquationKind::NotAnEquation => {
+                                // Opening $, in an inline latex equation
+                                latex_equation_kind = LatexEquationKind::Inline
+                            }
+                        };
+                    }
+                    // If the marker is not an inline marker but a multiline marker let the other
+                    // case manage it at the next iteration
+                }
+                // If the _ is not escaped and we are in an equation we need to escape it
+                (prev, '_') if prev != '\\' => match latex_equation_kind {
+                    LatexEquationKind::NotAnEquation => (),
+                    _ => rewritten_content.push('\\'),
+                },
+                _ => (),
+            }
+            rewritten_content.push(current_char);
+            previous_char = current_char;
+        }
+    }
+    Ok(())
+}
+
+fn process_doc_lines_until_impossible<'a>(
+    lines: &[&'a str],
+    rewritten_content: &'a mut String,
+    comment_search_fn: fn(&[&'a str], usize) -> (Vec<CommentContent<'a>>, usize),
+    start_line_idx: usize,
+) -> Result<usize, LatexEscapeToolError> {
+    let (full_doc_content, doc_end_line_idx) = comment_search_fn(lines, start_line_idx);
+
+    // Now we find code blocks parts OR pure comments parts
+    let mut current_line_in_doc_idx = 0;
+    while current_line_in_doc_idx < full_doc_content.len() {
+        let CommentContent {
+            is_in_code_block,
+            line_start: _,
+            line_content: _,
+        } = full_doc_content[current_line_in_doc_idx];
+
+        let (current_part_start_idx, current_part_stop_idx) =
+            find_contiguous_part_in_doc_test_or_comment(
+                is_in_code_block,
+                &full_doc_content,
+                current_line_in_doc_idx,
+            );
+
+        let current_part_content = &full_doc_content[current_part_start_idx..current_part_stop_idx];
+
+        // The current part is a code block
+        if is_in_code_block {
+            for CommentContent {
+                is_in_code_block: _,
+                line_start,
+                line_content,
+            } in current_part_content.iter()
+            {
+                // We can just push the content unmodified
+                rewritten_content.push_str(line_start);
+                rewritten_content.push_str(line_content);
+            }
+        } else {
+            // The part is a pure comment, we need to rewrite equations
+            escape_underscores_rewrite_equations(current_part_content, rewritten_content)?;
+        }
+        current_line_in_doc_idx += current_part_content.len();
+    }
+
+    Ok(doc_end_line_idx)
+}
+
+fn process_non_doc_lines_until_impossible(
+    lines: &Vec<&str>,
+    rewritten_content: &mut String,
+    mut line_idx: usize,
+) -> usize {
+    while line_idx < lines.len() {
+        let line = lines[line_idx];
+        match get_line_type_and_trimmed_line(line) {
+            (LineType::Other, _) => {
+                rewritten_content.push_str(line);
+                line_idx += 1;
+            }
+            _ => break,
+        };
+    }
+    line_idx
+}
+
+fn escape_underscore_in_latex_doc_in_file(
+    file_path: &std::path::Path,
+) -> Result<(), LatexEscapeToolError> {
+    let file_name = file_path.to_str().unwrap();
+    let content = std::fs::read_to_string(file_name).unwrap();
+
+    let number_of_underscores = content.matches('_').count();
+    let potential_additional_capacity_required = number_of_underscores * BACKSLASH_UTF8_LEN;
+
+    // Enough for the length of the original string + the length if we had to escape *all* `_`
+    // which won't happen but avoids reallocations
+    let mut rewritten_content =
+        String::with_capacity(content.len() + potential_additional_capacity_required);
+
+    let content_by_lines: Vec<&str> = content.split_inclusive('\n').collect();
+    let mut line_idx = 0_usize;
+
+    while line_idx < content_by_lines.len() {
+        let line = content_by_lines[line_idx];
+        let (line_type, _) = get_line_type_and_trimmed_line(line);
+        line_idx = match line_type {
+            LineType::DocComment {
+                code_block_limit: _,
+            } => process_doc_lines_until_impossible(
+                &content_by_lines,
+                &mut rewritten_content,
+                find_contiguous_doc_comment,
+                line_idx,
+            )?,
+            LineType::DocTest {
+                code_block_limit: _,
+            } => process_doc_lines_until_impossible(
+                &content_by_lines,
+                &mut rewritten_content,
+                find_contiguous_doc_test,
+                line_idx,
+            )?,
+            LineType::Other => process_non_doc_lines_until_impossible(
+                &content_by_lines,
+                &mut rewritten_content,
+                line_idx,
+            ),
+            LineType::EmptyLine => {
+                rewritten_content.push_str(line);
+                line_idx + 1
+            }
+        };
+    }
+
+    fs::write(file_name, rewritten_content).unwrap();
+    Ok(())
+}
+
+pub fn escape_underscore_in_latex_doc() -> Result<(), Error> {
+    let project_root = project_root();
+    let mut src_files: Vec<std::path::PathBuf> = Vec::new();
+    recurse_find_rs_files(project_root, &mut src_files, true);
+
+    println!("Found {} files to process.", src_files.len());
+
+    let mut files_with_problems: Vec<(std::path::PathBuf, LatexEscapeToolError)> = Vec::new();
+
+    println!("Processing...");
+    for file in src_files.into_iter() {
+        if let Err(err) = escape_underscore_in_latex_doc_in_file(&file) {
+            files_with_problems.push((file, err));
+        }
+    }
+    println!("Done!");
+
+    if !files_with_problems.is_empty() {
+        for (file_with_problem, error) in files_with_problems.iter() {
+            println!(
+                "File: {}, has error: {}",
+                file_with_problem.display(),
+                error
+            );
+        }
+        return Err(Error::new(
+            ErrorKind::InvalidInput,
+            "Issues while processing files, check log.",
+        ));
+    }
+
+    Ok(())
+}
--- a/tasks/src/main.rs
+++ b/tasks/src/main.rs
@@ -0,0 +1,88 @@
+#[macro_use]
+extern crate lazy_static;
+use clap::{Arg, Command};
+use log::LevelFilter;
+use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering::Relaxed;
+
+mod format_latex_doc;
+mod utils;
+
+// -------------------------------------------------------------------------------------------------
+// CONSTANTS
+// -------------------------------------------------------------------------------------------------
+lazy_static! {
+    static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
+    static ref ROOT_DIR: PathBuf = utils::project_root();
+    static ref ENV_TARGET_NATIVE: utils::Environment = {
+        let mut env = HashMap::new();
+        env.insert("RUSTFLAGS", "-Ctarget-cpu=native");
+        env
+    };
+}
+
+// -------------------------------------------------------------------------------------------------
+// MACROS
+// -------------------------------------------------------------------------------------------------
+
+#[macro_export]
+macro_rules! cmd {
+    (<$env: ident> $cmd: expr) => {
+        $crate::utils::execute($cmd, Some(&*$env), Some(&*$crate::ROOT_DIR))
+    };
+    ($cmd: expr) => {
+        $crate::utils::execute($cmd, None, Some(&*$crate::ROOT_DIR))
+    };
+}
+
+// -------------------------------------------------------------------------------------------------
+// MAIN
+// -------------------------------------------------------------------------------------------------
+
+fn main() -> Result<(), std::io::Error> {
+    // We parse the input args
+    let matches = Command::new("tasks")
+        .about("Rust scripts runner")
+        .arg(
+            Arg::new("verbose")
+                .short('v')
+                .long("verbose")
+                .help("Prints debug messages"),
+        )
+        .arg(
+            Arg::new("dry-run")
+                .long("dry-run")
+                .help("Do not execute the commands"),
+        )
+        .subcommand(Command::new("format_latex_doc").about("Escape underscores in latex equations"))
+        .arg_required_else_help(true)
+        .get_matches();
+
+    // We initialize the logger with proper verbosity
+    let verb = if matches.contains_id("verbose") {
+        LevelFilter::Debug
+    } else {
+        LevelFilter::Info
+    };
+    CombinedLogger::init(vec![TermLogger::new(
+        verb,
+        Config::default(),
+        TerminalMode::Mixed,
+        ColorChoice::Auto,
+    )])
+    .unwrap();
+
+    // We set the dry-run mode if present
+    if matches.contains_id("dry-run") {
+        DRY_RUN.store(true, Relaxed);
+    }
+
+    if matches.subcommand_matches("format_latex_doc").is_some() {
+        format_latex_doc::escape_underscore_in_latex_doc()?;
+    }
+
+    Ok(())
+}
--- a/tasks/src/utils.rs
+++ b/tasks/src/utils.rs
@@ -0,0 +1,50 @@
+use log::{debug, info};
+use std::collections::HashMap;
+use std::io::{Error, ErrorKind};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+use std::sync::atomic::Ordering::Relaxed;
+
+pub type Environment = HashMap<&'static str, &'static str>;
+
+#[allow(dead_code)]
+pub fn execute(cmd: &str, env: Option<&Environment>, cwd: Option<&PathBuf>) -> Result<(), Error> {
+    info!("Executing {}", cmd);
+    debug!("Env {:?}", env);
+    debug!("Cwd {:?}", cwd);
+    if crate::DRY_RUN.load(Relaxed) {
+        info!("Skipping execution because of --dry-run mode");
+        return Ok(());
+    }
+    let mut command = Command::new("sh");
+    command
+        .arg("-c")
+        .arg(cmd)
+        .stderr(Stdio::inherit())
+        .stdout(Stdio::inherit());
+    if let Some(env) = env {
+        for (key, val) in env.iter() {
+            command.env(key, val);
+        }
+    }
+    if let Some(cwd) = cwd {
+        command.current_dir(cwd);
+    }
+    let output = command.output()?;
+    if !output.status.success() {
+        Err(Error::new(
+            ErrorKind::Other,
+            "Command exited with nonzero status.",
+        ))
+    } else {
+        Ok(())
+    }
+}
+
+pub fn project_root() -> PathBuf {
+    Path::new(&env!("CARGO_MANIFEST_DIR"))
+        .ancestors()
+        .nth(1)
+        .unwrap()
+        .to_path_buf()
+}