chore(tools): add tasks tools to escape latex equations in docs

- add all checks to pcc and run that in CI
2026-01-08 22:28:01 -05:00 · 2022-12-08 14:54:39 +01:00
parent 1a72c4a814
commit 4ef7a73efe
8 changed files with 623 additions and 32 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
 [alias]
 xtask = "run --manifest-path ./tasks/Cargo.toml --"
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -28,53 +28,25 @@ jobs:
        run: |
          echo "rs-toolchain=$(make rs_toolchain)" >> "${GITHUB_OUTPUT}"
-      - name: Check format
+      - name: Run pcc checks
        run: |
-          make check_fmt
+          make pcc
      - name: Build doc
        run: |
          make doc
      - name: Build tests without running them
        run: |
          make check_compile_tests
      - name: Clippy boolean
        run: |
          make clippy_boolean
      - name: Build Release boolean
        run: |
          make build_boolean
      - name: Clippy shortint
        run: |
          make clippy_shortint
      - name: Build Release shortint
        run: |
          make build_shortint
      - name: Clippy shortint and boolean
        run: |
          make clippy
      - name: Build Release shortint and boolean
        run: |
          make build_boolean_and_shortint
      - name: C API Clippy
        run: |
          make clippy_c_api
      - name: Build Release c_api
        run: |
          make build_c_api
      - name: wasm API Clippy
        run: |
          make clippy_js_wasm_api
      # The wasm build check is a bit annoying to set-up here and is done during the tests in
      # aws_tfhe_tests.yml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["tfhe"]
+members = ["tfhe", "tasks"]
 [profile.bench]
 lto = "fat"
--- a/16
+++ b/16
@@ -79,8 +79,13 @@ clippy_js_wasm_api: install_rs_check_toolchain
 		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api \
 		-p tfhe -- --no-deps -D warnings
 .PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
 clippy_tasks:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		-p tasks -- --no-deps -D warnings
 .PHONY: clippy_all # Run all clippy targets
-clippy_all: clippy clippy_c_api clippy_js_wasm_api
+clippy_all: clippy clippy_boolean clippy_shortint clippy_c_api clippy_js_wasm_api clippy_tasks
 .PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
 gen_key_cache: install_rs_build_toolchain
@@ -143,6 +148,15 @@ doc: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint --no-deps
 .PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering.
 format_doc_latex:
 	cargo xtask format_latex_doc
 	@"$(MAKE)" --no-print-directory fmt
 	@printf "\n===============================\n\n"
 	@printf "Please manually inspect changes made by format_latex_doc, rustfmt can break equations \
 	if the line length is exceeded\n"
 	@printf "\n===============================\n"
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--- a/tasks/Cargo.toml
+++ b/tasks/Cargo.toml
@@ -0,0 +1,12 @@
 [package]
 name = "tasks"
 version = "0.0.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 clap = "3.1"
 lazy_static = "1.4"
 log = "0.4"
 simplelog = "0.12"
--- a/tasks/src/format_latex_doc.rs
+++ b/tasks/src/format_latex_doc.rs
@@ -0,0 +1,453 @@
 use crate::utils::project_root;
 use std::io::{Error, ErrorKind};
 use std::{fmt, fs};
 fn recurse_find_rs_files(
    root_dir: std::path::PathBuf,
    rs_files: &mut Vec<std::path::PathBuf>,
    at_root: bool,
 ) {
    for curr_entry in root_dir.read_dir().unwrap() {
        let curr_path = curr_entry.unwrap().path().canonicalize().unwrap();
        if curr_path.is_file() {
            if let Some(extension) = curr_path.extension() {
                if extension == "rs" {
                    rs_files.push(curr_path);
                }
            }
        } else if curr_path.is_dir() {
            if at_root {
                // Hardcoded ignores for root .git and target
                match curr_path.file_name().unwrap().to_str().unwrap() {
                    ".git" => continue,
                    "target" => continue,
                    _ => recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false),
                };
            } else {
                recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false);
            }
        }
    }
 }
 #[derive(Debug)]
 struct LatexEscapeToolError {
    details: String,
 }
 impl LatexEscapeToolError {
    fn new(msg: &str) -> LatexEscapeToolError {
        LatexEscapeToolError {
            details: msg.to_string(),
        }
    }
 }
 impl fmt::Display for LatexEscapeToolError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.details)
    }
 }
 impl std::error::Error for LatexEscapeToolError {}
 const DOC_TEST_START: &str = "///";
 const DOC_COMMENT_START: &str = "//!";
 const BACKSLASH_UTF8_LEN: usize = '\\'.len_utf8();
 enum LineType {
    DocTest { code_block_limit: bool },
    DocComment { code_block_limit: bool },
    EmptyLine,
    Other,
 }
 fn get_line_type_and_trimmed_line(line: &str) -> (LineType, &str) {
    let mut trimmed_line = line.trim_start();
    let line_type = if trimmed_line.starts_with(DOC_COMMENT_START) {
        trimmed_line = trimmed_line
            .strip_prefix(DOC_COMMENT_START)
            .unwrap()
            .trim_start();
        let has_code_block_limit = trimmed_line.starts_with("```");
        LineType::DocComment {
            code_block_limit: has_code_block_limit,
        }
    } else if trimmed_line.starts_with(DOC_TEST_START) {
        trimmed_line = trimmed_line
            .strip_prefix(DOC_TEST_START)
            .unwrap()
            .trim_start();
        let has_code_block_limit = trimmed_line.starts_with("```");
        LineType::DocTest {
            code_block_limit: has_code_block_limit,
        }
    } else if trimmed_line.is_empty() {
        LineType::EmptyLine
    } else {
        LineType::Other
    };
    (line_type, trimmed_line)
 }
 struct CommentContent<'a> {
    is_in_code_block: bool,
    line_start: &'a str,
    line_content: &'a str,
 }
 fn find_contiguous_doc_comment<'a>(
    lines: &[&'a str],
    start_line_idx: usize,
 ) -> (Vec<CommentContent<'a>>, usize) {
    let mut doc_comment_end_line_idx = start_line_idx + 1;
    let mut is_in_code_block = false;
    let mut contiguous_doc_comment = Vec::<CommentContent>::new();
    for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) {
        let (line_type, line_content) = get_line_type_and_trimmed_line(line);
        let line_start = &line[..line.len() - line_content.len()];
        // If there is an empty line we are still in the DocComment
        let line_type = if let LineType::EmptyLine = line_type {
            LineType::DocComment {
                code_block_limit: false,
            }
        } else {
            line_type
        };
        match line_type {
            LineType::DocComment { code_block_limit } => {
                if code_block_limit {
                    // We have found a code block limit, either starting or ending, toggle the
                    // flag
                    is_in_code_block = !is_in_code_block;
                };
                contiguous_doc_comment.push(CommentContent {
                    is_in_code_block,
                    line_start,
                    line_content,
                });
                // For now the only thing we know is that the next line is potentially the end of
                // the comment block, required if a file is a giant comment block to have the proper
                // bound
                doc_comment_end_line_idx = line_idx + 1;
            }
            _ => {
                // We are sure that the current line is the end of the comment block
                doc_comment_end_line_idx = line_idx;
                break;
            }
        };
    }
    (contiguous_doc_comment, doc_comment_end_line_idx)
 }
 fn find_contiguous_doc_test<'a>(
    lines: &[&'a str],
    start_line_idx: usize,
 ) -> (Vec<CommentContent<'a>>, usize) {
    let mut doc_test_end_line_idx = start_line_idx + 1;
    let mut is_in_code_block = false;
    let mut contiguous_doc_test = Vec::<CommentContent>::new();
    for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) {
        let (line_type, line_content) = get_line_type_and_trimmed_line(line);
        let line_start = &line[..line.len() - line_content.len()];
        // If there is an empty line we are still in the DocTest
        let line_type = if let LineType::EmptyLine = line_type {
            LineType::DocTest {
                code_block_limit: false,
            }
        } else {
            line_type
        };
        match line_type {
            LineType::DocTest { code_block_limit } => {
                if code_block_limit {
                    // We have found a code block limit, either starting or ending, toggle the
                    // flag
                    is_in_code_block = !is_in_code_block;
                };
                contiguous_doc_test.push(CommentContent {
                    is_in_code_block,
                    line_start,
                    line_content,
                });
                // For now the only thing we know is that the next line is potentially the end of
                // the comment block, required if a file is a giant comment block to have the proper
                // bound
                doc_test_end_line_idx = line_idx + 1;
            }
            _ => {
                // We are sure that the current line is the end of the comment block
                doc_test_end_line_idx = line_idx;
                break;
            }
        };
    }
    (contiguous_doc_test, doc_test_end_line_idx)
 }
 fn find_contiguous_part_in_doc_test_or_comment(
    part_is_code_block: bool,
    full_doc_comment_content: &Vec<CommentContent>,
    part_start_idx: usize,
 ) -> (usize, usize) {
    let mut next_line_idx = part_start_idx + 1;
    loop {
        // We have exhausted the doc comment content, break
        if next_line_idx == full_doc_comment_content.len() {
            break;
        }
        let CommentContent {
            is_in_code_block: next_line_is_in_code_block,
            line_start: _,
            line_content: _,
        } = full_doc_comment_content[next_line_idx];
        // We check if the next line is in a different part, if so we break
        if next_line_is_in_code_block != part_is_code_block {
            break;
        }
        next_line_idx += 1;
    }
    // next_line_idx points to the end of the part and is therefore returned as the part_stop_idx
    (part_start_idx, next_line_idx)
 }
 enum LatexEquationKind {
    Inline,
    Multiline,
    NotAnEquation,
 }
 fn escape_underscores_rewrite_equations(
    comment_to_rewrite: &[CommentContent],
    rewritten_content: &mut String,
 ) -> Result<(), LatexEscapeToolError> {
    let mut latex_equation_kind = LatexEquationKind::NotAnEquation;
    for CommentContent {
        is_in_code_block: _,
        line_start,
        line_content,
    } in comment_to_rewrite.iter()
    {
        rewritten_content.push_str(line_start);
        let mut previous_char = '\0';
        let mut chars = line_content.chars().peekable();
        while let Some(current_char) = chars.next() {
            match (previous_char, current_char) {
                ('$', '$') => {
                    match latex_equation_kind {
                        LatexEquationKind::Inline => {
                            // Problem we find an opening $$ after an opening $, return an error
                            return Err(LatexEscapeToolError::new(
                                "Found an opening '$' without a corresponding closing '$'",
                            ));
                        }
                        LatexEquationKind::Multiline => {
                            // Closing $$, no more in a latex equation
                            latex_equation_kind = LatexEquationKind::NotAnEquation
                        }
                        LatexEquationKind::NotAnEquation => {
                            // Opening $$, in a multiline latex equation
                            latex_equation_kind = LatexEquationKind::Multiline
                        }
                    };
                }
                (_, '$') => {
                    let is_inline_marker = chars.peek() != Some(&'$');
                    if is_inline_marker {
                        match latex_equation_kind {
                            LatexEquationKind::Multiline => {
                                // Problem we find an opening $ after an opening $$, return an error
                                return Err(LatexEscapeToolError::new(
                                    "Found an opening '$$' without a corresponding closing '$$'",
                                ));
                            }
                            LatexEquationKind::Inline => {
                                // Closing $, no more in a latex equation
                                latex_equation_kind = LatexEquationKind::NotAnEquation
                            }
                            LatexEquationKind::NotAnEquation => {
                                // Opening $, in an inline latex equation
                                latex_equation_kind = LatexEquationKind::Inline
                            }
                        };
                    }
                    // If the marker is not an inline marker but a multiline marker let the other
                    // case manage it at the next iteration
                }
                // If the _ is not escaped and we are in an equation we need to escape it
                (prev, '_') if prev != '\\' => match latex_equation_kind {
                    LatexEquationKind::NotAnEquation => (),
                    _ => rewritten_content.push('\\'),
                },
                _ => (),
            }
            rewritten_content.push(current_char);
            previous_char = current_char;
        }
    }
    Ok(())
 }
 fn process_doc_lines_until_impossible<'a>(
    lines: &[&'a str],
    rewritten_content: &'a mut String,
    comment_search_fn: fn(&[&'a str], usize) -> (Vec<CommentContent<'a>>, usize),
    start_line_idx: usize,
 ) -> Result<usize, LatexEscapeToolError> {
    let (full_doc_content, doc_end_line_idx) = comment_search_fn(lines, start_line_idx);
    // Now we find code blocks parts OR pure comments parts
    let mut current_line_in_doc_idx = 0;
    while current_line_in_doc_idx < full_doc_content.len() {
        let CommentContent {
            is_in_code_block,
            line_start: _,
            line_content: _,
        } = full_doc_content[current_line_in_doc_idx];
        let (current_part_start_idx, current_part_stop_idx) =
            find_contiguous_part_in_doc_test_or_comment(
                is_in_code_block,
                &full_doc_content,
                current_line_in_doc_idx,
            );
        let current_part_content = &full_doc_content[current_part_start_idx..current_part_stop_idx];
        // The current part is a code block
        if is_in_code_block {
            for CommentContent {
                is_in_code_block: _,
                line_start,
                line_content,
            } in current_part_content.iter()
            {
                // We can just push the content unmodified
                rewritten_content.push_str(line_start);
                rewritten_content.push_str(line_content);
            }
        } else {
            // The part is a pure comment, we need to rewrite equations
            escape_underscores_rewrite_equations(current_part_content, rewritten_content)?;
        }
        current_line_in_doc_idx += current_part_content.len();
    }
    Ok(doc_end_line_idx)
 }
 fn process_non_doc_lines_until_impossible(
    lines: &Vec<&str>,
    rewritten_content: &mut String,
    mut line_idx: usize,
 ) -> usize {
    while line_idx < lines.len() {
        let line = lines[line_idx];
        match get_line_type_and_trimmed_line(line) {
            (LineType::Other, _) => {
                rewritten_content.push_str(line);
                line_idx += 1;
            }
            _ => break,
        };
    }
    line_idx
 }
 fn escape_underscore_in_latex_doc_in_file(
    file_path: &std::path::Path,
 ) -> Result<(), LatexEscapeToolError> {
    let file_name = file_path.to_str().unwrap();
    let content = std::fs::read_to_string(file_name).unwrap();
    let number_of_underscores = content.matches('_').count();
    let potential_additional_capacity_required = number_of_underscores * BACKSLASH_UTF8_LEN;
    // Enough for the length of the original string + the length if we had to escape *all* `_`
    // which won't happen but avoids reallocations
    let mut rewritten_content =
        String::with_capacity(content.len() + potential_additional_capacity_required);
    let content_by_lines: Vec<&str> = content.split_inclusive('\n').collect();
    let mut line_idx = 0_usize;
    while line_idx < content_by_lines.len() {
        let line = content_by_lines[line_idx];
        let (line_type, _) = get_line_type_and_trimmed_line(line);
        line_idx = match line_type {
            LineType::DocComment {
                code_block_limit: _,
            } => process_doc_lines_until_impossible(
                &content_by_lines,
                &mut rewritten_content,
                find_contiguous_doc_comment,
                line_idx,
            )?,
            LineType::DocTest {
                code_block_limit: _,
            } => process_doc_lines_until_impossible(
                &content_by_lines,
                &mut rewritten_content,
                find_contiguous_doc_test,
                line_idx,
            )?,
            LineType::Other => process_non_doc_lines_until_impossible(
                &content_by_lines,
                &mut rewritten_content,
                line_idx,
            ),
            LineType::EmptyLine => {
                rewritten_content.push_str(line);
                line_idx + 1
            }
        };
    }
    fs::write(file_name, rewritten_content).unwrap();
    Ok(())
 }
 pub fn escape_underscore_in_latex_doc() -> Result<(), Error> {
    let project_root = project_root();
    let mut src_files: Vec<std::path::PathBuf> = Vec::new();
    recurse_find_rs_files(project_root, &mut src_files, true);
    println!("Found {} files to process.", src_files.len());
    let mut files_with_problems: Vec<(std::path::PathBuf, LatexEscapeToolError)> = Vec::new();
    println!("Processing...");
    for file in src_files.into_iter() {
        if let Err(err) = escape_underscore_in_latex_doc_in_file(&file) {
            files_with_problems.push((file, err));
        }
    }
    println!("Done!");
    if !files_with_problems.is_empty() {
        for (file_with_problem, error) in files_with_problems.iter() {
            println!(
                "File: {}, has error: {}",
                file_with_problem.display(),
                error
            );
        }
        return Err(Error::new(
            ErrorKind::InvalidInput,
            "Issues while processing files, check log.",
        ));
    }
    Ok(())
 }
--- a/tasks/src/main.rs
+++ b/tasks/src/main.rs
@@ -0,0 +1,88 @@
 #[macro_use]
 extern crate lazy_static;
 use clap::{Arg, Command};
 use log::LevelFilter;
 use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering::Relaxed;
 mod format_latex_doc;
 mod utils;
 // -------------------------------------------------------------------------------------------------
 // CONSTANTS
 // -------------------------------------------------------------------------------------------------
 lazy_static! {
    static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
    static ref ROOT_DIR: PathBuf = utils::project_root();
    static ref ENV_TARGET_NATIVE: utils::Environment = {
        let mut env = HashMap::new();
        env.insert("RUSTFLAGS", "-Ctarget-cpu=native");
        env
    };
 }
 // -------------------------------------------------------------------------------------------------
 // MACROS
 // -------------------------------------------------------------------------------------------------
 #[macro_export]
 macro_rules! cmd {
    (<$env: ident> $cmd: expr) => {
        $crate::utils::execute($cmd, Some(&*$env), Some(&*$crate::ROOT_DIR))
    };
    ($cmd: expr) => {
        $crate::utils::execute($cmd, None, Some(&*$crate::ROOT_DIR))
    };
 }
 // -------------------------------------------------------------------------------------------------
 // MAIN
 // -------------------------------------------------------------------------------------------------
 fn main() -> Result<(), std::io::Error> {
    // We parse the input args
    let matches = Command::new("tasks")
        .about("Rust scripts runner")
        .arg(
            Arg::new("verbose")
                .short('v')
                .long("verbose")
                .help("Prints debug messages"),
        )
        .arg(
            Arg::new("dry-run")
                .long("dry-run")
                .help("Do not execute the commands"),
        )
        .subcommand(Command::new("format_latex_doc").about("Escape underscores in latex equations"))
        .arg_required_else_help(true)
        .get_matches();
    // We initialize the logger with proper verbosity
    let verb = if matches.contains_id("verbose") {
        LevelFilter::Debug
    } else {
        LevelFilter::Info
    };
    CombinedLogger::init(vec![TermLogger::new(
        verb,
        Config::default(),
        TerminalMode::Mixed,
        ColorChoice::Auto,
    )])
    .unwrap();
    // We set the dry-run mode if present
    if matches.contains_id("dry-run") {
        DRY_RUN.store(true, Relaxed);
    }
    if matches.subcommand_matches("format_latex_doc").is_some() {
        format_latex_doc::escape_underscore_in_latex_doc()?;
    }
    Ok(())
 }
--- a/tasks/src/utils.rs
+++ b/tasks/src/utils.rs
@@ -0,0 +1,50 @@
 use log::{debug, info};
 use std::collections::HashMap;
 use std::io::{Error, ErrorKind};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::sync::atomic::Ordering::Relaxed;
 pub type Environment = HashMap<&'static str, &'static str>;
 #[allow(dead_code)]
 pub fn execute(cmd: &str, env: Option<&Environment>, cwd: Option<&PathBuf>) -> Result<(), Error> {
    info!("Executing {}", cmd);
    debug!("Env {:?}", env);
    debug!("Cwd {:?}", cwd);
    if crate::DRY_RUN.load(Relaxed) {
        info!("Skipping execution because of --dry-run mode");
        return Ok(());
    }
    let mut command = Command::new("sh");
    command
        .arg("-c")
        .arg(cmd)
        .stderr(Stdio::inherit())
        .stdout(Stdio::inherit());
    if let Some(env) = env {
        for (key, val) in env.iter() {
            command.env(key, val);
        }
    }
    if let Some(cwd) = cwd {
        command.current_dir(cwd);
    }
    let output = command.output()?;
    if !output.status.success() {
        Err(Error::new(
            ErrorKind::Other,
            "Command exited with nonzero status.",
        ))
    } else {
        Ok(())
    }
 }
 pub fn project_root() -> PathBuf {
    Path::new(&env!("CARGO_MANIFEST_DIR"))
        .ancestors()
        .nth(1)
        .unwrap()
        .to_path_buf()
 }
		`@@ -0,0 +1,2 @@`
							`[alias]`
							`xtask = "run --manifest-path ./tasks/Cargo.toml --"`