From 4ef7a73efeaca528fa60225a2928a806725012d4 Mon Sep 17 00:00:00 2001 From: Arthur Meyre Date: Thu, 8 Dec 2022 14:54:39 +0100 Subject: [PATCH] chore(tools): add tasks tools to escape latex equations in docs - add all checks to pcc and run that in CI --- .cargo/config.toml | 2 + .github/workflows/cargo_build.yml | 32 +-- Cargo.toml | 2 +- Makefile | 16 +- tasks/Cargo.toml | 12 + tasks/src/format_latex_doc.rs | 453 ++++++++++++++++++++++++++++++ tasks/src/main.rs | 88 ++++++ tasks/src/utils.rs | 50 ++++ 8 files changed, 623 insertions(+), 32 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 tasks/Cargo.toml create mode 100644 tasks/src/format_latex_doc.rs create mode 100644 tasks/src/main.rs create mode 100644 tasks/src/utils.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..46cd4526b --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --manifest-path ./tasks/Cargo.toml --" diff --git a/.github/workflows/cargo_build.yml b/.github/workflows/cargo_build.yml index 830cb34f2..7649f9f59 100644 --- a/.github/workflows/cargo_build.yml +++ b/.github/workflows/cargo_build.yml @@ -28,53 +28,25 @@ jobs: run: | echo "rs-toolchain=$(make rs_toolchain)" >> "${GITHUB_OUTPUT}" - - name: Check format + - name: Run pcc checks run: | - make check_fmt - - - name: Build doc - run: | - make doc - - - name: Build tests without running them - run: | - make check_compile_tests - - - name: Clippy boolean - run: | - make clippy_boolean + make pcc - name: Build Release boolean run: | make build_boolean - - name: Clippy shortint - run: | - make clippy_shortint - - name: Build Release shortint run: | make build_shortint - - name: Clippy shortint and boolean - run: | - make clippy - - name: Build Release shortint and boolean run: | make build_boolean_and_shortint - - name: C API Clippy - run: | - make clippy_c_api - - name: Build Release c_api run: | make build_c_api - - name: wasm API Clippy - run: | - make clippy_js_wasm_api - # The wasm build check is a bit annoying to set-up here and is done during the tests in # aws_tfhe_tests.yml diff --git a/Cargo.toml b/Cargo.toml index 403b36a51..ad8a3efbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["tfhe"] +members = ["tfhe", "tasks"] [profile.bench] lto = "fat" diff --git a/Makefile b/Makefile index 09481f0c6..048ec0e48 100644 --- a/Makefile +++ b/Makefile @@ -79,8 +79,13 @@ clippy_js_wasm_api: install_rs_check_toolchain --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api \ -p tfhe -- --no-deps -D warnings +.PHONY: clippy_tasks # Run clippy lints on helper tasks crate. +clippy_tasks: + RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \ + -p tasks -- --no-deps -D warnings + .PHONY: clippy_all # Run all clippy targets -clippy_all: clippy clippy_c_api clippy_js_wasm_api +clippy_all: clippy clippy_boolean clippy_shortint clippy_c_api clippy_js_wasm_api clippy_tasks .PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests gen_key_cache: install_rs_build_toolchain @@ -143,6 +148,15 @@ doc: install_rs_check_toolchain cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \ --features=$(TARGET_ARCH_FEATURE),boolean,shortint --no-deps +.PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering. +format_doc_latex: + cargo xtask format_latex_doc + @"$(MAKE)" --no-print-directory fmt + @printf "\n===============================\n\n" + @printf "Please manually inspect changes made by format_latex_doc, rustfmt can break equations \ + if the line length is exceeded\n" + @printf "\n===============================\n" + .PHONY: check_compile_tests # Build tests in debug without running them check_compile_tests: RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \ diff --git a/tasks/Cargo.toml b/tasks/Cargo.toml new file mode 100644 index 000000000..cda35921d --- /dev/null +++ b/tasks/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "tasks" +version = "0.0.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = "3.1" +lazy_static = "1.4" +log = "0.4" +simplelog = "0.12" diff --git a/tasks/src/format_latex_doc.rs b/tasks/src/format_latex_doc.rs new file mode 100644 index 000000000..6504c79ef --- /dev/null +++ b/tasks/src/format_latex_doc.rs @@ -0,0 +1,453 @@ +use crate::utils::project_root; +use std::io::{Error, ErrorKind}; +use std::{fmt, fs}; + +fn recurse_find_rs_files( + root_dir: std::path::PathBuf, + rs_files: &mut Vec, + at_root: bool, +) { + for curr_entry in root_dir.read_dir().unwrap() { + let curr_path = curr_entry.unwrap().path().canonicalize().unwrap(); + if curr_path.is_file() { + if let Some(extension) = curr_path.extension() { + if extension == "rs" { + rs_files.push(curr_path); + } + } + } else if curr_path.is_dir() { + if at_root { + // Hardcoded ignores for root .git and target + match curr_path.file_name().unwrap().to_str().unwrap() { + ".git" => continue, + "target" => continue, + _ => recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false), + }; + } else { + recurse_find_rs_files(curr_path.to_path_buf(), rs_files, false); + } + } + } +} + +#[derive(Debug)] +struct LatexEscapeToolError { + details: String, +} + +impl LatexEscapeToolError { + fn new(msg: &str) -> LatexEscapeToolError { + LatexEscapeToolError { + details: msg.to_string(), + } + } +} + +impl fmt::Display for LatexEscapeToolError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.details) + } +} + +impl std::error::Error for LatexEscapeToolError {} + +const DOC_TEST_START: &str = "///"; +const DOC_COMMENT_START: &str = "//!"; +const BACKSLASH_UTF8_LEN: usize = '\\'.len_utf8(); + +enum LineType { + DocTest { code_block_limit: bool }, + DocComment { code_block_limit: bool }, + EmptyLine, + Other, +} + +fn get_line_type_and_trimmed_line(line: &str) -> (LineType, &str) { + let mut trimmed_line = line.trim_start(); + let line_type = if trimmed_line.starts_with(DOC_COMMENT_START) { + trimmed_line = trimmed_line + .strip_prefix(DOC_COMMENT_START) + .unwrap() + .trim_start(); + let has_code_block_limit = trimmed_line.starts_with("```"); + LineType::DocComment { + code_block_limit: has_code_block_limit, + } + } else if trimmed_line.starts_with(DOC_TEST_START) { + trimmed_line = trimmed_line + .strip_prefix(DOC_TEST_START) + .unwrap() + .trim_start(); + let has_code_block_limit = trimmed_line.starts_with("```"); + LineType::DocTest { + code_block_limit: has_code_block_limit, + } + } else if trimmed_line.is_empty() { + LineType::EmptyLine + } else { + LineType::Other + }; + (line_type, trimmed_line) +} + +struct CommentContent<'a> { + is_in_code_block: bool, + line_start: &'a str, + line_content: &'a str, +} + +fn find_contiguous_doc_comment<'a>( + lines: &[&'a str], + start_line_idx: usize, +) -> (Vec>, usize) { + let mut doc_comment_end_line_idx = start_line_idx + 1; + + let mut is_in_code_block = false; + let mut contiguous_doc_comment = Vec::::new(); + + for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) { + let (line_type, line_content) = get_line_type_and_trimmed_line(line); + + let line_start = &line[..line.len() - line_content.len()]; + // If there is an empty line we are still in the DocComment + let line_type = if let LineType::EmptyLine = line_type { + LineType::DocComment { + code_block_limit: false, + } + } else { + line_type + }; + + match line_type { + LineType::DocComment { code_block_limit } => { + if code_block_limit { + // We have found a code block limit, either starting or ending, toggle the + // flag + is_in_code_block = !is_in_code_block; + }; + contiguous_doc_comment.push(CommentContent { + is_in_code_block, + line_start, + line_content, + }); + // For now the only thing we know is that the next line is potentially the end of + // the comment block, required if a file is a giant comment block to have the proper + // bound + doc_comment_end_line_idx = line_idx + 1; + } + _ => { + // We are sure that the current line is the end of the comment block + doc_comment_end_line_idx = line_idx; + break; + } + }; + } + (contiguous_doc_comment, doc_comment_end_line_idx) +} + +fn find_contiguous_doc_test<'a>( + lines: &[&'a str], + start_line_idx: usize, +) -> (Vec>, usize) { + let mut doc_test_end_line_idx = start_line_idx + 1; + + let mut is_in_code_block = false; + let mut contiguous_doc_test = Vec::::new(); + + for (line_idx, line) in lines.iter().enumerate().skip(start_line_idx) { + let (line_type, line_content) = get_line_type_and_trimmed_line(line); + + let line_start = &line[..line.len() - line_content.len()]; + // If there is an empty line we are still in the DocTest + let line_type = if let LineType::EmptyLine = line_type { + LineType::DocTest { + code_block_limit: false, + } + } else { + line_type + }; + + match line_type { + LineType::DocTest { code_block_limit } => { + if code_block_limit { + // We have found a code block limit, either starting or ending, toggle the + // flag + is_in_code_block = !is_in_code_block; + }; + contiguous_doc_test.push(CommentContent { + is_in_code_block, + line_start, + line_content, + }); + // For now the only thing we know is that the next line is potentially the end of + // the comment block, required if a file is a giant comment block to have the proper + // bound + doc_test_end_line_idx = line_idx + 1; + } + _ => { + // We are sure that the current line is the end of the comment block + doc_test_end_line_idx = line_idx; + break; + } + }; + } + (contiguous_doc_test, doc_test_end_line_idx) +} + +fn find_contiguous_part_in_doc_test_or_comment( + part_is_code_block: bool, + full_doc_comment_content: &Vec, + part_start_idx: usize, +) -> (usize, usize) { + let mut next_line_idx = part_start_idx + 1; + loop { + // We have exhausted the doc comment content, break + if next_line_idx == full_doc_comment_content.len() { + break; + } + + let CommentContent { + is_in_code_block: next_line_is_in_code_block, + line_start: _, + line_content: _, + } = full_doc_comment_content[next_line_idx]; + + // We check if the next line is in a different part, if so we break + if next_line_is_in_code_block != part_is_code_block { + break; + } + next_line_idx += 1; + } + // next_line_idx points to the end of the part and is therefore returned as the part_stop_idx + (part_start_idx, next_line_idx) +} + +enum LatexEquationKind { + Inline, + Multiline, + NotAnEquation, +} + +fn escape_underscores_rewrite_equations( + comment_to_rewrite: &[CommentContent], + rewritten_content: &mut String, +) -> Result<(), LatexEscapeToolError> { + let mut latex_equation_kind = LatexEquationKind::NotAnEquation; + for CommentContent { + is_in_code_block: _, + line_start, + line_content, + } in comment_to_rewrite.iter() + { + rewritten_content.push_str(line_start); + let mut previous_char = '\0'; + let mut chars = line_content.chars().peekable(); + while let Some(current_char) = chars.next() { + match (previous_char, current_char) { + ('$', '$') => { + match latex_equation_kind { + LatexEquationKind::Inline => { + // Problem we find an opening $$ after an opening $, return an error + return Err(LatexEscapeToolError::new( + "Found an opening '$' without a corresponding closing '$'", + )); + } + LatexEquationKind::Multiline => { + // Closing $$, no more in a latex equation + latex_equation_kind = LatexEquationKind::NotAnEquation + } + LatexEquationKind::NotAnEquation => { + // Opening $$, in a multiline latex equation + latex_equation_kind = LatexEquationKind::Multiline + } + }; + } + (_, '$') => { + let is_inline_marker = chars.peek() != Some(&'$'); + if is_inline_marker { + match latex_equation_kind { + LatexEquationKind::Multiline => { + // Problem we find an opening $ after an opening $$, return an error + return Err(LatexEscapeToolError::new( + "Found an opening '$$' without a corresponding closing '$$'", + )); + } + LatexEquationKind::Inline => { + // Closing $, no more in a latex equation + latex_equation_kind = LatexEquationKind::NotAnEquation + } + LatexEquationKind::NotAnEquation => { + // Opening $, in an inline latex equation + latex_equation_kind = LatexEquationKind::Inline + } + }; + } + // If the marker is not an inline marker but a multiline marker let the other + // case manage it at the next iteration + } + // If the _ is not escaped and we are in an equation we need to escape it + (prev, '_') if prev != '\\' => match latex_equation_kind { + LatexEquationKind::NotAnEquation => (), + _ => rewritten_content.push('\\'), + }, + _ => (), + } + rewritten_content.push(current_char); + previous_char = current_char; + } + } + Ok(()) +} + +fn process_doc_lines_until_impossible<'a>( + lines: &[&'a str], + rewritten_content: &'a mut String, + comment_search_fn: fn(&[&'a str], usize) -> (Vec>, usize), + start_line_idx: usize, +) -> Result { + let (full_doc_content, doc_end_line_idx) = comment_search_fn(lines, start_line_idx); + + // Now we find code blocks parts OR pure comments parts + let mut current_line_in_doc_idx = 0; + while current_line_in_doc_idx < full_doc_content.len() { + let CommentContent { + is_in_code_block, + line_start: _, + line_content: _, + } = full_doc_content[current_line_in_doc_idx]; + + let (current_part_start_idx, current_part_stop_idx) = + find_contiguous_part_in_doc_test_or_comment( + is_in_code_block, + &full_doc_content, + current_line_in_doc_idx, + ); + + let current_part_content = &full_doc_content[current_part_start_idx..current_part_stop_idx]; + + // The current part is a code block + if is_in_code_block { + for CommentContent { + is_in_code_block: _, + line_start, + line_content, + } in current_part_content.iter() + { + // We can just push the content unmodified + rewritten_content.push_str(line_start); + rewritten_content.push_str(line_content); + } + } else { + // The part is a pure comment, we need to rewrite equations + escape_underscores_rewrite_equations(current_part_content, rewritten_content)?; + } + current_line_in_doc_idx += current_part_content.len(); + } + + Ok(doc_end_line_idx) +} + +fn process_non_doc_lines_until_impossible( + lines: &Vec<&str>, + rewritten_content: &mut String, + mut line_idx: usize, +) -> usize { + while line_idx < lines.len() { + let line = lines[line_idx]; + match get_line_type_and_trimmed_line(line) { + (LineType::Other, _) => { + rewritten_content.push_str(line); + line_idx += 1; + } + _ => break, + }; + } + line_idx +} + +fn escape_underscore_in_latex_doc_in_file( + file_path: &std::path::Path, +) -> Result<(), LatexEscapeToolError> { + let file_name = file_path.to_str().unwrap(); + let content = std::fs::read_to_string(file_name).unwrap(); + + let number_of_underscores = content.matches('_').count(); + let potential_additional_capacity_required = number_of_underscores * BACKSLASH_UTF8_LEN; + + // Enough for the length of the original string + the length if we had to escape *all* `_` + // which won't happen but avoids reallocations + let mut rewritten_content = + String::with_capacity(content.len() + potential_additional_capacity_required); + + let content_by_lines: Vec<&str> = content.split_inclusive('\n').collect(); + let mut line_idx = 0_usize; + + while line_idx < content_by_lines.len() { + let line = content_by_lines[line_idx]; + let (line_type, _) = get_line_type_and_trimmed_line(line); + line_idx = match line_type { + LineType::DocComment { + code_block_limit: _, + } => process_doc_lines_until_impossible( + &content_by_lines, + &mut rewritten_content, + find_contiguous_doc_comment, + line_idx, + )?, + LineType::DocTest { + code_block_limit: _, + } => process_doc_lines_until_impossible( + &content_by_lines, + &mut rewritten_content, + find_contiguous_doc_test, + line_idx, + )?, + LineType::Other => process_non_doc_lines_until_impossible( + &content_by_lines, + &mut rewritten_content, + line_idx, + ), + LineType::EmptyLine => { + rewritten_content.push_str(line); + line_idx + 1 + } + }; + } + + fs::write(file_name, rewritten_content).unwrap(); + Ok(()) +} + +pub fn escape_underscore_in_latex_doc() -> Result<(), Error> { + let project_root = project_root(); + let mut src_files: Vec = Vec::new(); + recurse_find_rs_files(project_root, &mut src_files, true); + + println!("Found {} files to process.", src_files.len()); + + let mut files_with_problems: Vec<(std::path::PathBuf, LatexEscapeToolError)> = Vec::new(); + + println!("Processing..."); + for file in src_files.into_iter() { + if let Err(err) = escape_underscore_in_latex_doc_in_file(&file) { + files_with_problems.push((file, err)); + } + } + println!("Done!"); + + if !files_with_problems.is_empty() { + for (file_with_problem, error) in files_with_problems.iter() { + println!( + "File: {}, has error: {}", + file_with_problem.display(), + error + ); + } + return Err(Error::new( + ErrorKind::InvalidInput, + "Issues while processing files, check log.", + )); + } + + Ok(()) +} diff --git a/tasks/src/main.rs b/tasks/src/main.rs new file mode 100644 index 000000000..7e1058224 --- /dev/null +++ b/tasks/src/main.rs @@ -0,0 +1,88 @@ +#[macro_use] +extern crate lazy_static; +use clap::{Arg, Command}; +use log::LevelFilter; +use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering::Relaxed; + +mod format_latex_doc; +mod utils; + +// ------------------------------------------------------------------------------------------------- +// CONSTANTS +// ------------------------------------------------------------------------------------------------- +lazy_static! { + static ref DRY_RUN: AtomicBool = AtomicBool::new(false); + static ref ROOT_DIR: PathBuf = utils::project_root(); + static ref ENV_TARGET_NATIVE: utils::Environment = { + let mut env = HashMap::new(); + env.insert("RUSTFLAGS", "-Ctarget-cpu=native"); + env + }; +} + +// ------------------------------------------------------------------------------------------------- +// MACROS +// ------------------------------------------------------------------------------------------------- + +#[macro_export] +macro_rules! cmd { + (<$env: ident> $cmd: expr) => { + $crate::utils::execute($cmd, Some(&*$env), Some(&*$crate::ROOT_DIR)) + }; + ($cmd: expr) => { + $crate::utils::execute($cmd, None, Some(&*$crate::ROOT_DIR)) + }; +} + +// ------------------------------------------------------------------------------------------------- +// MAIN +// ------------------------------------------------------------------------------------------------- + +fn main() -> Result<(), std::io::Error> { + // We parse the input args + let matches = Command::new("tasks") + .about("Rust scripts runner") + .arg( + Arg::new("verbose") + .short('v') + .long("verbose") + .help("Prints debug messages"), + ) + .arg( + Arg::new("dry-run") + .long("dry-run") + .help("Do not execute the commands"), + ) + .subcommand(Command::new("format_latex_doc").about("Escape underscores in latex equations")) + .arg_required_else_help(true) + .get_matches(); + + // We initialize the logger with proper verbosity + let verb = if matches.contains_id("verbose") { + LevelFilter::Debug + } else { + LevelFilter::Info + }; + CombinedLogger::init(vec![TermLogger::new( + verb, + Config::default(), + TerminalMode::Mixed, + ColorChoice::Auto, + )]) + .unwrap(); + + // We set the dry-run mode if present + if matches.contains_id("dry-run") { + DRY_RUN.store(true, Relaxed); + } + + if matches.subcommand_matches("format_latex_doc").is_some() { + format_latex_doc::escape_underscore_in_latex_doc()?; + } + + Ok(()) +} diff --git a/tasks/src/utils.rs b/tasks/src/utils.rs new file mode 100644 index 000000000..f4cd05e0e --- /dev/null +++ b/tasks/src/utils.rs @@ -0,0 +1,50 @@ +use log::{debug, info}; +use std::collections::HashMap; +use std::io::{Error, ErrorKind}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::sync::atomic::Ordering::Relaxed; + +pub type Environment = HashMap<&'static str, &'static str>; + +#[allow(dead_code)] +pub fn execute(cmd: &str, env: Option<&Environment>, cwd: Option<&PathBuf>) -> Result<(), Error> { + info!("Executing {}", cmd); + debug!("Env {:?}", env); + debug!("Cwd {:?}", cwd); + if crate::DRY_RUN.load(Relaxed) { + info!("Skipping execution because of --dry-run mode"); + return Ok(()); + } + let mut command = Command::new("sh"); + command + .arg("-c") + .arg(cmd) + .stderr(Stdio::inherit()) + .stdout(Stdio::inherit()); + if let Some(env) = env { + for (key, val) in env.iter() { + command.env(key, val); + } + } + if let Some(cwd) = cwd { + command.current_dir(cwd); + } + let output = command.output()?; + if !output.status.success() { + Err(Error::new( + ErrorKind::Other, + "Command exited with nonzero status.", + )) + } else { + Ok(()) + } +} + +pub fn project_root() -> PathBuf { + Path::new(&env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(1) + .unwrap() + .to_path_buf() +}