diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index 69ff3e09a..ebbf04b1e 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -609,6 +609,7 @@ impl Context { ast::Expression::FunctionCall(name, arguments) => { Expression::FunctionCall(self.namespaced(name), self.process_expressions(arguments)) } + ast::Expression::FreeInput(_) => panic!(), } } @@ -671,7 +672,8 @@ impl Context { self.evaluate_binary_operation(left, op, right) } ast::Expression::UnaryOperation(op, value) => self.evaluate_unary_operation(op, value), - ast::Expression::FunctionCall(_, _) => None, // TODO we should also try to evaluate through macro calls. + ast::Expression::FunctionCall(_, _) => None, + ast::Expression::FreeInput(_) => panic!(), } } diff --git a/src/bin/compiler.rs b/src/bin/compiler.rs index d0b6bf2ca..f7d6311ad 100644 --- a/src/bin/compiler.rs +++ b/src/bin/compiler.rs @@ -1,5 +1,14 @@ -use std::{env, path::Path}; +use std::{env, fs, path::Path}; fn main() { - powdr::compiler::compile(Path::new(&env::args().nth(1).unwrap())); + if env::args().nth(1).unwrap() == "--asm" { + let file_name = env::args().nth(2).unwrap(); + let contents = fs::read_to_string(Path::new(&file_name)).unwrap(); + match powdr::parser::parse_asm(Some(&file_name), &contents) { + Ok(ast) => println!("{ast:?}"), + Err(err) => err.output_to_stderr(), + } + } else { + powdr::compiler::compile(Path::new(&env::args().nth(1).unwrap())); + } } diff --git a/src/parser/asm_ast.rs b/src/parser/asm_ast.rs new file mode 100644 index 000000000..55500d805 --- /dev/null +++ b/src/parser/asm_ast.rs @@ -0,0 +1,30 @@ +use super::ast::{Expression, Statement}; + +#[derive(Debug, PartialEq, Eq)] +pub struct ASMFile(pub Vec); + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ASMStatement { + RegisterDeclaration(usize, String, Option), + InstructionDeclaration(usize, String, Vec, Vec), + InlinePil(usize, Vec), + Assignment(usize, Vec, Option, Box), + Instruction(usize, String, Vec), + Label(usize, String), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum RegisterFlag { + IsPC, + IsDefaultAssignment, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct InstructionParam { + pub name: String, + pub param_type: Option, + /// Which register this parameter is passed in (first) and out (second). + /// It is a double option, because the arrow can be optional and the + /// assign register inside the arrow is optional as well. + pub assignment_reg: (Option>, Option>), +} diff --git a/src/parser/ast.rs b/src/parser/ast.rs index d6c4178cd..da4b16fce 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -46,6 +46,7 @@ pub enum Expression { BinaryOperation(Box, BinaryOperator, Box), UnaryOperation(UnaryOperator, Box), FunctionCall(String, Vec), + FreeInput(Box), } #[derive(Debug, PartialEq, Eq, Default, Clone)] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 43d94331b..ea6bdd6c4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4,12 +4,13 @@ use codespan_reporting::term; use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; use lalrpop_util::*; +pub mod asm_ast; pub mod ast; lalrpop_mod!( #[allow(clippy::all)] - pil, - "/parser/pil.rs" + powdr, + "/parser/powdr.rs" ); #[derive(Debug)] @@ -35,7 +36,38 @@ impl<'a> ParseError<'a> { } pub fn parse<'a>(file_name: Option<&str>, input: &'a str) -> Result> { - pil::PILFileParser::new().parse(input).map_err(|err| { + powdr::PILFileParser::new().parse(input).map_err(|err| { + let (&start, &end) = match &err { + lalrpop_util::ParseError::InvalidToken { location } => (location, location), + lalrpop_util::ParseError::UnrecognizedEOF { + location, + expected: _, + } => (location, location), + lalrpop_util::ParseError::UnrecognizedToken { + token: (start, _, end), + expected: _, + } => (start, end), + lalrpop_util::ParseError::ExtraToken { + token: (start, _, end), + } => (start, end), + lalrpop_util::ParseError::User { error: _ } => (&0, &0), + }; + ParseError { + start, + end, + file_name: file_name.unwrap_or("input").to_string(), + contents: input, + message: format!("{err}"), + } + }) +} + +pub fn parse_asm<'a>( + file_name: Option<&str>, + input: &'a str, +) -> Result> { + powdr::ASMFileParser::new().parse(input).map_err(|err| { + // TODO code duplication let (&start, &end) = match &err { lalrpop_util::ParseError::InvalidToken { location } => (location, location), lalrpop_util::ParseError::UnrecognizedEOF { @@ -65,17 +97,17 @@ pub fn parse<'a>(file_name: Option<&str>, input: &'a str) -> Result ASMFile { + let input = fs::read_to_string(name).unwrap(); + parse_asm(Some(name), &input).unwrap_or_else(|err| { + eprintln!("Parse error during test:"); + err.output_to_stderr(); + panic!(); + }) + } + #[test] fn parse_example_files() { parse_file("tests/polygon-hermez/arith.pil"); @@ -158,7 +199,7 @@ mod test { #[test] fn simple_macro() { - let parsed = pil::PILFileParser::new() + let parsed = powdr::PILFileParser::new() .parse("macro f(x) { x in g; x + 1 };") .unwrap(); assert_eq!( @@ -197,4 +238,9 @@ mod test { )]) ); } + + #[test] + fn parse_example_asm_files() { + parse_asm_file("tests/simple_sum.asm"); + } } diff --git a/src/parser/pil.lalrpop b/src/parser/powdr.lalrpop similarity index 72% rename from src/parser/pil.lalrpop rename to src/parser/powdr.lalrpop index 6727d405e..0e11f8bde 100644 --- a/src/parser/pil.lalrpop +++ b/src/parser/powdr.lalrpop @@ -1,5 +1,6 @@ use std::str::FromStr; use crate::parser::ast::*; +use crate::parser::asm_ast::*; grammar; @@ -15,6 +16,12 @@ pub PILFile: PILFile = { }; +pub ASMFile: ASMFile = { + ()* => ASMFile(<>) +}; + +// ---------------------------- PIL part ----------------------------- + Statement = { Include, Namespace, @@ -119,8 +126,80 @@ ConstantFixed = { "constant", "fixed" } +// ---------------------------- ASM part ----------------------------- + + +ASMStatement: ASMStatement = { + RegisterDeclaration, + InstructionDeclaration, + InlinePil, + Assignment, + Instruction, + Label, +} + +RegisterDeclaration: ASMStatement = { + // TODO default update + <@L> "reg" <( "[" "]" )?> ";" => ASMStatement::RegisterDeclaration(<>) + +} + +RegisterFlag: RegisterFlag = { + "@pc" => RegisterFlag::IsPC, + "<=" => RegisterFlag::IsDefaultAssignment +} + +InstructionDeclaration: ASMStatement = { + <@L> "instr" "{" "}" => ASMStatement::InstructionDeclaration(<>) +} + +EqualityExpressionList: Vec = { + "," )*> => { list.push(end); list } +} + +EqualityExpression: Expression = { + "=" => Expression::BinaryOperation(l, BinaryOperator::Sub, r) +} + +InstructionParamList: Vec = { + => vec![], + "," )*> => { list.push(end); list } +} + +InstructionParam: InstructionParam = { + )?> => + InstructionParam{name, param_type, assignment_reg: (assign_read, assign_write)} +} + +InlinePil: ASMStatement = { + <@L> "pil{" <( ";")*> "}" => ASMStatement::InlinePil(<>) +} + +Assignment: ASMStatement = { + <@L> ";" => ASMStatement::Assignment(<>) + +} + +IdentifierList: Vec = { + "," )*> => { list.push(end); list } +} + +AssignOperator: Option = { + "<=" "=" +} + +Instruction: ASMStatement = { + <@L> ";" => ASMStatement::Instruction(<>) +} + +Label: ASMStatement = { + <@L> "::" => ASMStatement::Label(<>) +} + +// ---------------------------- Expressions ----------------------------- ExpressionList: Vec = { + => vec![], "," )*> => { list.push(end); list } } @@ -207,6 +286,7 @@ Term: Box = { PublicReference => Box::new(Expression::PublicReference(<>)), Number => Box::new(Expression::Number(<>)), "(" ")", + "${" "}" => Box::new(Expression::FreeInput(<>)) } FunctionCall: Expression = { @@ -224,6 +304,9 @@ PublicReference: String = { ":" } +// ---------------------------- Terminals ----------------------------- + + StringLiteral: String = { r#""[^"]*""# => <>[1..<>.len() - 1].to_string() } diff --git a/tests/simple_sum.asm b/tests/simple_sum.asm index 3ebeec9e9..5fe4a2244 100644 --- a/tests/simple_sum.asm +++ b/tests/simple_sum.asm @@ -6,10 +6,10 @@ // Code in `${`...`}` is rust-like code that is run by the prover // to generate free inputs. -reg A -reg CNT -reg pc(@line): pc' = pc + 1 -reg X(<=) +reg A; +reg CNT; +reg pc[@pc]; // "@pc" means "pc' = pc + 1" is the default propagation (instead of pc' = pc) and it tracks the line in the program. +reg X[<=]; // "<=" means it is the default assignment register. // Code in `pil{`..`}` is pil code that is inserted into the pil file. pil{ @@ -23,107 +23,107 @@ instr jmp l: label { pc' = l } instr dec_CNT { CNT' = CNT - 1 } instr assert_zero <=X= a { XIsZero = 1 } -CNT <=X= ${ input[1] } +CNT <=X= ${ input[1] }; -start: - jmpz CNT end - A <=X= A + ${ input[CNT + 1] } +start:: + jmpz CNT, end; + A <=X= A + ${ input[CNT + 1] }; // Could use "CNT <=X= CNT - 1", but that would need X. - dec_CNT - jmp start + dec_CNT; + jmp start; -end: - A <=X= A - { input[0] } - assert_zero A +end:: + A <=X= A - ${ input[0] }; + assert_zero A; /// -------------------------- compiled into the following pil file ------------------------------- -// ===== Register definitions -col witness A; -col witness CNT; -col witness pc; -col witness X; - -// ===== Inline PIL -col witness XInv; -col XIsZero = 1 - X * XInv; -XIsZero * X = 0; - -// ===== Encoding of the instructions - -// New powdr feature, creates a bit field type. -// This is a bit field and not an enum so that multiple instructions -// can be combined in a single line. -BitField Instr { - jmpz - jmp - dec_CNT - assert_zero -} - -col witness instr: Instr; -// The above automatically generates the following commit polys: -// col witness instr_jmp_set: bool; -// col witness instr_jmpz_set: bool; -// col witness instr_jmp_set: bool; -// col witness instr_dec_CNT_set: bool; -// col witness instr_assert_zero_set: bool; -// and the following constraint: -// instr = 1 * -// 1 * instr_jmp_set + -// 2 * instr_jmpz_set + -// 4 * instr_jmp_set + -// 8 * instr_dec_CNT_set + -// 16 * instr_assert_zero_set; -// The expression `instr == Instr::jmpz` is replaced by the flag. - -col witness instr_jmp_arg0; - -// ===== Register propagation - -// There is only a single write poly per register (not one per register/assignment register combination) -// because we do not want to write from multiple assignment registers into the same register. -col witness write_A: bool; -col witness read_X_A; -col witness write_CNT: bool; -col witness const_X; - -A' = write_A * X; -// The compiler ensures that write_CNT and dec_cnt cannot both be set at the same time. -CNT' = write_CNT * X + instr_dec_cnt_set * (CNT - 1) + (1 - write_CNT - instr_dec_cnt_set) * CNT; -X = read_X_A * A + const_X; - -// The match expression is replaced by the usual if-then-else construction. -// Note that multiple arms can match. The result is the sum. -// Maybe then match is not the right construct? -pc' = match instr { - Instr::jmpz => (XIsZero * l + (1 - XIsZero) * (pc + 1)), - Instr::jmp => instr_jmp_arg0, - _ => pc + 1 -}; - - -// ===== Constraints from instructions - -instr_assert_zero_set * (XIsZero - 1) = 0; - -// ===== Fixed columns representing the program -// Their contents are generated from the program, but not explicitly expressed in the PIL language. - -col fixed line; -col fixed p_instr; -// The compiler can actually combine multiple boolean columns into one. -col fixed p_write_A; -col fixed p_read_X_A; -col fixed p_write_CNT; -col fixed p_const_X; -col fixed p_instr_jmp_arg0; - -// ===== Lookups connecting the execution to the program - -{ pc, instr, write_A, read_X_A, write_CNT, const_x, instr_jmp_arg0 } - in -{ line, p_instr, p_write_A, p_read_X_A, p_write_CNT, p_const_x, p_instr_jmp_arg0}; - -// TODO What is missing here is the termination of the program and making all polynomials cyclic. \ No newline at end of file +// // ===== Register definitions +// col witness A; +// col witness CNT; +// col witness pc; +// col witness X; +// +// // ===== Inline PIL +// col witness XInv; +// col XIsZero = 1 - X * XInv; +// XIsZero * X = 0; +// +// // ===== Encoding of the instructions +// +// // New powdr feature, creates a bit field type. +// // This is a bit field and not an enum so that multiple instructions +// // can be combined in a single line. +// BitField Instr { +// jmpz +// jmp +// dec_CNT +// assert_zero +// } +// +// col witness instr: Instr; +// // The above automatically generates the following commit polys: +// // col witness instr_jmp_set: bool; +// // col witness instr_jmpz_set: bool; +// // col witness instr_jmp_set: bool; +// // col witness instr_dec_CNT_set: bool; +// // col witness instr_assert_zero_set: bool; +// // and the following constraint: +// // instr = 1 * +// // 1 * instr_jmp_set + +// // 2 * instr_jmpz_set + +// // 4 * instr_jmp_set + +// // 8 * instr_dec_CNT_set + +// // 16 * instr_assert_zero_set; +// // The expression `instr == Instr::jmpz` is replaced by the flag. +// +// col witness instr_jmp_arg0; +// +// // ===== Register propagation +// +// // There is only a single write poly per register (not one per register/assignment register combination) +// // because we do not want to write from multiple assignment registers into the same register. +// col witness write_A: bool; +// col witness read_X_A; +// col witness write_CNT: bool; +// col witness const_X; +// +// A' = write_A * X + (1 - write_A) * A; +// // The compiler ensures that write_CNT and dec_cnt cannot both be set at the same time. +// CNT' = write_CNT * X + instr_dec_cnt_set * (CNT - 1) + (1 - write_CNT - instr_dec_cnt_set) * CNT; +// X = read_X_A * A + const_X; +// +// // The match expression is replaced by the usual if-then-else construction. +// // Note that multiple arms can match. The result is the sum. +// // Maybe then match is not the right construct? +// pc' = match instr { +// Instr::jmpz => (XIsZero * l + (1 - XIsZero) * (pc + 1)), +// Instr::jmp => instr_jmp_arg0, +// _ => pc + 1 +// }; +// +// +// // ===== Constraints from instructions +// +// instr_assert_zero_set * (XIsZero - 1) = 0; +// +// // ===== Fixed columns representing the program +// // Their contents are generated from the program, but not explicitly expressed in the PIL language. +// +// col fixed line; +// col fixed p_instr; +// // The compiler can actually combine multiple boolean columns into one. +// col fixed p_write_A; +// col fixed p_read_X_A; +// col fixed p_write_CNT; +// col fixed p_const_X; +// col fixed p_instr_jmp_arg0; +// +// // ===== Lookups connecting the execution to the program +// +// { pc, instr, write_A, read_X_A, write_CNT, const_x, instr_jmp_arg0 } +// in +// { line, p_instr, p_write_A, p_read_X_A, p_write_CNT, p_const_x, p_instr_jmp_arg0}; +// +// // TODO What is missing here is the termination of the program and making all polynomials cyclic. \ No newline at end of file