ASM parser.

This commit is contained in:
chriseth
2023-02-16 16:54:50 +01:00
parent f9a511f8b3
commit c7bdda36c1
7 changed files with 284 additions and 113 deletions

View File

@@ -609,6 +609,7 @@ impl Context {
ast::Expression::FunctionCall(name, arguments) => {
Expression::FunctionCall(self.namespaced(name), self.process_expressions(arguments))
}
ast::Expression::FreeInput(_) => panic!(),
}
}
@@ -671,7 +672,8 @@ impl Context {
self.evaluate_binary_operation(left, op, right)
}
ast::Expression::UnaryOperation(op, value) => self.evaluate_unary_operation(op, value),
ast::Expression::FunctionCall(_, _) => None, // TODO we should also try to evaluate through macro calls.
ast::Expression::FunctionCall(_, _) => None,
ast::Expression::FreeInput(_) => panic!(),
}
}

View File

@@ -1,5 +1,14 @@
use std::{env, path::Path};
use std::{env, fs, path::Path};
fn main() {
powdr::compiler::compile(Path::new(&env::args().nth(1).unwrap()));
if env::args().nth(1).unwrap() == "--asm" {
let file_name = env::args().nth(2).unwrap();
let contents = fs::read_to_string(Path::new(&file_name)).unwrap();
match powdr::parser::parse_asm(Some(&file_name), &contents) {
Ok(ast) => println!("{ast:?}"),
Err(err) => err.output_to_stderr(),
}
} else {
powdr::compiler::compile(Path::new(&env::args().nth(1).unwrap()));
}
}

30
src/parser/asm_ast.rs Normal file
View File

@@ -0,0 +1,30 @@
use super::ast::{Expression, Statement};
#[derive(Debug, PartialEq, Eq)]
pub struct ASMFile(pub Vec<ASMStatement>);
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ASMStatement {
RegisterDeclaration(usize, String, Option<RegisterFlag>),
InstructionDeclaration(usize, String, Vec<InstructionParam>, Vec<Expression>),
InlinePil(usize, Vec<Statement>),
Assignment(usize, Vec<String>, Option<String>, Box<Expression>),
Instruction(usize, String, Vec<Expression>),
Label(usize, String),
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum RegisterFlag {
IsPC,
IsDefaultAssignment,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct InstructionParam {
pub name: String,
pub param_type: Option<String>,
/// Which register this parameter is passed in (first) and out (second).
/// It is a double option, because the arrow can be optional and the
/// assign register inside the arrow is optional as well.
pub assignment_reg: (Option<Option<String>>, Option<Option<String>>),
}

View File

@@ -46,6 +46,7 @@ pub enum Expression {
BinaryOperation(Box<Expression>, BinaryOperator, Box<Expression>),
UnaryOperation(UnaryOperator, Box<Expression>),
FunctionCall(String, Vec<Expression>),
FreeInput(Box<Expression>),
}
#[derive(Debug, PartialEq, Eq, Default, Clone)]

View File

@@ -4,12 +4,13 @@ use codespan_reporting::term;
use codespan_reporting::term::termcolor::{ColorChoice, StandardStream};
use lalrpop_util::*;
pub mod asm_ast;
pub mod ast;
lalrpop_mod!(
#[allow(clippy::all)]
pil,
"/parser/pil.rs"
powdr,
"/parser/powdr.rs"
);
#[derive(Debug)]
@@ -35,7 +36,38 @@ impl<'a> ParseError<'a> {
}
pub fn parse<'a>(file_name: Option<&str>, input: &'a str) -> Result<ast::PILFile, ParseError<'a>> {
pil::PILFileParser::new().parse(input).map_err(|err| {
powdr::PILFileParser::new().parse(input).map_err(|err| {
let (&start, &end) = match &err {
lalrpop_util::ParseError::InvalidToken { location } => (location, location),
lalrpop_util::ParseError::UnrecognizedEOF {
location,
expected: _,
} => (location, location),
lalrpop_util::ParseError::UnrecognizedToken {
token: (start, _, end),
expected: _,
} => (start, end),
lalrpop_util::ParseError::ExtraToken {
token: (start, _, end),
} => (start, end),
lalrpop_util::ParseError::User { error: _ } => (&0, &0),
};
ParseError {
start,
end,
file_name: file_name.unwrap_or("input").to_string(),
contents: input,
message: format!("{err}"),
}
})
}
pub fn parse_asm<'a>(
file_name: Option<&str>,
input: &'a str,
) -> Result<asm_ast::ASMFile, ParseError<'a>> {
powdr::ASMFileParser::new().parse(input).map_err(|err| {
// TODO code duplication
let (&start, &end) = match &err {
lalrpop_util::ParseError::InvalidToken { location } => (location, location),
lalrpop_util::ParseError::UnrecognizedEOF {
@@ -65,17 +97,17 @@ pub fn parse<'a>(file_name: Option<&str>, input: &'a str) -> Result<ast::PILFile
mod test {
use std::fs;
use super::*;
use super::{asm_ast::ASMFile, *};
use ast::*;
#[test]
fn empty() {
assert!(pil::PILFileParser::new().parse("").is_ok());
assert!(powdr::PILFileParser::new().parse("").is_ok());
}
#[test]
fn simple_include() {
let parsed = pil::PILFileParser::new().parse("include \"x\";").unwrap();
let parsed = powdr::PILFileParser::new().parse("include \"x\";").unwrap();
assert_eq!(
parsed,
PILFile(vec![Statement::Include(0, "x".to_string())])
@@ -84,7 +116,7 @@ mod test {
#[test]
fn start_offsets() {
let parsed = pil::PILFileParser::new()
let parsed = powdr::PILFileParser::new()
.parse("include \"x\"; pol commit t;")
.unwrap();
assert_eq!(
@@ -104,7 +136,7 @@ mod test {
#[test]
fn simple_plookup() {
let parsed = pil::PILFileParser::new().parse("f in g;").unwrap();
let parsed = powdr::PILFileParser::new().parse("f in g;").unwrap();
assert_eq!(
parsed,
PILFile(vec![Statement::PlookupIdentity(
@@ -136,6 +168,15 @@ mod test {
})
}
fn parse_asm_file(name: &str) -> ASMFile {
let input = fs::read_to_string(name).unwrap();
parse_asm(Some(name), &input).unwrap_or_else(|err| {
eprintln!("Parse error during test:");
err.output_to_stderr();
panic!();
})
}
#[test]
fn parse_example_files() {
parse_file("tests/polygon-hermez/arith.pil");
@@ -158,7 +199,7 @@ mod test {
#[test]
fn simple_macro() {
let parsed = pil::PILFileParser::new()
let parsed = powdr::PILFileParser::new()
.parse("macro f(x) { x in g; x + 1 };")
.unwrap();
assert_eq!(
@@ -197,4 +238,9 @@ mod test {
)])
);
}
#[test]
fn parse_example_asm_files() {
parse_asm_file("tests/simple_sum.asm");
}
}

View File

@@ -1,5 +1,6 @@
use std::str::FromStr;
use crate::parser::ast::*;
use crate::parser::asm_ast::*;
grammar;
@@ -15,6 +16,12 @@ pub PILFile: PILFile = {
};
pub ASMFile: ASMFile = {
(<ASMStatement>)* => ASMFile(<>)
};
// ---------------------------- PIL part -----------------------------
Statement = {
Include,
Namespace,
@@ -119,8 +126,80 @@ ConstantFixed = {
"constant", "fixed"
}
// ---------------------------- ASM part -----------------------------
ASMStatement: ASMStatement = {
RegisterDeclaration,
InstructionDeclaration,
InlinePil,
Assignment,
Instruction,
Label,
}
RegisterDeclaration: ASMStatement = {
// TODO default update
<@L> "reg" <Identifier> <( "[" <RegisterFlag> "]" )?> ";" => ASMStatement::RegisterDeclaration(<>)
}
RegisterFlag: RegisterFlag = {
"@pc" => RegisterFlag::IsPC,
"<=" => RegisterFlag::IsDefaultAssignment
}
InstructionDeclaration: ASMStatement = {
<@L> "instr" <Identifier> <InstructionParamList> "{" <EqualityExpressionList> "}" => ASMStatement::InstructionDeclaration(<>)
}
EqualityExpressionList: Vec<Expression> = {
<mut list:( <EqualityExpression> "," )*> <end:EqualityExpression> => { list.push(end); list }
}
EqualityExpression: Expression = {
<l:BoxedExpression> "=" <r:BoxedExpression> => Expression::BinaryOperation(l, BinaryOperator::Sub, r)
}
InstructionParamList: Vec<InstructionParam> = {
=> vec![],
<mut list:( <InstructionParam> "," )*> <end:InstructionParam> => { list.push(end); list }
}
InstructionParam: InstructionParam = {
<assign_read:AssignOperator?> <name: Identifier> <param_type:(":" <Identifier>)?> <assign_write:AssignOperator?> =>
InstructionParam{name, param_type, assignment_reg: (assign_read, assign_write)}
}
InlinePil: ASMStatement = {
<@L> "pil{" <(<Statement> ";")*> "}" => ASMStatement::InlinePil(<>)
}
Assignment: ASMStatement = {
<@L> <IdentifierList> <AssignOperator> <BoxedExpression> ";" => ASMStatement::Assignment(<>)
}
IdentifierList: Vec<String> = {
<mut list:( <Identifier> "," )*> <end:Identifier> => { list.push(end); list }
}
AssignOperator: Option<String> = {
"<=" <Identifier?> "="
}
Instruction: ASMStatement = {
<@L> <Identifier> <ExpressionList> ";" => ASMStatement::Instruction(<>)
}
Label: ASMStatement = {
<@L> <Identifier> "::" => ASMStatement::Label(<>)
}
// ---------------------------- Expressions -----------------------------
ExpressionList: Vec<Expression> = {
=> vec![],
<mut list:( <Expression> "," )*> <end:Expression> => { list.push(end); list }
}
@@ -207,6 +286,7 @@ Term: Box<Expression> = {
PublicReference => Box::new(Expression::PublicReference(<>)),
Number => Box::new(Expression::Number(<>)),
"(" <BoxedExpression> ")",
"${" <BoxedExpression> "}" => Box::new(Expression::FreeInput(<>))
}
FunctionCall: Expression = {
@@ -224,6 +304,9 @@ PublicReference: String = {
":" <Identifier>
}
// ---------------------------- Terminals -----------------------------
StringLiteral: String = {
r#""[^"]*""# => <>[1..<>.len() - 1].to_string()
}

View File

@@ -6,10 +6,10 @@
// Code in `${`...`}` is rust-like code that is run by the prover
// to generate free inputs.
reg A
reg CNT
reg pc(@line): pc' = pc + 1
reg X(<=)
reg A;
reg CNT;
reg pc[@pc]; // "@pc" means "pc' = pc + 1" is the default propagation (instead of pc' = pc) and it tracks the line in the program.
reg X[<=]; // "<=" means it is the default assignment register.
// Code in `pil{`..`}` is pil code that is inserted into the pil file.
pil{
@@ -23,107 +23,107 @@ instr jmp l: label { pc' = l }
instr dec_CNT { CNT' = CNT - 1 }
instr assert_zero <=X= a { XIsZero = 1 }
CNT <=X= ${ input[1] }
CNT <=X= ${ input[1] };
start:
jmpz CNT end
A <=X= A + ${ input[CNT + 1] }
start::
jmpz CNT, end;
A <=X= A + ${ input[CNT + 1] };
// Could use "CNT <=X= CNT - 1", but that would need X.
dec_CNT
jmp start
dec_CNT;
jmp start;
end:
A <=X= A - { input[0] }
assert_zero A
end::
A <=X= A - ${ input[0] };
assert_zero A;
/// -------------------------- compiled into the following pil file -------------------------------
// ===== Register definitions
col witness A;
col witness CNT;
col witness pc;
col witness X;
// ===== Inline PIL
col witness XInv;
col XIsZero = 1 - X * XInv;
XIsZero * X = 0;
// ===== Encoding of the instructions
// New powdr feature, creates a bit field type.
// This is a bit field and not an enum so that multiple instructions
// can be combined in a single line.
BitField Instr {
jmpz
jmp
dec_CNT
assert_zero
}
col witness instr: Instr;
// The above automatically generates the following commit polys:
// col witness instr_jmp_set: bool;
// col witness instr_jmpz_set: bool;
// col witness instr_jmp_set: bool;
// col witness instr_dec_CNT_set: bool;
// col witness instr_assert_zero_set: bool;
// and the following constraint:
// instr = 1 *
// 1 * instr_jmp_set +
// 2 * instr_jmpz_set +
// 4 * instr_jmp_set +
// 8 * instr_dec_CNT_set +
// 16 * instr_assert_zero_set;
// The expression `instr == Instr::jmpz` is replaced by the flag.
col witness instr_jmp_arg0;
// ===== Register propagation
// There is only a single write poly per register (not one per register/assignment register combination)
// because we do not want to write from multiple assignment registers into the same register.
col witness write_A: bool;
col witness read_X_A;
col witness write_CNT: bool;
col witness const_X;
A' = write_A * X;
// The compiler ensures that write_CNT and dec_cnt cannot both be set at the same time.
CNT' = write_CNT * X + instr_dec_cnt_set * (CNT - 1) + (1 - write_CNT - instr_dec_cnt_set) * CNT;
X = read_X_A * A + const_X;
// The match expression is replaced by the usual if-then-else construction.
// Note that multiple arms can match. The result is the sum.
// Maybe then match is not the right construct?
pc' = match instr {
Instr::jmpz => (XIsZero * l + (1 - XIsZero) * (pc + 1)),
Instr::jmp => instr_jmp_arg0,
_ => pc + 1
};
// ===== Constraints from instructions
instr_assert_zero_set * (XIsZero - 1) = 0;
// ===== Fixed columns representing the program
// Their contents are generated from the program, but not explicitly expressed in the PIL language.
col fixed line;
col fixed p_instr;
// The compiler can actually combine multiple boolean columns into one.
col fixed p_write_A;
col fixed p_read_X_A;
col fixed p_write_CNT;
col fixed p_const_X;
col fixed p_instr_jmp_arg0;
// ===== Lookups connecting the execution to the program
{ pc, instr, write_A, read_X_A, write_CNT, const_x, instr_jmp_arg0 }
in
{ line, p_instr, p_write_A, p_read_X_A, p_write_CNT, p_const_x, p_instr_jmp_arg0};
// TODO What is missing here is the termination of the program and making all polynomials cyclic.
// // ===== Register definitions
// col witness A;
// col witness CNT;
// col witness pc;
// col witness X;
//
// // ===== Inline PIL
// col witness XInv;
// col XIsZero = 1 - X * XInv;
// XIsZero * X = 0;
//
// // ===== Encoding of the instructions
//
// // New powdr feature, creates a bit field type.
// // This is a bit field and not an enum so that multiple instructions
// // can be combined in a single line.
// BitField Instr {
// jmpz
// jmp
// dec_CNT
// assert_zero
// }
//
// col witness instr: Instr;
// // The above automatically generates the following commit polys:
// // col witness instr_jmp_set: bool;
// // col witness instr_jmpz_set: bool;
// // col witness instr_jmp_set: bool;
// // col witness instr_dec_CNT_set: bool;
// // col witness instr_assert_zero_set: bool;
// // and the following constraint:
// // instr = 1 *
// // 1 * instr_jmp_set +
// // 2 * instr_jmpz_set +
// // 4 * instr_jmp_set +
// // 8 * instr_dec_CNT_set +
// // 16 * instr_assert_zero_set;
// // The expression `instr == Instr::jmpz` is replaced by the flag.
//
// col witness instr_jmp_arg0;
//
// // ===== Register propagation
//
// // There is only a single write poly per register (not one per register/assignment register combination)
// // because we do not want to write from multiple assignment registers into the same register.
// col witness write_A: bool;
// col witness read_X_A;
// col witness write_CNT: bool;
// col witness const_X;
//
// A' = write_A * X + (1 - write_A) * A;
// // The compiler ensures that write_CNT and dec_cnt cannot both be set at the same time.
// CNT' = write_CNT * X + instr_dec_cnt_set * (CNT - 1) + (1 - write_CNT - instr_dec_cnt_set) * CNT;
// X = read_X_A * A + const_X;
//
// // The match expression is replaced by the usual if-then-else construction.
// // Note that multiple arms can match. The result is the sum.
// // Maybe then match is not the right construct?
// pc' = match instr {
// Instr::jmpz => (XIsZero * l + (1 - XIsZero) * (pc + 1)),
// Instr::jmp => instr_jmp_arg0,
// _ => pc + 1
// };
//
//
// // ===== Constraints from instructions
//
// instr_assert_zero_set * (XIsZero - 1) = 0;
//
// // ===== Fixed columns representing the program
// // Their contents are generated from the program, but not explicitly expressed in the PIL language.
//
// col fixed line;
// col fixed p_instr;
// // The compiler can actually combine multiple boolean columns into one.
// col fixed p_write_A;
// col fixed p_read_X_A;
// col fixed p_write_CNT;
// col fixed p_const_X;
// col fixed p_instr_jmp_arg0;
//
// // ===== Lookups connecting the execution to the program
//
// { pc, instr, write_A, read_X_A, write_CNT, const_x, instr_jmp_arg0 }
// in
// { line, p_instr, p_write_A, p_read_X_A, p_write_CNT, p_const_x, p_instr_jmp_arg0};
//
// // TODO What is missing here is the termination of the program and making all polynomials cyclic.