From 6f0f551bac9cf920d7bbc8c91f70e79776eb4e1b Mon Sep 17 00:00:00 2001 From: Pelle Johnsen Date: Tue, 30 Jun 2020 16:42:50 +0200 Subject: [PATCH] New GLSL frontend using pomelo (#87) * Add initial pomelo glsl work * Fix ref to glsl_old * Fix idents with numbers in lexer * Use glsl_new for .vert in convert example Controlled by PREFER_GLSL_NEW=1 env var Also add simple test files * Start making parser look like spec * Port 'old' lex.rs to glsl_new * Apply fixes after rebasing * Fix clippy issue * Fix PR comments - Make into optional feature glsl-new - Minor code style improvements --- Cargo.toml | 2 + examples/convert.rs | 35 +++- src/front/glsl_new/error.rs | 43 +++++ src/front/glsl_new/lex.rs | 311 +++++++++++++++++++++++++++++++++++ src/front/glsl_new/mod.rs | 48 ++++++ src/front/glsl_new/parser.rs | 185 +++++++++++++++++++++ src/front/glsl_new/token.rs | 7 + src/front/mod.rs | 2 + test-data/simple.frag | 5 + test-data/simple.vert | 5 + test-data/simple.wgsl | 8 + 11 files changed, 648 insertions(+), 3 deletions(-) create mode 100644 src/front/glsl_new/error.rs create mode 100644 src/front/glsl_new/lex.rs create mode 100644 src/front/glsl_new/mod.rs create mode 100644 src/front/glsl_new/parser.rs create mode 100644 src/front/glsl_new/token.rs create mode 100644 test-data/simple.frag create mode 100644 test-data/simple.vert create mode 100644 test-data/simple.wgsl diff --git a/Cargo.toml b/Cargo.toml index 10c8f94ede..f4cdd1f9b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,9 +16,11 @@ log = "0.4" num-traits = "0.2" spirv = { package = "spirv_headers", version = "1.4.2", optional = true } glsl = { version = "4", optional = true } +pomelo = { version = "0.1.4", optional = true } [features] glsl_preprocessor = ["glsl"] +glsl-new = ["pomelo"] [dev-dependencies] env_logger = "0.6" diff --git a/examples/convert.rs b/examples/convert.rs index 7a2b7cfa0e..5658f5a313 100644 --- a/examples/convert.rs +++ b/examples/convert.rs @@ -29,6 +29,9 @@ fn main() { println!("Call with "); return; } + #[cfg(any(feature = "glsl", feature = "glsl-new"))] + let prefer_glsl_new = + !cfg!(feature = "glsl") || env::var("PREFER_GLSL_NEW").unwrap_or_default() == "1"; let module = match Path::new(&args[1]) .extension() .expect("Input has no extension?") @@ -44,11 +47,37 @@ fn main() { let input = fs::read_to_string(&args[1]).unwrap(); naga::front::wgsl::parse_str(&input).unwrap() } - #[cfg(feature = "glsl")] + #[cfg(any(feature = "glsl", feature = "glsl-new"))] "vert" => { let input = fs::read_to_string(&args[1]).unwrap(); - naga::front::glsl::parse_str(&input, "main".to_string(), naga::ShaderStage::Vertex) - .unwrap() + let mut module: Option = None; + if prefer_glsl_new { + #[cfg(feature = "glsl-new")] + { + module = Some( + naga::front::glsl_new::parse_str( + &input, + "main".to_string(), + naga::ShaderStage::Vertex, + ) + .unwrap(), + ) + } + } + if module.is_none() { + #[cfg(feature = "glsl")] + { + module = Some( + naga::front::glsl::parse_str( + &input, + "main".to_string(), + naga::ShaderStage::Vertex, + ) + .unwrap(), + ) + } + } + module.unwrap() } #[cfg(feature = "glsl")] "frag" => { diff --git a/src/front/glsl_new/error.rs b/src/front/glsl_new/error.rs new file mode 100644 index 0000000000..991e6050a2 --- /dev/null +++ b/src/front/glsl_new/error.rs @@ -0,0 +1,43 @@ +use std::{fmt, io}; + +#[derive(Debug)] +pub enum ErrorKind { + InvalidInput, + IoError(io::Error), +} + +impl fmt::Display for ErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ErrorKind::IoError(error) => write!(f, "IO Error {}", error), + ErrorKind::InvalidInput => write!(f, "InvalidInput"), + } + } +} + +#[derive(Debug)] +pub struct ParseError { + pub kind: ErrorKind, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +impl From for ParseError { + fn from(error: io::Error) -> Self { + ParseError { + kind: ErrorKind::IoError(error), + } + } +} + +impl From for ParseError { + fn from(kind: ErrorKind) -> Self { + ParseError { kind } + } +} + +impl std::error::Error for ParseError {} diff --git a/src/front/glsl_new/lex.rs b/src/front/glsl_new/lex.rs new file mode 100644 index 0000000000..8097518a63 --- /dev/null +++ b/src/front/glsl_new/lex.rs @@ -0,0 +1,311 @@ +use super::parser::Token; +use super::token::TokenMetadata; +use std::{iter::Enumerate, str::Lines}; + +fn _consume_str<'a>(input: &'a str, what: &str) -> Option<&'a str> { + if input.starts_with(what) { + Some(&input[what.len()..]) + } else { + None + } +} + +fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str, usize) { + let pos = input.find(|c| !what(c)).unwrap_or_else(|| input.len()); + let (o, i) = input.split_at(pos); + (o, i, pos) +} + +pub fn consume_token(mut input: &str) -> (Option, &str) { + let start = input + .find(|c: char| !c.is_whitespace()) + .unwrap_or_else(|| input.chars().count()); + input = &input[start..]; + + let mut chars = input.chars(); + let cur = match chars.next() { + Some(c) => c, + None => return (None, input), + }; + let mut meta = TokenMetadata { + line: 0, + chars: start..start + 1, + }; + match cur { + ':' => (Some(Token::Colon(meta)), chars.as_str()), + ';' => (Some(Token::Semicolon(meta)), chars.as_str()), + ',' => (Some(Token::Comma(meta)), chars.as_str()), + '.' => (Some(Token::Dot(meta)), chars.as_str()), + + '(' => (Some(Token::LeftParen(meta)), chars.as_str()), + ')' => (Some(Token::RightParen(meta)), chars.as_str()), + '{' => (Some(Token::LeftBrace(meta)), chars.as_str()), + '}' => (Some(Token::RightBrace(meta)), chars.as_str()), + '[' => (Some(Token::LeftBracket(meta)), chars.as_str()), + ']' => (Some(Token::RightBracket(meta)), chars.as_str()), + '<' | '>' => { + input = chars.as_str(); + let n1 = chars.next(); + let input1 = chars.as_str(); + let n2 = chars.next(); + match (cur, n1, n2) { + ('<', Some('<'), Some('=')) => { + meta.chars.end = start + 3; + (Some(Token::LeftAssign(meta)), chars.as_str()) + } + ('>', Some('>'), Some('=')) => { + meta.chars.end = start + 3; + (Some(Token::RightAssign(meta)), chars.as_str()) + } + ('<', Some('<'), _) => { + meta.chars.end = start + 2; + (Some(Token::LeftOp(meta)), input1) + } + ('>', Some('>'), _) => { + meta.chars.end = start + 2; + (Some(Token::RightOp(meta)), input1) + } + ('<', Some('='), _) => { + meta.chars.end = start + 2; + (Some(Token::LeOp(meta)), input1) + } + ('>', Some('='), _) => { + meta.chars.end = start + 2; + (Some(Token::GeOp(meta)), input1) + } + ('<', _, _) => (Some(Token::LeftAngle(meta)), input), + ('>', _, _) => (Some(Token::RightAngle(meta)), input), + _ => (None, input), + } + } + '0'..='9' => { + let (number, rest, pos) = consume_any(input, |c| (c >= '0' && c <= '9' || c == '.')); + if number.find('.').is_some() { + if ( + chars.next().map(|c| c.to_lowercase().next().unwrap()), + chars.next().map(|c| c.to_lowercase().next().unwrap()), + ) == (Some('l'), Some('f')) + { + meta.chars.end = start + pos + 2; + ( + Some(Token::DoubleConstant((meta, number.parse().unwrap()))), + chars.as_str(), + ) + } else { + meta.chars.end = start + pos; + ( + Some(Token::FloatConstant((meta, number.parse().unwrap()))), + chars.as_str(), + ) + } + } else { + meta.chars.end = start + pos; + ( + Some(Token::IntConstant((meta, number.parse().unwrap()))), + rest, + ) + } + } + 'a'..='z' | 'A'..='Z' | '_' => { + let (word, rest, pos) = consume_any(input, |c| c.is_alphanumeric() || c == '_'); + meta.chars.end = start + pos; + match word { + "void" => (Some(Token::Void(meta)), rest), + "vec4" => (Some(Token::Vec4(meta)), rest), + //TODO: remaining types + _ => (Some(Token::Identifier((meta, String::from(word)))), rest), + } + } + + '+' | '-' | '&' | '|' => { + input = chars.as_str(); + match chars.next() { + Some('=') => { + meta.chars.end = start + 2; + match cur { + '+' => (Some(Token::AddAssign(meta)), chars.as_str()), + '-' => (Some(Token::SubAssign(meta)), chars.as_str()), + '&' => (Some(Token::AndAssign(meta)), chars.as_str()), + '|' => (Some(Token::OrAssign(meta)), chars.as_str()), + '^' => (Some(Token::XorAssign(meta)), chars.as_str()), + _ => (None, input), + } + } + Some(cur) => { + meta.chars.end = start + 2; + match cur { + '+' => (Some(Token::IncOp(meta)), chars.as_str()), + '-' => (Some(Token::DecOp(meta)), chars.as_str()), + '&' => (Some(Token::AndOp(meta)), chars.as_str()), + '|' => (Some(Token::OrOp(meta)), chars.as_str()), + '^' => (Some(Token::XorOp(meta)), chars.as_str()), + _ => (None, input), + } + } + _ => match cur { + '+' => (Some(Token::Plus(meta)), input), + '-' => (Some(Token::Dash(meta)), input), + '&' => (Some(Token::Ampersand(meta)), input), + '|' => (Some(Token::VerticalBar(meta)), input), + '^' => (Some(Token::Caret(meta)), input), + _ => (None, input), + }, + } + } + + '%' | '!' | '=' => { + input = chars.as_str(); + match chars.next() { + Some('=') => { + meta.chars.end = start + 2; + match cur { + '%' => (Some(Token::ModAssign(meta)), chars.as_str()), + '!' => (Some(Token::NeOp(meta)), chars.as_str()), + '=' => (Some(Token::EqOp(meta)), chars.as_str()), + _ => (None, input), + } + } + _ => match cur { + '%' => (Some(Token::Percent(meta)), input), + '!' => (Some(Token::Bang(meta)), input), + '=' => (Some(Token::Equal(meta)), input), + _ => (None, input), + }, + } + } + + '*' => { + input = chars.as_str(); + match chars.next() { + Some('=') => { + meta.chars.end = start + 2; + (Some(Token::MulAssign(meta)), chars.as_str()) + } + //TODO: multi-line comments + // Some('/') => ( + // Token::MultiLineCommentClose, + // chars.as_str(), + // start, + // start + 2, + // ), + _ => (Some(Token::MulAssign(meta)), input), + } + } + '/' => { + input = chars.as_str(); + match chars.next() { + Some('=') => { + meta.chars.end = start + 2; + (Some(Token::DivAssign(meta)), chars.as_str()) + } + //TODO: line comments + // Some('/') => (Token::LineComment, chars.as_str(), start, start + 2), + //TODO: multi-line comments + // Some('*') => ( + // Token::MultiLineCommentOpen, + // chars.as_str(), + // start, + // start + 2, + // ), + _ => (Some(Token::Slash(meta)), input), + } + } + '#' => { + input = chars.as_str(); + let (word, rest, pos) = consume_any(input, |c| c.is_alphanumeric() || c == '_'); + meta.chars.end = start + pos; + match word { + "version" => (Some(Token::Version(meta)), rest), + _ => (None, input), + } + + //TODO: preprocessor + // if chars.next() == Some(cur) { + // (Token::TokenPasting, chars.as_str(), start, start + 2) + // } else { + // (Token::Preprocessor, input, start, start + 1) + // } + } + '~' => (Some(Token::Tilde(meta)), chars.as_str()), + '?' => (Some(Token::Question(meta)), chars.as_str()), + _ => (None, chars.as_str()), + } +} + +#[derive(Clone, Debug)] +pub struct Lexer<'a> { + lines: Enumerate>, + input: String, + line: usize, + offset: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + let mut lines = input.lines().enumerate(); + let (line, input) = lines.next().unwrap_or((0, "")); + let mut input = String::from(input); + + while input.ends_with('\\') { + if let Some((_, next)) = lines.next() { + input.pop(); + input.push_str(next); + } else { + break; + } + } + + Lexer { + lines, + input, + line, + offset: 0, + } + } + + #[must_use] + pub fn next(&mut self) -> Option { + let (token, rest) = consume_token(&self.input); + + if let Some(mut token) = token { + self.input = String::from(rest); + let meta = token.extra_mut(); + let end = meta.chars.end; + meta.line = self.line; + meta.chars.start += self.offset; + meta.chars.end += self.offset; + self.offset += end; + Some(token) + } else { + let (line, input) = self.lines.next()?; + + let mut input = String::from(input); + + while input.ends_with('\\') { + if let Some((_, next)) = self.lines.next() { + input.pop(); + input.push_str(next); + } else { + break; + } + } + + self.input = input; + self.line = line; + self.offset = 0; + self.next() + } + } + + // #[must_use] + // pub fn peek(&mut self) -> Option { + // self.clone().next() + // } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token; + fn next(&mut self) -> Option { + self.next() + } +} diff --git a/src/front/glsl_new/mod.rs b/src/front/glsl_new/mod.rs new file mode 100644 index 0000000000..4b3c70dafe --- /dev/null +++ b/src/front/glsl_new/mod.rs @@ -0,0 +1,48 @@ +use crate::{Arena, Constant, EntryPoint, Function, GlobalVariable, Header, Module, ShaderStage, Type}; + +mod lex; +use lex::Lexer; +mod error; +use error::{ErrorKind, ParseError}; +mod parser; +mod token; + +pub fn parse_str(source: &str, entry: String, stage: ShaderStage) -> Result { + log::debug!("------ GLSL-pomelo ------"); + + let module = Module { + header: Header { + version: (1, 0, 0), + generator: 0, + }, + types: Arena::::new(), + constants: Arena::::new(), + global_variables: Arena::::new(), + functions: Arena::::new(), + entry_points: vec![], + }; + + let lex = Lexer::new(source); + let mut parser = parser::Parser::new(module); + + for token in lex { + log::debug!("token: {:#?}", token); + parser.parse(token).map_err(|_| ErrorKind::InvalidInput)?; + } + let (_, mut parsed_module) = parser.end_of_input().map_err(|_| ErrorKind::InvalidInput)?; + + // find entry point + let entry_func = parsed_module + .functions + .iter() + .find(|(_, f)| f.name.as_ref().filter(|n| **n == entry).is_some()); + if let Some((h, _)) = entry_func { + parsed_module.entry_points.push(EntryPoint { + stage, + name: entry, + function: h, + }); + } + + Ok(parsed_module) +} diff --git a/src/front/glsl_new/parser.rs b/src/front/glsl_new/parser.rs new file mode 100644 index 0000000000..67fe65737a --- /dev/null +++ b/src/front/glsl_new/parser.rs @@ -0,0 +1,185 @@ +#![allow(unused_braces)] +use pomelo::pomelo; + +pomelo! { + //%verbose; + %include { + use super::super::token::*; + use crate::{Arena, Expression, Function, LocalVariable, Module}; + } + %token #[derive(Debug)] pub enum Token {}; + %extra_argument Module; + %extra_token TokenMetadata; + %type Identifier String; + %type IntConstant i64; + %type UintConstant u64; + %type FloatConstant f32; + %type BoolConstant bool; + %type DoubleConstant f64; + %type String String; + %type arg_list Vec; + %type function_definition Function; + + %left Else; + %right Assign; + %left Or; + %left And; + %nonassoc Equal NotEqual; + %nonassoc Less LessEq Greater GreaterEq; + %left Plus Minus; + %left Mult Div; + %nonassoc Not; + + root ::= version_pragma translation_unit; + version_pragma ::= Version IntConstant Identifier?; + + // expression + variable_identifier ::= Identifier; + + primary_expression ::= variable_identifier; + primary_expression ::= IntConstant; + primary_expression ::= UintConstant; + primary_expression ::= FloatConstant; + primary_expression ::= BoolConstant; + primary_expression ::= DoubleConstant; + primary_expression ::= LeftParen expression RightParen; + + postfix_expression ::= primary_expression; + postfix_expression ::= postfix_expression LeftBracket integer_expression RightBracket; + postfix_expression ::= function_call; + postfix_expression ::= postfix_expression Dot FieldSelection; + postfix_expression ::= postfix_expression IncOp; + postfix_expression ::= postfix_expression DecOp; + + integer_expression ::= expression; + + function_call ::= function_call_generic; + function_call_generic ::= function_call_header_with_parameters RightParen; + function_call_generic ::= function_call_header_no_parameters RightParen; + function_call_header_no_parameters ::= function_call_header Void; + function_call_header_no_parameters ::= function_call_header; + function_call_header_with_parameters ::= function_call_header assignment_expression; + function_call_header_with_parameters ::= function_call_header_with_parameters Comma assignment_expression; + function_call_header ::= function_identifier LeftParen; + + // Grammar Note: Constructors look like functions, but lexical analysis recognized most of them as + // keywords. They are now recognized through “type_specifier”. + // Methods (.length), subroutine array calls, and identifiers are recognized through postfix_expression. + function_identifier ::= type_specifier; + function_identifier ::= postfix_expression; + + unary_expression ::= postfix_expression; + unary_expression ::= IncOp unary_expression; + unary_expression ::= DecOp unary_expression; + unary_expression ::= unary_operator unary_expression; + unary_operator ::= Plus; + unary_operator ::= Dash; + unary_operator ::= Bang; + unary_operator ::= Tilde; + multiplicative_expression ::= unary_expression; + multiplicative_expression ::= multiplicative_expression Star unary_expression; + multiplicative_expression ::= multiplicative_expression Slash unary_expression; + multiplicative_expression ::= multiplicative_expression Percent unary_expression; + additive_expression ::= multiplicative_expression; + additive_expression ::= additive_expression Plus multiplicative_expression; + additive_expression ::= additive_expression Dash multiplicative_expression; + shift_expression ::= additive_expression; + shift_expression ::= shift_expression LeftOp additive_expression; + shift_expression ::= shift_expression RightOp additive_expression; + relational_expression ::= shift_expression; + relational_expression ::= relational_expression LeftAngle shift_expression; + relational_expression ::= relational_expression RightAngle shift_expression; + relational_expression ::= relational_expression LeOp shift_expression; + relational_expression ::= relational_expression GeOp shift_expression; + equality_expression ::= relational_expression; + equality_expression ::= equality_expression EqOp relational_expression; + equality_expression ::= equality_expression NeOp relational_expression; + and_expression ::= equality_expression; + and_expression ::= and_expression Ampersand equality_expression; + exclusive_or_expression ::= and_expression; + exclusive_or_expression ::= exclusive_or_expression Caret and_expression; + inclusive_or_expression ::= exclusive_or_expression; + inclusive_or_expression ::= inclusive_or_expression VerticalBar exclusive_or_expression; + logical_and_expression ::= inclusive_or_expression; + logical_and_expression ::= logical_and_expression AndOp inclusive_or_expression; + logical_xor_expression ::= logical_and_expression; + logical_xor_expression ::= logical_xor_expression XorOp logical_and_expression; + logical_or_expression ::= logical_xor_expression; + logical_or_expression ::= logical_or_expression OrOp logical_xor_expression; + + conditional_expression ::= logical_or_expression; + conditional_expression ::= logical_or_expression Question expression Colon assignment_expression; + + assignment_expression ::= conditional_expression; + assignment_expression ::= unary_expression assignment_operator assignment_expression; + + assignment_operator ::= Equal; + assignment_operator ::= MulAssign; + assignment_operator ::= DivAssign; + assignment_operator ::= ModAssign; + assignment_operator ::= AddAssign; + assignment_operator ::= SubAssign; + assignment_operator ::= LeftAssign; + assignment_operator ::= RightAssign; + assignment_operator ::= AndAssign; + assignment_operator ::= XorAssign; + assignment_operator ::= OrAssign; + + expression ::= assignment_expression; + expression ::= expression Comma assignment_expression; + + // statement + statement ::= compound_statement; + statement ::= simple_statement; + + // Grammar Note: labeled statements for SWITCH only; 'goto' is not supported. + //simple_statement ::= declaration_statement; + simple_statement ::= expression_statement; + + compound_statement ::= LeftBrace RightBrace; + compound_statement ::= LeftBrace statement_list RightBrace; + + compound_statement_no_new_scope ::= LeftBrace RightBrace; + compound_statement_no_new_scope ::= LeftBrace statement_list RightBrace; + + statement_list ::= statement(s) { /*vec![s]*/ } + statement_list ::= statement_list/*(mut ss)*/ statement(s) { /*ss.push(s); ss*/ } + + expression_statement ::= Semicolon; + expression_statement ::= expression Semicolon; + + + + // function + function_prototype ::= function_declarator RightParen; + function_declarator ::= function_header; + function_header ::= fully_specified_type Identifier LeftParen; + + // type + fully_specified_type ::= type_specifier; + type_specifier ::= type_specifier_nonarray; + + type_specifier_nonarray ::= Void; + type_specifier_nonarray ::= Vec4; + //TODO: remaining types + + // misc + translation_unit ::= external_declaration; + translation_unit ::= translation_unit external_declaration; + + external_declaration ::= function_definition(f) { extra.functions.append(f); } + + function_definition ::= function_prototype compound_statement_no_new_scope { + Function { + name: Some(String::from("main")), + parameter_types: vec![], + return_type: None, + global_usage: vec![], + local_variables: Arena::::new(), + expressions: Arena::::new(), + body: vec![], + } + }; +} + +pub use parser::*; diff --git a/src/front/glsl_new/token.rs b/src/front/glsl_new/token.rs new file mode 100644 index 0000000000..31daf24cb1 --- /dev/null +++ b/src/front/glsl_new/token.rs @@ -0,0 +1,7 @@ +use std::ops::Range; + +#[derive(Debug, Clone)] +pub struct TokenMetadata { + pub line: usize, + pub chars: Range, +} diff --git a/src/front/mod.rs b/src/front/mod.rs index cae9301288..7e169017a0 100644 --- a/src/front/mod.rs +++ b/src/front/mod.rs @@ -2,6 +2,8 @@ #[cfg(feature = "glsl")] pub mod glsl; +#[cfg(feature = "glsl-new")] +pub mod glsl_new; #[cfg(feature = "spirv")] pub mod spv; pub mod wgsl; diff --git a/test-data/simple.frag b/test-data/simple.frag new file mode 100644 index 0000000000..2aea31896a --- /dev/null +++ b/test-data/simple.frag @@ -0,0 +1,5 @@ +#version 450 core + +void main() { + gl_FragDepth = 0; +} diff --git a/test-data/simple.vert b/test-data/simple.vert new file mode 100644 index 0000000000..2a0aaea1cc --- /dev/null +++ b/test-data/simple.vert @@ -0,0 +1,5 @@ +#version 450 core + +void main() { + gl_Position = vec4(1); +} diff --git a/test-data/simple.wgsl b/test-data/simple.wgsl new file mode 100644 index 0000000000..35afac0878 --- /dev/null +++ b/test-data/simple.wgsl @@ -0,0 +1,8 @@ +# vertex +[[builtin position]] var o_position : vec4; + +fn main() -> void { + o_position = vec4(1); + return; +} +entry_point vertex as "main" = main;