# The CoffeeScript parser is generated by [Jison](http://github.com/zaach/jison) # from this grammar file. Jison is a bottom-up parser generator, similar in # style to [Bison](http://www.gnu.org/software/bison), implemented in JavaScript. # It can recognize [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar) # type grammars. To create the Jison parser, we list the pattern to match # on the left-hand side, and the action to take (usually the creation of syntax # tree nodes) on the right. As the parser runs, it # shifts tokens from our token stream, from left to right, and # [attempts to match](http://en.wikipedia.org/wiki/Bottom-up_parsing) # the token sequence against the rules below. When a match can be made, it # reduces into the [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols) # (the enclosing name at the top), and we proceed from there. # # If you run the `cake build:parser` command, Jison constructs a parse table # from our rules and saves it into `lib/parser.js`. # The only dependency is on the **Jison.Parser**. Parser: require('jison').Parser # Jison DSL # --------- # Since we're going to be wrapped in a function by Jison in any case, if our # action immediately returns a value, we can optimize by removing the function # wrapper and just returning the value directly. unwrap: /function\s*\(\)\s*\{\s*return\s*([\s\S]*);\s*\}/ # Our handy DSL for Jison grammar generation, thanks to # [Tim Caswell](http://github.com/creationix). For every rule in the grammar, # we pass the pattern-defining string, the action to run, and extra options, # optionally. If no action is specified, we simply pass the value of the # previous nonterminal. o: (pattern_string, action, options) -> return [pattern_string, '$$ = $1;', options] unless action action: if match: (action + '').match(unwrap) then match[1] else "($action())" [pattern_string, "$$ = $action;", options] # Grammatical Rules # ----------------- # In all of the rules that follow, you'll see the name of the nonterminal as # the key to a list of alternative matches. With each match's action, the # dollar-sign variables are provided by Jison as references to the value of # their numeric position, so in this rule: # # "Expression UNLESS Expression" # # `$1` would be the value of the first `Expression`, `$2` would be the token # for the `UNLESS` terminal, and `$3` would be the value of the second # `Expression`. grammar: { # The **Root** is the top-level node in the syntax tree. Since we parse bottom-up, # all parsing must end here. Root: [ o "", -> new Expressions() o "TERMINATOR", -> new Expressions() o "Expressions" o "Block TERMINATOR" ] # Any list of expressions or method body, seperated by line breaks or # semicolons. Expressions: [ o "Expression", -> Expressions.wrap [$1] o "Expressions TERMINATOR Expression", -> $1.push $3 o "Expressions TERMINATOR" ] # All the different types of expressions in our language. The basic unit of # CoffeeScript is the **Expression** -- you'll notice that there is no # "statement" nonterminal. Expressions serve as the building blocks # of many other rules, making them somewhat circular. Expression: [ o "Value" o "Call" o "Curry" o "Code" o "Operation" o "Assign" o "If" o "Try" o "Throw" o "Return" o "While" o "For" o "Switch" o "Extends" o "Class" o "Splat" o "Existence" o "Comment" o "Extension" ] # A an indented block of expressions. Note that the [Rewriter](rewriter.html) # will convert some postfix forms into blocks for us, by adjusting the # token stream. Block: [ o "INDENT Expressions OUTDENT", -> $2 o "INDENT OUTDENT", -> new Expressions() o "TERMINATOR Comment", -> Expressions.wrap [$2] ] # A literal identifier, a variable name or property. Identifier: [ o "IDENTIFIER", -> new LiteralNode yytext ] # Alphanumerics are separated from the other **Literal** matchers because # they can also serve as keys in object literals. AlphaNumeric: [ o "NUMBER", -> new LiteralNode yytext o "STRING", -> new LiteralNode yytext ] # All of our immediate values. These can (in general), be passed straight # through and printed to JavaScript. Literal: [ o "AlphaNumeric" o "JS", -> new LiteralNode yytext o "REGEX", -> new LiteralNode yytext o "BREAK", -> new LiteralNode yytext o "CONTINUE", -> new LiteralNode yytext o "TRUE", -> new LiteralNode true o "FALSE", -> new LiteralNode false o "YES", -> new LiteralNode true o "NO", -> new LiteralNode false o "ON", -> new LiteralNode true o "OFF", -> new LiteralNode false ] # Assignment of a variable, property, or index to a value. Assign: [ o "Value ASSIGN Expression", -> new AssignNode $1, $3 ] # Assignment when it happens within an object literal. The difference from # the ordinary **Assign** is that these allow numbers and strings as keys. AssignObj: [ o "Identifier ASSIGN Expression", -> new AssignNode new ValueNode($1), $3, 'object' o "AlphaNumeric ASSIGN Expression", -> new AssignNode new ValueNode($1), $3, 'object' o "Comment" ] # A return statement from a function body. Return: [ o "RETURN Expression", -> new ReturnNode $2 o "RETURN", -> new ReturnNode new ValueNode new LiteralNode 'null' ] # A comment. Because CoffeeScript passes comments through to JavaScript, we # have to parse comments like any other construct, and identify all of the # positions in which they can occur in the grammar. Comment: [ o "COMMENT", -> new CommentNode yytext ] # [The existential operator](http://jashkenas.github.com/coffee-script/#existence). Existence: [ o "Expression ?", -> new ExistenceNode $1 ] # The **Code** node is the function literal. It's defined by an indented block # of **Expressions** preceded by a function arrow, with an optional parameter # list. Code: [ o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode $2, $5, $4 o "FuncGlyph Block", -> new CodeNode [], $2, $1 ] # CoffeeScript has two different symbols for functions. `->` is for ordinary # functions, and `=>` is for functions bound to the current value of *this*. FuncGlyph: [ o "->", -> 'func' o "=>", -> 'boundfunc' ] # The list of parameters that a function accepts can be of any length. ParamList: [ o "", -> [] o "Param", -> [$1] o "ParamList , Param", -> $1.concat [$3] ] # A single parameter in a function definition can be ordinary, or a splat # that hoovers up the remaining arguments. Param: [ o "PARAM", -> new LiteralNode yytext o "Param . . .", -> new SplatNode $1 ] # A splat that occurs outside of a parameter list. Splat: [ o "Expression . . .", -> new SplatNode $1 ] # The types of things that can be treated as values -- assigned to, invoked # as functions, indexed into, named as a class, etc. Value: [ o "Identifier", -> new ValueNode $1 o "Literal", -> new ValueNode $1 o "Array", -> new ValueNode $1 o "Object", -> new ValueNode $1 o "Parenthetical", -> new ValueNode $1 o "Range", -> new ValueNode $1 o "This" o "Value Accessor", -> $1.push $2 o "Invocation Accessor", -> new ValueNode $1, [$2] ] # The general group of accessors into an object, by property, by prototype # or by array index or slice. Accessor: [ o "PROPERTY_ACCESS Identifier", -> new AccessorNode $2 o "PROTOTYPE_ACCESS Identifier", -> new AccessorNode $2, 'prototype' o "::", -> new AccessorNode(new LiteralNode('prototype')) o "SOAK_ACCESS Identifier", -> new AccessorNode $2, 'soak' o "Index" o "Slice", -> new SliceNode $1 ] # Indexing into an object or array using bracket notation. Index: [ o "INDEX_START Expression INDEX_END", -> new IndexNode $2 o "SOAKED_INDEX_START Expression SOAKED_INDEX_END", -> new IndexNode $2, 'soak' ] # In CoffeeScript, an object literal is simply a list of assignments. Object: [ o "{ AssignList }", -> new ObjectNode $2 o "{ IndentedAssignList }", -> new ObjectNode $2 ] # Class definitions have optional bodies of prototype property assignments, # and optional references to the superclass. Class: [ o "CLASS Value", -> new ClassNode $2 o "CLASS Value EXTENDS Value", -> new ClassNode $2, $4 o "CLASS Value IndentedAssignList", -> new ClassNode $2, null, $3 o "CLASS Value EXTENDS Value IndentedAssignList", -> new ClassNode $2, $4, $5 ] # Assignment of properties within an object literal can be separated by # comma, as in JavaScript, or simply by newline. AssignList: [ o "", -> [] o "AssignObj", -> [$1] o "AssignList , AssignObj", -> $1.concat [$3] o "AssignList TERMINATOR AssignObj", -> $1.concat [$3] o "AssignList , TERMINATOR AssignObj", -> $1.concat [$4] ] # An **AssignList** within a block indentation. IndentedAssignList: [ o "INDENT AssignList OUTDENT", -> $2 ] # The three flavors of function call: normal, object instantiation with `new`, # and calling `super()` Call: [ o "Invocation" o "NEW Invocation", -> $2.new_instance() o "Super" ] Curry: [ o "Value CURRY Arguments", -> new CurryNode $1, $3 ] # Extending an object by setting its prototype chain to reference a parent # object. Extends: [ o "Value EXTENDS Value", -> new ExtendsNode $1, $3 ] # Ordinary function invocation, or a chained series of calls. Invocation: [ o "Value Arguments", -> new CallNode $1, $2 o "Invocation Arguments", -> new CallNode $1, $2 ] # The list of arguments to a function call. Arguments: [ o "CALL_START ArgList CALL_END", -> $2 ] # Calling super. Super: [ o "SUPER CALL_START ArgList CALL_END", -> new CallNode 'super', $3 ] # A reference to the *this* current object, either naked or to a property. This: [ o "@", -> new ValueNode new LiteralNode 'this' o "@ Identifier", -> new ValueNode new LiteralNode('this'), [new AccessorNode($2)] ] # The CoffeeScript range literal. Range: [ o "[ Expression . . Expression ]", -> new RangeNode $2, $5 o "[ Expression . . . Expression ]", -> new RangeNode $2, $6, true ] # The slice literal. Slice: [ o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode $2, $5 o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode $2, $6, true ] # The array literal. Array: [ o "[ ArgList ]", -> new ArrayNode $2 ] # The **ArgList** is both the list of objects passed into a function call, # as well as the contents of an array literal # (i.e. comma-separated expressions). Newlines work as well. ArgList: [ o "", -> [] o "Expression", -> [$1] o "INDENT Expression", -> [$2] o "ArgList , Expression", -> $1.concat [$3] o "ArgList TERMINATOR Expression", -> $1.concat [$3] o "ArgList , TERMINATOR Expression", -> $1.concat [$4] o "ArgList , INDENT Expression", -> $1.concat [$4] o "ArgList OUTDENT" ] # Just simple, comma-separated, required arguments (no fancy syntax). We need # this to be separate from the **ArgList** for use in **Switch** blocks, where # having the newlines wouldn't make sense. SimpleArgs: [ o "Expression" o "SimpleArgs , Expression", -> if $1 instanceof Array then $1.concat([$3]) else [$1].concat([$3]) ] # The variants of *try/catch/finally* exception handling blocks. Try: [ o "TRY Block Catch", -> new TryNode $2, $3[0], $3[1] o "TRY Block FINALLY Block", -> new TryNode $2, null, null, $4 o "TRY Block Catch FINALLY Block", -> new TryNode $2, $3[0], $3[1], $5 ] # A catch clause names its error and runs a block of code. Catch: [ o "CATCH Identifier Block", -> [$2, $3] ] # Throw an exception object. Throw: [ o "THROW Expression", -> new ThrowNode $2 ] # Parenthetical expressions. Note that the **Parenthetical** is a **Value**, # not an **Expression**, so if you need to use an expression in a place # where only values are accepted, wrapping it in parentheses will always do # the trick. Parenthetical: [ o "( Expression )", -> new ParentheticalNode $2 ] # A language extension to CoffeeScript from the outside. We simply pass # it through unaltered. Extension: [ o "EXTENSION", -> yytext ] # The condition portion of a while loop. WhileSource: [ o "WHILE Expression", -> new WhileNode $2 o "WHILE Expression WHEN Expression", -> new WhileNode $2, {filter : $4} ] # The while loop can either be normal, with a block of expressions to execute, # or postfix, with a single expression. There is no do..while. While: [ o "WhileSource Block", -> $1.add_body $2 o "Expression WhileSource", -> $2.add_body Expressions.wrap [$1] ] # Array, object, and range comprehensions, at the most generic level. # Comprehensions can either be normal, with a block of expressions to execute, # or postfix, with a single expression. For: [ o "Expression FOR ForVariables ForSource", -> new ForNode $1, $4, $3[0], $3[1] o "FOR ForVariables ForSource Block", -> new ForNode $4, $3, $2[0], $2[1] ] # An array or range comprehension has variables for the current element and # (optional) reference to the current index. Or, *key, value*, in the case # of object comprehensions. ForVariables: [ o "Identifier", -> [$1] o "Identifier , Identifier", -> [$1, $3] ] # The source of a comprehension is an array or object with an optional filter # clause. If it's an array comprehension, you can also choose to step throug # in fixed-size increments. ForSource: [ o "IN Expression", -> {source: $2} o "OF Expression", -> {source: $2, object: true} o "ForSource WHEN Expression", -> $1.filter: $3; $1 o "ForSource BY Expression", -> $1.step: $3; $1 ] # The CoffeeScript switch/when/else block replaces the JavaScript # switch/case/default by compiling into an if-else chain. Switch: [ o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition $2 o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else $6, true ] # The inner list of whens is left recursive. At code-generation time, the # IfNode will rewrite them into a proper chain. Whens: [ o "When" o "Whens When", -> $1.push $2 ] # An individual **When** clause, with action. When: [ o "LEADING_WHEN SimpleArgs Block", -> new IfNode $2, $3, null, {statement: true} o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode $2, $3, null, {statement: true} o "Comment TERMINATOR When", -> $3.comment: $1; $3 ] # The most basic form of *if* is a condition and an action. The following # if-related rules are broken up along these lines in order to avoid # ambiguity. IfStart: [ o "IF Expression Block", -> new IfNode $2, $3 o "IfStart ElsIf", -> $1.add_else $2 ] # An **IfStart** can optionally be followed by an else block. IfBlock: [ o "IfStart" o "IfStart ELSE Block", -> $1.add_else $3 ] # An *else if* continuation of the *if* expression. ElsIf: [ o "ELSE IF Expression Block", -> (new IfNode($3, $4)).force_statement() ] # The full complement of *if* expressions, including postfix one-liner # *if* and *unless*. If: [ o "IfBlock" o "Expression IF Expression", -> new IfNode $3, Expressions.wrap([$1]), null, {statement: true} o "Expression UNLESS Expression", -> new IfNode $3, Expressions.wrap([$1]), null, {statement: true, invert: true} ] # Arithmetic and logical operators, working on one or more operands. # Here they are grouped by order of precedence. The actual precedence rules # are defined at the bottom of the page. It would be shorter if we could # combine most of these rules into a single generic *Operand OpSymbol Operand* # -type rule, but in order to make the precedence binding possible, separate # rules are necessary. Operation: [ o "! Expression", -> new OpNode '!', $2 o "!! Expression", -> new OpNode '!!', $2 o("- Expression", (-> new OpNode('-', $2)), {prec: 'UMINUS'}) o("+ Expression", (-> new OpNode('+', $2)), {prec: 'UPLUS'}) o "NOT Expression", -> new OpNode 'not', $2 o "~ Expression", -> new OpNode '~', $2 o "-- Expression", -> new OpNode '--', $2 o "++ Expression", -> new OpNode '++', $2 o "DELETE Expression", -> new OpNode 'delete', $2 o "TYPEOF Expression", -> new OpNode 'typeof', $2 o "Expression --", -> new OpNode '--', $1, null, true o "Expression ++", -> new OpNode '++', $1, null, true o "Expression * Expression", -> new OpNode '*', $1, $3 o "Expression / Expression", -> new OpNode '/', $1, $3 o "Expression % Expression", -> new OpNode '%', $1, $3 o "Expression + Expression", -> new OpNode '+', $1, $3 o "Expression - Expression", -> new OpNode '-', $1, $3 o "Expression << Expression", -> new OpNode '<<', $1, $3 o "Expression >> Expression", -> new OpNode '>>', $1, $3 o "Expression >>> Expression", -> new OpNode '>>>', $1, $3 o "Expression & Expression", -> new OpNode '&', $1, $3 o "Expression | Expression", -> new OpNode '|', $1, $3 o "Expression ^ Expression", -> new OpNode '^', $1, $3 o "Expression <= Expression", -> new OpNode '<=', $1, $3 o "Expression < Expression", -> new OpNode '<', $1, $3 o "Expression > Expression", -> new OpNode '>', $1, $3 o "Expression >= Expression", -> new OpNode '>=', $1, $3 o "Expression == Expression", -> new OpNode '==', $1, $3 o "Expression != Expression", -> new OpNode '!=', $1, $3 o "Expression IS Expression", -> new OpNode 'is', $1, $3 o "Expression ISNT Expression", -> new OpNode 'isnt', $1, $3 o "Expression && Expression", -> new OpNode '&&', $1, $3 o "Expression || Expression", -> new OpNode '||', $1, $3 o "Expression AND Expression", -> new OpNode 'and', $1, $3 o "Expression OR Expression", -> new OpNode 'or', $1, $3 o "Expression ? Expression", -> new OpNode '?', $1, $3 o "Expression -= Expression", -> new OpNode '-=', $1, $3 o "Expression += Expression", -> new OpNode '+=', $1, $3 o "Expression /= Expression", -> new OpNode '/=', $1, $3 o "Expression *= Expression", -> new OpNode '*=', $1, $3 o "Expression %= Expression", -> new OpNode '%=', $1, $3 o "Expression ||= Expression", -> new OpNode '||=', $1, $3 o "Expression &&= Expression", -> new OpNode '&&=', $1, $3 o "Expression ?= Expression", -> new OpNode '?=', $1, $3 o "Expression INSTANCEOF Expression", -> new OpNode 'instanceof', $1, $3 o "Expression IN Expression", -> new OpNode 'in', $1, $3 ] } # Precedence # ---------- # Operators at the top of this list have higher precedence than the ones lower # down. Following these rules is what makes `2 + 3 * 4` parse as: # # 2 + (3 * 4) # # And not: # # (2 + 3) * 4 operators: [ ["left", '?'] ["nonassoc", 'UMINUS', 'UPLUS', 'NOT', '!', '!!', '~', '++', '--'] ["left", '*', '/', '%'] ["left", '+', '-'] ["left", '<<', '>>', '>>>'] ["left", '&', '|', '^'] ["left", '<=', '<', '>', '>='] ["right", 'DELETE', 'INSTANCEOF', 'TYPEOF'] ["left", '==', '!=', 'IS', 'ISNT'] ["left", '&&', '||', 'AND', 'OR'] ["right", '-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?='] ["left", '.'] ["right", 'INDENT'] ["left", 'OUTDENT'] ["right", 'WHEN', 'LEADING_WHEN', 'IN', 'OF', 'BY', 'THROW'] ["right", 'FOR', 'NEW', 'SUPER', 'CLASS'] ["left", 'EXTENDS'] ["right", 'ASSIGN', 'RETURN'] ["right", '->', '=>', 'UNLESS', 'IF', 'ELSE', 'WHILE'] ] # Wrapping Up # ----------- # Finally, now what we have our **grammar** and our **operators**, we can create # our **Jison.Parser**. We do this by processing all of our rules, recording all # terminals (every symbol which does not appear as the name of a rule above) # as "tokens". tokens: [] for name, alternatives of grammar grammar[name]: for alt in alternatives for token in alt[0].split ' ' tokens.push token unless grammar[token] alt[1] = "return ${alt[1]}" if name is 'Root' alt # Initialize the **Parser** with our list of terminal **tokens**, our **grammar** # rules, and the name of the root. Reverse the operators because Jison orders # precedence from low to high, and we have it high to low # (as in [Yacc](http://dinosaur.compilertools.net/yacc/index.html)). exports.parser: new Parser { tokens: tokens.join ' ' bnf: grammar operators: operators.reverse() startSymbol: 'Root' }