mirror of
https://github.com/jashkenas/coffeescript.git
synced 2026-02-16 10:34:55 -05:00
adding complete documentation for the grammar
This commit is contained in:
@@ -1,20 +1,521 @@
|
||||
# The CoffeeScript parser is generated by [Jison](http://github.com/zaach/jison)
|
||||
# from this grammar file. Jison is a bottom-up parser generator, similar in
|
||||
# style to [Bison](http://www.gnu.org/software/bison), implemented in JavaScript.
|
||||
# It can recognize [LALR(1), LR(0), SLR(1), and LR(1)](http://en.wikipedia.org/wiki/LR_grammar)
|
||||
# type grammars. To create the Jison parser, we list the pattern to match
|
||||
# on the left-hand side, and the action to take (usually the creation of syntax
|
||||
# tree nodes) on the right. As the parser runs, it
|
||||
# shifts tokens from our token stream, from left to right, and
|
||||
# [attempts to match](http://en.wikipedia.org/wiki/Bottom-up_parsing)
|
||||
# the token sequence against the rules below. When a match can be made, it
|
||||
# reduces into the [nonterminal](http://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols)
|
||||
# (the enclosing name at the top), and we proceed from there.
|
||||
#
|
||||
# If you run the `cake build:parser` command, Jison constructs a parse table
|
||||
# from our rules and saves it into `lib/parser.js`.
|
||||
|
||||
# The only dependency is on the **Jison.Parser**.
|
||||
Parser: require('jison').Parser
|
||||
|
||||
# DSL ===================================================================
|
||||
# Jison DSL
|
||||
# ---------
|
||||
|
||||
# Detect functions: [
|
||||
# Since we're going to be wrapped in a function by Jison in any case, if our
|
||||
# action immediately returns a value, we can optimize by removing the function
|
||||
# wrapper and just returning the value directly.
|
||||
unwrap: /function\s*\(\)\s*\{\s*return\s*([\s\S]*);\s*\}/
|
||||
|
||||
# Quickie DSL for Jison access.
|
||||
o: (pattern_string, func, options) ->
|
||||
if func
|
||||
func: if match: (func + '').match(unwrap) then match[1] else "($func())"
|
||||
[pattern_string, "$$ = $func;", options]
|
||||
else
|
||||
[pattern_string, '$$ = $1;', options]
|
||||
# Our handy DSL for Jison grammar generation, thanks to
|
||||
# [Tim Caswell](http://github.com/creationix). For every rule in the grammar,
|
||||
# we pass the pattern-defining string, the action to run, and extra options,
|
||||
# optionally. If no action is specified, we simply pass the value of the
|
||||
# previous nonterminal.
|
||||
o: (pattern_string, action, options) ->
|
||||
return [pattern_string, '$$ = $1;', options] unless action
|
||||
action: if match: (action + '').match(unwrap) then match[1] else "($action())"
|
||||
[pattern_string, "$$ = $action;", options]
|
||||
|
||||
# Precedence ===========================================================
|
||||
# Grammatical Rules
|
||||
# -----------------
|
||||
|
||||
# In all of the rules that follow, you'll see the name of the nonterminal as
|
||||
# the key to a list of alternative matches. With each match's action, the
|
||||
# dollar-sign variables are provided by Jison as references to the value of
|
||||
# their numeric position, so in this rule:
|
||||
#
|
||||
# "Expression UNLESS Expression"
|
||||
#
|
||||
# `$1` would be the value of the first `Expression`, `$2` would be the token
|
||||
# for the `UNLESS` terminal, and `$3` would be the value of the second
|
||||
# `Expression`.
|
||||
grammar: {
|
||||
|
||||
# The **Root** is the top-level node in the syntax tree. Since we parse bottom-up,
|
||||
# all parsing must end here.
|
||||
Root: [
|
||||
o "", -> new Expressions()
|
||||
o "TERMINATOR", -> new Expressions()
|
||||
o "Expressions"
|
||||
o "Block TERMINATOR"
|
||||
]
|
||||
|
||||
# Any list of expressions or method body, seperated by line breaks or
|
||||
# semicolons.
|
||||
Expressions: [
|
||||
o "Expression", -> Expressions.wrap [$1]
|
||||
o "Expressions TERMINATOR Expression", -> $1.push $3
|
||||
o "Expressions TERMINATOR"
|
||||
]
|
||||
|
||||
# All the different types of expressions in our language. The basic unit of
|
||||
# CoffeeScript is the **Expression** -- you'll notice that there is no
|
||||
# "statement" nonterminal. Expressions serve as the building blocks
|
||||
# of many other rules, making them somewhat circular.
|
||||
Expression: [
|
||||
o "Value"
|
||||
o "Call"
|
||||
o "Code"
|
||||
o "Operation"
|
||||
o "Assign"
|
||||
o "If"
|
||||
o "Try"
|
||||
o "Throw"
|
||||
o "Return"
|
||||
o "While"
|
||||
o "For"
|
||||
o "Switch"
|
||||
o "Extends"
|
||||
o "Class"
|
||||
o "Splat"
|
||||
o "Existence"
|
||||
o "Comment"
|
||||
]
|
||||
|
||||
# A an indented block of expressions. Note that the [Rewriter](rewriter.html)
|
||||
# will convert some postfix forms into blocks for us, by adjusting the
|
||||
# token stream.
|
||||
Block: [
|
||||
o "INDENT Expressions OUTDENT", -> $2
|
||||
o "INDENT OUTDENT", -> new Expressions()
|
||||
]
|
||||
|
||||
# A literal identifier, a variable name or property.
|
||||
Identifier: [
|
||||
o "IDENTIFIER", -> new LiteralNode yytext
|
||||
]
|
||||
|
||||
# Alphanumerics are separated from the other **Literal** matchers because
|
||||
# they can also serve as keys in object literals.
|
||||
AlphaNumeric: [
|
||||
o "NUMBER", -> new LiteralNode yytext
|
||||
o "STRING", -> new LiteralNode yytext
|
||||
]
|
||||
|
||||
# All of our immediate values. These can (in general), be passed straight
|
||||
# through and printed to JavaScript.
|
||||
Literal: [
|
||||
o "AlphaNumeric"
|
||||
o "JS", -> new LiteralNode yytext
|
||||
o "REGEX", -> new LiteralNode yytext
|
||||
o "BREAK", -> new LiteralNode yytext
|
||||
o "CONTINUE", -> new LiteralNode yytext
|
||||
o "TRUE", -> new LiteralNode true
|
||||
o "FALSE", -> new LiteralNode false
|
||||
o "YES", -> new LiteralNode true
|
||||
o "NO", -> new LiteralNode false
|
||||
o "ON", -> new LiteralNode true
|
||||
o "OFF", -> new LiteralNode false
|
||||
]
|
||||
|
||||
# Assignment of a variable, property, or index to a value.
|
||||
Assign: [
|
||||
o "Value ASSIGN Expression", -> new AssignNode $1, $3
|
||||
]
|
||||
|
||||
# Assignment when it happens within an object literal. The difference from
|
||||
# the ordinary **Assign** is that these allow numbers and strings as keys.
|
||||
AssignObj: [
|
||||
o "Identifier ASSIGN Expression", -> new AssignNode new ValueNode($1), $3, 'object'
|
||||
o "AlphaNumeric ASSIGN Expression", -> new AssignNode new ValueNode($1), $3, 'object'
|
||||
o "Comment"
|
||||
]
|
||||
|
||||
# A return statement from a function body.
|
||||
Return: [
|
||||
o "RETURN Expression", -> new ReturnNode $2
|
||||
o "RETURN", -> new ReturnNode new ValueNode new LiteralNode 'null'
|
||||
]
|
||||
|
||||
# A comment. Because CoffeeScript passes comments through to JavaScript, we
|
||||
# have to parse comments like any other construct, and identify all of the
|
||||
# positions in which they can occur in the grammar.
|
||||
Comment: [
|
||||
o "COMMENT", -> new CommentNode yytext
|
||||
]
|
||||
|
||||
# [The existential operator](http://jashkenas.github.com/coffee-script/#existence).
|
||||
Existence: [
|
||||
o "Expression ?", -> new ExistenceNode $1
|
||||
]
|
||||
|
||||
# The **Code** node is the function literal. It's defined by an indented block
|
||||
# of **Expressions** preceded by a function arrow, with an optional parameter
|
||||
# list.
|
||||
Code: [
|
||||
o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode $2, $5, $4
|
||||
o "FuncGlyph Block", -> new CodeNode [], $2, $1
|
||||
]
|
||||
|
||||
# CoffeeScript has two different symbols for functions. `->` is for ordinary
|
||||
# functions, and `=>` is for functions bound to the current value of *this*.
|
||||
FuncGlyph: [
|
||||
o "->", -> 'func'
|
||||
o "=>", -> 'boundfunc'
|
||||
]
|
||||
|
||||
# The list of parameters that a function accepts can be of any length.
|
||||
ParamList: [
|
||||
o "", -> []
|
||||
o "Param", -> [$1]
|
||||
o "ParamList , Param", -> $1.concat [$3]
|
||||
]
|
||||
|
||||
# A single parameter in a function definition can be ordinary, or a splat
|
||||
# that hoovers up the remaining arguments.
|
||||
Param: [
|
||||
o "PARAM", -> new LiteralNode yytext
|
||||
o "Param . . .", -> new SplatNode $1
|
||||
]
|
||||
|
||||
# A splat that occurs outside of a parameter list.
|
||||
Splat: [
|
||||
o "Expression . . .", -> new SplatNode $1
|
||||
]
|
||||
|
||||
# The types of things that can be treated as values -- assigned to, invoked
|
||||
# as functions, indexed into, named as a class, etc.
|
||||
Value: [
|
||||
o "Identifier", -> new ValueNode $1
|
||||
o "Literal", -> new ValueNode $1
|
||||
o "Array", -> new ValueNode $1
|
||||
o "Object", -> new ValueNode $1
|
||||
o "Parenthetical", -> new ValueNode $1
|
||||
o "Range", -> new ValueNode $1
|
||||
o "This"
|
||||
o "Value Accessor", -> $1.push $2
|
||||
o "Invocation Accessor", -> new ValueNode $1, [$2]
|
||||
]
|
||||
|
||||
# The general group of accessors into an object, by property, by prototype
|
||||
# or by array index or slice.
|
||||
Accessor: [
|
||||
o "PROPERTY_ACCESS Identifier", -> new AccessorNode $2
|
||||
o "PROTOTYPE_ACCESS Identifier", -> new AccessorNode $2, 'prototype'
|
||||
o "SOAK_ACCESS Identifier", -> new AccessorNode $2, 'soak'
|
||||
o "Index"
|
||||
o "Slice", -> new SliceNode $1
|
||||
]
|
||||
|
||||
# Indexing into an object or array using bracket notation.
|
||||
Index: [
|
||||
o "INDEX_START Expression INDEX_END", -> new IndexNode $2
|
||||
o "SOAKED_INDEX_START Expression SOAKED_INDEX_END", -> new IndexNode $2, 'soak'
|
||||
]
|
||||
|
||||
# In CoffeeScript, an object literal is simply a list of assignments.
|
||||
Object: [
|
||||
o "{ AssignList }", -> new ObjectNode $2
|
||||
o "{ IndentedAssignList }", -> new ObjectNode $2
|
||||
]
|
||||
|
||||
# Class definitions have optional bodies of prototype property assignments,
|
||||
# and optional references to the superclass.
|
||||
Class: [
|
||||
o "CLASS Value", -> new ClassNode $2
|
||||
o "CLASS Value EXTENDS Value", -> new ClassNode $2, $4
|
||||
o "CLASS Value IndentedAssignList", -> new ClassNode $2, null, $3
|
||||
o "CLASS Value EXTENDS Value IndentedAssignList", -> new ClassNode $2, $4, $5
|
||||
]
|
||||
|
||||
# Assignment of properties within an object literal can be separated by
|
||||
# comma, as in JavaScript, or simply by newline.
|
||||
AssignList: [
|
||||
o "", -> []
|
||||
o "AssignObj", -> [$1]
|
||||
o "AssignList , AssignObj", -> $1.concat [$3]
|
||||
o "AssignList TERMINATOR AssignObj", -> $1.concat [$3]
|
||||
o "AssignList , TERMINATOR AssignObj", -> $1.concat [$4]
|
||||
]
|
||||
|
||||
# An **AssignList** within a block indentation.
|
||||
IndentedAssignList: [
|
||||
o "INDENT AssignList OUTDENT", -> $2
|
||||
]
|
||||
|
||||
# The three flavors of function call: normal, object instantiation with `new`,
|
||||
# and calling `super()`
|
||||
Call: [
|
||||
o "Invocation"
|
||||
o "NEW Invocation", -> $2.new_instance()
|
||||
o "Super"
|
||||
]
|
||||
|
||||
# Extending an object by setting its prototype chain to reference a parent
|
||||
# object.
|
||||
Extends: [
|
||||
o "Value EXTENDS Value", -> new ExtendsNode $1, $3
|
||||
]
|
||||
|
||||
# Ordinary function invocation, or a chained series of calls.
|
||||
Invocation: [
|
||||
o "Value Arguments", -> new CallNode $1, $2
|
||||
o "Invocation Arguments", -> new CallNode $1, $2
|
||||
]
|
||||
|
||||
# The list of arguments to a function call.
|
||||
Arguments: [
|
||||
o "CALL_START ArgList CALL_END", -> $2
|
||||
]
|
||||
|
||||
# Calling super.
|
||||
Super: [
|
||||
o "SUPER CALL_START ArgList CALL_END", -> new CallNode 'super', $3
|
||||
]
|
||||
|
||||
# A reference to the *this* current object, either naked or to a property.
|
||||
This: [
|
||||
o "@", -> new ValueNode new LiteralNode 'this'
|
||||
o "@ Identifier", -> new ValueNode new LiteralNode('this'), [new AccessorNode($2)]
|
||||
]
|
||||
|
||||
# The CoffeeScript range literal.
|
||||
Range: [
|
||||
o "[ Expression . . Expression ]", -> new RangeNode $2, $5
|
||||
o "[ Expression . . . Expression ]", -> new RangeNode $2, $6, true
|
||||
]
|
||||
|
||||
# The slice literal.
|
||||
Slice: [
|
||||
o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode $2, $5
|
||||
o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode $2, $6, true
|
||||
]
|
||||
|
||||
# The array literal.
|
||||
Array: [
|
||||
o "[ ArgList ]", -> new ArrayNode $2
|
||||
]
|
||||
|
||||
# The **ArgList** is both the list of objects passed into a function call,
|
||||
# as well as the contents of an array literal
|
||||
# (i.e. comma-separated expressions). Newlines work as well.
|
||||
ArgList: [
|
||||
o "", -> []
|
||||
o "Expression", -> [$1]
|
||||
o "INDENT Expression", -> [$2]
|
||||
o "ArgList , Expression", -> $1.concat [$3]
|
||||
o "ArgList TERMINATOR Expression", -> $1.concat [$3]
|
||||
o "ArgList , TERMINATOR Expression", -> $1.concat [$4]
|
||||
o "ArgList , INDENT Expression", -> $1.concat [$4]
|
||||
o "ArgList OUTDENT"
|
||||
]
|
||||
|
||||
# Just simple, comma-separated, required arguments (no fancy syntax). We need
|
||||
# this to be separate from the **ArgList** for use in **Switch** blocks, where
|
||||
# having the newlines wouldn't make sense.
|
||||
SimpleArgs: [
|
||||
o "Expression"
|
||||
o "SimpleArgs , Expression", ->
|
||||
if $1 instanceof Array then $1.concat([$3]) else [$1].concat([$3])
|
||||
]
|
||||
|
||||
# The variants of *try/catch/finally* exception handling blocks.
|
||||
Try: [
|
||||
o "TRY Block Catch", -> new TryNode $2, $3[0], $3[1]
|
||||
o "TRY Block FINALLY Block", -> new TryNode $2, null, null, $4
|
||||
o "TRY Block Catch FINALLY Block", -> new TryNode $2, $3[0], $3[1], $5
|
||||
]
|
||||
|
||||
# A catch clause names its error and runs a block of code.
|
||||
Catch: [
|
||||
o "CATCH Identifier Block", -> [$2, $3]
|
||||
]
|
||||
|
||||
# Throw an exception object.
|
||||
Throw: [
|
||||
o "THROW Expression", -> new ThrowNode $2
|
||||
]
|
||||
|
||||
# Parenthetical expressions. Note that the **Parenthetical** is a **Value**,
|
||||
# not an **Expression**, so if you need to use an expression in a place
|
||||
# where only values are accepted, wrapping it in parentheses will always do
|
||||
# the trick.
|
||||
Parenthetical: [
|
||||
o "( Expression )", -> new ParentheticalNode $2
|
||||
]
|
||||
|
||||
# The condition portion of a while loop.
|
||||
WhileSource: [
|
||||
o "WHILE Expression", -> new WhileNode $2
|
||||
o "WHILE Expression WHEN Expression", -> new WhileNode $2, {filter : $4}
|
||||
]
|
||||
|
||||
# The while loop can either be normal, with a block of expressions to execute,
|
||||
# or postfix, with a single expression. There is no do..while.
|
||||
While: [
|
||||
o "WhileSource Block", -> $1.add_body $2
|
||||
o "Expression WhileSource", -> $2.add_body $1
|
||||
]
|
||||
|
||||
# Array, object, and range comprehensions, at the most generic level.
|
||||
# Comprehensions can either be normal, with a block of expressions to execute,
|
||||
# or postfix, with a single expression.
|
||||
For: [
|
||||
o "Expression FOR ForVariables ForSource", -> new ForNode $1, $4, $3[0], $3[1]
|
||||
o "FOR ForVariables ForSource Block", -> new ForNode $4, $3, $2[0], $2[1]
|
||||
]
|
||||
|
||||
# An array or range comprehension has variables for the current element and
|
||||
# (optional) reference to the current index. Or, *key, value*, in the case
|
||||
# of object comprehensions.
|
||||
ForVariables: [
|
||||
o "Identifier", -> [$1]
|
||||
o "Identifier , Identifier", -> [$1, $3]
|
||||
]
|
||||
|
||||
# The source of a comprehension is an array or object with an optional filter
|
||||
# clause. If it's an array comprehension, you can also choose to step throug
|
||||
# in fixed-size increments.
|
||||
ForSource: [
|
||||
o "IN Expression", -> {source: $2}
|
||||
o "OF Expression", -> {source: $2, object: true}
|
||||
o "ForSource WHEN Expression", -> $1.filter: $3; $1
|
||||
o "ForSource BY Expression", -> $1.step: $3; $1
|
||||
]
|
||||
|
||||
# The CoffeeScript switch/when/else block replaces the JavaScript
|
||||
# switch/case/default by compiling into an if-else chain.
|
||||
Switch: [
|
||||
o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition $2
|
||||
o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else $6, true
|
||||
]
|
||||
|
||||
# The inner list of whens is left recursive. At code-generation time, the
|
||||
# IfNode will rewrite them into a proper chain.
|
||||
Whens: [
|
||||
o "When"
|
||||
o "Whens When", -> $1.push $2
|
||||
]
|
||||
|
||||
# An individual **When** clause, with action.
|
||||
When: [
|
||||
o "LEADING_WHEN SimpleArgs Block", -> new IfNode $2, $3, null, {statement: true}
|
||||
o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode $2, $3, null, {statement: true}
|
||||
o "Comment TERMINATOR When", -> $3.comment: $1; $3
|
||||
]
|
||||
|
||||
# The most basic form of *if* is a condition and an action. The following
|
||||
# if-related rules are broken up along these lines in order to avoid
|
||||
# ambiguity.
|
||||
IfStart: [
|
||||
o "IF Expression Block", -> new IfNode $2, $3
|
||||
o "IfStart ElsIf", -> $1.add_else $2
|
||||
]
|
||||
|
||||
# An **IfStart** can optionally be followed by an else block.
|
||||
IfBlock: [
|
||||
o "IfStart"
|
||||
o "IfStart ELSE Block", -> $1.add_else $3
|
||||
]
|
||||
|
||||
# An *else if* continuation of the *if* expression.
|
||||
ElsIf: [
|
||||
o "ELSE IF Expression Block", -> (new IfNode($3, $4)).force_statement()
|
||||
]
|
||||
|
||||
# The full complement of *if* expressions, including postfix one-liner
|
||||
# *if* and *unless*.
|
||||
If: [
|
||||
o "IfBlock"
|
||||
o "Expression IF Expression", -> new IfNode $3, Expressions.wrap([$1]), null, {statement: true}
|
||||
o "Expression UNLESS Expression", -> new IfNode $3, Expressions.wrap([$1]), null, {statement: true, invert: true}
|
||||
]
|
||||
|
||||
# Arithmetic and logical operators, working on one or more operands.
|
||||
# Here they are grouped by order of precedence. The actual precedence rules
|
||||
# are defined at the bottom of the page. It would be shorter if we could
|
||||
# combine most of these rules into a single generic *Operand OpSymbol Operand*
|
||||
# -type rule, but in order to make the precedence binding possible, separate
|
||||
# rules are necessary.
|
||||
Operation: [
|
||||
o "! Expression", -> new OpNode '!', $2
|
||||
o "!! Expression", -> new OpNode '!!', $2
|
||||
o("- Expression", (-> new OpNode('-', $2)), {prec: 'UMINUS'})
|
||||
o("+ Expression", (-> new OpNode('+', $2)), {prec: 'UPLUS'})
|
||||
o "NOT Expression", -> new OpNode 'not', $2
|
||||
o "~ Expression", -> new OpNode '~', $2
|
||||
o "-- Expression", -> new OpNode '--', $2
|
||||
o "++ Expression", -> new OpNode '++', $2
|
||||
o "DELETE Expression", -> new OpNode 'delete', $2
|
||||
o "TYPEOF Expression", -> new OpNode 'typeof', $2
|
||||
o "Expression --", -> new OpNode '--', $1, null, true
|
||||
o "Expression ++", -> new OpNode '++', $1, null, true
|
||||
|
||||
o "Expression * Expression", -> new OpNode '*', $1, $3
|
||||
o "Expression / Expression", -> new OpNode '/', $1, $3
|
||||
o "Expression % Expression", -> new OpNode '%', $1, $3
|
||||
|
||||
o "Expression + Expression", -> new OpNode '+', $1, $3
|
||||
o "Expression - Expression", -> new OpNode '-', $1, $3
|
||||
|
||||
o "Expression << Expression", -> new OpNode '<<', $1, $3
|
||||
o "Expression >> Expression", -> new OpNode '>>', $1, $3
|
||||
o "Expression >>> Expression", -> new OpNode '>>>', $1, $3
|
||||
o "Expression & Expression", -> new OpNode '&', $1, $3
|
||||
o "Expression | Expression", -> new OpNode '|', $1, $3
|
||||
o "Expression ^ Expression", -> new OpNode '^', $1, $3
|
||||
|
||||
o "Expression <= Expression", -> new OpNode '<=', $1, $3
|
||||
o "Expression < Expression", -> new OpNode '<', $1, $3
|
||||
o "Expression > Expression", -> new OpNode '>', $1, $3
|
||||
o "Expression >= Expression", -> new OpNode '>=', $1, $3
|
||||
|
||||
o "Expression == Expression", -> new OpNode '==', $1, $3
|
||||
o "Expression != Expression", -> new OpNode '!=', $1, $3
|
||||
o "Expression IS Expression", -> new OpNode 'is', $1, $3
|
||||
o "Expression ISNT Expression", -> new OpNode 'isnt', $1, $3
|
||||
|
||||
o "Expression && Expression", -> new OpNode '&&', $1, $3
|
||||
o "Expression || Expression", -> new OpNode '||', $1, $3
|
||||
o "Expression AND Expression", -> new OpNode 'and', $1, $3
|
||||
o "Expression OR Expression", -> new OpNode 'or', $1, $3
|
||||
o "Expression ? Expression", -> new OpNode '?', $1, $3
|
||||
|
||||
o "Expression -= Expression", -> new OpNode '-=', $1, $3
|
||||
o "Expression += Expression", -> new OpNode '+=', $1, $3
|
||||
o "Expression /= Expression", -> new OpNode '/=', $1, $3
|
||||
o "Expression *= Expression", -> new OpNode '*=', $1, $3
|
||||
o "Expression %= Expression", -> new OpNode '%=', $1, $3
|
||||
o "Expression ||= Expression", -> new OpNode '||=', $1, $3
|
||||
o "Expression &&= Expression", -> new OpNode '&&=', $1, $3
|
||||
o "Expression ?= Expression", -> new OpNode '?=', $1, $3
|
||||
|
||||
o "Expression INSTANCEOF Expression", -> new OpNode 'instanceof', $1, $3
|
||||
o "Expression IN Expression", -> new OpNode 'in', $1, $3
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
# Precedence
|
||||
# ----------
|
||||
|
||||
# Operators at the top of this list have higher precedence than the ones lower
|
||||
# down. Following these rules is what makes `2 + 3 * 4` parse as:
|
||||
#
|
||||
# 2 + (3 * 4)
|
||||
#
|
||||
# And not:
|
||||
#
|
||||
# (2 + 3) * 4
|
||||
operators: [
|
||||
["left", '?']
|
||||
["nonassoc", 'UMINUS', 'UPLUS', 'NOT', '!', '!!', '~', '++', '--']
|
||||
@@ -37,434 +538,28 @@ operators: [
|
||||
["right", '->', '=>', 'UNLESS', 'IF', 'ELSE', 'WHILE']
|
||||
]
|
||||
|
||||
# Grammar ==============================================================
|
||||
# Wrapping Up
|
||||
# -----------
|
||||
|
||||
grammar: {
|
||||
|
||||
# All parsing will end in this rule, being the trunk of the AST.
|
||||
Root: [
|
||||
o "", -> new Expressions()
|
||||
o "TERMINATOR", -> new Expressions()
|
||||
o "Expressions", -> $1
|
||||
o "Block TERMINATOR", -> $1
|
||||
]
|
||||
|
||||
# Any list of expressions or method body, seperated by line breaks or semis.
|
||||
Expressions: [
|
||||
o "Expression", -> Expressions.wrap([$1])
|
||||
o "Expressions TERMINATOR Expression", -> $1.push($3)
|
||||
o "Expressions TERMINATOR", -> $1
|
||||
]
|
||||
|
||||
# All types of expressions in our language. The basic unit of CoffeeScript
|
||||
# is the expression.
|
||||
Expression: [
|
||||
o "Value"
|
||||
o "Call"
|
||||
o "Code"
|
||||
o "Operation"
|
||||
o "Assign"
|
||||
o "If"
|
||||
o "Try"
|
||||
o "Throw"
|
||||
o "Return"
|
||||
o "While"
|
||||
o "For"
|
||||
o "Switch"
|
||||
o "Extends"
|
||||
o "Class"
|
||||
o "Splat"
|
||||
o "Existence"
|
||||
o "Comment"
|
||||
]
|
||||
|
||||
# A block of expressions. Note that the Rewriter will convert some postfix
|
||||
# forms into blocks for us, by altering the token stream.
|
||||
Block: [
|
||||
o "INDENT Expressions OUTDENT", -> $2
|
||||
o "INDENT OUTDENT", -> new Expressions()
|
||||
]
|
||||
|
||||
Identifier: [
|
||||
o "IDENTIFIER", -> new LiteralNode(yytext)
|
||||
]
|
||||
|
||||
AlphaNumeric: [
|
||||
o "NUMBER", -> new LiteralNode(yytext)
|
||||
o "STRING", -> new LiteralNode(yytext)
|
||||
]
|
||||
|
||||
# All hard-coded values. These can be printed straight to JavaScript.
|
||||
Literal: [
|
||||
o "AlphaNumeric", -> $1
|
||||
o "JS", -> new LiteralNode(yytext)
|
||||
o "REGEX", -> new LiteralNode(yytext)
|
||||
o "BREAK", -> new LiteralNode(yytext)
|
||||
o "CONTINUE", -> new LiteralNode(yytext)
|
||||
o "TRUE", -> new LiteralNode(true)
|
||||
o "FALSE", -> new LiteralNode(false)
|
||||
o "YES", -> new LiteralNode(true)
|
||||
o "NO", -> new LiteralNode(false)
|
||||
o "ON", -> new LiteralNode(true)
|
||||
o "OFF", -> new LiteralNode(false)
|
||||
]
|
||||
|
||||
# Assignment to a variable (or index).
|
||||
Assign: [
|
||||
o "Value ASSIGN Expression", -> new AssignNode($1, $3)
|
||||
]
|
||||
|
||||
# Assignment within an object literal (can be quoted).
|
||||
AssignObj: [
|
||||
o "Identifier ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
|
||||
o "AlphaNumeric ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
|
||||
o "Comment"
|
||||
]
|
||||
|
||||
# A return statement.
|
||||
Return: [
|
||||
o "RETURN Expression", -> new ReturnNode($2)
|
||||
o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
|
||||
]
|
||||
|
||||
# A comment.
|
||||
Comment: [
|
||||
o "COMMENT", -> new CommentNode(yytext)
|
||||
]
|
||||
|
||||
# Arithmetic and logical operators
|
||||
# For Ruby's Operator precedence, see: [
|
||||
# https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
|
||||
Operation: [
|
||||
o "! Expression", -> new OpNode('!', $2)
|
||||
o "!! Expression", -> new OpNode('!!', $2)
|
||||
o("- Expression", (-> new OpNode('-', $2)), {prec: 'UMINUS'})
|
||||
o("+ Expression", (-> new OpNode('+', $2)), {prec: 'UPLUS'})
|
||||
o "NOT Expression", -> new OpNode('not', $2)
|
||||
o "~ Expression", -> new OpNode('~', $2)
|
||||
o "-- Expression", -> new OpNode('--', $2)
|
||||
o "++ Expression", -> new OpNode('++', $2)
|
||||
o "DELETE Expression", -> new OpNode('delete', $2)
|
||||
o "TYPEOF Expression", -> new OpNode('typeof', $2)
|
||||
o "Expression --", -> new OpNode('--', $1, null, true)
|
||||
o "Expression ++", -> new OpNode('++', $1, null, true)
|
||||
|
||||
o "Expression * Expression", -> new OpNode('*', $1, $3)
|
||||
o "Expression / Expression", -> new OpNode('/', $1, $3)
|
||||
o "Expression % Expression", -> new OpNode('%', $1, $3)
|
||||
|
||||
o "Expression + Expression", -> new OpNode('+', $1, $3)
|
||||
o "Expression - Expression", -> new OpNode('-', $1, $3)
|
||||
|
||||
o "Expression << Expression", -> new OpNode('<<', $1, $3)
|
||||
o "Expression >> Expression", -> new OpNode('>>', $1, $3)
|
||||
o "Expression >>> Expression", -> new OpNode('>>>', $1, $3)
|
||||
o "Expression & Expression", -> new OpNode('&', $1, $3)
|
||||
o "Expression | Expression", -> new OpNode('|', $1, $3)
|
||||
o "Expression ^ Expression", -> new OpNode('^', $1, $3)
|
||||
|
||||
o "Expression <= Expression", -> new OpNode('<=', $1, $3)
|
||||
o "Expression < Expression", -> new OpNode('<', $1, $3)
|
||||
o "Expression > Expression", -> new OpNode('>', $1, $3)
|
||||
o "Expression >= Expression", -> new OpNode('>=', $1, $3)
|
||||
|
||||
o "Expression == Expression", -> new OpNode('==', $1, $3)
|
||||
o "Expression != Expression", -> new OpNode('!=', $1, $3)
|
||||
o "Expression IS Expression", -> new OpNode('is', $1, $3)
|
||||
o "Expression ISNT Expression", -> new OpNode('isnt', $1, $3)
|
||||
|
||||
o "Expression && Expression", -> new OpNode('&&', $1, $3)
|
||||
o "Expression || Expression", -> new OpNode('||', $1, $3)
|
||||
o "Expression AND Expression", -> new OpNode('and', $1, $3)
|
||||
o "Expression OR Expression", -> new OpNode('or', $1, $3)
|
||||
o "Expression ? Expression", -> new OpNode('?', $1, $3)
|
||||
|
||||
o "Expression -= Expression", -> new OpNode('-=', $1, $3)
|
||||
o "Expression += Expression", -> new OpNode('+=', $1, $3)
|
||||
o "Expression /= Expression", -> new OpNode('/=', $1, $3)
|
||||
o "Expression *= Expression", -> new OpNode('*=', $1, $3)
|
||||
o "Expression %= Expression", -> new OpNode('%=', $1, $3)
|
||||
o "Expression ||= Expression", -> new OpNode('||=', $1, $3)
|
||||
o "Expression &&= Expression", -> new OpNode('&&=', $1, $3)
|
||||
o "Expression ?= Expression", -> new OpNode('?=', $1, $3)
|
||||
|
||||
o "Expression INSTANCEOF Expression", -> new OpNode('instanceof', $1, $3)
|
||||
o "Expression IN Expression", -> new OpNode('in', $1, $3)
|
||||
]
|
||||
|
||||
# The existence operator.
|
||||
Existence: [
|
||||
o "Expression ?", -> new ExistenceNode($1)
|
||||
]
|
||||
|
||||
# Function definition.
|
||||
Code: [
|
||||
o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
|
||||
o "FuncGlyph Block", -> new CodeNode([], $2, $1)
|
||||
]
|
||||
|
||||
# The symbols to signify functions, and bound functions.
|
||||
FuncGlyph: [
|
||||
o "->", -> 'func'
|
||||
o "=>", -> 'boundfunc'
|
||||
]
|
||||
|
||||
# The parameters to a function definition.
|
||||
ParamList: [
|
||||
o "", -> []
|
||||
o "Param", -> [$1]
|
||||
o "ParamList , Param", -> $1.concat [$3]
|
||||
]
|
||||
|
||||
# A Parameter (or ParamSplat) in a function definition.
|
||||
Param: [
|
||||
o "PARAM", -> new LiteralNode(yytext)
|
||||
o "Param . . .", -> new SplatNode($1)
|
||||
]
|
||||
|
||||
# A regular splat.
|
||||
Splat: [
|
||||
o "Expression . . .", -> new SplatNode($1)
|
||||
]
|
||||
|
||||
# Expressions that can be treated as values.
|
||||
Value: [
|
||||
o "Identifier", -> new ValueNode($1)
|
||||
o "Literal", -> new ValueNode($1)
|
||||
o "Array", -> new ValueNode($1)
|
||||
o "Object", -> new ValueNode($1)
|
||||
o "Parenthetical", -> new ValueNode($1)
|
||||
o "Range", -> new ValueNode($1)
|
||||
o "This", -> $1
|
||||
o "Value Accessor", -> $1.push($2)
|
||||
o "Invocation Accessor", -> new ValueNode($1, [$2])
|
||||
]
|
||||
|
||||
# Accessing into an object or array, through dot or index notation.
|
||||
Accessor: [
|
||||
o "PROPERTY_ACCESS Identifier", -> new AccessorNode($2)
|
||||
o "PROTOTYPE_ACCESS Identifier", -> new AccessorNode($2, 'prototype')
|
||||
o "SOAK_ACCESS Identifier", -> new AccessorNode($2, 'soak')
|
||||
o "Index"
|
||||
o "Slice", -> new SliceNode($1)
|
||||
]
|
||||
|
||||
# Indexing into an object or array.
|
||||
Index: [
|
||||
o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
|
||||
o "SOAKED_INDEX_START Expression SOAKED_INDEX_END", -> new IndexNode($2, 'soak')
|
||||
]
|
||||
|
||||
# An object literal.
|
||||
Object: [
|
||||
o "{ AssignList }", -> new ObjectNode($2)
|
||||
o "{ IndentedAssignList }", -> new ObjectNode($2)
|
||||
]
|
||||
|
||||
# A class literal.
|
||||
Class: [
|
||||
o "CLASS Value", -> new ClassNode($2)
|
||||
o "CLASS Value EXTENDS Value", -> new ClassNode($2, $4)
|
||||
o "CLASS Value IndentedAssignList", -> new ClassNode($2, null, $3)
|
||||
o "CLASS Value EXTENDS Value IndentedAssignList", -> new ClassNode($2, $4, $5)
|
||||
]
|
||||
|
||||
# Assignment within an object literal (comma or newline separated).
|
||||
AssignList: [
|
||||
o "", -> []
|
||||
o "AssignObj", -> [$1]
|
||||
o "AssignList , AssignObj", -> $1.concat [$3]
|
||||
o "AssignList TERMINATOR AssignObj", -> $1.concat [$3]
|
||||
o "AssignList , TERMINATOR AssignObj", -> $1.concat [$4]
|
||||
]
|
||||
|
||||
# A list of assignments in a block indentation.
|
||||
IndentedAssignList: [
|
||||
o "INDENT AssignList OUTDENT", -> $2
|
||||
]
|
||||
|
||||
# All flavors of function call (instantiation, super, and regular).
|
||||
Call: [
|
||||
o "Invocation", -> $1
|
||||
o "NEW Invocation", -> $2.new_instance()
|
||||
o "Super", -> $1
|
||||
]
|
||||
|
||||
# Extending an object's prototype.
|
||||
Extends: [
|
||||
o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
|
||||
]
|
||||
|
||||
# A generic function invocation.
|
||||
Invocation: [
|
||||
o "Value Arguments", -> new CallNode($1, $2)
|
||||
o "Invocation Arguments", -> new CallNode($1, $2)
|
||||
]
|
||||
|
||||
# The list of arguments to a function invocation.
|
||||
Arguments: [
|
||||
o "CALL_START ArgList CALL_END", -> $2
|
||||
]
|
||||
|
||||
# Calling super.
|
||||
Super: [
|
||||
o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
|
||||
]
|
||||
|
||||
# This references, either naked or to a property.
|
||||
This: [
|
||||
o "@", -> new ValueNode(new LiteralNode('this'))
|
||||
o "@ Identifier", -> new ValueNode(new LiteralNode('this'), [new AccessorNode($2)])
|
||||
]
|
||||
|
||||
# The range literal.
|
||||
Range: [
|
||||
o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
|
||||
o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
|
||||
]
|
||||
|
||||
# The slice literal.
|
||||
Slice: [
|
||||
o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
|
||||
o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
|
||||
]
|
||||
|
||||
# The array literal.
|
||||
Array: [
|
||||
o "[ ArgList ]", -> new ArrayNode($2)
|
||||
]
|
||||
|
||||
# A list of arguments to a method call, or as the contents of an array.
|
||||
ArgList: [
|
||||
o "", -> []
|
||||
o "Expression", -> [$1]
|
||||
o "INDENT Expression", -> [$2]
|
||||
o "ArgList , Expression", -> $1.concat [$3]
|
||||
o "ArgList TERMINATOR Expression", -> $1.concat [$3]
|
||||
o "ArgList , TERMINATOR Expression", -> $1.concat [$4]
|
||||
o "ArgList , INDENT Expression", -> $1.concat [$4]
|
||||
o "ArgList OUTDENT", -> $1
|
||||
]
|
||||
|
||||
# Just simple, comma-separated, required arguments (no fancy syntax).
|
||||
SimpleArgs: [
|
||||
o "Expression", -> $1
|
||||
o "SimpleArgs , Expression", ->
|
||||
if $1 instanceof Array then $1.concat([$3]) else [$1].concat([$3])
|
||||
]
|
||||
|
||||
# Try/catch/finally exception handling blocks.
|
||||
Try: [
|
||||
o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
|
||||
o "TRY Block FINALLY Block", -> new TryNode($2, null, null, $4)
|
||||
o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
|
||||
]
|
||||
|
||||
# A catch clause.
|
||||
Catch: [
|
||||
o "CATCH Identifier Block", -> [$2, $3]
|
||||
]
|
||||
|
||||
# Throw an exception.
|
||||
Throw: [
|
||||
o "THROW Expression", -> new ThrowNode($2)
|
||||
]
|
||||
|
||||
# Parenthetical expressions.
|
||||
Parenthetical: [
|
||||
o "( Expression )", -> new ParentheticalNode($2)
|
||||
]
|
||||
|
||||
# The condition for a while loop.
|
||||
WhileSource: [
|
||||
o "WHILE Expression", -> new WhileNode($2)
|
||||
o "WHILE Expression WHEN Expression", -> new WhileNode($2, {filter : $4})
|
||||
]
|
||||
|
||||
# The while loop. (there is no do..while).
|
||||
While: [
|
||||
o "WhileSource Block", -> $1.add_body $2
|
||||
o "Expression WhileSource", -> $2.add_body $1
|
||||
]
|
||||
|
||||
# Array comprehensions, including guard and current index.
|
||||
# Looks a little confusing, check nodes.rb for the arguments to ForNode.
|
||||
For: [
|
||||
o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
|
||||
o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
|
||||
]
|
||||
|
||||
# An array comprehension has variables for the current element and index.
|
||||
ForVariables: [
|
||||
o "Identifier", -> [$1]
|
||||
o "Identifier , Identifier", -> [$1, $3]
|
||||
]
|
||||
|
||||
# The source of the array comprehension can optionally be filtered.
|
||||
ForSource: [
|
||||
o "IN Expression", -> {source: $2}
|
||||
o "OF Expression", -> {source: $2, object: true}
|
||||
o "ForSource WHEN Expression", -> $1.filter: $3; $1
|
||||
o "ForSource BY Expression", -> $1.step: $3; $1
|
||||
]
|
||||
|
||||
# Switch/When blocks.
|
||||
Switch: [
|
||||
o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
|
||||
o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6, true)
|
||||
]
|
||||
|
||||
# The inner list of whens.
|
||||
Whens: [
|
||||
o "When", -> $1
|
||||
o "Whens When", -> $1.push $2
|
||||
]
|
||||
|
||||
# An individual when.
|
||||
When: [
|
||||
o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, null, {statement: true})
|
||||
o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode($2, $3, null, {statement: true})
|
||||
o "Comment TERMINATOR When", -> $3.comment: $1; $3
|
||||
]
|
||||
|
||||
# The most basic form of "if".
|
||||
IfStart: [
|
||||
o "IF Expression Block", -> new IfNode($2, $3)
|
||||
o "IfStart ElsIfs", -> $1.add_else($2)
|
||||
]
|
||||
|
||||
IfBlock: [
|
||||
o "IfStart", -> $1
|
||||
o "IfStart ELSE Block", -> $1.add_else($3)
|
||||
]
|
||||
|
||||
# Multiple elsifs can be chained together.
|
||||
ElsIfs: [
|
||||
o "ELSE IF Expression Block", -> (new IfNode($3, $4)).force_statement()
|
||||
o "ElsIfs ElsIf", -> $1.add_else($2)
|
||||
]
|
||||
|
||||
# The full complement of if blocks, including postfix one-liner ifs and unlesses.
|
||||
If: [
|
||||
o "IfBlock", -> $1
|
||||
o "Expression IF Expression", -> new IfNode($3, Expressions.wrap([$1]), null, {statement: true})
|
||||
o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap([$1]), null, {statement: true, invert: true})
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
# Helpers ==============================================================
|
||||
|
||||
# Make the Jison parser.
|
||||
bnf: {}
|
||||
# Finally, now what we have our **grammar** and our **operators**, we can create
|
||||
# our **Jison.Parser**. We do this by processing all of our rules, recording all
|
||||
# terminals (every symbol which does not appear as the name of a rule above)
|
||||
# as "tokens".
|
||||
tokens: []
|
||||
for name, non_terminal of grammar
|
||||
bnf[name]: for option in non_terminal
|
||||
for part in option[0].split(" ")
|
||||
if !grammar[part]
|
||||
tokens.push(part)
|
||||
if name == "Root"
|
||||
option[1] = "return ${option[1]}"
|
||||
option
|
||||
tokens: tokens.join(" ")
|
||||
exports.parser: new Parser({tokens: tokens, bnf: bnf, operators: operators.reverse(), startSymbol: 'Root'}, {debug: false})
|
||||
for name, alternatives of grammar
|
||||
grammar[name]: for alt in alternatives
|
||||
for token in alt[0].split ' '
|
||||
tokens.push token unless grammar[token]
|
||||
alt[1] = "return ${alt[1]}" if name is 'Root'
|
||||
alt
|
||||
|
||||
# Initialize the **Parser** with our list of terminal **tokens**, our **grammar**
|
||||
# rules, and the name of the root. Reverse the operators because Jison orders
|
||||
# precedence from low to high, and we have it high to low
|
||||
# (as in [Yacc](http://dinosaur.compilertools.net/yacc/index.html)).
|
||||
exports.parser: new Parser {
|
||||
tokens: tokens.join ' '
|
||||
bnf: grammar
|
||||
operators: operators.reverse()
|
||||
startSymbol: 'Root'
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user