waypoint -- it's beginning to parser

This commit is contained in:
Jeremy Ashkenas
2010-02-07 15:15:36 -05:00
parent 7ec0a8d653
commit 56499984ca
8 changed files with 839 additions and 926 deletions

View File

@@ -1,4 +1,3 @@
sys: require 'sys'
Rewriter: require('./rewriter').Rewriter
# The lexer reads a stream of CoffeeScript and divvys it up into tagged
@@ -70,7 +69,6 @@ lex::tokenize: (code) ->
while this.i < this.code.length
this.chunk: this.code.slice(this.i)
this.extract_next_token()
# sys.puts "original stream: " + this.tokens if process.ENV['VERBOSE']
this.close_indentation()
(new Rewriter()).rewrite this.tokens
@@ -157,7 +155,7 @@ lex::comment_token: ->
return false unless comment: this.match COMMENT, 1
this.line += comment.match(MULTILINER).length
this.token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
this.token "\n", "\n"
this.token 'TERMINATOR', "\n"
this.i += comment.length
true
@@ -187,7 +185,7 @@ lex::outdent_token: (move_out) ->
last_indent: this.indents.pop()
this.token 'OUTDENT', last_indent
move_out -= last_indent
this.token "\n", "\n"
this.token 'TERMINATOR', "\n"
true
# Matches and consumes non-meaningful whitespace.
@@ -200,7 +198,7 @@ lex::whitespace_token: ->
# Multiple newlines get merged together.
# Use a trailing \ to escape newlines.
lex::newline_token: (newlines) ->
this.token "\n", "\n" unless this.value() is "\n"
this.token 'TERMINATOR', "\n" unless this.value() is "\n"
true
# Tokens to explicitly escape newlines are removed once their job is done.
@@ -217,6 +215,7 @@ lex::literal_token: ->
this.tag_parameters() if value and value.match(CODE)
value ||= this.chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
tag: 'TERMINATOR' if value == ';'
if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '['
@@ -244,12 +243,12 @@ lex::value: (index, val) ->
tok[1]
# Count the occurences of a character in a string.
lex::count: (string, char) ->
lex::count: (string, letter) ->
num: 0
pos: string.indexOf(char)
pos: string.indexOf(letter)
while pos isnt -1
count += 1
pos: string.indexOf(char, pos + 1)
pos: string.indexOf(letter, pos + 1)
count
# Attempt to match a string against the current chunk, returning the indexed

View File

@@ -1,4 +1,5 @@
exports.Node: -> this.values: arguments
exports.Node: -> @values: arguments
exports.Node.wrap: (values) -> @values: values
exports.Expressions : exports.Node
exports.LiteralNode : exports.Node

View File

@@ -1,4 +1,5 @@
Parser: require('jison').Parser
process.mixin require './nodes'
# DSL ===================================================================
@@ -45,55 +46,49 @@ grammar: {
# All parsing will end in this rule, being the trunk of the AST.
Root: [
o "", -> new Expressions()
o "Terminator", -> new Expressions()
o "TERMINATOR", -> new Expressions()
o "Expressions"
o "Block Terminator"
o "Block TERMINATOR"
]
# Any list of expressions or method body, seperated by line breaks or semis.
Expressions: [
o "Expression", -> Expressions.wrap([$1])
o "Expressions Terminator Expression", -> $1.push($3)
o "Expressions Terminator"
o "Expressions TERMINATOR Expression", -> $1.push($3)
o "Expressions TERMINATOR"
]
# All types of expressions in our language. The basic unit of CoffeeScript
# is the expression.
Expression: [
o "Value"
o "Call"
o "Code"
o "Operation"
o "Assign"
o "If"
o "Try"
o "Throw"
o "Return"
o "While"
o "For"
o "Switch"
o "Extends"
o "Splat"
o "Existence"
o "Comment"
# o "Call"
# o "Code"
# o "Operation"
# o "Assign"
# o "If"
# o "Try"
# o "Throw"
# o "Return"
# o "While"
# o "For"
# o "Switch"
# o "Extends"
# o "Splat"
# o "Existence"
# o "Comment"
]
# A block of expressions. Note that the Rewriter will convert some postfix
# forms into blocks for us, by altering the token stream.
Block: [
o "INDENT Expressions OUTDENT", -> $2
o "INDENT OUTDENT", -> new Expressions()
]
# Tokens that can terminate an expression.
Terminator: [
o "\n"
o ";"
]
# # A block of expressions. Note that the Rewriter will convert some postfix
# # forms into blocks for us, by altering the token stream.
# Block: [
# o "INDENT Expressions OUTDENT", -> $2
# o "INDENT OUTDENT", -> new Expressions()
# ]
# All hard-coded values. These can be printed straight to JavaScript.
Literal: [
o "NUMBER", -> new LiteralNode($1)
o "NUMBER", -> new LiteralNode(yytext)
o "STRING", -> new LiteralNode($1)
o "JS", -> new LiteralNode($1)
o "REGEX", -> new LiteralNode($1)
@@ -108,387 +103,387 @@ grammar: {
o "OFF", -> new LiteralNode(false)
]
# Assignment to a variable (or index).
Assign: [
o "Value ASSIGN Expression", -> new AssignNode($1, $3)
]
# Assignment within an object literal (can be quoted).
AssignObj: [
o "IDENTIFIER ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
o "STRING ASSIGN Expression", -> new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object')
o "Comment"
]
# A return statement.
Return: [
o "RETURN Expression", -> new ReturnNode($2)
o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
]
# A comment.
Comment: [
o "COMMENT", -> new CommentNode($1)
]
# Arithmetic and logical operators
# For Ruby's Operator precedence, see: [
# https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
Operation: [
o "! Expression", -> new OpNode($1, $2)
o "!! Expression", -> new OpNode($1, $2)
o "- Expression", -> new OpNode($1, $2)
o "+ Expression", -> new OpNode($1, $2)
o "NOT Expression", -> new OpNode($1, $2)
o "~ Expression", -> new OpNode($1, $2)
o "-- Expression", -> new OpNode($1, $2)
o "++ Expression", -> new OpNode($1, $2)
o "DELETE Expression", -> new OpNode($1, $2)
o "TYPEOF Expression", -> new OpNode($1, $2)
o "Expression --", -> new OpNode($2, $1, null, true)
o "Expression ++", -> new OpNode($2, $1, null, true)
o "Expression * Expression", -> new OpNode($2, $1, $3)
o "Expression / Expression", -> new OpNode($2, $1, $3)
o "Expression % Expression", -> new OpNode($2, $1, $3)
o "Expression + Expression", -> new OpNode($2, $1, $3)
o "Expression - Expression", -> new OpNode($2, $1, $3)
o "Expression << Expression", -> new OpNode($2, $1, $3)
o "Expression >> Expression", -> new OpNode($2, $1, $3)
o "Expression >>> Expression", -> new OpNode($2, $1, $3)
o "Expression & Expression", -> new OpNode($2, $1, $3)
o "Expression | Expression", -> new OpNode($2, $1, $3)
o "Expression ^ Expression", -> new OpNode($2, $1, $3)
o "Expression <= Expression", -> new OpNode($2, $1, $3)
o "Expression < Expression", -> new OpNode($2, $1, $3)
o "Expression > Expression", -> new OpNode($2, $1, $3)
o "Expression >= Expression", -> new OpNode($2, $1, $3)
o "Expression == Expression", -> new OpNode($2, $1, $3)
o "Expression != Expression", -> new OpNode($2, $1, $3)
o "Expression IS Expression", -> new OpNode($2, $1, $3)
o "Expression ISNT Expression", -> new OpNode($2, $1, $3)
o "Expression && Expression", -> new OpNode($2, $1, $3)
o "Expression || Expression", -> new OpNode($2, $1, $3)
o "Expression AND Expression", -> new OpNode($2, $1, $3)
o "Expression OR Expression", -> new OpNode($2, $1, $3)
o "Expression ? Expression", -> new OpNode($2, $1, $3)
o "Expression -= Expression", -> new OpNode($2, $1, $3)
o "Expression += Expression", -> new OpNode($2, $1, $3)
o "Expression /= Expression", -> new OpNode($2, $1, $3)
o "Expression *= Expression", -> new OpNode($2, $1, $3)
o "Expression %= Expression", -> new OpNode($2, $1, $3)
o "Expression ||= Expression", -> new OpNode($2, $1, $3)
o "Expression &&= Expression", -> new OpNode($2, $1, $3)
o "Expression ?= Expression", -> new OpNode($2, $1, $3)
o "Expression INSTANCEOF Expression", -> new OpNode($2, $1, $3)
o "Expression IN Expression", -> new OpNode($2, $1, $3)
]
# Try abbreviated expressions to make the grammar build faster:
# UnaryOp: [
# o "!"
# o "!!"
# o "NOT"
# o "~"
# o "--"
# o "++"
# o "DELETE"
# o "TYPEOF"
# # Assignment to a variable (or index).
# Assign: [
# o "Value ASSIGN Expression", -> new AssignNode($1, $3)
# ]
#
# BinaryOp: [
# o "*"
# o "/"
# o "%"
# o "+"
# o "-"
# o "<<"
# o ">>"
# o ">>>"
# o "&"
# o "|"
# o "^"
# o "<="
# o "<"
# o ">"
# o ">="
# o "=="
# o "!="
# o "IS"
# o "ISNT"
# o "&&"
# o "||"
# o "AND"
# o "OR"
# o "?"
# o "-="
# o "+="
# o "/="
# o "*="
# o "%="
# o "||="
# o "&&="
# o "?="
# o "INSTANCEOF"
# o "IN"
# # Assignment within an object literal (can be quoted).
# AssignObj: [
# o "IDENTIFIER ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
# o "STRING ASSIGN Expression", -> new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object')
# o "Comment"
# ]
#
# # A return statement.
# Return: [
# o "RETURN Expression", -> new ReturnNode($2)
# o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
# ]
#
# # A comment.
# Comment: [
# o "COMMENT", -> new CommentNode($1)
# ]
#
# # Arithmetic and logical operators
# # For Ruby's Operator precedence, see: [
# # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
# Operation: [
# o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
# o "UnaryOp Expression", -> new OpNode($1, $2)
# o "! Expression", -> new OpNode($1, $2)
# o "!! Expression", -> new OpNode($1, $2)
# o "- Expression", -> new OpNode($1, $2)
# o "+ Expression", -> new OpNode($1, $2)
# o "NOT Expression", -> new OpNode($1, $2)
# o "~ Expression", -> new OpNode($1, $2)
# o "-- Expression", -> new OpNode($1, $2)
# o "++ Expression", -> new OpNode($1, $2)
# o "DELETE Expression", -> new OpNode($1, $2)
# o "TYPEOF Expression", -> new OpNode($1, $2)
# o "Expression --", -> new OpNode($2, $1, null, true)
# o "Expression ++", -> new OpNode($2, $1, null, true)
#
# o "Expression * Expression", -> new OpNode($2, $1, $3)
# o "Expression / Expression", -> new OpNode($2, $1, $3)
# o "Expression % Expression", -> new OpNode($2, $1, $3)
#
# o "Expression + Expression", -> new OpNode($2, $1, $3)
# o "Expression - Expression", -> new OpNode($2, $1, $3)
#
# o "Expression << Expression", -> new OpNode($2, $1, $3)
# o "Expression >> Expression", -> new OpNode($2, $1, $3)
# o "Expression >>> Expression", -> new OpNode($2, $1, $3)
#
# o "Expression & Expression", -> new OpNode($2, $1, $3)
# o "Expression | Expression", -> new OpNode($2, $1, $3)
# o "Expression ^ Expression", -> new OpNode($2, $1, $3)
#
# o "Expression <= Expression", -> new OpNode($2, $1, $3)
# o "Expression < Expression", -> new OpNode($2, $1, $3)
# o "Expression > Expression", -> new OpNode($2, $1, $3)
# o "Expression >= Expression", -> new OpNode($2, $1, $3)
#
# o "Expression == Expression", -> new OpNode($2, $1, $3)
# o "Expression != Expression", -> new OpNode($2, $1, $3)
# o "Expression IS Expression", -> new OpNode($2, $1, $3)
# o "Expression ISNT Expression", -> new OpNode($2, $1, $3)
#
# o "Expression && Expression", -> new OpNode($2, $1, $3)
# o "Expression || Expression", -> new OpNode($2, $1, $3)
# o "Expression AND Expression", -> new OpNode($2, $1, $3)
# o "Expression OR Expression", -> new OpNode($2, $1, $3)
# o "Expression ? Expression", -> new OpNode($2, $1, $3)
#
# o "Expression -= Expression", -> new OpNode($2, $1, $3)
# o "Expression += Expression", -> new OpNode($2, $1, $3)
# o "Expression /= Expression", -> new OpNode($2, $1, $3)
# o "Expression *= Expression", -> new OpNode($2, $1, $3)
# o "Expression %= Expression", -> new OpNode($2, $1, $3)
# o "Expression ||= Expression", -> new OpNode($2, $1, $3)
# o "Expression &&= Expression", -> new OpNode($2, $1, $3)
# o "Expression ?= Expression", -> new OpNode($2, $1, $3)
#
# o "Expression INSTANCEOF Expression", -> new OpNode($2, $1, $3)
# o "Expression IN Expression", -> new OpNode($2, $1, $3)
# ]
#
# # Try abbreviated expressions to make the grammar build faster:
#
# # UnaryOp: [
# # o "!"
# # o "!!"
# # o "NOT"
# # o "~"
# # o "--"
# # o "++"
# # o "DELETE"
# # o "TYPEOF"
# # ]
# #
# # BinaryOp: [
# # o "*"
# # o "/"
# # o "%"
# # o "+"
# # o "-"
# # o "<<"
# # o ">>"
# # o ">>>"
# # o "&"
# # o "|"
# # o "^"
# # o "<="
# # o "<"
# # o ">"
# # o ">="
# # o "=="
# # o "!="
# # o "IS"
# # o "ISNT"
# # o "&&"
# # o "||"
# # o "AND"
# # o "OR"
# # o "?"
# # o "-="
# # o "+="
# # o "/="
# # o "*="
# # o "%="
# # o "||="
# # o "&&="
# # o "?="
# # o "INSTANCEOF"
# # o "IN"
# # ]
# #
# # Operation: [
# # o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
# # o "UnaryOp Expression", -> new OpNode($1, $2)
# # ]
#
# # The existence operator.
# Existence: [
# o "Expression ?", -> new ExistenceNode($1)
# ]
#
# # Function definition.
# Code: [
# o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
# o "FuncGlyph Block", -> new CodeNode([], $2, $1)
# ]
#
# # The symbols to signify functions, and bound functions.
# FuncGlyph: [
# o "->", -> 'func'
# o "=>", -> 'boundfunc'
# ]
#
# # The parameters to a function definition.
# ParamList: [
# o "Param", -> [$1]
# o "ParamList , Param", -> $1.push($3)
# ]
#
# # A Parameter (or ParamSplat) in a function definition.
# Param: [
# o "PARAM"
# o "PARAM . . .", -> new SplatNode($1)
# ]
#
# # A regular splat.
# Splat: [
# o "Expression . . .", -> new SplatNode($1)
# ]
# The existence operator.
Existence: [
o "Expression ?", -> new ExistenceNode($1)
]
# Function definition.
Code: [
o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
o "FuncGlyph Block", -> new CodeNode([], $2, $1)
]
# The symbols to signify functions, and bound functions.
FuncGlyph: [
o "->", -> 'func'
o "=>", -> 'boundfunc'
]
# The parameters to a function definition.
ParamList: [
o "Param", -> [$1]
o "ParamList , Param", -> $1.push($3)
]
# A Parameter (or ParamSplat) in a function definition.
Param: [
o "PARAM"
o "PARAM . . .", -> new SplatNode($1)
]
# A regular splat.
Splat: [
o "Expression . . .", -> new SplatNode($1)
]
# Expressions that can be treated as values.
Value: [
o "IDENTIFIER", -> new ValueNode($1)
o "IDENTIFIER", -> new ValueNode(yytext)
o "Literal", -> new ValueNode($1)
o "Array", -> new ValueNode($1)
o "Object", -> new ValueNode($1)
o "Parenthetical", -> new ValueNode($1)
o "Range", -> new ValueNode($1)
o "Value Accessor", -> $1.push($2)
o "Invocation Accessor", -> new ValueNode($1, [$2])
# o "Array", -> new ValueNode($1)
# o "Object", -> new ValueNode($1)
# o "Parenthetical", -> new ValueNode($1)
# o "Range", -> new ValueNode($1)
# o "Value Accessor", -> $1.push($2)
# o "Invocation Accessor", -> new ValueNode($1, [$2])
]
# Accessing into an object or array, through dot or index notation.
Accessor: [
o "PROPERTY_ACCESS IDENTIFIER", -> new AccessorNode($2)
o "PROTOTYPE_ACCESS IDENTIFIER", -> new AccessorNode($2, 'prototype')
o "SOAK_ACCESS IDENTIFIER", -> new AccessorNode($2, 'soak')
o "Index"
o "Slice", -> new SliceNode($1)
]
# Indexing into an object or array.
Index: [
o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
]
# An object literal.
Object: [
o "{ AssignList }", -> new ObjectNode($2)
]
# Assignment within an object literal (comma or newline separated).
AssignList: [
o "", -> []
o "AssignObj", -> [$1]
o "AssignList , AssignObj", -> $1.push $3
o "AssignList Terminator AssignObj", -> $1.push $3
o "AssignList , Terminator AssignObj", -> $1.push $4
o "INDENT AssignList OUTDENT", -> $2
]
# All flavors of function call (instantiation, super, and regular).
Call: [
o "Invocation", -> $1
o "NEW Invocation", -> $2.new_instance()
o "Super", -> $1
]
# Extending an object's prototype.
Extends: [
o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
]
# A generic function invocation.
Invocation: [
o "Value Arguments", -> new CallNode($1, $2)
o "Invocation Arguments", -> new CallNode($1, $2)
]
# The list of arguments to a function invocation.
Arguments: [
o "CALL_START ArgList CALL_END", -> $2
]
# Calling super.
Super: [
o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
]
# The range literal.
Range: [
o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
]
# The slice literal.
Slice: [
o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
]
# The array literal.
Array: [
o "[ ArgList ]", -> new ArrayNode($2)
]
# A list of arguments to a method call, or as the contents of an array.
ArgList: [
o "", -> []
o "Expression", -> val
o "INDENT Expression", -> [$2]
o "ArgList , Expression", -> $1.push $3
o "ArgList Terminator Expression", -> $1.push $3
o "ArgList , Terminator Expression", -> $1.push $4
o "ArgList , INDENT Expression", -> $1.push $4
o "ArgList OUTDENT", -> $1
]
# Just simple, comma-separated, required arguments (no fancy syntax).
SimpleArgs: [
o "Expression", -> $1
o "SimpleArgs , Expression", ->
([$1].push($3)).reduce (a, b) -> a.concat(b)
]
# Try/catch/finally exception handling blocks.
Try: [
o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
o "TRY Block FINALLY Block", -> new TryNode($2, nil, nil, $4)
o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
]
# A catch clause.
Catch: [
o "CATCH IDENTIFIER Block", -> [$2, $3]
]
# Throw an exception.
Throw: [
o "THROW Expression", -> new ThrowNode($2)
]
# Parenthetical expressions.
Parenthetical: [
o "( Expression )", -> new ParentheticalNode($2)
]
# The while loop. (there is no do..while).
While: [
o "WHILE Expression Block", -> new WhileNode($2, $3)
o "WHILE Expression", -> new WhileNode($2, nil)
o "Expression WHILE Expression", -> new WhileNode($3, Expressions.wrap($1))
]
# Array comprehensions, including guard and current index.
# Looks a little confusing, check nodes.rb for the arguments to ForNode.
For: [
o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
]
# An array comprehension has variables for the current element and index.
ForVariables: [
o "IDENTIFIER", -> [$1]
o "IDENTIFIER , IDENTIFIER", -> [$1, $3]
]
# The source of the array comprehension can optionally be filtered.
ForSource: [
o "IN Expression", -> {source: $2}
o "OF Expression", -> {source: $2, object: true}
o "ForSource WHEN Expression", -> $1.filter: $3; $1
o "ForSource BY Expression", -> $1.step: $3; $1
]
# Switch/When blocks.
Switch: [
o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6)
]
# The inner list of whens.
Whens: [
o "When", -> $1
o "Whens When", -> $1.push $2
]
# An individual when.
When: [
o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, nil, {statement: true})
o "LEADING_WHEN SimpleArgs Block Terminator", -> new IfNode($2, $3, nil, {statement: true})
o "Comment Terminator When", -> $3.add_comment($1)
]
# The most basic form of "if".
IfBlock: [
o "IF Expression Block", -> new IfNode($2, $3)
]
# An elsif portion of an if-else block.
ElsIf: [
o "ELSE IfBlock", -> $2.force_statement()
]
# Multiple elsifs can be chained together.
ElsIfs: [
o "ElsIf", -> $1
o "ElsIfs ElsIf", -> $1.add_else($2)
]
# Terminating else bodies are strictly optional.
ElseBody: [
o "", -> null
o "ELSE Block", -> $2
]
# All the alternatives for ending an if-else block.
IfEnd: [
o "ElseBody", -> $1
o "ElsIfs ElseBody", -> $1.add_else($2)
]
# The full complement of if blocks, including postfix one-liner ifs and unlesses.
If: [
o "IfBlock IfEnd", -> $1.add_else($2)
o "Expression IF Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true})
o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true, invert: true})
]
# # Accessing into an object or array, through dot or index notation.
# Accessor: [
# o "PROPERTY_ACCESS IDENTIFIER", -> new AccessorNode($2)
# o "PROTOTYPE_ACCESS IDENTIFIER", -> new AccessorNode($2, 'prototype')
# o "SOAK_ACCESS IDENTIFIER", -> new AccessorNode($2, 'soak')
# o "Index"
# o "Slice", -> new SliceNode($1)
# ]
#
# # Indexing into an object or array.
# Index: [
# o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
# ]
#
# # An object literal.
# Object: [
# o "{ AssignList }", -> new ObjectNode($2)
# ]
#
# # Assignment within an object literal (comma or newline separated).
# AssignList: [
# o "", -> []
# o "AssignObj", -> [$1]
# o "AssignList , AssignObj", -> $1.push $3
# o "AssignList TERMINATOR AssignObj", -> $1.push $3
# o "AssignList , TERMINATOR AssignObj", -> $1.push $4
# o "INDENT AssignList OUTDENT", -> $2
# ]
#
# # All flavors of function call (instantiation, super, and regular).
# Call: [
# o "Invocation", -> $1
# o "NEW Invocation", -> $2.new_instance()
# o "Super", -> $1
# ]
#
# # Extending an object's prototype.
# Extends: [
# o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
# ]
#
# # A generic function invocation.
# Invocation: [
# o "Value Arguments", -> new CallNode($1, $2)
# o "Invocation Arguments", -> new CallNode($1, $2)
# ]
#
# # The list of arguments to a function invocation.
# Arguments: [
# o "CALL_START ArgList CALL_END", -> $2
# ]
#
# # Calling super.
# Super: [
# o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
# ]
#
# # The range literal.
# Range: [
# o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
# o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
# ]
#
# # The slice literal.
# Slice: [
# o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
# o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
# ]
#
# # The array literal.
# Array: [
# o "[ ArgList ]", -> new ArrayNode($2)
# ]
#
# # A list of arguments to a method call, or as the contents of an array.
# ArgList: [
# o "", -> []
# o "Expression", -> val
# o "INDENT Expression", -> [$2]
# o "ArgList , Expression", -> $1.push $3
# o "ArgList TERMINATOR Expression", -> $1.push $3
# o "ArgList , TERMINATOR Expression", -> $1.push $4
# o "ArgList , INDENT Expression", -> $1.push $4
# o "ArgList OUTDENT", -> $1
# ]
#
# # Just simple, comma-separated, required arguments (no fancy syntax).
# SimpleArgs: [
# o "Expression", -> $1
# o "SimpleArgs , Expression", ->
# ([$1].push($3)).reduce (a, b) -> a.concat(b)
# ]
#
# # Try/catch/finally exception handling blocks.
# Try: [
# o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
# o "TRY Block FINALLY Block", -> new TryNode($2, nil, nil, $4)
# o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
# ]
#
# # A catch clause.
# Catch: [
# o "CATCH IDENTIFIER Block", -> [$2, $3]
# ]
#
# # Throw an exception.
# Throw: [
# o "THROW Expression", -> new ThrowNode($2)
# ]
#
# # Parenthetical expressions.
# Parenthetical: [
# o "( Expression )", -> new ParentheticalNode($2)
# ]
#
# # The while loop. (there is no do..while).
# While: [
# o "WHILE Expression Block", -> new WhileNode($2, $3)
# o "WHILE Expression", -> new WhileNode($2, nil)
# o "Expression WHILE Expression", -> new WhileNode($3, Expressions.wrap($1))
# ]
#
# # Array comprehensions, including guard and current index.
# # Looks a little confusing, check nodes.rb for the arguments to ForNode.
# For: [
# o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
# o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
# ]
#
# # An array comprehension has variables for the current element and index.
# ForVariables: [
# o "IDENTIFIER", -> [$1]
# o "IDENTIFIER , IDENTIFIER", -> [$1, $3]
# ]
#
# # The source of the array comprehension can optionally be filtered.
# ForSource: [
# o "IN Expression", -> {source: $2}
# o "OF Expression", -> {source: $2, object: true}
# o "ForSource WHEN Expression", -> $1.filter: $3; $1
# o "ForSource BY Expression", -> $1.step: $3; $1
# ]
#
# # Switch/When blocks.
# Switch: [
# o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
# o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6)
# ]
#
# # The inner list of whens.
# Whens: [
# o "When", -> $1
# o "Whens When", -> $1.push $2
# ]
#
# # An individual when.
# When: [
# o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, nil, {statement: true})
# o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode($2, $3, nil, {statement: true})
# o "Comment TERMINATOR When", -> $3.add_comment($1)
# ]
#
# # The most basic form of "if".
# IfBlock: [
# o "IF Expression Block", -> new IfNode($2, $3)
# ]
#
# # An elsif portion of an if-else block.
# ElsIf: [
# o "ELSE IfBlock", -> $2.force_statement()
# ]
#
# # Multiple elsifs can be chained together.
# ElsIfs: [
# o "ElsIf", -> $1
# o "ElsIfs ElsIf", -> $1.add_else($2)
# ]
#
# # Terminating else bodies are strictly optional.
# ElseBody: [
# o "", -> null
# o "ELSE Block", -> $2
# ]
#
# # All the alternatives for ending an if-else block.
# IfEnd: [
# o "ElseBody", -> $1
# o "ElsIfs ElseBody", -> $1.add_else($2)
# ]
#
# # The full complement of if blocks, including postfix one-liner ifs and unlesses.
# If: [
# o "IfBlock IfEnd", -> $1.add_else($2)
# o "Expression IF Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true})
# o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true, invert: true})
# ]
}
@@ -506,14 +501,14 @@ for name, non_terminal of grammar
option[1] = "return " + option[1]
option
tokens: tokens.join(" ")
parser: new Parser({tokens: tokens, bnf: bnf, operators: operators}, {debug: false})
parser: new Parser({tokens: tokens, bnf: bnf, operators: operators, startSymbol: 'Root'}, {debug: false})
# Thin wrapper around the real lexer
parser.lexer: {
lex: ->
token: this.tokens[this.pos] or [""]
this.pos += 1
# this.yylineno: token and token[1] and token[1][1]
this.yylineno: token[2]
this.yytext: token[1]
token[0]
setInput: (tokens) ->

View File

@@ -18,7 +18,7 @@ EXPRESSION_CLOSE: ['CATCH', 'WHEN', 'ELSE', 'FINALLY'].concat(EXPRESSION_TAIL)
# Tokens pairs that, in immediate succession, indicate an implicit call.
IMPLICIT_FUNC: ['IDENTIFIER', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END']
IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', "\n", 'OUTDENT']
IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', 'TERMINATOR', 'OUTDENT']
IMPLICIT_CALL: ['IDENTIFIER', 'NUMBER', 'STRING', 'JS', 'REGEX', 'NEW', 'PARAM_START',
'TRY', 'DELETE', 'TYPEOF', 'SWITCH', 'ARGUMENTS',
'TRUE', 'FALSE', 'YES', 'NO', 'ON', 'OFF', '!', '!!', 'NOT',
@@ -33,7 +33,7 @@ for pair in BALANCED_PAIRS
# Single-line flavors of block expressions that have unclosed endings.
# The grammar can't disambiguate them, so we insert the implicit indentation.
SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']
SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START']
SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START']
# Rewrite the token stream in multiple passes, one logical filter at
# a time. This could certainly be changed into a single pass through the
@@ -55,11 +55,11 @@ re::rewrite: (tokens) ->
# Allow the return value of the block to tell us how many tokens to move
# forwards (or backwards) in the stream, to make sure we don't miss anything
# as the stream changes length under our feet.
re::scan_tokens: (yield) ->
re::scan_tokens: (block) ->
i: 0
while true
break unless this.tokens[i]
move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
move: block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
i += move
true
@@ -77,12 +77,12 @@ re::adjust_comments: ->
this.tokens.splice(i + 2, 1)
this.tokens.splice(i - 2, 1)
return 0
else if prev[0] is "\n" and after[0] is 'INDENT'
else if prev[0] is 'TERMINATOR' and after[0] is 'INDENT'
this.tokens.splice(i + 2, 1)
this.tokens[i - 1]: after
return 1
else if prev[0] isnt "\n" and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
this.tokens.splice(i, 0, ["\n", "\n"])
else if prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]])
return 2
else
return 1
@@ -90,13 +90,13 @@ re::adjust_comments: ->
# Leading newlines would introduce an ambiguity in the grammar, so we
# dispatch them here.
re::remove_leading_newlines: ->
this.tokens.shift() if this.tokens[0][0] is "\n"
this.tokens.shift() if this.tokens[0][0] is 'TERMINATOR'
# Some blocks occur in the middle of expressions -- when we're expecting
# this, remove their trailing newlines.
re::remove_mid_expression_newlines: ->
this.scan_tokens (prev, token, post, i) =>
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n"
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is 'TERMINATOR'
this.tokens.splice(i, 1)
return 0