a smarter lexer brings us down to three shift/reduces

This commit is contained in:
Jeremy Ashkenas
2009-12-16 20:48:37 -05:00
parent 2753648d65
commit 5153ef7c94
4 changed files with 455 additions and 569 deletions

35
code.cs
View File

@@ -1,29 +1,34 @@
# TODO: Add range indexing: array[5..7] => array.slice(5, 7)
# Functions:
square: x => x * x.
# square: x => x * x.
#
# sum: x, y => x + y.
#
# odd: x => x % 2 is 0.
#
# even: x => x % 2 aint 0.
#
# run_loop: =>
# fire_events( e => e.stopPropagation(). )
# listen()
# wait().
#
# # Objects:
# dense_object_literal: {one: 1, two: 2, three: 3}
sum: x, y => x + y.
spaced_out_multiline_object: {
odd: x => x % 2 is 0.
even: x => x % 2 aint 0.
run_loop: =>
fire_events( e => e.stopPropagation(). )
listen()
wait().
# Objects:
object_literal: {one: 1, two: 2, three: 3}
multiline_object: {
pi: 3.14159
list: [1, 2, 3, 4]
three: new Idea()
inner_obj: {
freedom: => _.freedom().
}
}
# Arrays:

View File

@@ -29,9 +29,9 @@ prechigh
nonassoc "."
preclow
# We expect 8 shift/reduce errors for optional syntax.
# We expect 3 shift/reduce errors for optional syntax.
# There used to be 252 -- greatly improved.
expect 8
expect 3
rule
@@ -177,8 +177,8 @@ rule
;
Object:
ObjectStart ObjectEnd { result = ObjectNode.new([]) }
| ObjectStart AssignList ObjectEnd { result = ObjectNode.new(val[1]) }
"{" "}" { result = ObjectNode.new([]) }
| "{" AssignList "}" { result = ObjectNode.new(val[1]) }
;
AssignList:
@@ -195,12 +195,12 @@ rule
;
Invocation:
Value ParenStart ArgList ParenEnd { result = CallNode.new(val[0], val[2]) }
Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
;
# An Array.
Array:
ArrayStart ArgList ArrayEnd { result = ArrayNode.new(val[1]) }
"[" ArgList "]" { result = ArrayNode.new(val[1]) }
;
# A list of arguments to a method call.
@@ -236,7 +236,7 @@ rule
;
Parenthetical:
ParenStart Expressions ParenEnd { result = ParentheticalNode.new(val[1]) }
"(" Expressions ")" { result = ParentheticalNode.new(val[1]) }
;
While:
@@ -275,36 +275,6 @@ rule
CASE Expression Then Expressions { result = IfNode.new(val[1], val[3]) }
;
ObjectStart:
"{" { result = nil }
| "{" "\n" { result = nil }
;
ObjectEnd:
"}" { result = nil }
| "\n" "}" { result = nil }
;
ParenStart:
"(" { result = nil }
| "(" "\n" { result = nil }
;
ParenEnd:
")" { result = nil }
| "\n" ")" { result = nil }
;
ArrayStart:
"[" { result = nil }
| "[" "\n" { result = nil }
;
ArrayEnd:
"]" { result = nil }
| "\n" "]" { result = nil }
;
end
---- header

View File

@@ -22,6 +22,9 @@ class Lexer
JS_CLEANER = /(\A`|`\Z)/
EXP_START = ['{', '(', '[']
EXP_END = ['}', ')', ']']
# This is how to implement a very simple scanner.
# Scan one caracter at the time until you find something to parse.
def tokenize(code)
@@ -107,6 +110,8 @@ class Lexer
value = @chunk[OPERATOR, 1]
tag_parameters if value && value.match(CODE)
value ||= @chunk[0,1]
skip_following_newlines if EXP_START.include?(value)
remove_leading_newlines if EXP_END.include?(value)
@tokens << [value, value]
@i += value.length
end
@@ -123,4 +128,13 @@ class Lexer
end
end
def skip_following_newlines
newlines = @code[(@i+1)..-1][NEWLINE, 1]
@i += newlines.length if newlines
end
def remove_leading_newlines
@tokens.pop if @tokens.last[1] == "\n"
end
end

931
parser.rb

File diff suppressed because it is too large Load Diff