a smarter lexer brings us down to three shift/reduces

2026-01-15 01:38:13 -05:00 · 2009-12-16 20:48:37 -05:00
parent 2753648d65
commit 5153ef7c94
4 changed files with 455 additions and 569 deletions
--- a/code.cs
+++ b/code.cs
@@ -1,29 +1,34 @@
 # TODO: Add range indexing: array[5..7] => array.slice(5, 7)

 # Functions:
-square: x => x * x.
+# square: x => x * x.
+#
+# sum: x, y => x + y.
+#
+# odd: x => x % 2 is 0.
+#
+# even: x => x % 2 aint 0.
+#
+# run_loop: =>
+#   fire_events( e => e.stopPropagation(). )
+#   listen()
+#   wait().
+#
+# # Objects:
+# dense_object_literal: {one: 1, two: 2, three: 3}

-sum: x, y => x + y.
+spaced_out_multiline_object: {

-odd: x => x % 2 is 0.
-
-even: x => x % 2 aint 0.
-
-run_loop: =>
-  fire_events( e => e.stopPropagation(). )
-  listen()
-  wait().
-
-# Objects:
-object_literal: {one: 1, two: 2, three: 3}
-
-multiline_object: {
  pi: 3.14159
+
  list: [1, 2, 3, 4]
+
  three: new Idea()
+
  inner_obj: {
    freedom: => _.freedom().
  }
+
 }

 # Arrays:
--- a/grammar.y
+++ b/grammar.y
@@ -29,9 +29,9 @@ prechigh
  nonassoc "."
 preclow

-# We expect 8 shift/reduce errors for optional syntax.
+# We expect 3 shift/reduce errors for optional syntax.
 # There used to be 252 -- greatly improved.
-expect 8
+expect 3

 rule

@@ -177,8 +177,8 @@ rule
  ;

  Object:
-    ObjectStart ObjectEnd             { result = ObjectNode.new([]) }
-  | ObjectStart AssignList ObjectEnd  { result = ObjectNode.new(val[1]) }
+    "{" "}"                           { result = ObjectNode.new([]) }
+  | "{" AssignList "}"                { result = ObjectNode.new(val[1]) }
  ;

  AssignList:
@@ -195,12 +195,12 @@ rule
  ;

  Invocation:
-    Value ParenStart ArgList ParenEnd { result = CallNode.new(val[0], val[2]) }
+    Value "(" ArgList ")"             { result = CallNode.new(val[0], val[2]) }
  ;

  # An Array.
  Array:
-    ArrayStart ArgList ArrayEnd       { result = ArrayNode.new(val[1]) }
+    "[" ArgList "]"                   { result = ArrayNode.new(val[1]) }
  ;

  # A list of arguments to a method call.
@@ -236,7 +236,7 @@ rule
  ;

  Parenthetical:
-    ParenStart Expressions ParenEnd   { result = ParentheticalNode.new(val[1]) }
+    "(" Expressions ")"               { result = ParentheticalNode.new(val[1]) }
  ;

  While:
@@ -275,36 +275,6 @@ rule
    CASE Expression Then Expressions  { result = IfNode.new(val[1], val[3]) }
  ;

-  ObjectStart:
-    "{"                               { result = nil }
-  | "{" "\n"                          { result = nil }
-  ;
-
-  ObjectEnd:
-    "}"                               { result = nil }
-  | "\n" "}"                          { result = nil }
-  ;
-
-  ParenStart:
-    "("                               { result = nil }
-  | "(" "\n"                          { result = nil }
-  ;
-
-  ParenEnd:
-    ")"                               { result = nil }
-  | "\n" ")"                          { result = nil }
-  ;
-
-  ArrayStart:
-    "["                               { result = nil }
-  | "[" "\n"                          { result = nil }
-  ;
-
-  ArrayEnd:
-    "]"                               { result = nil }
-  | "\n" "]"                          { result = nil }
-  ;
-
 end

 ---- header
--- a/lexer.rb
+++ b/lexer.rb
@@ -22,6 +22,9 @@ class Lexer

  JS_CLEANER = /(\A`|`\Z)/

+  EXP_START  = ['{', '(', '[']
+  EXP_END    = ['}', ')', ']']
+
  # This is how to implement a very simple scanner.
  # Scan one caracter at the time until you find something to parse.
  def tokenize(code)
@@ -107,6 +110,8 @@ class Lexer
    value = @chunk[OPERATOR, 1]
    tag_parameters if value && value.match(CODE)
    value ||= @chunk[0,1]
+    skip_following_newlines if EXP_START.include?(value)
+    remove_leading_newlines if EXP_END.include?(value)
    @tokens << [value, value]
    @i += value.length
  end
@@ -123,4 +128,13 @@ class Lexer
    end
  end

+  def skip_following_newlines
+    newlines = @code[(@i+1)..-1][NEWLINE, 1]
+    @i += newlines.length if newlines
+  end
+
+  def remove_leading_newlines
+    @tokens.pop if @tokens.last[1] == "\n"
+  end
+
 end
--- a/parser.rb
+++ b/parser.rb