rewriter is halfway done, and working

This commit is contained in:
Jeremy Ashkenas
2010-01-30 17:24:48 -05:00
parent 84feab3492
commit 557cdbba71
5 changed files with 259 additions and 57 deletions

View File

@@ -216,7 +216,7 @@ lex::literal_token: ->
this.tag_parameters() if value and value.match(CODE)
value ||= this.chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
if this.value() and this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '['
this.token tag, value

View File

@@ -41,12 +41,12 @@ SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'P
re::rewrite: (tokens) ->
this.tokens: tokens
this.adjust_comments()
# this.remove_leading_newlines()
# this.remove_mid_expression_newlines()
# this.move_commas_outside_outdents()
# this.close_open_calls_and_indexes()
this.remove_leading_newlines()
this.remove_mid_expression_newlines()
this.move_commas_outside_outdents()
this.close_open_calls_and_indexes()
# this.add_implicit_parentheses()
# this.add_implicit_indentation()
this.add_implicit_indentation()
# this.ensure_balance(BALANCED_PAIRS)
# this.rewrite_closing_parens()
this.tokens
@@ -56,7 +56,7 @@ re::rewrite: (tokens) ->
# forwards (or backwards) in the stream, to make sure we don't miss anything
# as the stream changes length under our feet.
re::scan_tokens: (yield) ->
i = 0
i: 0
while true
break unless this.tokens[i]
move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
@@ -66,7 +66,7 @@ re::scan_tokens: (yield) ->
# Massage newlines and indentations so that comments don't have to be
# correctly indented, or appear on their own line.
re::adjust_comments: ->
this.scan_tokens (prev, token, post, i) ->
this.scan_tokens (prev, token, post, i) =>
return 1 unless token[0] is 'COMMENT'
before: this.tokens[i - 2]
after: this.tokens[i + 2]
@@ -87,6 +87,86 @@ re::adjust_comments: ->
else
return 1
# Leading newlines would introduce an ambiguity in the grammar, so we
# dispatch them here.
re::remove_leading_newlines: ->
this.tokens.shift() if this.tokens[0][0] is "\n"
# Some blocks occur in the middle of expressions -- when we're expecting
# this, remove their trailing newlines.
re::remove_mid_expression_newlines: ->
this.scan_tokens (prev, token, post, i) =>
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n"
this.tokens.splice(i, 1)
return 0
# Make sure that we don't accidentally break trailing commas, which need
# to go on the outside of expression closers.
re::move_commas_outside_outdents: ->
this.scan_tokens (prev, token, post, i) =>
this.tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ','
return 1
# We've tagged the opening parenthesis of a method call, and the opening
# bracket of an indexing operation. Match them with their close.
re::close_open_calls_and_indexes: ->
parens: [0]
brackets: [0]
this.scan_tokens (prev, token, post, i) =>
switch token[0]
when 'CALL_START' then parens.push(0)
when 'INDEX_START' then brackets.push(0)
when '(' then parens[-1] += 1
when '[' then brackets[-1] += 1
when ')'
if parens[parens.length - 1] is 0
parens.pop
token[0]: 'CALL_END'
else
parens[parens.length - 1] -= 1
when ']'
if brackets[brackets.length - 1] == 0
brackets.pop()
token[0]: 'INDEX_END'
else
brackets[brackets.length - 1] -= 1
return 1
# Because our grammar is LALR(1), it can't handle some single-line
# expressions that lack ending delimiters. Use the lexer to add the implicit
# blocks, so it doesn't need to.
# ')' can close a single-line block, but we need to make sure it's balanced.
re::add_implicit_indentation: ->
this.scan_tokens (prev, token, post, i) =>
return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and
not (token[0] is 'ELSE' and post[0] is 'IF')
starter: token[0]
this.tokens.splice(i + 1, 0, ['INDENT', 2])
idx: i + 1
parens: 0
while true
idx += 1
tok: this.tokens[idx]
if (not tok or SINGLE_CLOSERS.indexOf(tok[0]) >= 0 or
(tok[0] is ')' && parens is 0)) and
not (starter is 'ELSE' and tok[0] is 'ELSE')
insertion: if this.tokens[idx - 1][0] is "," then idx - 1 else idx
this.tokens.splice(insertion, 0, ['OUTDENT', 2])
break
parens += 1 if tok[0] is '('
parens -= 1 if tok[0] is ')'
return 1 unless token[0] is 'THEN'
this.tokens.splice(i, 1)
return 0