WIP: Optimization: compile a single regex for each grammar rule

We compile a giant regex out of all the individual regexes for each pattern by or'ing together a capture group for each one. Then we use the index of the matched capture group to determine which pattern actually matched, and adjust the capture indexes of the subtree to make it appear to start from index 0, so the capture indices on the pattern align properly. There is still broken-ness on more complex patterns, but basic patterns and patterns w/ captures work.
This commit is contained in:
Nathan Sobo
2012-08-06 18:32:55 -06:00
parent a02af2a51f
commit d520d6c859
2 changed files with 39 additions and 6 deletions

View File

@@ -13,7 +13,7 @@ describe "TextMateGrammar", ->
describe ".getLineTokens(line, currentRule)", ->
describe "when the entire line matches a single pattern with no capture groups", ->
it "returns a single token with the correct scope", ->
fit "returns a single token with the correct scope", ->
{tokens} = grammar.getLineTokens("return")
expect(tokens.length).toBe 1
@@ -21,7 +21,7 @@ describe "TextMateGrammar", ->
expect(token.scopes).toEqual ['source.coffee', 'keyword.control.coffee']
describe "when the entire line matches a single pattern with capture groups", ->
it "returns a single token with the correct scope", ->
fit "returns a single token with the correct scope", ->
{tokens} = grammar.getLineTokens("new foo.bar.Baz")
expect(tokens.length).toBe 3
@@ -31,7 +31,7 @@ describe "TextMateGrammar", ->
expect(className).toEqual value: 'foo.bar.Baz', scopes: ['source.coffee', 'meta.class.instance.constructor', 'entity.name.type.instance.coffee']
describe "when the line matches multiple patterns", ->
it "returns multiple tokens, filling in regions that don't match patterns with tokens in the grammar's global scope", ->
fit "returns multiple tokens, filling in regions that don't match patterns with tokens in the grammar's global scope", ->
{tokens} = grammar.getLineTokens(" return new foo.bar.Baz ")
expect(tokens.length).toBe 7

View File

@@ -21,6 +21,9 @@ class TextMateGrammar
for name, data of repository
@repository[name] = new Rule(this, data)
for rule in [@initialRule, _.values(@repository)...]
rule.compileRegex()
getLineTokens: (line, stack=[@initialRule]) ->
stack = new Array(stack...)
tokens = []
@@ -66,16 +69,40 @@ class Rule
@patterns.push(@endPattern) if @endPattern
@patterns.push((patterns.map (pattern) => new Pattern(grammar, pattern))...)
getNextTokens: (stack, line, position) ->
{ match, pattern } = @getNextMatch(line, position)
return {} unless match
compileRegex: ->
regexComponents = []
@patternsByCaptureIndex = {}
currentCaptureIndex = 1
for [regex, pattern] in @getRegexPatternPairs()
regexComponents.push(regex.source)
@patternsByCaptureIndex[currentCaptureIndex] = pattern
currentCaptureIndex += 1 + regex.getCaptureCount()
@regex = new OnigRegExp('(' + regexComponents.join(')|(') + ')')
getRegexPatternPairs: (included=[]) ->
return [] if _.include(included, this)
included.push(this)
regexPatternPairs = []
for pattern in @patterns
regexPatternPairs.push(pattern.getRegexPatternPairs(included)...)
regexPatternPairs
getNextTokens: (stack, line, position) ->
return {} unless tree = @regex.getCaptureTree(line, position)
match = tree.captures[0]
pattern = @patternsByCaptureIndex[match.index]
@adjustCaptureTreeIndices(match, match.index)
nextTokens = pattern.handleMatch(stack, match)
tokensStartPosition = match.position
tokensEndPosition = tokensStartPosition + match.text.length
{ nextTokens, tokensStartPosition, tokensEndPosition }
adjustCaptureTreeIndices: (tree, startIndex) ->
tree.index -= startIndex
for capture in tree.captures ? []
@adjustCaptureTreeIndices(capture, startIndex)
getNextMatch: (line, position) ->
nextMatch = null
matchedPattern = null
@@ -108,6 +135,12 @@ class Pattern
endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
@pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })
getRegexPatternPairs: (included) ->
if @include
@grammar.ruleForInclude(@include).getRegexPatternPairs(included)
else
[[@regex, this]]
getNextMatch: (line, position) ->
if @include
rule = @grammar.ruleForInclude(@include)