From 89ef3f0f2038cc7fe56087954592e904be46aa50 Mon Sep 17 00:00:00 2001 From: Corey Johnson & Nathan Sobo Date: Mon, 5 Nov 2012 16:03:19 -0700 Subject: [PATCH] Build Token objects in TextMateGrammar. Tokenization-related renames. --- spec/app/text-mate-grammar-spec.coffee | 54 +++++++++++++------------- src/app/language-mode.coffee | 5 +-- src/app/screen-line.coffee | 8 ++-- src/app/text-mate-grammar.coffee | 26 ++++++++----- src/app/tokenized-buffer.coffee | 24 +++++------- 5 files changed, 58 insertions(+), 59 deletions(-) diff --git a/spec/app/text-mate-grammar-spec.coffee b/spec/app/text-mate-grammar-spec.coffee index 79b888390..e694eb302 100644 --- a/spec/app/text-mate-grammar-spec.coffee +++ b/spec/app/text-mate-grammar-spec.coffee @@ -10,10 +10,10 @@ describe "TextMateGrammar", -> beforeEach -> grammar = TextMateBundle.grammarForFilePath("hello.coffee") - describe ".getLineTokens(line, currentRule)", -> + describe ".tokenizeLine(line, { ruleStack, tabLength })", -> describe "when the entire line matches a single pattern with no capture groups", -> it "returns a single token with the correct scope", -> - {tokens} = grammar.getLineTokens("return") + {tokens} = grammar.tokenizeLine("return") expect(tokens.length).toBe 1 [token] = tokens @@ -21,7 +21,7 @@ describe "TextMateGrammar", -> describe "when the entire line matches a single pattern with capture groups", -> it "returns a single token with the correct scope", -> - {tokens} = grammar.getLineTokens("new foo.bar.Baz") + {tokens} = grammar.tokenizeLine("new foo.bar.Baz") expect(tokens.length).toBe 3 [newOperator, whitespace, className] = tokens @@ -32,12 +32,12 @@ describe "TextMateGrammar", -> describe "when the line doesn't match any patterns", -> it "returns the entire line as a single simple token with the grammar's scope", -> textGrammar = TextMateBundle.grammarForFilePath('foo.txt') - {tokens} = textGrammar.getLineTokens("abc def") + {tokens} = textGrammar.tokenizeLine("abc def") expect(tokens.length).toBe 1 describe "when the line matches multiple patterns", -> it "returns multiple tokens, filling in regions that don't match patterns with tokens in the grammar's global scope", -> - {tokens} = grammar.getLineTokens(" return new foo.bar.Baz ") + {tokens} = grammar.tokenizeLine(" return new foo.bar.Baz ") expect(tokens.length).toBe 7 @@ -51,7 +51,7 @@ describe "TextMateGrammar", -> describe "when the line matches a pattern with optional capture groups", -> it "only returns tokens for capture groups that matched", -> - {tokens} = grammar.getLineTokens("class Quicksort") + {tokens} = grammar.tokenizeLine("class Quicksort") expect(tokens.length).toBe 3 expect(tokens[0].value).toBe "class" expect(tokens[1].value).toBe " " @@ -59,7 +59,7 @@ describe "TextMateGrammar", -> describe "when the line matches a rule with nested capture groups and lookahead capture groups beyond the scope of the overall match", -> it "creates distinct tokens for nested captures and does not return tokens beyond the scope of the overall capture", -> - {tokens} = grammar.getLineTokens(" destroy: ->") + {tokens} = grammar.tokenizeLine(" destroy: ->") expect(tokens.length).toBe 6 expect(tokens[0]).toEqual(value: ' ', scopes: ["source.coffee", "meta.function.coffee"]) expect(tokens[1]).toEqual(value: 'destro', scopes: ["source.coffee", "meta.function.coffee", "entity.name.function.coffee"]) @@ -71,13 +71,13 @@ describe "TextMateGrammar", -> describe "when the line matches a pattern that includes a rule", -> it "returns tokens based on the included rule", -> - {tokens} = grammar.getLineTokens("7777777") + {tokens} = grammar.tokenizeLine("7777777") expect(tokens.length).toBe 1 expect(tokens[0]).toEqual value: '7777777', scopes: ['source.coffee', 'constant.numeric.coffee'] describe "when the line is an interpolated string", -> it "returns the correct tokens", -> - {tokens} = grammar.getLineTokens('"the value is #{@x} my friend"') + {tokens} = grammar.tokenizeLine('"the value is #{@x} my friend"') expect(tokens[0]).toEqual value: '"', scopes: ["source.coffee","string.quoted.double.coffee","punctuation.definition.string.begin.coffee"] expect(tokens[1]).toEqual value: "the value is ", scopes: ["source.coffee","string.quoted.double.coffee"] @@ -89,7 +89,7 @@ describe "TextMateGrammar", -> describe "when the line has an interpolated string inside an interpolated string", -> it "returns the correct tokens", -> - {tokens} = grammar.getLineTokens('"#{"#{@x}"}"') + {tokens} = grammar.tokenizeLine('"#{"#{@x}"}"') expect(tokens[0]).toEqual value: '"', scopes: ["source.coffee","string.quoted.double.coffee","punctuation.definition.string.begin.coffee"] expect(tokens[1]).toEqual value: '#{', scopes: ["source.coffee","string.quoted.double.coffee","source.coffee.embedded.source","punctuation.section.embedded.coffee"] @@ -103,26 +103,26 @@ describe "TextMateGrammar", -> describe "when the line is empty", -> it "returns a single token which has the global scope", -> - {tokens} = grammar.getLineTokens('') + {tokens} = grammar.tokenizeLine('') expect(tokens[0]).toEqual value: '', scopes: ["source.coffee"] describe "when the line matches no patterns", -> it "does not infinitely loop", -> grammar = TextMateBundle.grammarForFilePath("sample.txt") - {tokens} = grammar.getLineTokens('hoo') + {tokens} = grammar.tokenizeLine('hoo') expect(tokens.length).toBe 1 expect(tokens[0]).toEqual value: 'hoo', scopes: ["text.plain", "meta.paragraph.text"] describe "when the line matches a pattern with a 'contentName'", -> it "creates tokens using the content of contentName as the token name", -> grammar = TextMateBundle.grammarForFilePath("sample.txt") - {tokens} = grammar.getLineTokens('ok, cool') + {tokens} = grammar.tokenizeLine('ok, cool') expect(tokens[0]).toEqual value: 'ok, cool', scopes: ["text.plain", "meta.paragraph.text"] describe "when the line matches a pattern with no `name` or `contentName`", -> it "creates tokens without adding a new scope", -> grammar = TextMateBundle.grammarsByFileType["rb"] - {tokens} = grammar.getLineTokens('%w|oh \\look|') + {tokens} = grammar.tokenizeLine('%w|oh \\look|') expect(tokens.length).toBe 5 expect(tokens[0]).toEqual value: '%w|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"] expect(tokens[1]).toEqual value: 'oh ', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"] @@ -131,7 +131,7 @@ describe "TextMateGrammar", -> describe "when the line matches a begin/end pattern", -> it "returns tokens based on the beginCaptures, endCaptures and the child scope", -> - {tokens} = grammar.getLineTokens("'''single-quoted heredoc'''") + {tokens} = grammar.tokenizeLine("'''single-quoted heredoc'''") expect(tokens.length).toBe 3 @@ -140,9 +140,9 @@ describe "TextMateGrammar", -> expect(tokens[2]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee'] describe "when the pattern spans multiple lines", -> - it "uses the currentRule returned by the first line to parse the second line", -> - {tokens: firstTokens, stack} = grammar.getLineTokens("'''single-quoted") - {tokens: secondTokens, stack} = grammar.getLineTokens("heredoc'''", stack) + it "uses the ruleStack returned by the first line to parse the second line", -> + {tokens: firstTokens, ruleStack} = grammar.tokenizeLine("'''single-quoted") + {tokens: secondTokens, ruleStack} = grammar.tokenizeLine("heredoc'''", {ruleStack}) expect(firstTokens.length).toBe 2 expect(secondTokens.length).toBe 2 @@ -155,7 +155,7 @@ describe "TextMateGrammar", -> describe "when the pattern contains sub-patterns", -> it "returns tokens within the begin/end scope based on the sub-patterns", -> - {tokens} = grammar.getLineTokens('"""heredoc with character escape \\t"""') + {tokens} = grammar.tokenizeLine('"""heredoc with character escape \\t"""') expect(tokens.length).toBe 4 @@ -167,7 +167,7 @@ describe "TextMateGrammar", -> describe "when the end pattern contains a back reference", -> it "constructs the end rule based on its back-references to captures in the begin rule", -> grammar = TextMateBundle.grammarsByFileType["rb"] - {tokens} = grammar.getLineTokens('%w|oh|,') + {tokens} = grammar.tokenizeLine('%w|oh|,') expect(tokens.length).toBe 4 expect(tokens[0]).toEqual value: '%w|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"] expect(tokens[1]).toEqual value: 'oh', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"] @@ -176,7 +176,7 @@ describe "TextMateGrammar", -> it "allows the rule containing that end pattern to be pushed to the stack multiple times", -> grammar = TextMateBundle.grammarsByFileType["rb"] - {tokens} = grammar.getLineTokens('%Q+matz had some #{%Q-crazy ideas-} for ruby syntax+ # damn.') + {tokens} = grammar.tokenizeLine('%Q+matz had some #{%Q-crazy ideas-} for ruby syntax+ # damn.') expect(tokens[0]).toEqual value: '%Q+', scopes: ["source.ruby","string.quoted.other.literal.upper.ruby","punctuation.definition.string.begin.ruby"] expect(tokens[1]).toEqual value: 'matz had some ', scopes: ["source.ruby","string.quoted.other.literal.upper.ruby"] expect(tokens[2]).toEqual value: '#{', scopes: ["source.ruby","string.quoted.other.literal.upper.ruby","source.ruby.embedded.source","punctuation.section.embedded.ruby"] @@ -193,7 +193,7 @@ describe "TextMateGrammar", -> describe "when the pattern includes rules from another grammar", -> it "parses tokens inside the begin/end patterns based on the included grammar's rules", -> grammar = TextMateBundle.grammarsByFileType["html.erb"] - {tokens} = grammar.getLineTokens("
<%= User.find(2).full_name %>
") + {tokens} = grammar.tokenizeLine("
<%= User.find(2).full_name %>
") expect(tokens[0]).toEqual value: '<', scopes: ["text.html.ruby","meta.tag.block.any.html","punctuation.definition.tag.begin.html"] expect(tokens[1]).toEqual value: 'div', scopes: ["text.html.ruby","meta.tag.block.any.html","entity.name.tag.block.any.html"] @@ -232,9 +232,9 @@ describe "TextMateGrammar", -> } ] - {tokens, stack} = grammar.getLineTokens("// a singleLineComment") - expect(stack.length).toBe 1 - expect(stack[0].scopeName).toBe "source.imaginaryLanguage" + {tokens, ruleStack} = grammar.tokenizeLine("// a singleLineComment") + expect(ruleStack.length).toBe 1 + expect(ruleStack[0].scopeName).toBe "source.imaginaryLanguage" expect(tokens.length).toBe 2 expect(tokens[0].value).toBe "//" @@ -242,5 +242,5 @@ describe "TextMateGrammar", -> it "does not loop infinitley (regression)", -> grammar = TextMateBundle.grammarForFilePath("hello.js") - {tokens, stack} = grammar.getLineTokens("// line comment") - {tokens, stack} = grammar.getLineTokens(" // second line comment with a single leading space", stack) + {tokens, ruleStack} = grammar.tokenizeLine("// line comment") + {tokens, ruleStack} = grammar.tokenizeLine(" // second line comment with a single leading space", ruleStack) diff --git a/src/app/language-mode.coffee b/src/app/language-mode.coffee index 8e1956d1a..e6446f2dc 100644 --- a/src/app/language-mode.coffee +++ b/src/app/language-mode.coffee @@ -168,6 +168,5 @@ class LanguageMode if desiredIndentLevel < currentIndentLevel @editSession.setIndentationForBufferRow(bufferRow, desiredIndentLevel) - getLineTokens: (line, stack) -> - {tokens, stack} = @grammar.getLineTokens(line, stack) - + tokenizeLine: (line, stack) -> + {tokens, stack} = @grammar.tokenizeLine(line, stack) diff --git a/src/app/screen-line.coffee b/src/app/screen-line.coffee index 72ec91018..b9227ef1c 100644 --- a/src/app/screen-line.coffee +++ b/src/app/screen-line.coffee @@ -2,14 +2,14 @@ _ = require 'underscore' module.exports = class ScreenLine - constructor: ({@tokens, @stack, @bufferRows, @startBufferColumn, @fold, @foldable}) -> + constructor: ({@tokens, @ruleStack, @bufferRows, @startBufferColumn, @fold, @foldable}) -> @bufferRows ?= 1 @startBufferColumn ?= 0 @foldable ?= false @text = _.pluck(@tokens, 'value').join('') copy: -> - new ScreenLine({@tokens, @stack, @bufferRows, @startBufferColumn, @fold, @foldable}) + new ScreenLine({@tokens, @ruleStack, @bufferRows, @startBufferColumn, @fold, @foldable}) clipScreenColumn: (column, options={}) -> { skipAtomicTokens } = options @@ -73,13 +73,13 @@ class ScreenLine tokens: leftTokens bufferRows: 0 startBufferColumn: @startBufferColumn - stack: @stack + ruleStack: @ruleStack foldable: @foldable ) rightFragment = new ScreenLine( tokens: rightTokens startBufferColumn: @startBufferColumn + column - stack: @stack + ruleStack: @ruleStack ) [leftFragment, rightFragment] diff --git a/src/app/text-mate-grammar.coffee b/src/app/text-mate-grammar.coffee index 1a460700c..99f482d9a 100644 --- a/src/app/text-mate-grammar.coffee +++ b/src/app/text-mate-grammar.coffee @@ -1,6 +1,7 @@ _ = require 'underscore' fs = require 'fs' plist = require 'plist' +Token = require 'token' module.exports = class TextMateGrammar @@ -27,7 +28,8 @@ class TextMateGrammar for name, data of repository @repository[name] = new Rule(this, data) - getLineTokens: (line, ruleStack=[@initialRule]) -> + tokenizeLine: (line, {ruleStack, tabLength}={}) -> + ruleStack ?= [@initialRule] ruleStack = new Array(ruleStack...) # clone ruleStack tokens = [] position = 0 @@ -36,28 +38,31 @@ class TextMateGrammar scopes = scopesFromStack(ruleStack) if line.length == 0 - tokens = [{value: "", scopes: scopes}] - return { tokens, scopes } + tokens = [new Token(value: "", scopes: scopes)] + return { tokens, ruleStack } break if position == line.length if match = _.last(ruleStack).getNextTokens(ruleStack, line, position) { nextTokens, tokensStartPosition, tokensEndPosition } = match if position < tokensStartPosition # unmatched text before next tokens - tokens.push + tokens.push(new Token( value: line[position...tokensStartPosition] scopes: scopes + )) tokens.push(nextTokens...) position = tokensEndPosition else # push filler token for unmatched text at end of line - tokens.push + tokens.push(new Token( value: line[position...line.length] scopes: scopes + )) break - { tokens, stack: ruleStack } + tokens = _.flatten(tokens.map (token) -> token.breakOutTabCharacters(tabLength)) + { tokens, ruleStack } ruleForInclude: (name) -> if name[0] == "#" @@ -158,7 +163,6 @@ class Pattern getIncludedPatterns: (included) -> if @include rule = @grammar.ruleForInclude(@include) - # console.log "Could not find rule for include #{@include} in #{@grammar.name} grammar" unless rule rule?.getIncludedPatterns(included) ? [] else [this] @@ -175,7 +179,7 @@ class Pattern if zeroLengthMatch tokens = [] else - tokens = [{ value: line[start...end], scopes: scopes }] + tokens = [new Token(value: line[start...end], scopes: scopes)] if @pushRule stack.push(@pushRule.getRuleToPush(line, captureIndices)) else if @popRule @@ -201,18 +205,20 @@ class Pattern continue if childCaptureStart > previousChildCaptureEnd - tokens.push + tokens.push(new Token( value: line[previousChildCaptureEnd...childCaptureStart] scopes: scopes + )) captureTokens = @getTokensForCaptureIndices(line, captureIndices, scopes) tokens.push(captureTokens...) previousChildCaptureEnd = childCaptureEnd if parentCaptureEnd > previousChildCaptureEnd - tokens.push + tokens.push(new Token( value: line[previousChildCaptureEnd...parentCaptureEnd] scopes: scopes + )) tokens diff --git a/src/app/tokenized-buffer.coffee b/src/app/tokenized-buffer.coffee index e52acdac6..e7f564426 100644 --- a/src/app/tokenized-buffer.coffee +++ b/src/app/tokenized-buffer.coffee @@ -53,24 +53,18 @@ class TokenizedBuffer @trigger("change", {oldRange, newRange}) buildScreenLinesForRows: (startRow, endRow, startingStack) -> - stack = startingStack + ruleStack = startingStack for row in [startRow..endRow] - screenLine = @buildScreenLineForRow(row, stack) - stack = screenLine.stack + screenLine = @buildScreenLineForRow(row, ruleStack) + ruleStack = screenLine.ruleStack screenLine - buildScreenLineForRow: (row, stack) -> + buildScreenLineForRow: (row, ruleStack) -> line = @buffer.lineForRow(row) - {tokens, stack} = @languageMode.getLineTokens(line, stack) - tokenObjects = [] - for tokenProperties in tokens - token = new Token(tokenProperties) - tokenObjects.push(token.breakOutTabCharacters(@tabLength)...) - text = _.pluck(tokenObjects, 'value').join('') - new ScreenLine( - tokens: tokenObjects - stack: stack - ) + + val = @languageMode.tokenizeLine(line, {ruleStack, @tabLength}) + console.log val, line unless val.ruleStack + new ScreenLine(val) lineForScreenRow: (row) -> @screenLines[row] @@ -79,7 +73,7 @@ class TokenizedBuffer @screenLines[startRow..endRow] stackForRow: (row) -> - @screenLines[row]?.stack + @screenLines[row]?.ruleStack scopesForPosition: (position) -> position = Point.fromObject(position)