From a66120aed2e725cddf5b31e95043ae1413736639 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 15 May 2018 16:09:34 -0700 Subject: [PATCH 1/7] :arrow_up: text-buffer, tree-sitter --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 8f63730c2..e007bc635 100644 --- a/package.json +++ b/package.json @@ -71,8 +71,8 @@ "service-hub": "^0.7.4", "sinon": "1.17.4", "temp": "^0.8.3", - "text-buffer": "13.14.2", - "tree-sitter": "^0.11.2", + "text-buffer": "13.14.3", + "tree-sitter": "0.12.1-0", "typescript-simple": "1.0.0", "underscore-plus": "^1.6.6", "winreg": "^1.2.1", From aced30da1f12eece514ba70cc22d63bebbb0da2a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 15 May 2018 17:53:47 -0700 Subject: [PATCH 2/7] Start work on async parsing --- package.json | 2 +- spec/tree-sitter-language-mode-spec.js | 76 +++++++++------ src/tree-sitter-language-mode.js | 126 ++++++++++--------------- 3 files changed, 100 insertions(+), 104 deletions(-) diff --git a/package.json b/package.json index e007bc635..06c73c488 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "sinon": "1.17.4", "temp": "^0.8.3", "text-buffer": "13.14.3", - "tree-sitter": "0.12.1-0", + "tree-sitter": "0.12.1-1", "typescript-simple": "1.0.0", "underscore-plus": "^1.6.6", "winreg": "^1.2.1", diff --git a/spec/tree-sitter-language-mode-spec.js b/spec/tree-sitter-language-mode-spec.js index a788fac47..43e87d886 100644 --- a/spec/tree-sitter-language-mode-spec.js +++ b/spec/tree-sitter-language-mode-spec.js @@ -20,7 +20,7 @@ describe('TreeSitterLanguageMode', () => { }) describe('highlighting', () => { - it('applies the most specific scope mapping to each node in the syntax tree', () => { + it('applies the most specific scope mapping to each node in the syntax tree', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', scopes: { @@ -31,8 +31,11 @@ describe('TreeSitterLanguageMode', () => { } }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText('aa.bbb = cc(d.eee());') + await languageMode.reparsePromise + expectTokensToEqual(editor, [[ {text: 'aa.', scopes: ['source']}, {text: 'bbb', scopes: ['source', 'property']}, @@ -44,7 +47,7 @@ describe('TreeSitterLanguageMode', () => { ]]) }) - it('can start or end multiple scopes at the same position', () => { + it('can start or end multiple scopes at the same position', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', scopes: { @@ -57,8 +60,11 @@ describe('TreeSitterLanguageMode', () => { } }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText('a = bb.ccc();') + await languageMode.reparsePromise + expectTokensToEqual(editor, [[ {text: 'a', scopes: ['source', 'variable']}, {text: ' = ', scopes: ['source']}, @@ -70,7 +76,7 @@ describe('TreeSitterLanguageMode', () => { ]]) }) - it('can resume highlighting on a line that starts with whitespace', () => { + it('can resume highlighting on a line that starts with whitespace', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', scopes: { @@ -80,8 +86,11 @@ describe('TreeSitterLanguageMode', () => { } }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText('a\n .b();') + await languageMode.reparsePromise + expectTokensToEqual(editor, [ [ {text: 'a', scopes: ['variable']}, @@ -95,7 +104,7 @@ describe('TreeSitterLanguageMode', () => { ]) }) - it('correctly skips over tokens with zero size', () => { + it('correctly skips over tokens with zero size', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-c', scopes: { @@ -107,10 +116,11 @@ describe('TreeSitterLanguageMode', () => { const languageMode = new TreeSitterLanguageMode({buffer, grammar}) buffer.setLanguageMode(languageMode) buffer.setText('int main() {\n int a\n int b;\n}'); + await languageMode.reparsePromise editor.screenLineForScreenRow(0) expect( - languageMode.document.rootNode.descendantForPosition(Point(1, 2), Point(1, 6)).toString() + languageMode.tree.rootNode.descendantForPosition(Point(1, 2), Point(1, 6)).toString() ).toBe('(declaration (primitive_type) (identifier) (MISSING))') expectTokensToEqual(editor, [ @@ -139,7 +149,7 @@ describe('TreeSitterLanguageMode', () => { ]) }) - it('updates lines\' highlighting when they are affected by distant changes', () => { + it('updates lines\' highlighting when they are affected by distant changes', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', scopes: { @@ -148,10 +158,12 @@ describe('TreeSitterLanguageMode', () => { } }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) + buffer.setText('a(\nb,\nc\n') + await languageMode.reparsePromise // missing closing paren - buffer.setText('a(\nb,\nc\n') expectTokensToEqual(editor, [ [{text: 'a(', scopes: []}], [{text: 'b,', scopes: []}], @@ -160,6 +172,7 @@ describe('TreeSitterLanguageMode', () => { ]) buffer.append(')') + await languageMode.reparsePromise expectTokensToEqual(editor, [ [ {text: 'a', scopes: ['function']}, @@ -171,7 +184,7 @@ describe('TreeSitterLanguageMode', () => { ]) }) - it('handles edits after tokens that end between CR and LF characters (regression)', () => { + it('handles edits after tokens that end between CR and LF characters (regression)', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', scopes: { @@ -181,13 +194,14 @@ describe('TreeSitterLanguageMode', () => { } }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) - + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText([ '// abc', '', 'a("b").c' ].join('\r\n')) + await languageMode.reparsePromise expectTokensToEqual(editor, [ [{text: '// abc', scopes: ['comment']}], @@ -201,6 +215,7 @@ describe('TreeSitterLanguageMode', () => { ]) buffer.insert([2, 0], ' ') + await languageMode.reparsePromise expectTokensToEqual(editor, [ [{text: '// abc', scopes: ['comment']}], [{text: '', scopes: []}], @@ -220,7 +235,7 @@ describe('TreeSitterLanguageMode', () => { editor.displayLayer.reset({foldCharacter: '…'}) }) - it('can fold nodes that start and end with specified tokens', () => { + it('can fold nodes that start and end with specified tokens', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', folds: [ @@ -235,7 +250,8 @@ describe('TreeSitterLanguageMode', () => { ] }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText(dedent ` module.exports = class A { @@ -246,6 +262,7 @@ describe('TreeSitterLanguageMode', () => { } } `) + await languageMode.reparsePromise editor.screenLineForScreenRow(0) @@ -275,7 +292,7 @@ describe('TreeSitterLanguageMode', () => { `) }) - it('can fold nodes of specified types', () => { + it('can fold nodes of specified types', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', folds: [ @@ -296,7 +313,8 @@ describe('TreeSitterLanguageMode', () => { ] }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText(dedent ` const element1 = { world `) + await languageMode.reparsePromise editor.screenLineForScreenRow(0) @@ -336,7 +355,7 @@ describe('TreeSitterLanguageMode', () => { `) }) - it('can fold entire nodes when no start or end parameters are specified', () => { + it('can fold entire nodes when no start or end parameters are specified', async () => { const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { parser: 'tree-sitter-javascript', folds: [ @@ -346,7 +365,8 @@ describe('TreeSitterLanguageMode', () => { ] }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText(dedent ` /** * Important @@ -355,6 +375,7 @@ describe('TreeSitterLanguageMode', () => { Also important */ `) + await languageMode.reparsePromise editor.screenLineForScreenRow(0) @@ -379,7 +400,7 @@ describe('TreeSitterLanguageMode', () => { `) }) - it('tries each folding strategy for a given node in the order specified', () => { + it('tries each folding strategy for a given node in the order specified', async () => { const grammar = new TreeSitterGrammar(atom.grammars, cGrammarPath, { parser: 'tree-sitter-c', folds: [ @@ -405,8 +426,8 @@ describe('TreeSitterLanguageMode', () => { ] }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) - + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) buffer.setText(dedent ` #ifndef FOO_H_ #define FOO_H_ @@ -430,6 +451,7 @@ describe('TreeSitterLanguageMode', () => { #endif `) + await languageMode.reparsePromise editor.screenLineForScreenRow(0) @@ -504,8 +526,6 @@ describe('TreeSitterLanguageMode', () => { ] }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) - buffer.setText(dedent ` def ab(): print 'a' @@ -515,6 +535,7 @@ describe('TreeSitterLanguageMode', () => { print 'c' print 'd' `) + buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) editor.screenLineForScreenRow(0) @@ -537,9 +558,8 @@ describe('TreeSitterLanguageMode', () => { parser: 'tree-sitter-javascript' }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) - buffer.setText('foo({bar: baz});') + buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) editor.screenLineForScreenRow(0) expect(editor.scopeDescriptorForBufferPosition([0, 6]).getScopesArray()).toEqual([ @@ -562,13 +582,13 @@ describe('TreeSitterLanguageMode', () => { scopes: {'program': 'source'} }) - buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) buffer.setText(dedent ` function a (b, c, d) { eee.f() g() } `) + buffer.setLanguageMode(new TreeSitterLanguageMode({buffer, grammar})) editor.screenLineForScreenRow(0) diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 0d2fab8cf..3ec6a037a 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -1,4 +1,4 @@ -const {Document} = require('tree-sitter') +const Parser = require('tree-sitter') const {Point, Range} = require('text-buffer') const {Emitter, Disposable} = require('event-kit') const ScopeDescriptor = require('./scope-descriptor') @@ -14,13 +14,20 @@ class TreeSitterLanguageMode { this.buffer = buffer this.grammar = grammar this.config = config - this.document = new Document() - this.document.setInput(new TreeSitterTextBufferInput(buffer)) - this.document.setLanguage(grammar.languageModule) - this.document.parse() + this.parser = new Parser() + this.parser.setLanguage(grammar.languageModule) + this.tree = this.parser.parseTextBufferSync(this.buffer.buffer) this.rootScopeDescriptor = new ScopeDescriptor({scopes: [this.grammar.id]}) this.emitter = new Emitter() this.isFoldableCache = [] + this.hasQueuedParse = false + this.buffer.onDidChangeText(async () => { + if (!this.reparsePromise) { + this.reparsePromise = this.reparse().then(() => { + this.reparsePromise = null + }) + } + }) // TODO: Remove this once TreeSitterLanguageMode implements its own auto-indentation system. This // is temporarily needed in order to delegate to the TextMateLanguageMode's auto-indent system. @@ -36,7 +43,7 @@ class TreeSitterLanguageMode { const oldEndRow = oldRange.end.row const newEndRow = newRange.end.row this.isFoldableCache.splice(startRow, oldEndRow - startRow, ...new Array(newEndRow - startRow)) - this.document.edit({ + this.tree.edit({ startIndex: this.buffer.characterIndexForPosition(oldRange.start), lengthRemoved: oldText.length, lengthAdded: newText.length, @@ -50,8 +57,10 @@ class TreeSitterLanguageMode { Section - Highlighting */ - buildHighlightIterator () { - const invalidatedRanges = this.document.parse() + async reparse () { + const tree = await this.parser.parseTextBuffer(this.buffer.buffer, this.tree) + const invalidatedRanges = tree.getChangedRanges(this.tree) + this.tree = tree for (let i = 0, n = invalidatedRanges.length; i < n; i++) { const range = invalidatedRanges[i] const startRow = range.start.row @@ -61,6 +70,9 @@ class TreeSitterLanguageMode { } this.emitter.emit('did-change-highlighting', range) } + } + + buildHighlightIterator () { return new TreeSitterHighlightIterator(this) } @@ -139,7 +151,7 @@ class TreeSitterLanguageMode { getFoldableRangesAtIndentLevel (goalLevel) { let result = [] - let stack = [{node: this.document.rootNode, level: 0}] + let stack = [{node: this.tree.rootNode, level: 0}] while (stack.length > 0) { const {node, level} = stack.pop() @@ -183,7 +195,7 @@ class TreeSitterLanguageMode { } getFoldableRangeContainingPoint (point, tabLength, existenceOnly = false) { - let node = this.document.rootNode.descendantForPosition(this.buffer.clipPosition(point)) + let node = this.tree.rootNode.descendantForPosition(this.buffer.clipPosition(point)) while (node) { if (existenceOnly && node.startPosition.row < point.row) break if (node.endPosition.row > point.row) { @@ -273,7 +285,7 @@ class TreeSitterLanguageMode { getRangeForSyntaxNodeContainingRange (range) { const startIndex = this.buffer.characterIndexForPosition(range.start) const endIndex = this.buffer.characterIndexForPosition(range.end) - let node = this.document.rootNode.descendantForIndex(startIndex, endIndex - 1) + let node = this.tree.rootNode.descendantForIndex(startIndex, endIndex - 1) while (node && node.startIndex === startIndex && node.endIndex === endIndex) { node = node.parent } @@ -305,7 +317,7 @@ class TreeSitterLanguageMode { scopeDescriptorForPosition (point) { point = Point.fromObject(point) const result = [] - let node = this.document.rootNode.descendantForPosition(point) + let node = this.tree.rootNode.descendantForPosition(point) // Don't include anonymous token types like '(' because they prevent scope chains // from being parsed as CSS selectors by the `slick` parser. Other css selector @@ -331,17 +343,17 @@ class TreeSitterLanguageMode { } class TreeSitterHighlightIterator { - constructor (layer, document) { + constructor (layer) { this.layer = layer + this.treeCursor = this.layer.tree.walk() // Conceptually, the iterator represents a single position in the text. It stores this // position both as a character index and as a `Point`. This position corresponds to a // leaf node of the syntax tree, which either contains or follows the iterator's - // textual position. The `currentNode` property represents that leaf node, and + // textual position. The `treeCursor` property points at that leaf node, and // `currentChildIndex` represents the child index of that leaf node within its parent. this.currentIndex = null this.currentPosition = null - this.currentNode = null this.currentChildIndex = null // In order to determine which selectors match its current node, the iterator maintains @@ -358,6 +370,8 @@ class TreeSitterHighlightIterator { } seek (targetPosition) { + while (this.treeCursor.gotoParent()) {} + const containingTags = [] this.closeTags.length = 0 @@ -367,33 +381,28 @@ class TreeSitterHighlightIterator { this.currentPosition = targetPosition this.currentIndex = this.layer.buffer.characterIndexForPosition(targetPosition) - var node = this.layer.document.rootNode var childIndex = -1 var nodeContainsTarget = true for (;;) { - this.currentNode = node this.currentChildIndex = childIndex if (!nodeContainsTarget) break - this.containingNodeTypes.push(node.type) + this.containingNodeTypes.push(this.treeCursor.nodeType) this.containingNodeChildIndices.push(childIndex) const scopeName = this.currentScopeName() if (scopeName) { const id = this.layer.grammar.idForScope(scopeName) - if (this.currentIndex === node.startIndex) { + if (this.currentIndex === this.treeCursor.startIndex) { this.openTags.push(id) } else { containingTags.push(id) } } - node = node.firstChildForIndex(this.currentIndex) - if (node) { - if (node.startIndex > this.currentIndex) nodeContainsTarget = false - childIndex = node.childIndex - } else { - break - } + const nextChildIndex = this.treeCursor.gotoFirstChildForIndex(this.currentIndex) + if (nextChildIndex == null) break + if (this.treeCursor.startIndex > this.currentIndex) nodeContainsTarget = false + childIndex = nextChildIndex } return containingTags @@ -403,42 +412,35 @@ class TreeSitterHighlightIterator { this.closeTags.length = 0 this.openTags.length = 0 - if (!this.currentNode) { - this.currentPosition = {row: Infinity, column: Infinity} - return false - } - do { - if (this.currentIndex < this.currentNode.startIndex) { - this.currentIndex = this.currentNode.startIndex - this.currentPosition = this.currentNode.startPosition + if (this.currentIndex < this.treeCursor.startIndex) { + this.currentIndex = this.treeCursor.startIndex + this.currentPosition = this.treeCursor.startPosition this.pushOpenTag() this.descendLeft() - } else if (this.currentIndex < this.currentNode.endIndex) { + } else if (this.currentIndex < this.treeCursor.endIndex) { while (true) { - this.currentIndex = this.currentNode.endIndex - this.currentPosition = this.currentNode.endPosition + this.currentIndex = this.treeCursor.endIndex + this.currentPosition = this.treeCursor.endPosition this.pushCloseTag() - const {nextSibling} = this.currentNode - if (nextSibling && nextSibling.endIndex > this.currentIndex) { - this.currentNode = nextSibling + if (this.treeCursor.gotoNextSibling()) { this.currentChildIndex++ - if (this.currentIndex === nextSibling.startIndex) { + if (this.currentIndex === this.treeCursor.startIndex) { this.pushOpenTag() this.descendLeft() } break } else { - this.currentNode = this.currentNode.parent this.currentChildIndex = last(this.containingNodeChildIndices) - if (!this.currentNode) break + if (!this.treeCursor.gotoParent()) break } } - } else { - this.currentNode = this.currentNode.nextSibling + } else if (!this.treeCursor.gotoNextSibling()) { + this.currentPosition = {row: Infinity, column: Infinity} + break } - } while (this.closeTags.length === 0 && this.openTags.length === 0 && this.currentNode) + } while (this.closeTags.length === 0 && this.openTags.length === 0) return true } @@ -458,9 +460,7 @@ class TreeSitterHighlightIterator { // Private methods descendLeft () { - let child - while ((child = this.currentNode.firstChild) && this.currentIndex === child.startIndex) { - this.currentNode = child + while (this.treeCursor.gotoFirstChild()) { this.currentChildIndex = 0 this.pushOpenTag() } @@ -470,7 +470,7 @@ class TreeSitterHighlightIterator { return this.layer.grammar.scopeMap.get( this.containingNodeTypes, this.containingNodeChildIndices, - this.currentNode.isNamed + this.treeCursor.nodeIsNamed ) } @@ -482,37 +482,13 @@ class TreeSitterHighlightIterator { } pushOpenTag () { - this.containingNodeTypes.push(this.currentNode.type) + this.containingNodeTypes.push(this.treeCursor.nodeType) this.containingNodeChildIndices.push(this.currentChildIndex) const scopeName = this.currentScopeName() if (scopeName) this.openTags.push(this.layer.grammar.idForScope(scopeName)) } } -class TreeSitterTextBufferInput { - constructor (buffer) { - this.buffer = buffer - this.position = {row: 0, column: 0} - this.isBetweenCRLF = false - } - - seek (offset, position) { - this.position = position - this.isBetweenCRLF = this.position.column > this.buffer.lineLengthForRow(this.position.row) - } - - read () { - const endPosition = this.buffer.clipPosition(new Point(this.position.row + 1000, 0)) - let text = this.buffer.getTextInRange([this.position, endPosition]) - if (this.isBetweenCRLF) { - text = text.slice(1) - this.isBetweenCRLF = false - } - this.position = endPosition - return text - } -} - function last (array) { return array[array.length - 1] } From f6d2d5729944abbff8a2db1ab2a35d223e998b8c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 May 2018 11:12:40 -0700 Subject: [PATCH 3/7] Reparse again if there were changes since the last parse started --- package.json | 2 +- spec/tree-sitter-language-mode-spec.js | 61 ++++++++++++++++++++++++++ src/tree-sitter-language-mode.js | 52 +++++++++++++++------- 3 files changed, 98 insertions(+), 17 deletions(-) diff --git a/package.json b/package.json index 06c73c488..298d9c726 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "sinon": "1.17.4", "temp": "^0.8.3", "text-buffer": "13.14.3", - "tree-sitter": "0.12.1-1", + "tree-sitter": "0.12.4", "typescript-simple": "1.0.0", "underscore-plus": "^1.6.6", "winreg": "^1.2.1", diff --git a/spec/tree-sitter-language-mode-spec.js b/spec/tree-sitter-language-mode-spec.js index 43e87d886..7beb6a5e4 100644 --- a/spec/tree-sitter-language-mode-spec.js +++ b/spec/tree-sitter-language-mode-spec.js @@ -228,6 +228,67 @@ describe('TreeSitterLanguageMode', () => { ] ]) }) + + describe('when the buffer changes during a parse', () => { + it('immediately parses again when the current parse completes', async () => { + const grammar = new TreeSitterGrammar(atom.grammars, jsGrammarPath, { + parser: 'tree-sitter-javascript', + scopes: { + 'identifier': 'variable', + 'call_expression > identifier': 'function', + 'new_expression > call_expression > identifier': 'constructor' + } + }) + const languageMode = new TreeSitterLanguageMode({buffer, grammar}) + buffer.setLanguageMode(languageMode) + + buffer.setText('abc;'); + await languageMode.reparsePromise + expectTokensToEqual(editor, [ + [ + {text: 'abc', scopes: ['variable']}, + {text: ';', scopes: []} + ], + ]) + + buffer.setTextInRange([[0, 3], [0, 3]], '()'); + expectTokensToEqual(editor, [ + [ + {text: 'abc()', scopes: ['variable']}, + {text: ';', scopes: []} + ], + ]) + + buffer.setTextInRange([[0, 0], [0, 0]], 'new '); + expectTokensToEqual(editor, [ + [ + {text: 'new ', scopes: []}, + {text: 'abc()', scopes: ['variable']}, + {text: ';', scopes: []} + ], + ]) + + await languageMode.reparsePromise + expect(languageMode.reparsePromise).not.toBeNull() + expectTokensToEqual(editor, [ + [ + {text: 'new ', scopes: []}, + {text: 'abc', scopes: ['function']}, + {text: '();', scopes: []} + ], + ]) + + await languageMode.reparsePromise + expect(languageMode.reparsePromise).toBeNull() + expectTokensToEqual(editor, [ + [ + {text: 'new ', scopes: []}, + {text: 'abc', scopes: ['constructor']}, + {text: '();', scopes: []} + ], + ]) + }) + }) }) describe('folding', () => { diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 3ec6a037a..56047a4f4 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -21,11 +21,12 @@ class TreeSitterLanguageMode { this.emitter = new Emitter() this.isFoldableCache = [] this.hasQueuedParse = false - this.buffer.onDidChangeText(async () => { - if (!this.reparsePromise) { - this.reparsePromise = this.reparse().then(() => { - this.reparsePromise = null - }) + this.changeListsSinceCurrentParse = [] + this.buffer.onDidChangeText(async ({changes}) => { + if (this.reparsePromise) { + this.changeListsSinceCurrentParse.push(changes) + } else { + this.reparsePromise = this.reparse() } }) @@ -38,29 +39,35 @@ class TreeSitterLanguageMode { return this.grammar.id } - bufferDidChange ({oldRange, newRange, oldText, newText}) { + bufferDidChange (change) { + const {oldRange, newRange} = change const startRow = oldRange.start.row const oldEndRow = oldRange.end.row const newEndRow = newRange.end.row this.isFoldableCache.splice(startRow, oldEndRow - startRow, ...new Array(newEndRow - startRow)) - this.tree.edit({ - startIndex: this.buffer.characterIndexForPosition(oldRange.start), - lengthRemoved: oldText.length, - lengthAdded: newText.length, - startPosition: oldRange.start, - extentRemoved: oldRange.getExtent(), - extentAdded: newRange.getExtent() - }) + this.tree.edit(this.treeEditForBufferChange(change)) } /* Section - Highlighting */ + treeEditForBufferChange ({oldRange, newRange, oldText, newText}) { + const startIndex = this.buffer.characterIndexForPosition(oldRange.start) + return { + startIndex, + oldEndIndex: startIndex + oldText.length, + newEndIndex: startIndex + newText.length, + startPosition: oldRange.start, + oldEndPosition: oldRange.end, + newEndPosition: newRange.end + } + } + async reparse () { const tree = await this.parser.parseTextBuffer(this.buffer.buffer, this.tree) - const invalidatedRanges = tree.getChangedRanges(this.tree) - this.tree = tree + const invalidatedRanges = this.tree.getChangedRanges(tree) + for (let i = 0, n = invalidatedRanges.length; i < n; i++) { const range = invalidatedRanges[i] const startRow = range.start.row @@ -70,6 +77,19 @@ class TreeSitterLanguageMode { } this.emitter.emit('did-change-highlighting', range) } + + this.tree = tree + if (this.changeListsSinceCurrentParse.length > 0) { + for (const changeList of this.changeListsSinceCurrentParse) { + for (let i = changeList.length - 1; i >= 0; i--) { + this.tree.edit(this.treeEditForBufferChange(changeList[i])) + } + } + this.changeListsSinceCurrentParse.length = 0 + this.reparsePromise = this.reparse() + } else { + this.reparsePromise = null + } } buildHighlightIterator () { From 3548abe541157a3da2a705d5fdfc917a052fc345 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 May 2018 15:54:59 -0700 Subject: [PATCH 4/7] Fix bug w/ empty node handling, comment TreeSitterHighlightIterator --- package.json | 2 +- src/tree-sitter-language-mode.js | 57 ++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/package.json b/package.json index 298d9c726..6142cb5a7 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "sinon": "1.17.4", "temp": "^0.8.3", "text-buffer": "13.14.3", - "tree-sitter": "0.12.4", + "tree-sitter": "0.12.5", "typescript-simple": "1.0.0", "underscore-plus": "^1.6.6", "winreg": "^1.2.1", diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 56047a4f4..26583579f 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -367,6 +367,12 @@ class TreeSitterHighlightIterator { this.layer = layer this.treeCursor = this.layer.tree.walk() + // In order to determine which selectors match its current node, the iterator maintains + // a list of the current node's ancestors. Because the selectors can use the `:nth-child` + // pseudo-class, each node's child index is also stored. + this.containingNodeTypes = [] + this.containingNodeChildIndices = [] + // Conceptually, the iterator represents a single position in the text. It stores this // position both as a character index and as a `Point`. This position corresponds to a // leaf node of the syntax tree, which either contains or follows the iterator's @@ -376,12 +382,6 @@ class TreeSitterHighlightIterator { this.currentPosition = null this.currentChildIndex = null - // In order to determine which selectors match its current node, the iterator maintains - // a list of the current node's ancestors. Because the selectors can use the `:nth-child` - // pseudo-class, each node's child index is also stored. - this.containingNodeTypes = [] - this.containingNodeChildIndices = [] - // At any given position, the iterator exposes the list of class names that should be // *ended* at its current position and the list of class names that should be *started* // at its current position. @@ -401,6 +401,9 @@ class TreeSitterHighlightIterator { this.currentPosition = targetPosition this.currentIndex = this.layer.buffer.characterIndexForPosition(targetPosition) + // Descend from the root of the tree to the smallest node that spans the given position. + // Keep track of any nodes along the way that are associated with syntax highlighting + // tags. These tags must be returned. var childIndex = -1 var nodeContainsTarget = true for (;;) { @@ -432,30 +435,56 @@ class TreeSitterHighlightIterator { this.closeTags.length = 0 this.openTags.length = 0 + // Step forward through the leaves of the tree to find the next place where one or more + // syntax highlighting tags begin, end, or both. do { + // If the iterator is before the beginning of the current node, advance it to the + // beginning of then node and then walk down into the node's children, marking + // open tags as needed. if (this.currentIndex < this.treeCursor.startIndex) { this.currentIndex = this.treeCursor.startIndex this.currentPosition = this.treeCursor.startPosition this.pushOpenTag() this.descendLeft() + + // If the iterator is within the current node, advance it to the end of the node + // and then walk up the tree until the next sibling is found, marking close tags + // as needed. + // } else if (this.currentIndex < this.treeCursor.endIndex) { - while (true) { + /* eslint-disable no-labels */ + ascendingLoop: + do { this.currentIndex = this.treeCursor.endIndex this.currentPosition = this.treeCursor.endPosition this.pushCloseTag() - if (this.treeCursor.gotoNextSibling()) { + // Stop walking upward when we reach a node with a next sibling. + while (this.treeCursor.gotoNextSibling()) { this.currentChildIndex++ - if (this.currentIndex === this.treeCursor.startIndex) { + + // If the next sibling has a size of zero (e.g. something like an `automatic_semicolon`, + // an `indent`, or a `MISSING` node inserted by the parser during error recovery), + // then skip it. These nodes play no role in syntax highlighting. + if (this.treeCursor.endIndex === this.currentIndex) continue + + // If the next sibling starts right at the end of the current node (i.e. there is + // no whitespace in between), then before returning, also mark any open tags associated + // with this point in the tree. + if (this.treeCursor.startIndex === this.currentIndex) { this.pushOpenTag() this.descendLeft() } - break - } else { - this.currentChildIndex = last(this.containingNodeChildIndices) - if (!this.treeCursor.gotoParent()) break + + break ascendingLoop } - } + + this.currentChildIndex = last(this.containingNodeChildIndices) + } while (this.treeCursor.gotoParent()) + /* eslint-disable no-labels */ + + // If the iterator is at the end of a node, advance to the node's next sibling. If + // it has no next sibing, then the iterator has reached the end of the tree. } else if (!this.treeCursor.gotoNextSibling()) { this.currentPosition = {row: Infinity, column: Infinity} break From d4d57c2c8eb4ec4b02b830db89a78a596a21eb63 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 May 2018 08:56:13 -0700 Subject: [PATCH 5/7] :racehorse: Parse asynchronously when opening buffers --- src/project.js | 34 ++++++++++++++++++++------------ src/tree-sitter-language-mode.js | 25 ++++++++++++++++++----- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/project.js b/src/project.js index 7b8234b1c..8c98224d0 100644 --- a/src/project.js +++ b/src/project.js @@ -662,27 +662,35 @@ class Project extends Model { // * `text` The {String} text to use as a buffer. // // Returns a {Promise} that resolves to the {TextBuffer}. - buildBuffer (absoluteFilePath) { + async buildBuffer (absoluteFilePath) { const params = {shouldDestroyOnFileDelete: this.shouldDestroyBufferOnFileDelete} - let promise + let buffer if (absoluteFilePath != null) { if (this.loadPromisesByPath[absoluteFilePath] == null) { this.loadPromisesByPath[absoluteFilePath] = - TextBuffer.load(absoluteFilePath, params).catch(error => { - delete this.loadPromisesByPath[absoluteFilePath] - throw error - }) + TextBuffer.load(absoluteFilePath, params) + .then(result => { + delete this.loadPromisesByPath[absoluteFilePath] + return result + }) + .catch(error => { + delete this.loadPromisesByPath[absoluteFilePath] + throw error + }) } - promise = this.loadPromisesByPath[absoluteFilePath] + buffer = await this.loadPromisesByPath[absoluteFilePath] } else { - promise = Promise.resolve(new TextBuffer(params)) + buffer = new TextBuffer(params) } - return promise.then(buffer => { - delete this.loadPromisesByPath[absoluteFilePath] - this.addBuffer(buffer) - return buffer - }) + + this.grammarRegistry.autoAssignLanguageMode(buffer) + if (buffer.languageMode.initialize) { + await buffer.languageMode.initialize() + } + + this.addBuffer(buffer) + return buffer } addBuffer (buffer, options = {}) { diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 26583579f..51466e2f7 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -16,7 +16,7 @@ class TreeSitterLanguageMode { this.config = config this.parser = new Parser() this.parser.setLanguage(grammar.languageModule) - this.tree = this.parser.parseTextBufferSync(this.buffer.buffer) + this.tree = null this.rootScopeDescriptor = new ScopeDescriptor({scopes: [this.grammar.id]}) this.emitter = new Emitter() this.isFoldableCache = [] @@ -35,11 +35,22 @@ class TreeSitterLanguageMode { this.regexesByPattern = {} } + async initialize () { + this.tree = await this.parser.parseTextBuffer(this.buffer.buffer) + } + + ensureParseTree () { + if (!this.tree) { + this.tree = this.parser.parseTextBufferSync(this.buffer.buffer) + } + } + getLanguageId () { return this.grammar.id } bufferDidChange (change) { + this.ensureParseTree() const {oldRange, newRange} = change const startRow = oldRange.start.row const oldEndRow = oldRange.end.row @@ -93,7 +104,8 @@ class TreeSitterLanguageMode { } buildHighlightIterator () { - return new TreeSitterHighlightIterator(this) + this.ensureParseTree() + return new TreeSitterHighlightIterator(this, this.tree.walk()) } onDidChangeHighlighting (callback) { @@ -170,6 +182,7 @@ class TreeSitterLanguageMode { } getFoldableRangesAtIndentLevel (goalLevel) { + this.ensureParseTree() let result = [] let stack = [{node: this.tree.rootNode, level: 0}] while (stack.length > 0) { @@ -215,6 +228,7 @@ class TreeSitterLanguageMode { } getFoldableRangeContainingPoint (point, tabLength, existenceOnly = false) { + this.ensureParseTree() let node = this.tree.rootNode.descendantForPosition(this.buffer.clipPosition(point)) while (node) { if (existenceOnly && node.startPosition.row < point.row) break @@ -335,8 +349,8 @@ class TreeSitterLanguageMode { } scopeDescriptorForPosition (point) { + this.ensureParseTree() point = Point.fromObject(point) - const result = [] let node = this.tree.rootNode.descendantForPosition(point) // Don't include anonymous token types like '(' because they prevent scope chains @@ -345,6 +359,7 @@ class TreeSitterLanguageMode { // selectors. if (!node.isNamed) node = node.parent + const result = [] while (node) { result.push(node.type) node = node.parent @@ -363,9 +378,9 @@ class TreeSitterLanguageMode { } class TreeSitterHighlightIterator { - constructor (layer) { + constructor (layer, treeCursor) { this.layer = layer - this.treeCursor = this.layer.tree.walk() + this.treeCursor = treeCursor // In order to determine which selectors match its current node, the iterator maintains // a list of the current node's ancestors. Because the selectors can use the `:nth-child` From 7a2667406b08c5e4e1cf8351df1ce650d5403fb2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 May 2018 08:58:12 -0700 Subject: [PATCH 6/7] Rename out-of-date property: layer -> languageMode --- src/tree-sitter-language-mode.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 51466e2f7..599874c37 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -378,8 +378,8 @@ class TreeSitterLanguageMode { } class TreeSitterHighlightIterator { - constructor (layer, treeCursor) { - this.layer = layer + constructor (languageMode, treeCursor) { + this.languageMode = languageMode this.treeCursor = treeCursor // In order to determine which selectors match its current node, the iterator maintains @@ -414,7 +414,7 @@ class TreeSitterHighlightIterator { this.containingNodeTypes.length = 0 this.containingNodeChildIndices.length = 0 this.currentPosition = targetPosition - this.currentIndex = this.layer.buffer.characterIndexForPosition(targetPosition) + this.currentIndex = this.languageMode.buffer.characterIndexForPosition(targetPosition) // Descend from the root of the tree to the smallest node that spans the given position. // Keep track of any nodes along the way that are associated with syntax highlighting @@ -429,7 +429,7 @@ class TreeSitterHighlightIterator { const scopeName = this.currentScopeName() if (scopeName) { - const id = this.layer.grammar.idForScope(scopeName) + const id = this.languageMode.grammar.idForScope(scopeName) if (this.currentIndex === this.treeCursor.startIndex) { this.openTags.push(id) } else { @@ -531,7 +531,7 @@ class TreeSitterHighlightIterator { } currentScopeName () { - return this.layer.grammar.scopeMap.get( + return this.languageMode.grammar.scopeMap.get( this.containingNodeTypes, this.containingNodeChildIndices, this.treeCursor.nodeIsNamed @@ -540,7 +540,7 @@ class TreeSitterHighlightIterator { pushCloseTag () { const scopeName = this.currentScopeName() - if (scopeName) this.closeTags.push(this.layer.grammar.idForScope(scopeName)) + if (scopeName) this.closeTags.push(this.languageMode.grammar.idForScope(scopeName)) this.containingNodeTypes.pop() this.containingNodeChildIndices.pop() } @@ -549,7 +549,7 @@ class TreeSitterHighlightIterator { this.containingNodeTypes.push(this.treeCursor.nodeType) this.containingNodeChildIndices.push(this.currentChildIndex) const scopeName = this.currentScopeName() - if (scopeName) this.openTags.push(this.layer.grammar.idForScope(scopeName)) + if (scopeName) this.openTags.push(this.languageMode.grammar.idForScope(scopeName)) } } From 53dfa83ae9fe0c13f560f9bba73c1f5131b19af2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 May 2018 15:58:33 -0700 Subject: [PATCH 7/7] Allow some synchronous parsing to avoid unnecessary re-renders --- package.json | 2 +- src/tree-sitter-language-mode.js | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 6142cb5a7..8b1e7c125 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "sinon": "1.17.4", "temp": "^0.8.3", "text-buffer": "13.14.3", - "tree-sitter": "0.12.5", + "tree-sitter": "0.12.6", "typescript-simple": "1.0.0", "underscore-plus": "^1.6.6", "winreg": "^1.2.1", diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 599874c37..f15ad39a6 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -76,7 +76,9 @@ class TreeSitterLanguageMode { } async reparse () { - const tree = await this.parser.parseTextBuffer(this.buffer.buffer, this.tree) + const tree = await this.parser.parseTextBuffer(this.buffer.buffer, this.tree, { + syncOperationLimit: 1000 + }) const invalidatedRanges = this.tree.getChangedRanges(tree) for (let i = 0, n = invalidatedRanges.length; i < n; i++) {