diff --git a/spec/grammar-registry-spec.js b/spec/grammar-registry-spec.js index bcd57f3a2..b4afc34fb 100644 --- a/spec/grammar-registry-spec.js +++ b/spec/grammar-registry-spec.js @@ -472,6 +472,19 @@ describe('GrammarRegistry', () => { expect(grammar.name).toBe('C++') }) + it('does not apply content regexes from grammars without filetype or first line matches', () => { + atom.config.set('core.useTreeSitterParsers', true) + grammarRegistry.loadGrammarSync(require.resolve('language-c/grammars/tree-sitter-cpp.cson')) + + let grammar = grammarRegistry.selectGrammar('', dedent ` + class Foo + # this is ruby, not C++ + end + `) + + expect(grammar.name).toBe('Null Grammar') + }) + it('recognizes shell scripts with shebang lines', () => { atom.config.set('core.useTreeSitterParsers', true) grammarRegistry.loadGrammarSync(require.resolve('language-shellscript/grammars/shell-unix-bash.cson')) @@ -485,6 +498,14 @@ describe('GrammarRegistry', () => { expect(grammar.name).toBe('Shell Script') expect(grammar instanceof TreeSitterGrammar).toBeTruthy() + grammar = grammarRegistry.selectGrammar('test.h', dedent ` + # vim: set ft=bash + + echo "hi" + `) + expect(grammar.name).toBe('Shell Script') + expect(grammar instanceof TreeSitterGrammar).toBeTruthy() + atom.config.set('core.useTreeSitterParsers', false) grammar = grammarRegistry.selectGrammar('test.h', dedent ` #!/bin/bash diff --git a/src/grammar-registry.js b/src/grammar-registry.js index 101a38007..60d656afa 100644 --- a/src/grammar-registry.js +++ b/src/grammar-registry.js @@ -208,25 +208,35 @@ class GrammarRegistry { contents = fs.readFileSync(filePath, 'utf8') } + // Initially identify matching grammars based on the filename and the first + // line of the file. let score = this.getGrammarPathScore(grammar, filePath) - if (score > 0 && !grammar.bundledPackage) { - score += 0.125 - } + if (this.grammarMatchesPrefix(grammar, contents)) score += 0.5 - if (grammar instanceof TreeSitterGrammar) { - if (!this.config.get('core.useTreeSitterParsers')) return -Infinity + // If multiple grammars match by one of the above criteria, break ties. + if (score > 0) { - if (grammar.contentRegExp) { - if (grammar.contentRegExp.test(contents)) { - score += 0.25 + // Prefer either TextMate or Tree-sitter grammars based on the user's settings. + if (grammar instanceof TreeSitterGrammar) { + if (this.config.get('core.useTreeSitterParsers')) { + score += 0.1 } else { - score -= 0.25 + return -Infinity } } - if (score > 0) score += 0.5 - } else if (this.grammarMatchesPrefix(grammar, contents)) { - score += 0.25 + // Prefer grammars with matching content regexes. Prefer a grammar with no content regex + // over one with a non-matching content regex. + if (grammar.contentRegex) { + if (grammar.contentRegex.test(contents)) { + score += 0.05 + } else { + score -= 0.05 + } + } + + // Prefer grammars that the user has manually installed over bundled grammars. + if (!grammar.bundledPackage) score += 0.01 } return score @@ -282,8 +292,13 @@ class GrammarRegistry { } } - const lines = contents.split('\n') - return grammar.firstLineRegex.testSync(lines.slice(0, numberOfNewlinesInRegex + 1).join('\n')) + const prefix = contents.split('\n').slice(0, numberOfNewlinesInRegex + 1).join('\n') + if (grammar.firstLineRegex.testSync) { + return grammar.firstLineRegex.testSync(prefix) + } else { + + return grammar.firstLineRegex.test(prefix) + } } else { return false } @@ -395,7 +410,7 @@ class GrammarRegistry { // * `injectionPoint` An {Object} with the following keys: // * `type` The {String} type of syntax node that may embed other languages // * `language` A {Function} that is called with syntax nodes of the specified `type` and - // returns a {String} that will be tested against other grammars' `injectionRegExp` in + // returns a {String} that will be tested against other grammars' `injectionRegex` in // order to determine what language should be embedded. // * `content` A {Function} that is called with syntax nodes of the specified `type` and // returns another syntax node or array of syntax nodes that contain the embedded source code. @@ -542,12 +557,22 @@ class GrammarRegistry { } treeSitterGrammarForLanguageString (languageString) { + let longestMatchLength = 0 + let grammarWithLongestMatch = null for (const id in this.treeSitterGrammarsById) { const grammar = this.treeSitterGrammarsById[id] - if (grammar.injectionRegExp && grammar.injectionRegExp.test(languageString)) { - return grammar + if (grammar.injectionRegex) { + const match = languageString.match(grammar.injectionRegex) + if (match) { + const {length} = match[0] + if (length > longestMatchLength) { + grammarWithLongestMatch = grammar + longestMatchLength = length + } + } } } + return grammarWithLongestMatch } normalizeLanguageId (languageId) { diff --git a/src/tree-sitter-grammar.js b/src/tree-sitter-grammar.js index 594b8639c..a1bbbbe90 100644 --- a/src/tree-sitter-grammar.js +++ b/src/tree-sitter-grammar.js @@ -9,8 +9,12 @@ class TreeSitterGrammar { this.id = params.id this.name = params.name this.legacyScopeName = params.legacyScopeName - if (params.contentRegExp) this.contentRegExp = new RegExp(params.contentRegExp) - if (params.injectionRegExp) this.injectionRegExp = new RegExp(params.injectionRegExp) + + // TODO - Remove the `RegExp` spelling and only support `Regex`, once all of the existing + // Tree-sitter grammars are updated to spell it `Regex`. + this.contentRegex = buildRegex(params.contentRegex || params.contentRegExp) + this.injectionRegex = buildRegex(params.injectionRegex || params.injectionRegExp) + this.firstLineRegex = buildRegex(params.firstLineRegex) this.folds = params.folds || [] this.folds.forEach(normalizeFoldSpecification) @@ -36,7 +40,7 @@ class TreeSitterGrammar { } this.scopeMap = new SyntaxScopeMap(scopeSelectors) - this.fileTypes = params.fileTypes + this.fileTypes = params.fileTypes || [] this.injectionPoints = params.injectionPoints || [] // TODO - When we upgrade to a new enough version of node, use `require.resolve` @@ -125,3 +129,11 @@ function normalizeFoldSpecification (spec) { if (spec.start) normalizeFoldSpecification(spec.start) if (spec.end) normalizeFoldSpecification(spec.end) } + +function buildRegex (value) { + // Allow multiple alternatives to be specified via an array, for + // readability of the grammar file + if (Array.isArray(value)) value = value.map(_ => `(${_})`).join('|') + if (typeof value === 'string') return new RegExp(value) + return null +} diff --git a/src/tree-sitter-language-mode.js b/src/tree-sitter-language-mode.js index 9d42386e4..b9ecd3969 100644 --- a/src/tree-sitter-language-mode.js +++ b/src/tree-sitter-language-mode.js @@ -534,7 +534,7 @@ class LanguageLayer { } updateInjections (grammar) { - if (grammar.injectionRegExp) { + if (grammar.injectionRegex) { if (!this.currentParsePromise) this.currentParsePromise = Promise.resolve() this.currentParsePromise = this.currentParsePromise.then(async () => { await this._populateInjections(MAX_RANGE, null)