mirror of
https://github.com/atom/atom.git
synced 2026-04-28 03:01:47 -04:00
Make Tree-sitter grammars' contentRegExp work as intended
This commit is contained in:
@@ -283,32 +283,6 @@ describe('GrammarRegistry', () => {
|
||||
expect(atom.grammars.selectGrammar('/hu.git/config').name).toBe('Null Grammar')
|
||||
})
|
||||
|
||||
describe('when the grammar has a contentRegExp field', () => {
|
||||
it('favors grammars whose contentRegExp matches a prefix of the file\'s content', () => {
|
||||
atom.grammars.addGrammar({
|
||||
id: 'javascript-1',
|
||||
fileTypes: ['js']
|
||||
})
|
||||
atom.grammars.addGrammar({
|
||||
id: 'flow-javascript',
|
||||
contentRegExp: new RegExp('//.*@flow'),
|
||||
fileTypes: ['js']
|
||||
})
|
||||
atom.grammars.addGrammar({
|
||||
id: 'javascript-2',
|
||||
fileTypes: ['js']
|
||||
})
|
||||
|
||||
const selectedGrammar = atom.grammars.selectGrammar('test.js', dedent`
|
||||
// Copyright EvilCorp
|
||||
// @flow
|
||||
|
||||
module.exports = function () { return 1 + 1 }
|
||||
`)
|
||||
expect(selectedGrammar.id).toBe('flow-javascript')
|
||||
})
|
||||
})
|
||||
|
||||
it("uses the filePath's shebang line if the grammar cannot be determined by the extension or basename", async () => {
|
||||
await atom.packages.activatePackage('language-javascript')
|
||||
await atom.packages.activatePackage('language-ruby')
|
||||
@@ -442,6 +416,84 @@ describe('GrammarRegistry', () => {
|
||||
expect(grammar instanceof TreeSitterGrammar).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('tree-sitter grammars with content regexes', () => {
|
||||
it('recognizes C++ header files', () => {
|
||||
atom.config.set('core.useTreeSitterParsers', true)
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-c/grammars/tree-sitter-c.cson'))
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-c/grammars/tree-sitter-cpp.cson'))
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-coffee-script/grammars/coffeescript.cson'))
|
||||
|
||||
let grammar = grammarRegistry.selectGrammar('test.h', dedent `
|
||||
#include <string.h>
|
||||
|
||||
typedef struct {
|
||||
void verb();
|
||||
} Noun;
|
||||
`)
|
||||
expect(grammar.name).toBe('C')
|
||||
|
||||
grammar = grammarRegistry.selectGrammar('test.h', dedent `
|
||||
#include <string>
|
||||
|
||||
class Noun {
|
||||
public:
|
||||
void verb();
|
||||
};
|
||||
`)
|
||||
expect(grammar.name).toBe('C++')
|
||||
|
||||
// The word `class` only indicates C++ in `.h` files, not in all files.
|
||||
grammar = grammarRegistry.selectGrammar('test.coffee', dedent `
|
||||
module.exports =
|
||||
class Noun
|
||||
verb: -> true
|
||||
`)
|
||||
expect(grammar.name).toBe('CoffeeScript')
|
||||
})
|
||||
|
||||
it('recognizes shell scripts with shebang lines', () => {
|
||||
atom.config.set('core.useTreeSitterParsers', true)
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-shellscript/grammars/shell-unix-bash.cson'))
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-shellscript/grammars/tree-sitter-bash.cson'))
|
||||
|
||||
let grammar = grammarRegistry.selectGrammar('test.h', dedent `
|
||||
#!/bin/bash
|
||||
|
||||
echo "hi"
|
||||
`)
|
||||
expect(grammar.name).toBe('Shell Script')
|
||||
expect(grammar instanceof TreeSitterGrammar).toBeTruthy()
|
||||
|
||||
atom.config.set('core.useTreeSitterParsers', false)
|
||||
grammar = grammarRegistry.selectGrammar('test.h', dedent `
|
||||
#!/bin/bash
|
||||
|
||||
echo "hi"
|
||||
`)
|
||||
expect(grammar.name).toBe('Shell Script')
|
||||
expect(grammar instanceof TreeSitterGrammar).toBeFalsy()
|
||||
})
|
||||
|
||||
it('recognizes JavaScript files that use Flow', () => {
|
||||
atom.config.set('core.useTreeSitterParsers', true)
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-javascript/grammars/tree-sitter-javascript.cson'))
|
||||
grammarRegistry.loadGrammarSync(require.resolve('language-typescript/grammars/tree-sitter-flow.cson'))
|
||||
|
||||
let grammar = grammarRegistry.selectGrammar('test.js', dedent`
|
||||
// Copyright something
|
||||
// @flow
|
||||
|
||||
module.exports = function () { return 1 + 1 }
|
||||
`)
|
||||
expect(grammar.name).toBe('Flow JavaScript')
|
||||
|
||||
grammar = grammarRegistry.selectGrammar('test.js', dedent`
|
||||
module.exports = function () { return 1 + 1 }
|
||||
`)
|
||||
expect(grammar.name).toBe('JavaScript')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('.removeGrammar(grammar)', () => {
|
||||
|
||||
@@ -1032,6 +1032,7 @@ describe('PackageManager', () => {
|
||||
})
|
||||
|
||||
it('loads any tree-sitter grammars defined in the package', async () => {
|
||||
atom.config.set('core.useTreeSitterParsers', true)
|
||||
await atom.packages.activatePackage('package-with-tree-sitter-grammar')
|
||||
const grammar = atom.grammars.selectGrammar('test.somelang')
|
||||
expect(grammar.name).toBe('Some Language')
|
||||
|
||||
@@ -10,7 +10,6 @@ const Token = require('./token')
|
||||
const fs = require('fs-plus')
|
||||
const {Point, Range} = require('text-buffer')
|
||||
|
||||
const GRAMMAR_TYPE_BONUS = 1000
|
||||
const PATH_SPLIT_REGEX = new RegExp('[/.]')
|
||||
|
||||
// Extended: This class holds the grammars used for tokenizing.
|
||||
@@ -213,12 +212,23 @@ class GrammarRegistry {
|
||||
if (score > 0 && !grammar.bundledPackage) {
|
||||
score += 0.125
|
||||
}
|
||||
if (this.grammarMatchesContents(grammar, contents)) {
|
||||
score += 0.25
|
||||
}
|
||||
|
||||
if (score > 0 && this.isGrammarPreferredType(grammar)) {
|
||||
score += GRAMMAR_TYPE_BONUS
|
||||
if (grammar instanceof TreeSitterGrammar) {
|
||||
if (this.config.get('core.useTreeSitterParsers')) {
|
||||
score += 0.05
|
||||
} else {
|
||||
score = -Infinity
|
||||
}
|
||||
|
||||
if (grammar.contentRegExp) {
|
||||
if (grammar.contentRegExp.test(contents)) {
|
||||
score += 0.25
|
||||
} else {
|
||||
score -= 0.25
|
||||
}
|
||||
}
|
||||
} else if (this.grammarMatchesPrefix(grammar, contents)) {
|
||||
score += 0.25
|
||||
}
|
||||
|
||||
return score
|
||||
@@ -256,12 +266,8 @@ class GrammarRegistry {
|
||||
return pathScore
|
||||
}
|
||||
|
||||
grammarMatchesContents (grammar, contents) {
|
||||
if (contents == null) return false
|
||||
|
||||
if (grammar.contentRegExp) { // TreeSitter grammars
|
||||
return grammar.contentRegExp.test(contents)
|
||||
} else if (grammar.firstLineRegex) { // FirstMate grammars
|
||||
grammarMatchesPrefix (grammar, contents) {
|
||||
if (contents && grammar.firstLineRegex) {
|
||||
let escaped = false
|
||||
let numberOfNewlinesInRegex = 0
|
||||
for (let character of grammar.firstLineRegex.source) {
|
||||
@@ -511,12 +517,6 @@ class GrammarRegistry {
|
||||
return this.textmateRegistry.scopeForId(id)
|
||||
}
|
||||
|
||||
isGrammarPreferredType (grammar) {
|
||||
return this.config.get('core.useTreeSitterParsers')
|
||||
? grammar instanceof TreeSitterGrammar
|
||||
: grammar instanceof FirstMate.Grammar
|
||||
}
|
||||
|
||||
normalizeLanguageId (languageId) {
|
||||
if (this.config.get('core.useTreeSitterParsers')) {
|
||||
return this.treeSitterLanguageIdsByTextMateScopeName.get(languageId) || languageId
|
||||
|
||||
Reference in New Issue
Block a user