Add initial support for replacing anchors

2026-04-28 03:01:47 -04:00 · 2012-12-19 21:44:21 -08:00
parent 880edcd408
commit 9f6d1f987f
6 changed files with 79 additions and 18 deletions
--- a/native/v8_extensions/onig_scanner.mm
+++ b/native/v8_extensions/onig_scanner.mm
@@ -57,10 +57,7 @@ class OnigScannerUserData : public CefBase {
      bool useCachedResult = false;
      OnigResult *result = NULL;

-      // In Oniguruma, \G is based on the start position of the match, so the result
-      // changes based on the start position. So it can't be cached.
-      BOOL containsBackslashG = [regExp.expression rangeOfString:@"\\G"].location != NSNotFound;
-      if (useCachedResults && index <= maxCachedIndex && ! containsBackslashG) {
+      if (useCachedResults && index <= maxCachedIndex) {
        result = cachedResults[index];
        useCachedResult = (result == NULL || [result locationAt:0] >= startLocation);
      }
@@ -158,4 +155,4 @@ bool OnigScanner::Execute(const CefString& name,
  return false;
 }

-} // namespace v8_extensions
+} // namespace v8_extensions
--- a/spec/app/tokenized-buffer-spec.coffee
+++ b/spec/app/tokenized-buffer-spec.coffee
@@ -14,7 +14,7 @@ describe "TokenizedBuffer", ->

  fullyTokenize = (tokenizedBuffer) ->
    advanceClock() while tokenizedBuffer.firstInvalidRow()?
-    changeHandler.reset()
+    changeHandler?.reset()

  describe "when the buffer contains soft-tabs", ->
    beforeEach ->
@@ -326,3 +326,21 @@ describe "TokenizedBuffer", ->

        expect(tokenizedBuffer.lineForScreenRow(2).text).toBe "#{tabAsSpaces} buy()#{tabAsSpaces}while supply > demand"

+  describe "when a Git commit message file is tokenized", ->
+    beforeEach ->
+      editSession =  fixturesProject.buildEditSessionForPath('COMMIT_EDITMSG', autoIndent: false)
+      buffer = editSession.buffer
+      tokenizedBuffer = editSession.displayBuffer.tokenizedBuffer
+      editSession.setVisible(true)
+      fullyTokenize(tokenizedBuffer)
+
+    afterEach ->
+      editSession.destroy()
+
+    it "correctly parses the number sign of the first comment line", ->
+      commentLine = tokenizedBuffer.lineForScreenRow(1)
+      expect(commentLine.text).toBe "# Please enter the commit message for your changes. Lines starting"
+      { tokens } = commentLine
+
+      expect(tokens[0].value).toBe "#"
+      expect(tokens[0].scopes).toEqual ["text.git-commit", "meta.scope.metadata.git-commit", "comment.line.number-sign.git-commit", "punctuation.definition.comment.git-commit"]
--- a/spec/fixtures/COMMIT_EDITMSG
+++ b/spec/fixtures/COMMIT_EDITMSG
@@ -0,0 +1,2 @@
+longggggggggggggggggggggggggggggggggggggggggggggggg
+# Please enter the commit message for your changes. Lines starting
--- a/src/app/language-mode.coffee
+++ b/src/app/language-mode.coffee
@@ -186,5 +186,5 @@ class LanguageMode
    if desiredIndentLevel < currentIndentLevel
      @editSession.setIndentationForBufferRow(bufferRow, desiredIndentLevel)

-  tokenizeLine: (line, stack) ->
-    {tokens, stack} = @grammar.tokenizeLine(line, stack)
+  tokenizeLine: (line, stack, firstLine) ->
+    {tokens, stack} = @grammar.tokenizeLine(line, stack, firstLine)
--- a/src/app/text-mate-grammar.coffee
+++ b/src/app/text-mate-grammar.coffee
@@ -29,8 +29,7 @@ class TextMateGrammar
      data = {patterns: [data], tempName: name} if data.begin? or data.match?
      @repository[name] = new Rule(this, data)

-  tokenizeLine: (line, ruleStack=[@initialRule]) ->
-    ruleStack ?= [@initialRule]
+  tokenizeLine: (line, ruleStack=[@initialRule], firstLine=false) ->
    ruleStack = new Array(ruleStack...) # clone ruleStack
    tokens = []
    position = 0
@@ -44,7 +43,7 @@ class TextMateGrammar

      break if position == line.length

-      if match = _.last(ruleStack).getNextTokens(ruleStack, line, position)
+      if match = _.last(ruleStack).getNextTokens(ruleStack, line, position, firstLine)
        { nextTokens, tokensStartPosition, tokensEndPosition } = match
        if position < tokensStartPosition # unmatched text before next tokens
          tokens.push(new Token(
@@ -79,6 +78,7 @@ class Rule
  patterns: null
  allPatterns: null
  createEndPattern: null
+  anchor: -1

  constructor: (@grammar, {@scopeName, patterns, @endPattern}) ->
    patterns ?= []
@@ -95,14 +95,28 @@ class Rule
      @allPatterns.push(pattern.getIncludedPatterns(included)...)
    @allPatterns

-  getScanner: ->
-    @scanner ?= new OnigScanner(_.pluck(@getIncludedPatterns(), 'regexSource'))
+  getScanner: (position, firstLine) ->
+    return @scanner if @scanner

-  getNextTokens: (stack, line, position) ->
+    anchored = false
+    regexes = []
+    @getIncludedPatterns().forEach (pattern) =>
+      if pattern.anchored
+        anchored = true
+        regex = pattern.replaceAnchor(firstLine, position, @anchor)
+      else
+        regex = pattern.regexSource
+      regexes.push regex if regex
+
+    regexScanner = new OnigScanner(regexes)
+    @scanner = regexScanner unless anchored
+    regexScanner
+
+  getNextTokens: (stack, line, position, firstLine) ->
    patterns = @getIncludedPatterns()

    # Add a `\n` to appease patterns that contain '\n' explicitly
-    return null unless result = @getScanner().findNextMatch(line + "\n", position)
+    return null unless result = @getScanner(position, firstLine).findNextMatch("#{line}\n", position)
    { index, captureIndices } = result
    # Since the `\n' (added above) is not part of the line, truncate captures to the line's actual length
    lineLength = line.length
@@ -130,6 +144,7 @@ class Pattern
  scopeName: null
  captures: null
  backReferences: null
+  anchored: false

  constructor: (@grammar, { name, contentName, @include, match, begin, end, captures, beginCaptures, endCaptures, patterns, @popRule, hasBackReferences}) ->
    @scopeName = name ? contentName # TODO: We need special treatment of contentName
@@ -144,6 +159,34 @@ class Pattern
      @captures = beginCaptures ? captures
      endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
      @pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })
+    @anchored = @hasAnchor()
+
+  hasAnchor: ->
+    return false unless @regexSource
+    escape = false
+    for character in @regexSource.split('')
+      return true if escape and (character is 'A' or character is 'G' or character is 'z')
+      escape = not escape and character is '\\'
+    false
+
+  replaceAnchor: (firstLine, offset, anchor) ->
+    escaped = []
+    placeholder = '\uFFFF'
+    escape = false
+    for character in @regexSource.split('')
+      if escape
+        switch character
+          when 'A' then escaped.push(placeholder) unless firstLine
+          when 'G' then escaped.push(placeholder) unless offset is anchor
+          when 'z' then escaped.push('$(?!\n)(?<!\n)')
+          else escaped.push("\\#{character}")
+        escape = false
+      else if character is '\\'
+        escape = true
+      else
+        escaped.push(character)
+
+    escaped.join('')

  resolveBackReferences: (line, beginCaptureIndices) ->
    beginCaptures = []
@@ -180,7 +223,9 @@ class Pattern
      else
        tokens = [new Token(value: line[start...end], scopes: scopes)]
    if @pushRule
-      stack.push(@pushRule.getRuleToPush(line, captureIndices))
+      ruleToPush = @pushRule.getRuleToPush(line, captureIndices)
+      ruleToPush.anchor = captureIndices[1]
+      stack.push(ruleToPush)
    else if @popRule
      stack.pop()

@@ -226,4 +271,3 @@ shiftCapture = (captureIndices) ->

 scopesFromStack = (stack) ->
  _.compact(_.pluck(stack, "scopeName"))
-
--- a/src/app/tokenized-buffer.coffee
+++ b/src/app/tokenized-buffer.coffee
@@ -134,7 +134,7 @@ class TokenizedBuffer

  buildTokenizedScreenLineForRow: (row, ruleStack) ->
    line = @buffer.lineForRow(row)
-    { tokens, ruleStack } = @languageMode.tokenizeLine(line, ruleStack)
+    { tokens, ruleStack } = @languageMode.tokenizeLine(line, ruleStack, row is 0)
    new ScreenLine({tokens, ruleStack, @tabLength})

  lineForScreenRow: (row) ->