Handle invisibles at the token level to fix char width measurement

Fixes #3188
2026-04-28 03:01:47 -04:00 · 2014-08-08 18:03:58 -06:00
parent fca9ed07e6
commit 2daf70f0e5
4 changed files with 84 additions and 20 deletions
--- a/spec/tokenized-buffer-spec.coffee
+++ b/spec/tokenized-buffer-spec.coffee
@@ -1,4 +1,5 @@
 TokenizedBuffer = require '../src/tokenized-buffer'
+TextBuffer = require 'text-buffer'
 _ = require 'underscore-plus'

 describe "TokenizedBuffer", ->
@@ -12,6 +13,9 @@ describe "TokenizedBuffer", ->
    waitsForPromise ->
      atom.packages.activatePackage('language-javascript')

+  afterEach ->
+    tokenizedBuffer?.destroy()
+
  startTokenizing = (tokenizedBuffer) ->
    tokenizedBuffer.setVisible(true)

@@ -584,6 +588,20 @@ describe "TokenizedBuffer", ->
      atom.config.set('editor.tabLength', 0)
      expect(tokenizedBuffer.tokenForPosition([0,0]).value).toBe '  '

+  describe "when the editor.showInvisibles and editor.invisibles config values change", ->
+    beforeEach ->
+
+    it "updates the tokens with the appropriate invisible characters", ->
+      buffer = new TextBuffer(text: "  \t a line with tabs\tand \tspaces \t ")
+      tokenizedBuffer = new TokenizedBuffer({buffer})
+      fullyTokenize(tokenizedBuffer)
+
+      atom.config.set('editor.invisibles', space: 'S', tab: 'T')
+      atom.config.set('editor.showInvisibles', true)
+      fullyTokenize(tokenizedBuffer)
+
+      expect(tokenizedBuffer.lineForScreenRow(0).text).toBe "SST Sa line with tabsTand T spacesSTS"
+
  describe "leading and trailing whitespace", ->
    beforeEach ->
      buffer = atom.project.bufferForPathSync('sample.js')
--- a/src/token.coffee
+++ b/src/token.coffee
@@ -142,8 +142,7 @@ class Token
      classes = 'hard-tab'
      classes += ' indent-guide' if hasIndentGuide
      classes += ' invisible-character' if invisibles.tab
-      value = if invisibles.tab then @value.replace(StartCharacterRegex, invisibles.tab) else @value
-      html = "<span class='#{classes}'>#{@escapeString(value)}</span>"
+      html = "<span class='#{classes}'>#{@escapeString(@value)}</span>"
    else
      startIndex = 0
      endIndex = @value.length
@@ -156,7 +155,6 @@ class Token
        classes += ' indent-guide' if hasIndentGuide
        classes += ' invisible-character' if invisibles.space

-        match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space
        leadingHtml = "<span class='#{classes}'>#{match[0]}</span>"

        startIndex = match[0].length
@@ -167,7 +165,6 @@ class Token
        classes += ' indent-guide' if hasIndentGuide and not @hasLeadingWhitespace and tokenIsOnlyWhitespace
        classes += ' invisible-character' if invisibles.space

-        match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space
        trailingHtml = "<span class='#{classes}'>#{match[0]}</span>"

        endIndex = match.index
--- a/src/tokenized-buffer.coffee
+++ b/src/tokenized-buffer.coffee
@@ -21,29 +21,35 @@ class TokenizedBuffer extends Model

  constructor: ({@buffer, @tabLength}) ->
    @tabLength ?= atom.config.getPositiveInt('editor.tabLength', 2)
+    @setShowInvisibles(atom.config.get('editor.showInvisibles'))
+    @setInvisibles(atom.config.get('editor.invisibles'))

    @subscribe atom.syntax, 'grammar-added grammar-updated', (grammar) =>
      if grammar.injectionSelector?
-        @resetTokenizedLines() if @hasTokenForSelector(grammar.injectionSelector)
+        @retokenizeLines() if @hasTokenForSelector(grammar.injectionSelector)
      else
        newScore = grammar.getScore(@buffer.getPath(), @buffer.getText())
        @setGrammar(grammar, newScore) if newScore > @currentGrammarScore

-    @on 'grammar-changed grammar-updated', => @resetTokenizedLines()
+    @on 'grammar-changed grammar-updated', => @retokenizeLines()
    @subscribe @buffer, "changed", (e) => @handleBufferChange(e)
    @subscribe @buffer, "path-changed", =>
      @bufferPath = @buffer.getPath()
      @reloadGrammar()

-    @subscribe @$tabLength.changes, (tabLength) =>
-      lastRow = @buffer.getLastRow()
-      @tokenizedLines = @buildPlaceholderTokenizedLinesForRows(0, lastRow)
-      @invalidateRow(0)
-      @emit "changed", { start: 0, end: lastRow, delta: 0 }
+    @subscribe @$tabLength.changes, (tabLength) => @retokenizeLines()

    @subscribe atom.config.observe 'editor.tabLength', callNow: false, =>
      @setTabLength(atom.config.getPositiveInt('editor.tabLength', 2))

+    @subscribe atom.config.observe 'editor.showInvisibles', callNow: false, (showInvisibles) =>
+      @setShowInvisibles(showInvisibles)
+      @retokenizeLines()
+
+    @subscribe atom.config.observe 'editor.invisibles', callNow: false, (invisibles) =>
+      @setInvisibles(invisibles)
+      @retokenizeLines()
+
    @reloadGrammar()

  serializeParams: ->
@@ -59,7 +65,7 @@ class TokenizedBuffer extends Model
    @unsubscribe(@grammar) if @grammar
    @grammar = grammar
    @currentGrammarScore = score ? grammar.getScore(@buffer.getPath(), @buffer.getText())
-    @subscribe @grammar, 'grammar-updated', => @resetTokenizedLines()
+    @subscribe @grammar, 'grammar-updated', => @retokenizeLines()
    @emit 'grammar-changed', grammar

  reloadGrammar: ->
@@ -74,11 +80,13 @@ class TokenizedBuffer extends Model
        return true if selector.matches(token.scopes)
    false

-  resetTokenizedLines: ->
-    @tokenizedLines = @buildPlaceholderTokenizedLinesForRows(0, @buffer.getLastRow())
+  retokenizeLines: ->
+    lastRow = @buffer.getLastRow()
+    @tokenizedLines = @buildPlaceholderTokenizedLinesForRows(0, lastRow)
    @invalidRows = []
    @invalidateRow(0)
    @fullyTokenized = false
+    @emit "changed", {start: 0, end: lastRow, delta: 0}

  setVisible: (@visible) ->
    @tokenizeInBackground() if @visible
@@ -94,6 +102,17 @@ class TokenizedBuffer extends Model
  # tabLength - A {Number} that defines the new tab length.
  setTabLength: (@tabLength) ->

+  setShowInvisibles: (@showInvisibles) ->
+
+  setInvisibles: (invisibles={}) ->
+    _.defaults invisibles,
+      eol: '\u00ac'
+      space: '\u00b7'
+      tab: '\u00bb'
+      cr: '\u00a4'
+
+    @invisibles = invisibles
+
  tokenizeInBackground: ->
    return if not @visible or @pendingChunk or not @isAlive()
    @pendingChunk = true
@@ -206,15 +225,17 @@ class TokenizedBuffer extends Model
    tokens = [new Token(value: line, scopes: [@grammar.scopeName])]
    tabLength = @getTabLength()
    indentLevel = @indentLevelForRow(row)
-    new TokenizedLine({tokens, tabLength, indentLevel})
+    invisibles = @invisibles if @showInvisibles
+    new TokenizedLine({tokens, tabLength, indentLevel, invisibles})

  buildTokenizedTokenizedLineForRow: (row, ruleStack) ->
    line = @buffer.lineForRow(row)
    lineEnding = @buffer.lineEndingForRow(row)
    tabLength = @getTabLength()
    indentLevel = @indentLevelForRow(row)
-    { tokens, ruleStack } = @grammar.tokenizeLine(line, ruleStack, row is 0)
-    new TokenizedLine({tokens, ruleStack, tabLength, lineEnding, indentLevel})
+    invisibles = @invisibles if @showInvisibles
+    {tokens, ruleStack} = @grammar.tokenizeLine(line, ruleStack, row is 0)
+    new TokenizedLine({tokens, ruleStack, tabLength, lineEnding, indentLevel, invisibles})

  # FIXME: benogle says: These are actually buffer rows as all buffer rows are
  # accounted for in @tokenizedLines
--- a/src/tokenized-line.coffee
+++ b/src/tokenized-line.coffee
@@ -1,10 +1,14 @@
 _ = require 'underscore-plus'

+NonWhitespaceRegex = /\S/
+LeadingWhitespaceRegex = /^\s*/
+TrailingWhitespaceRegex = /\s*$/
+RepeatedSpaceRegex = /[ ]/g
 idCounter = 1

 module.exports =
 class TokenizedLine
-  constructor: ({tokens, @lineEnding, @ruleStack, @startBufferColumn, @fold, @tabLength, @indentLevel}) ->
+  constructor: ({tokens, @lineEnding, @ruleStack, @startBufferColumn, @fold, @tabLength, @indentLevel, @invisibles}) ->
    @startBufferColumn ?= 0
    @tokens = @breakOutAtomicTokens(tokens)
    @text = @buildText()
@@ -12,6 +16,7 @@ class TokenizedLine

    @id = idCounter++
    @markLeadingAndTrailingWhitespaceTokens()
+    @substituteInvisibleCharacters() if @invisibles

  buildText: ->
    text = ""
@@ -133,8 +138,8 @@ class TokenizedLine
    outputTokens

  markLeadingAndTrailingWhitespaceTokens: ->
-    firstNonWhitespacePosition = @text.search(/\S/)
-    firstTrailingWhitespacePosition = @text.search(/\s*$/)
+    firstNonWhitespacePosition = @text.search(NonWhitespaceRegex)
+    firstTrailingWhitespacePosition = @text.search(TrailingWhitespaceRegex)
    lineIsWhitespaceOnly = firstTrailingWhitespacePosition is 0
    position = 0
    for token, i in @tokens
@@ -143,6 +148,29 @@ class TokenizedLine
      token.hasTrailingWhitespace = @lineEnding? and (position + token.value.length > firstTrailingWhitespacePosition)
      position += token.value.length

+  substituteInvisibleCharacters: ->
+    invisibles = @invisibles
+    changedText = false
+
+    for token, i in @tokens
+      if token.isHardTab
+        if invisibles.tab
+          token.value = invisibles.tab + token.value.substring(invisibles.tab.length)
+          changedText = true
+
+      else
+        if invisibles.space
+          if token.hasLeadingWhitespace
+            token.value = token.value.replace LeadingWhitespaceRegex, (leadingWhitespace) ->
+              leadingWhitespace.replace RepeatedSpaceRegex, invisibles.space
+            changedText = true
+          if token.hasTrailingWhitespace
+            token.value = token.value.replace TrailingWhitespaceRegex, (leadingWhitespace) ->
+              leadingWhitespace.replace RepeatedSpaceRegex, invisibles.space
+            changedText = true
+
+    @text = @buildText() if changedText
+
  isComment: ->
    for token in @tokens
      continue if token.scopes.length is 1