Merge pull request #3565 from atom/ks-support-variation-sequences

Support variation sequences
2026-01-23 13:58:08 -05:00 · 2014-09-17 10:58:43 -07:00
parent b6faffe2db 146e8c2a0b
commit e4d50f4b38
4 changed files with 114 additions and 58 deletions
--- a/spec/editor-spec.coffee
+++ b/spec/editor-spec.coffee
@@ -3343,7 +3343,7 @@ describe "Editor", ->
        editor2.destroy()
        expect(editor.shouldPromptToSave()).toBeTruthy()

-  describe "when the edit session contains surrogate pair characters", ->
+  describe "when the editor contains surrogate pair characters", ->
    it "correctly backspaces over them", ->
      editor.setText('\uD835\uDF97\uD835\uDF97\uD835\uDF97')
      editor.moveToBottom()
@@ -3384,6 +3384,47 @@ describe "Editor", ->
      editor.moveLeft()
      expect(editor.getCursorBufferPosition()).toEqual [0, 0]

+  describe "when the editor contains variation sequence character pairs", ->
+    it "correctly backspaces over them", ->
+      editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E')
+      editor.moveToBottom()
+      editor.backspace()
+      expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E'
+      editor.backspace()
+      expect(editor.getText()).toBe '\u2714\uFE0E'
+      editor.backspace()
+      expect(editor.getText()).toBe ''
+
+    it "correctly deletes over them", ->
+      editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E')
+      editor.moveToTop()
+      editor.delete()
+      expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E'
+      editor.delete()
+      expect(editor.getText()).toBe '\u2714\uFE0E'
+      editor.delete()
+      expect(editor.getText()).toBe ''
+
+    it "correctly moves over them", ->
+      editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E\n')
+      editor.moveToTop()
+      editor.moveRight()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 2]
+      editor.moveRight()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 4]
+      editor.moveRight()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 6]
+      editor.moveRight()
+      expect(editor.getCursorBufferPosition()).toEqual [1, 0]
+      editor.moveLeft()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 6]
+      editor.moveLeft()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 4]
+      editor.moveLeft()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 2]
+      editor.moveLeft()
+      expect(editor.getCursorBufferPosition()).toEqual [0, 0]
+
  describe ".setIndentationForBufferRow", ->
    describe "when the editor uses soft tabs but the row has hard tabs", ->
      it "only replaces whitespace characters", ->
--- a/spec/text-utils-spec.coffee
+++ b/spec/text-utils-spec.coffee
@@ -1,30 +1,32 @@
 textUtils = require '../src/text-utils'

 describe 'text utilities', ->
-  describe '.getCharacterCount(string)', ->
-    it 'returns the number of full characters in the string', ->
-      expect(textUtils.getCharacterCount('abc')).toBe 3
-      expect(textUtils.getCharacterCount('a\uD835\uDF97b\uD835\uDF97c')).toBe 5
-      expect(textUtils.getCharacterCount('\uD835\uDF97')).toBe 1
-      expect(textUtils.getCharacterCount('\uD835')).toBe 1
-      expect(textUtils.getCharacterCount('\uDF97')).toBe 1
+  describe '.hasPairedCharacter(string)', ->
+    it 'returns true when the string contains a surrogate pair or variation sequence', ->
+      expect(textUtils.hasPairedCharacter('abc')).toBe false
+      expect(textUtils.hasPairedCharacter('a\uD835\uDF97b\uD835\uDF97c')).toBe true
+      expect(textUtils.hasPairedCharacter('\uD835\uDF97')).toBe true
+      expect(textUtils.hasPairedCharacter('\u2714\uFE0E')).toBe true
+      expect(textUtils.hasPairedCharacter('\uD835')).toBe false
+      expect(textUtils.hasPairedCharacter('\uDF97')).toBe false
+      expect(textUtils.hasPairedCharacter('\uFE0E')).toBe false
+      expect(textUtils.hasPairedCharacter('\uFE0E\uFE0E')).toBe false

-  describe '.hasSurrogatePair(string)', ->
-    it 'returns true when the string contains a surrogate pair', ->
-      expect(textUtils.hasSurrogatePair('abc')).toBe false
-      expect(textUtils.hasSurrogatePair('a\uD835\uDF97b\uD835\uDF97c')).toBe true
-      expect(textUtils.hasSurrogatePair('\uD835\uDF97')).toBe true
-      expect(textUtils.hasSurrogatePair('\uD835')).toBe false
-      expect(textUtils.hasSurrogatePair('\uDF97')).toBe false
-
-  describe '.isSurrogatePair(string, index)', ->
-    it 'returns true when the index is the start of a high/low surrogate pair', ->
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
-      expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
-      expect(textUtils.isSurrogatePair('\uD835')).toBe false
-      expect(textUtils.isSurrogatePair('\uDF97')).toBe false
+  describe '.isPairedCharacter(string, index)', ->
+    it 'returns true when the index is the start of a high/low surrogate pair or variation sequence', ->
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
+      expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
+      expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 0)).toBe false
+      expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 1)).toBe true
+      expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 2)).toBe false
+      expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 3)).toBe false
+      expect(textUtils.isPairedCharacter('\uD835')).toBe false
+      expect(textUtils.isPairedCharacter('\uDF97')).toBe false
+      expect(textUtils.isPairedCharacter('\uFE0E')).toBe false
+      expect(textUtils.isPairedCharacter('\uFE0E')).toBe false
+      expect(textUtils.isPairedCharacter('\uFE0E\uFE0E')).toBe false
--- a/src/text-utils.coffee
+++ b/src/text-utils.coffee
@@ -4,34 +4,47 @@ isHighSurrogate = (string, index) ->
 isLowSurrogate = (string, index) ->
  0xDC00 <= string.charCodeAt(index) <= 0xDFFF

+isVariationSelector = (string, index) ->
+  0xFE00 <= string.charCodeAt(index) <= 0xFE0F
+
 # Is the character at the given index the start of a high/low surrogate pair?
 #
-# string - The {String} to check for a surrogate pair.
-# index - The {Number} index to look for a surrogate pair at.
+# * `string` The {String} to check for a surrogate pair.
+# * `index`  The {Number} index to look for a surrogate pair at.
 #
 # Return a {Boolean}.
 isSurrogatePair = (string, index=0) ->
  isHighSurrogate(string, index) and isLowSurrogate(string, index + 1)

-# Get the number of characters in the string accounting for surrogate pairs.
+# Is the character at the given index the start of a variation sequence?
 #
-# This method counts high/low surrogate pairs as a single character and will
-# always returns a value less than or equal to `string.length`.
+# * `string` The {String} to check for a variation sequence.
+# * `index`  The {Number} index to look for a variation sequence at.
 #
-# string - The {String} to count the number of full characters in.
-#
-# Returns a {Number}.
-getCharacterCount = (string) ->
-  count = string.length
-  count-- for index in [0...string.length] when isSurrogatePair(string, index)
-  count
+# Return a {Boolean}.
+isVariationSequence = (string, index=0) ->
+  not isVariationSelector(string, index) and isVariationSelector(string, index + 1)

-# Does the given string contain at least one surrogate pair?
+# Is the character at the given index the start of high/low surrogate pair
+# or a variation sequence?
 #
-# string - The {String} to check for the presence of surrogate pairs.
+# * `string` The {String} to check for a surrogate pair or variation sequence.
+# * `index`  The {Number} index to look for a surrogate pair at.
+#
+# Return a {Boolean}.
+isPairedCharacter = (string, index=0) ->
+  isSurrogatePair(string, index) or isVariationSequence(string, index)
+
+# Does the given string contain at least surrogate pair or variation sequence?
+#
+# * `string` The {String} to check for the presence of paired characters.
 #
 # Returns a {Boolean}.
-hasSurrogatePair = (string) ->
-  string.length isnt getCharacterCount(string)
+hasPairedCharacter = (string) ->
+  index = 0
+  while index < string.length
+    return true if isPairedCharacter(string, index)
+    index++
+  false

-module.exports = {getCharacterCount, isSurrogatePair, hasSurrogatePair}
+module.exports = {isPairedCharacter, hasPairedCharacter}
--- a/src/token.coffee
+++ b/src/token.coffee
@@ -12,7 +12,7 @@ MaxTokenLength = 20000
 module.exports =
 class Token
  value: null
-  hasSurrogatePair: false
+  hasPairedCharacter: false
  scopes: null
  isAtomic: null
  isHardTab: null
@@ -23,7 +23,7 @@ class Token
  constructor: ({@value, @scopes, @isAtomic, @bufferDelta, @isHardTab}) ->
    @screenDelta = @value.length
    @bufferDelta ?= @screenDelta
-    @hasSurrogatePair = textUtils.hasSurrogatePair(@value)
+    @hasPairedCharacter = textUtils.hasPairedCharacter(@value)

  isEqual: (other) ->
    @value == other.value and _.isEqual(@scopes, other.scopes) and !!@isAtomic == !!other.isAtomic
@@ -57,11 +57,11 @@ class Token
    WhitespaceRegexesByTabLength[tabLength] ?= new RegExp("([ ]{#{tabLength}})|(\t)|([^\t]+)", "g")

  breakOutAtomicTokens: (tabLength, breakOutLeadingSoftTabs, startColumn) ->
-    if @hasSurrogatePair
+    if @hasPairedCharacter
      outputTokens = []
      column = startColumn

-      for token in @breakOutSurrogatePairs()
+      for token in @breakOutPairedCharacters()
        if token.isAtomic
          outputTokens.push(token)
        else
@@ -98,27 +98,27 @@ class Token

      outputTokens

-  breakOutSurrogatePairs: ->
+  breakOutPairedCharacters: ->
    outputTokens = []
    index = 0
-    nonSurrogatePairStart = 0
+    nonPairStart = 0

    while index < @value.length
-      if textUtils.isSurrogatePair(@value, index)
-        if nonSurrogatePairStart isnt index
-          outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
-        outputTokens.push(@buildSurrogatePairToken(@value, index))
+      if textUtils.isPairedCharacter(@value, index)
+        if nonPairStart isnt index
+          outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes}))
+        outputTokens.push(@buildPairedCharacterToken(@value, index))
        index += 2
-        nonSurrogatePairStart = index
+        nonPairStart = index
      else
        index++

-    if nonSurrogatePairStart isnt index
-      outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
+    if nonPairStart isnt index
+      outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes}))

    outputTokens

-  buildSurrogatePairToken: (value, index) ->
+  buildPairedCharacterToken: (value, index) ->
    new Token(
      value: value[index..index + 1]
      scopes: @scopes