diff --git a/spec/editor-spec.coffee b/spec/editor-spec.coffee index 6ff0aa304..27a0dc099 100644 --- a/spec/editor-spec.coffee +++ b/spec/editor-spec.coffee @@ -3343,7 +3343,7 @@ describe "Editor", -> editor2.destroy() expect(editor.shouldPromptToSave()).toBeTruthy() - describe "when the edit session contains surrogate pair characters", -> + describe "when the editor contains surrogate pair characters", -> it "correctly backspaces over them", -> editor.setText('\uD835\uDF97\uD835\uDF97\uD835\uDF97') editor.moveToBottom() @@ -3384,6 +3384,47 @@ describe "Editor", -> editor.moveLeft() expect(editor.getCursorBufferPosition()).toEqual [0, 0] + describe "when the editor contains variation sequence character pairs", -> + it "correctly backspaces over them", -> + editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E') + editor.moveToBottom() + editor.backspace() + expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E' + editor.backspace() + expect(editor.getText()).toBe '\u2714\uFE0E' + editor.backspace() + expect(editor.getText()).toBe '' + + it "correctly deletes over them", -> + editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E') + editor.moveToTop() + editor.delete() + expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E' + editor.delete() + expect(editor.getText()).toBe '\u2714\uFE0E' + editor.delete() + expect(editor.getText()).toBe '' + + it "correctly moves over them", -> + editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E\n') + editor.moveToTop() + editor.moveRight() + expect(editor.getCursorBufferPosition()).toEqual [0, 2] + editor.moveRight() + expect(editor.getCursorBufferPosition()).toEqual [0, 4] + editor.moveRight() + expect(editor.getCursorBufferPosition()).toEqual [0, 6] + editor.moveRight() + expect(editor.getCursorBufferPosition()).toEqual [1, 0] + editor.moveLeft() + expect(editor.getCursorBufferPosition()).toEqual [0, 6] + editor.moveLeft() + expect(editor.getCursorBufferPosition()).toEqual [0, 4] + editor.moveLeft() + expect(editor.getCursorBufferPosition()).toEqual [0, 2] + editor.moveLeft() + expect(editor.getCursorBufferPosition()).toEqual [0, 0] + describe ".setIndentationForBufferRow", -> describe "when the editor uses soft tabs but the row has hard tabs", -> it "only replaces whitespace characters", -> diff --git a/spec/text-utils-spec.coffee b/spec/text-utils-spec.coffee index 36ac0b356..89cf34aca 100644 --- a/spec/text-utils-spec.coffee +++ b/spec/text-utils-spec.coffee @@ -1,30 +1,32 @@ textUtils = require '../src/text-utils' describe 'text utilities', -> - describe '.getCharacterCount(string)', -> - it 'returns the number of full characters in the string', -> - expect(textUtils.getCharacterCount('abc')).toBe 3 - expect(textUtils.getCharacterCount('a\uD835\uDF97b\uD835\uDF97c')).toBe 5 - expect(textUtils.getCharacterCount('\uD835\uDF97')).toBe 1 - expect(textUtils.getCharacterCount('\uD835')).toBe 1 - expect(textUtils.getCharacterCount('\uDF97')).toBe 1 + describe '.hasPairedCharacter(string)', -> + it 'returns true when the string contains a surrogate pair or variation sequence', -> + expect(textUtils.hasPairedCharacter('abc')).toBe false + expect(textUtils.hasPairedCharacter('a\uD835\uDF97b\uD835\uDF97c')).toBe true + expect(textUtils.hasPairedCharacter('\uD835\uDF97')).toBe true + expect(textUtils.hasPairedCharacter('\u2714\uFE0E')).toBe true + expect(textUtils.hasPairedCharacter('\uD835')).toBe false + expect(textUtils.hasPairedCharacter('\uDF97')).toBe false + expect(textUtils.hasPairedCharacter('\uFE0E')).toBe false + expect(textUtils.hasPairedCharacter('\uFE0E\uFE0E')).toBe false - describe '.hasSurrogatePair(string)', -> - it 'returns true when the string contains a surrogate pair', -> - expect(textUtils.hasSurrogatePair('abc')).toBe false - expect(textUtils.hasSurrogatePair('a\uD835\uDF97b\uD835\uDF97c')).toBe true - expect(textUtils.hasSurrogatePair('\uD835\uDF97')).toBe true - expect(textUtils.hasSurrogatePair('\uD835')).toBe false - expect(textUtils.hasSurrogatePair('\uDF97')).toBe false - - describe '.isSurrogatePair(string, index)', -> - it 'returns true when the index is the start of a high/low surrogate pair', -> - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false - expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false - expect(textUtils.isSurrogatePair('\uD835')).toBe false - expect(textUtils.isSurrogatePair('\uDF97')).toBe false + describe '.isPairedCharacter(string, index)', -> + it 'returns true when the index is the start of a high/low surrogate pair or variation sequence', -> + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false + expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false + expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 0)).toBe false + expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 1)).toBe true + expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 2)).toBe false + expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 3)).toBe false + expect(textUtils.isPairedCharacter('\uD835')).toBe false + expect(textUtils.isPairedCharacter('\uDF97')).toBe false + expect(textUtils.isPairedCharacter('\uFE0E')).toBe false + expect(textUtils.isPairedCharacter('\uFE0E')).toBe false + expect(textUtils.isPairedCharacter('\uFE0E\uFE0E')).toBe false diff --git a/src/text-utils.coffee b/src/text-utils.coffee index a043d7c73..90bf220ff 100644 --- a/src/text-utils.coffee +++ b/src/text-utils.coffee @@ -4,34 +4,47 @@ isHighSurrogate = (string, index) -> isLowSurrogate = (string, index) -> 0xDC00 <= string.charCodeAt(index) <= 0xDFFF +isVariationSelector = (string, index) -> + 0xFE00 <= string.charCodeAt(index) <= 0xFE0F + # Is the character at the given index the start of a high/low surrogate pair? # -# string - The {String} to check for a surrogate pair. -# index - The {Number} index to look for a surrogate pair at. +# * `string` The {String} to check for a surrogate pair. +# * `index` The {Number} index to look for a surrogate pair at. # # Return a {Boolean}. isSurrogatePair = (string, index=0) -> isHighSurrogate(string, index) and isLowSurrogate(string, index + 1) -# Get the number of characters in the string accounting for surrogate pairs. +# Is the character at the given index the start of a variation sequence? # -# This method counts high/low surrogate pairs as a single character and will -# always returns a value less than or equal to `string.length`. +# * `string` The {String} to check for a variation sequence. +# * `index` The {Number} index to look for a variation sequence at. # -# string - The {String} to count the number of full characters in. -# -# Returns a {Number}. -getCharacterCount = (string) -> - count = string.length - count-- for index in [0...string.length] when isSurrogatePair(string, index) - count +# Return a {Boolean}. +isVariationSequence = (string, index=0) -> + not isVariationSelector(string, index) and isVariationSelector(string, index + 1) -# Does the given string contain at least one surrogate pair? +# Is the character at the given index the start of high/low surrogate pair +# or a variation sequence? # -# string - The {String} to check for the presence of surrogate pairs. +# * `string` The {String} to check for a surrogate pair or variation sequence. +# * `index` The {Number} index to look for a surrogate pair at. +# +# Return a {Boolean}. +isPairedCharacter = (string, index=0) -> + isSurrogatePair(string, index) or isVariationSequence(string, index) + +# Does the given string contain at least surrogate pair or variation sequence? +# +# * `string` The {String} to check for the presence of paired characters. # # Returns a {Boolean}. -hasSurrogatePair = (string) -> - string.length isnt getCharacterCount(string) +hasPairedCharacter = (string) -> + index = 0 + while index < string.length + return true if isPairedCharacter(string, index) + index++ + false -module.exports = {getCharacterCount, isSurrogatePair, hasSurrogatePair} +module.exports = {isPairedCharacter, hasPairedCharacter} diff --git a/src/token.coffee b/src/token.coffee index 5366f33cc..a36117ea8 100644 --- a/src/token.coffee +++ b/src/token.coffee @@ -12,7 +12,7 @@ MaxTokenLength = 20000 module.exports = class Token value: null - hasSurrogatePair: false + hasPairedCharacter: false scopes: null isAtomic: null isHardTab: null @@ -23,7 +23,7 @@ class Token constructor: ({@value, @scopes, @isAtomic, @bufferDelta, @isHardTab}) -> @screenDelta = @value.length @bufferDelta ?= @screenDelta - @hasSurrogatePair = textUtils.hasSurrogatePair(@value) + @hasPairedCharacter = textUtils.hasPairedCharacter(@value) isEqual: (other) -> @value == other.value and _.isEqual(@scopes, other.scopes) and !!@isAtomic == !!other.isAtomic @@ -57,11 +57,11 @@ class Token WhitespaceRegexesByTabLength[tabLength] ?= new RegExp("([ ]{#{tabLength}})|(\t)|([^\t]+)", "g") breakOutAtomicTokens: (tabLength, breakOutLeadingSoftTabs, startColumn) -> - if @hasSurrogatePair + if @hasPairedCharacter outputTokens = [] column = startColumn - for token in @breakOutSurrogatePairs() + for token in @breakOutPairedCharacters() if token.isAtomic outputTokens.push(token) else @@ -98,27 +98,27 @@ class Token outputTokens - breakOutSurrogatePairs: -> + breakOutPairedCharacters: -> outputTokens = [] index = 0 - nonSurrogatePairStart = 0 + nonPairStart = 0 while index < @value.length - if textUtils.isSurrogatePair(@value, index) - if nonSurrogatePairStart isnt index - outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes})) - outputTokens.push(@buildSurrogatePairToken(@value, index)) + if textUtils.isPairedCharacter(@value, index) + if nonPairStart isnt index + outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes})) + outputTokens.push(@buildPairedCharacterToken(@value, index)) index += 2 - nonSurrogatePairStart = index + nonPairStart = index else index++ - if nonSurrogatePairStart isnt index - outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes})) + if nonPairStart isnt index + outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes})) outputTokens - buildSurrogatePairToken: (value, index) -> + buildPairedCharacterToken: (value, index) -> new Token( value: value[index..index + 1] scopes: @scopes