Merge pull request #3565 from atom/ks-support-variation-sequences

Support variation sequences
This commit is contained in:
Kevin Sawicki
2014-09-17 10:58:43 -07:00
4 changed files with 114 additions and 58 deletions

View File

@@ -3343,7 +3343,7 @@ describe "Editor", ->
editor2.destroy()
expect(editor.shouldPromptToSave()).toBeTruthy()
describe "when the edit session contains surrogate pair characters", ->
describe "when the editor contains surrogate pair characters", ->
it "correctly backspaces over them", ->
editor.setText('\uD835\uDF97\uD835\uDF97\uD835\uDF97')
editor.moveToBottom()
@@ -3384,6 +3384,47 @@ describe "Editor", ->
editor.moveLeft()
expect(editor.getCursorBufferPosition()).toEqual [0, 0]
describe "when the editor contains variation sequence character pairs", ->
it "correctly backspaces over them", ->
editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E')
editor.moveToBottom()
editor.backspace()
expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E'
editor.backspace()
expect(editor.getText()).toBe '\u2714\uFE0E'
editor.backspace()
expect(editor.getText()).toBe ''
it "correctly deletes over them", ->
editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E')
editor.moveToTop()
editor.delete()
expect(editor.getText()).toBe '\u2714\uFE0E\u2714\uFE0E'
editor.delete()
expect(editor.getText()).toBe '\u2714\uFE0E'
editor.delete()
expect(editor.getText()).toBe ''
it "correctly moves over them", ->
editor.setText('\u2714\uFE0E\u2714\uFE0E\u2714\uFE0E\n')
editor.moveToTop()
editor.moveRight()
expect(editor.getCursorBufferPosition()).toEqual [0, 2]
editor.moveRight()
expect(editor.getCursorBufferPosition()).toEqual [0, 4]
editor.moveRight()
expect(editor.getCursorBufferPosition()).toEqual [0, 6]
editor.moveRight()
expect(editor.getCursorBufferPosition()).toEqual [1, 0]
editor.moveLeft()
expect(editor.getCursorBufferPosition()).toEqual [0, 6]
editor.moveLeft()
expect(editor.getCursorBufferPosition()).toEqual [0, 4]
editor.moveLeft()
expect(editor.getCursorBufferPosition()).toEqual [0, 2]
editor.moveLeft()
expect(editor.getCursorBufferPosition()).toEqual [0, 0]
describe ".setIndentationForBufferRow", ->
describe "when the editor uses soft tabs but the row has hard tabs", ->
it "only replaces whitespace characters", ->

View File

@@ -1,30 +1,32 @@
textUtils = require '../src/text-utils'
describe 'text utilities', ->
describe '.getCharacterCount(string)', ->
it 'returns the number of full characters in the string', ->
expect(textUtils.getCharacterCount('abc')).toBe 3
expect(textUtils.getCharacterCount('a\uD835\uDF97b\uD835\uDF97c')).toBe 5
expect(textUtils.getCharacterCount('\uD835\uDF97')).toBe 1
expect(textUtils.getCharacterCount('\uD835')).toBe 1
expect(textUtils.getCharacterCount('\uDF97')).toBe 1
describe '.hasPairedCharacter(string)', ->
it 'returns true when the string contains a surrogate pair or variation sequence', ->
expect(textUtils.hasPairedCharacter('abc')).toBe false
expect(textUtils.hasPairedCharacter('a\uD835\uDF97b\uD835\uDF97c')).toBe true
expect(textUtils.hasPairedCharacter('\uD835\uDF97')).toBe true
expect(textUtils.hasPairedCharacter('\u2714\uFE0E')).toBe true
expect(textUtils.hasPairedCharacter('\uD835')).toBe false
expect(textUtils.hasPairedCharacter('\uDF97')).toBe false
expect(textUtils.hasPairedCharacter('\uFE0E')).toBe false
expect(textUtils.hasPairedCharacter('\uFE0E\uFE0E')).toBe false
describe '.hasSurrogatePair(string)', ->
it 'returns true when the string contains a surrogate pair', ->
expect(textUtils.hasSurrogatePair('abc')).toBe false
expect(textUtils.hasSurrogatePair('a\uD835\uDF97b\uD835\uDF97c')).toBe true
expect(textUtils.hasSurrogatePair('\uD835\uDF97')).toBe true
expect(textUtils.hasSurrogatePair('\uD835')).toBe false
expect(textUtils.hasSurrogatePair('\uDF97')).toBe false
describe '.isSurrogatePair(string, index)', ->
it 'returns true when the index is the start of a high/low surrogate pair', ->
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
expect(textUtils.isSurrogatePair('\uD835')).toBe false
expect(textUtils.isSurrogatePair('\uDF97')).toBe false
describe '.isPairedCharacter(string, index)', ->
it 'returns true when the index is the start of a high/low surrogate pair or variation sequence', ->
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 0)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 1)).toBe true
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 2)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 3)).toBe false
expect(textUtils.isPairedCharacter('\uD835')).toBe false
expect(textUtils.isPairedCharacter('\uDF97')).toBe false
expect(textUtils.isPairedCharacter('\uFE0E')).toBe false
expect(textUtils.isPairedCharacter('\uFE0E')).toBe false
expect(textUtils.isPairedCharacter('\uFE0E\uFE0E')).toBe false

View File

@@ -4,34 +4,47 @@ isHighSurrogate = (string, index) ->
isLowSurrogate = (string, index) ->
0xDC00 <= string.charCodeAt(index) <= 0xDFFF
isVariationSelector = (string, index) ->
0xFE00 <= string.charCodeAt(index) <= 0xFE0F
# Is the character at the given index the start of a high/low surrogate pair?
#
# string - The {String} to check for a surrogate pair.
# index - The {Number} index to look for a surrogate pair at.
# * `string` The {String} to check for a surrogate pair.
# * `index` The {Number} index to look for a surrogate pair at.
#
# Return a {Boolean}.
isSurrogatePair = (string, index=0) ->
isHighSurrogate(string, index) and isLowSurrogate(string, index + 1)
# Get the number of characters in the string accounting for surrogate pairs.
# Is the character at the given index the start of a variation sequence?
#
# This method counts high/low surrogate pairs as a single character and will
# always returns a value less than or equal to `string.length`.
# * `string` The {String} to check for a variation sequence.
# * `index` The {Number} index to look for a variation sequence at.
#
# string - The {String} to count the number of full characters in.
#
# Returns a {Number}.
getCharacterCount = (string) ->
count = string.length
count-- for index in [0...string.length] when isSurrogatePair(string, index)
count
# Return a {Boolean}.
isVariationSequence = (string, index=0) ->
not isVariationSelector(string, index) and isVariationSelector(string, index + 1)
# Does the given string contain at least one surrogate pair?
# Is the character at the given index the start of high/low surrogate pair
# or a variation sequence?
#
# string - The {String} to check for the presence of surrogate pairs.
# * `string` The {String} to check for a surrogate pair or variation sequence.
# * `index` The {Number} index to look for a surrogate pair at.
#
# Return a {Boolean}.
isPairedCharacter = (string, index=0) ->
isSurrogatePair(string, index) or isVariationSequence(string, index)
# Does the given string contain at least surrogate pair or variation sequence?
#
# * `string` The {String} to check for the presence of paired characters.
#
# Returns a {Boolean}.
hasSurrogatePair = (string) ->
string.length isnt getCharacterCount(string)
hasPairedCharacter = (string) ->
index = 0
while index < string.length
return true if isPairedCharacter(string, index)
index++
false
module.exports = {getCharacterCount, isSurrogatePair, hasSurrogatePair}
module.exports = {isPairedCharacter, hasPairedCharacter}

View File

@@ -12,7 +12,7 @@ MaxTokenLength = 20000
module.exports =
class Token
value: null
hasSurrogatePair: false
hasPairedCharacter: false
scopes: null
isAtomic: null
isHardTab: null
@@ -23,7 +23,7 @@ class Token
constructor: ({@value, @scopes, @isAtomic, @bufferDelta, @isHardTab}) ->
@screenDelta = @value.length
@bufferDelta ?= @screenDelta
@hasSurrogatePair = textUtils.hasSurrogatePair(@value)
@hasPairedCharacter = textUtils.hasPairedCharacter(@value)
isEqual: (other) ->
@value == other.value and _.isEqual(@scopes, other.scopes) and !!@isAtomic == !!other.isAtomic
@@ -57,11 +57,11 @@ class Token
WhitespaceRegexesByTabLength[tabLength] ?= new RegExp("([ ]{#{tabLength}})|(\t)|([^\t]+)", "g")
breakOutAtomicTokens: (tabLength, breakOutLeadingSoftTabs, startColumn) ->
if @hasSurrogatePair
if @hasPairedCharacter
outputTokens = []
column = startColumn
for token in @breakOutSurrogatePairs()
for token in @breakOutPairedCharacters()
if token.isAtomic
outputTokens.push(token)
else
@@ -98,27 +98,27 @@ class Token
outputTokens
breakOutSurrogatePairs: ->
breakOutPairedCharacters: ->
outputTokens = []
index = 0
nonSurrogatePairStart = 0
nonPairStart = 0
while index < @value.length
if textUtils.isSurrogatePair(@value, index)
if nonSurrogatePairStart isnt index
outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
outputTokens.push(@buildSurrogatePairToken(@value, index))
if textUtils.isPairedCharacter(@value, index)
if nonPairStart isnt index
outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes}))
outputTokens.push(@buildPairedCharacterToken(@value, index))
index += 2
nonSurrogatePairStart = index
nonPairStart = index
else
index++
if nonSurrogatePairStart isnt index
outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
if nonPairStart isnt index
outputTokens.push(new Token({value: @value[nonPairStart...index], @scopes}))
outputTokens
buildSurrogatePairToken: (value, index) ->
buildPairedCharacterToken: (value, index) ->
new Token(
value: value[index..index + 1]
scopes: @scopes