Add support for Unicode variation sequences

These are character pairs that should be treated as tokens with a
buffer delta of 2 and a screen delta of 1.
This commit is contained in:
Kevin Sawicki
2014-09-17 09:26:52 -07:00
parent b6faffe2db
commit 878da262d2
3 changed files with 73 additions and 41 deletions

View File

@@ -6,25 +6,32 @@ describe 'text utilities', ->
expect(textUtils.getCharacterCount('abc')).toBe 3
expect(textUtils.getCharacterCount('a\uD835\uDF97b\uD835\uDF97c')).toBe 5
expect(textUtils.getCharacterCount('\uD835\uDF97')).toBe 1
expect(textUtils.getCharacterCount('\u2714\uFE0E')).toBe 1
expect(textUtils.getCharacterCount('\uD835')).toBe 1
expect(textUtils.getCharacterCount('\uDF97')).toBe 1
describe '.hasSurrogatePair(string)', ->
describe '.hasPairedCharacter(string)', ->
it 'returns true when the string contains a surrogate pair', ->
expect(textUtils.hasSurrogatePair('abc')).toBe false
expect(textUtils.hasSurrogatePair('a\uD835\uDF97b\uD835\uDF97c')).toBe true
expect(textUtils.hasSurrogatePair('\uD835\uDF97')).toBe true
expect(textUtils.hasSurrogatePair('\uD835')).toBe false
expect(textUtils.hasSurrogatePair('\uDF97')).toBe false
expect(textUtils.hasPairedCharacter('abc')).toBe false
expect(textUtils.hasPairedCharacter('a\uD835\uDF97b\uD835\uDF97c')).toBe true
expect(textUtils.hasPairedCharacter('\uD835\uDF97')).toBe true
expect(textUtils.hasPairedCharacter('\u2714\uFE0E')).toBe true
expect(textUtils.hasPairedCharacter('\uD835')).toBe false
expect(textUtils.hasPairedCharacter('\uDF97')).toBe false
expect(textUtils.hasPairedCharacter('\uFE0E')).toBe false
describe '.isSurrogatePair(string, index)', ->
describe '.isPairedCharacter(string, index)', ->
it 'returns true when the index is the start of a high/low surrogate pair', ->
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
expect(textUtils.isSurrogatePair('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
expect(textUtils.isSurrogatePair('\uD835')).toBe false
expect(textUtils.isSurrogatePair('\uDF97')).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 0)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 1)).toBe true
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 2)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 3)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 4)).toBe true
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 5)).toBe false
expect(textUtils.isPairedCharacter('a\uD835\uDF97b\uD835\uDF97c', 6)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 0)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 1)).toBe true
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 2)).toBe false
expect(textUtils.isPairedCharacter('a\u2714\uFE0E', 3)).toBe false
expect(textUtils.isPairedCharacter('\uD835')).toBe false
expect(textUtils.isPairedCharacter('\uDF97')).toBe false