mirror of
https://github.com/atom/atom.git
synced 2026-02-08 13:45:09 -05:00
☠☕ Decaffeinate src/text-utils.coffee
This commit is contained in:
@@ -1,121 +0,0 @@
|
||||
isHighSurrogate = (charCode) ->
|
||||
0xD800 <= charCode <= 0xDBFF
|
||||
|
||||
isLowSurrogate = (charCode) ->
|
||||
0xDC00 <= charCode <= 0xDFFF
|
||||
|
||||
isVariationSelector = (charCode) ->
|
||||
0xFE00 <= charCode <= 0xFE0F
|
||||
|
||||
isCombiningCharacter = (charCode) ->
|
||||
0x0300 <= charCode <= 0x036F or
|
||||
0x1AB0 <= charCode <= 0x1AFF or
|
||||
0x1DC0 <= charCode <= 0x1DFF or
|
||||
0x20D0 <= charCode <= 0x20FF or
|
||||
0xFE20 <= charCode <= 0xFE2F
|
||||
|
||||
# Are the given character codes a high/low surrogate pair?
|
||||
#
|
||||
# * `charCodeA` The first character code {Number}.
|
||||
# * `charCode2` The second character code {Number}.
|
||||
#
|
||||
# Return a {Boolean}.
|
||||
isSurrogatePair = (charCodeA, charCodeB) ->
|
||||
isHighSurrogate(charCodeA) and isLowSurrogate(charCodeB)
|
||||
|
||||
# Are the given character codes a variation sequence?
|
||||
#
|
||||
# * `charCodeA` The first character code {Number}.
|
||||
# * `charCode2` The second character code {Number}.
|
||||
#
|
||||
# Return a {Boolean}.
|
||||
isVariationSequence = (charCodeA, charCodeB) ->
|
||||
not isVariationSelector(charCodeA) and isVariationSelector(charCodeB)
|
||||
|
||||
# Are the given character codes a combined character pair?
|
||||
#
|
||||
# * `charCodeA` The first character code {Number}.
|
||||
# * `charCode2` The second character code {Number}.
|
||||
#
|
||||
# Return a {Boolean}.
|
||||
isCombinedCharacter = (charCodeA, charCodeB) ->
|
||||
not isCombiningCharacter(charCodeA) and isCombiningCharacter(charCodeB)
|
||||
|
||||
# Is the character at the given index the start of high/low surrogate pair
|
||||
# a variation sequence, or a combined character?
|
||||
#
|
||||
# * `string` The {String} to check for a surrogate pair, variation sequence,
|
||||
# or combined character.
|
||||
# * `index` The {Number} index to look for a surrogate pair, variation
|
||||
# sequence, or combined character.
|
||||
#
|
||||
# Return a {Boolean}.
|
||||
isPairedCharacter = (string, index=0) ->
|
||||
charCodeA = string.charCodeAt(index)
|
||||
charCodeB = string.charCodeAt(index + 1)
|
||||
isSurrogatePair(charCodeA, charCodeB) or
|
||||
isVariationSequence(charCodeA, charCodeB) or
|
||||
isCombinedCharacter(charCodeA, charCodeB)
|
||||
|
||||
IsJapaneseKanaCharacter = (charCode) ->
|
||||
0x3000 <= charCode <= 0x30FF
|
||||
|
||||
isCJKUnifiedIdeograph = (charCode) ->
|
||||
0x4E00 <= charCode <= 0x9FFF
|
||||
|
||||
isFullWidthForm = (charCode) ->
|
||||
0xFF01 <= charCode <= 0xFF5E or
|
||||
0xFFE0 <= charCode <= 0xFFE6
|
||||
|
||||
isDoubleWidthCharacter = (character) ->
|
||||
charCode = character.charCodeAt(0)
|
||||
|
||||
IsJapaneseKanaCharacter(charCode) or
|
||||
isCJKUnifiedIdeograph(charCode) or
|
||||
isFullWidthForm(charCode)
|
||||
|
||||
isHalfWidthCharacter = (character) ->
|
||||
charCode = character.charCodeAt(0)
|
||||
|
||||
0xFF65 <= charCode <= 0xFFDC or
|
||||
0xFFE8 <= charCode <= 0xFFEE
|
||||
|
||||
isKoreanCharacter = (character) ->
|
||||
charCode = character.charCodeAt(0)
|
||||
|
||||
0xAC00 <= charCode <= 0xD7A3 or
|
||||
0x1100 <= charCode <= 0x11FF or
|
||||
0x3130 <= charCode <= 0x318F or
|
||||
0xA960 <= charCode <= 0xA97F or
|
||||
0xD7B0 <= charCode <= 0xD7FF
|
||||
|
||||
isCJKCharacter = (character) ->
|
||||
isDoubleWidthCharacter(character) or
|
||||
isHalfWidthCharacter(character) or
|
||||
isKoreanCharacter(character)
|
||||
|
||||
isWordStart = (previousCharacter, character) ->
|
||||
(previousCharacter is ' ' or previousCharacter is '\t') and
|
||||
(character isnt ' ' and character isnt '\t')
|
||||
|
||||
isWrapBoundary = (previousCharacter, character) ->
|
||||
isWordStart(previousCharacter, character) or isCJKCharacter(character)
|
||||
|
||||
# Does the given string contain at least surrogate pair, variation sequence,
|
||||
# or combined character?
|
||||
#
|
||||
# * `string` The {String} to check for the presence of paired characters.
|
||||
#
|
||||
# Returns a {Boolean}.
|
||||
hasPairedCharacter = (string) ->
|
||||
index = 0
|
||||
while index < string.length
|
||||
return true if isPairedCharacter(string, index)
|
||||
index++
|
||||
false
|
||||
|
||||
module.exports = {
|
||||
isPairedCharacter, hasPairedCharacter,
|
||||
isDoubleWidthCharacter, isHalfWidthCharacter, isKoreanCharacter,
|
||||
isWrapBoundary
|
||||
}
|
||||
130
src/text-utils.js
Normal file
130
src/text-utils.js
Normal file
@@ -0,0 +1,130 @@
|
||||
const isHighSurrogate = (charCode) =>
|
||||
charCode >= 0xD800 && charCode <= 0xDBFF
|
||||
|
||||
const isLowSurrogate = (charCode) =>
|
||||
charCode >= 0xDC00 && charCode <= 0xDFFF
|
||||
|
||||
const isVariationSelector = (charCode) =>
|
||||
charCode >= 0xFE00 && charCode <= 0xFE0F
|
||||
|
||||
const isCombiningCharacter = charCode =>
|
||||
(charCode >= 0x0300 && charCode <= 0x036F) ||
|
||||
(charCode >= 0x1AB0 && charCode <= 0x1AFF) ||
|
||||
(charCode >= 0x1DC0 && charCode <= 0x1DFF) ||
|
||||
(charCode >= 0x20D0 && charCode <= 0x20FF) ||
|
||||
(charCode >= 0xFE20 && charCode <= 0xFE2F)
|
||||
|
||||
// Are the given character codes a high/low surrogate pair?
|
||||
//
|
||||
// * `charCodeA` The first character code {Number}.
|
||||
// * `charCode2` The second character code {Number}.
|
||||
//
|
||||
// Return a {Boolean}.
|
||||
const isSurrogatePair = (charCodeA, charCodeB) =>
|
||||
isHighSurrogate(charCodeA) && isLowSurrogate(charCodeB)
|
||||
|
||||
// Are the given character codes a variation sequence?
|
||||
//
|
||||
// * `charCodeA` The first character code {Number}.
|
||||
// * `charCode2` The second character code {Number}.
|
||||
//
|
||||
// Return a {Boolean}.
|
||||
const isVariationSequence = (charCodeA, charCodeB) =>
|
||||
!isVariationSelector(charCodeA) && isVariationSelector(charCodeB)
|
||||
|
||||
// Are the given character codes a combined character pair?
|
||||
//
|
||||
// * `charCodeA` The first character code {Number}.
|
||||
// * `charCode2` The second character code {Number}.
|
||||
//
|
||||
// Return a {Boolean}.
|
||||
const isCombinedCharacter = (charCodeA, charCodeB) =>
|
||||
!isCombiningCharacter(charCodeA) && isCombiningCharacter(charCodeB)
|
||||
|
||||
// Is the character at the given index the start of high/low surrogate pair
|
||||
// a variation sequence, or a combined character?
|
||||
//
|
||||
// * `string` The {String} to check for a surrogate pair, variation sequence,
|
||||
// or combined character.
|
||||
// * `index` The {Number} index to look for a surrogate pair, variation
|
||||
// sequence, or combined character.
|
||||
//
|
||||
// Return a {Boolean}.
|
||||
const isPairedCharacter = (string, index = 0) => {
|
||||
const charCodeA = string.charCodeAt(index)
|
||||
const charCodeB = string.charCodeAt(index + 1)
|
||||
return isSurrogatePair(charCodeA, charCodeB) ||
|
||||
isVariationSequence(charCodeA, charCodeB) ||
|
||||
isCombinedCharacter(charCodeA, charCodeB)
|
||||
}
|
||||
|
||||
const IsJapaneseKanaCharacter = charCode =>
|
||||
charCode >= 0x3000 && charCode <= 0x30FF
|
||||
|
||||
const isCJKUnifiedIdeograph = charCode =>
|
||||
charCode >= 0x4E00 && charCode <= 0x9FFF
|
||||
|
||||
const isFullWidthForm = charCode =>
|
||||
(charCode >= 0xFF01 && charCode <= 0xFF5E) ||
|
||||
(charCode >= 0xFFE0 && charCode <= 0xFFE6)
|
||||
|
||||
const isDoubleWidthCharacter = (character) => {
|
||||
const charCode = character.charCodeAt(0)
|
||||
|
||||
return IsJapaneseKanaCharacter(charCode) ||
|
||||
isCJKUnifiedIdeograph(charCode) ||
|
||||
isFullWidthForm(charCode)
|
||||
}
|
||||
|
||||
const isHalfWidthCharacter = (character) => {
|
||||
const charCode = character.charCodeAt(0)
|
||||
|
||||
return (charCode >= 0xFF65 && charCode <= 0xFFDC) ||
|
||||
(charCode >= 0xFFE8 && charCode <= 0xFFEE)
|
||||
}
|
||||
|
||||
const isKoreanCharacter = (character) => {
|
||||
const charCode = character.charCodeAt(0)
|
||||
|
||||
return (charCode >= 0xAC00 && charCode <= 0xD7A3) ||
|
||||
(charCode >= 0x1100 && charCode <= 0x11FF) ||
|
||||
(charCode >= 0x3130 && charCode <= 0x318F) ||
|
||||
(charCode >= 0xA960 && charCode <= 0xA97F) ||
|
||||
(charCode >= 0xD7B0 && charCode <= 0xD7FF)
|
||||
}
|
||||
|
||||
const isCJKCharacter = (character) =>
|
||||
isDoubleWidthCharacter(character) ||
|
||||
isHalfWidthCharacter(character) ||
|
||||
isKoreanCharacter(character)
|
||||
|
||||
const isWordStart = (previousCharacter, character) =>
|
||||
((previousCharacter === ' ') || (previousCharacter === '\t')) &&
|
||||
((character !== ' ') && (character !== '\t'))
|
||||
|
||||
const isWrapBoundary = (previousCharacter, character) =>
|
||||
isWordStart(previousCharacter, character) || isCJKCharacter(character)
|
||||
|
||||
// Does the given string contain at least surrogate pair, variation sequence,
|
||||
// or combined character?
|
||||
//
|
||||
// * `string` The {String} to check for the presence of paired characters.
|
||||
//
|
||||
// Returns a {Boolean}.
|
||||
const hasPairedCharacter = (string) => {
|
||||
let index = 0
|
||||
while (index < string.length) {
|
||||
if (isPairedCharacter(string, index)) { return true }
|
||||
index++
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
isPairedCharacter,
|
||||
hasPairedCharacter,
|
||||
isDoubleWidthCharacter,
|
||||
isHalfWidthCharacter,
|
||||
isKoreanCharacter,
|
||||
isWrapBoundary
|
||||
}
|
||||
Reference in New Issue
Block a user