Files
atom/src/token.coffee
2014-02-06 10:40:45 -08:00

193 lines
6.0 KiB
CoffeeScript

_ = require 'underscore-plus'
textUtils = require './text-utils'
WhitespaceRegexesByTabLength = {}
LeadingWhitespaceRegex = /^[ ]+/
TrailingWhitespaceRegex = /[ ]+$/
EscapeRegex = /[&"'<>]/g
CharacterRegex = /./g
StartCharacterRegex = /^./
StartDotRegex = /^\.?/
WhitespaceRegex = /\S/
MaxTokenLength = 20000
# Represents a single unit of text as selected by a grammar.
module.exports =
class Token
value: null
hasSurrogatePair: false
scopes: null
isAtomic: null
isHardTab: null
constructor: ({@value, @scopes, @isAtomic, @bufferDelta, @isHardTab}) ->
@screenDelta = @value.length
@bufferDelta ?= @screenDelta
@hasSurrogatePair = textUtils.hasSurrogatePair(@value)
isEqual: (other) ->
@value == other.value and _.isEqual(@scopes, other.scopes) and !!@isAtomic == !!other.isAtomic
isBracket: ->
/^meta\.brace\b/.test(_.last(@scopes))
splitAt: (splitIndex) ->
value1 = @value.substring(0, splitIndex)
value2 = @value.substring(splitIndex)
[new Token(value: value1, scopes: @scopes), new Token(value: value2, scopes: @scopes)]
whitespaceRegexForTabLength: (tabLength) ->
WhitespaceRegexesByTabLength[tabLength] ?= new RegExp("([ ]{#{tabLength}})|(\t)|([^\t]+)", "g")
breakOutAtomicTokens: (tabLength, breakOutLeadingWhitespace) ->
if @hasSurrogatePair
outputTokens = []
for token in @breakOutSurrogatePairs()
if token.isAtomic
outputTokens.push(token)
else
outputTokens.push(token.breakOutAtomicTokens(tabLength, breakOutLeadingWhitespace)...)
breakOutLeadingWhitespace = token.isOnlyWhitespace() if breakOutLeadingWhitespace
outputTokens
else
return [this] if @isAtomic
if breakOutLeadingWhitespace
return [this] unless /^[ ]|\t/.test(@value)
else
return [this] unless /\t/.test(@value)
outputTokens = []
regex = @whitespaceRegexForTabLength(tabLength)
while match = regex.exec(@value)
[fullMatch, softTab, hardTab] = match
if softTab and breakOutLeadingWhitespace
outputTokens.push(@buildSoftTabToken(tabLength, false))
else if hardTab
breakOutLeadingWhitespace = false
outputTokens.push(@buildHardTabToken(tabLength, true))
else
breakOutLeadingWhitespace = false
value = match[0]
outputTokens.push(new Token({value, @scopes}))
outputTokens
breakOutSurrogatePairs: ->
outputTokens = []
index = 0
nonSurrogatePairStart = 0
while index < @value.length
if textUtils.isSurrogatePair(@value, index)
if nonSurrogatePairStart isnt index
outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
outputTokens.push(@buildSurrogatePairToken(@value, index))
index += 2
nonSurrogatePairStart = index
else
index++
if nonSurrogatePairStart isnt index
outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes}))
outputTokens
buildSurrogatePairToken: (value, index) ->
new Token(
value: value[index..index + 1]
scopes: @scopes
isAtomic: true
)
buildHardTabToken: (tabLength) ->
@buildTabToken(tabLength, true)
buildSoftTabToken: (tabLength) ->
@buildTabToken(tabLength, false)
buildTabToken: (tabLength, isHardTab) ->
new Token(
value: _.multiplyString(" ", tabLength)
scopes: @scopes
bufferDelta: if isHardTab then 1 else tabLength
isAtomic: true
isHardTab: isHardTab
)
isOnlyWhitespace: ->
not WhitespaceRegex.test(@value)
matchesScopeSelector: (selector) ->
targetClasses = selector.replace(StartDotRegex, '').split('.')
_.any @scopes, (scope) ->
scopeClasses = scope.split('.')
_.isSubset(targetClasses, scopeClasses)
getValueAsHtml: ({invisibles, hasLeadingWhitespace, hasTrailingWhitespace, hasIndentGuide})->
invisibles ?= {}
if @isHardTab
classes = 'hard-tab'
classes += ' indent-guide' if hasIndentGuide
classes += ' invisible-character' if invisibles.tab
value = if invisibles.tab then @value.replace(StartCharacterRegex, invisibles.tab) else @value
html = "<span class='#{classes}'>#{@escapeString(value)}</span>"
else
startIndex = 0
endIndex = @value.length
leadingHtml = ''
trailingHtml = ''
if hasLeadingWhitespace and match = LeadingWhitespaceRegex.exec(@value)
classes = 'leading-whitespace'
classes += ' indent-guide' if hasIndentGuide
classes += ' invisible-character' if invisibles.space
match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space
leadingHtml = "<span class='#{classes}'>#{match[0]}</span>"
startIndex = match[0].length
if hasTrailingWhitespace and match = TrailingWhitespaceRegex.exec(@value)
classes = 'trailing-whitespace'
classes += ' indent-guide' if hasIndentGuide and not hasLeadingWhitespace
classes += ' invisible-character' if invisibles.space
match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space
trailingHtml = "<span class='#{classes}'>#{match[0]}</span>"
endIndex = match.index
html = leadingHtml
if @value.length > MaxTokenLength
while startIndex < endIndex
html += "<span>" + @escapeString(@value, startIndex, startIndex + MaxTokenLength) + "</span>"
startIndex += MaxTokenLength
else
html += @escapeString(@value, startIndex, endIndex)
html += trailingHtml
html
escapeString: (str, startIndex, endIndex) ->
strLength = str.length
startIndex ?= 0
endIndex ?= strLength
str = str.slice(startIndex, endIndex) if startIndex > 0 or endIndex < strLength
str.replace(EscapeRegex, @escapeStringReplace)
escapeStringReplace: (match) ->
switch match
when '&' then '&amp;'
when '"' then '&quot;'
when "'" then '&#39;'
when '<' then '&lt;'
when '>' then '&gt;'
else match