_ = require 'underscore-plus' textUtils = require './text-utils' WhitespaceRegexesByTabLength = {} LeadingWhitespaceRegex = /^[ ]+/ TrailingWhitespaceRegex = /[ ]+$/ EscapeRegex = /[&"'<>]/g CharacterRegex = /./g StartCharacterRegex = /^./ StartDotRegex = /^\.?/ WhitespaceRegex = /\S/ MaxTokenLength = 20000 # Represents a single unit of text as selected by a grammar. module.exports = class Token value: null hasSurrogatePair: false scopes: null isAtomic: null isHardTab: null constructor: ({@value, @scopes, @isAtomic, @bufferDelta, @isHardTab}) -> @screenDelta = @value.length @bufferDelta ?= @screenDelta @hasSurrogatePair = textUtils.hasSurrogatePair(@value) isEqual: (other) -> @value == other.value and _.isEqual(@scopes, other.scopes) and !!@isAtomic == !!other.isAtomic isBracket: -> /^meta\.brace\b/.test(_.last(@scopes)) splitAt: (splitIndex) -> value1 = @value.substring(0, splitIndex) value2 = @value.substring(splitIndex) [new Token(value: value1, scopes: @scopes), new Token(value: value2, scopes: @scopes)] whitespaceRegexForTabLength: (tabLength) -> WhitespaceRegexesByTabLength[tabLength] ?= new RegExp("([ ]{#{tabLength}})|(\t)|([^\t]+)", "g") breakOutAtomicTokens: (tabLength, breakOutLeadingWhitespace) -> if @hasSurrogatePair outputTokens = [] for token in @breakOutSurrogatePairs() if token.isAtomic outputTokens.push(token) else outputTokens.push(token.breakOutAtomicTokens(tabLength, breakOutLeadingWhitespace)...) breakOutLeadingWhitespace = token.isOnlyWhitespace() if breakOutLeadingWhitespace outputTokens else return [this] if @isAtomic if breakOutLeadingWhitespace return [this] unless /^[ ]|\t/.test(@value) else return [this] unless /\t/.test(@value) outputTokens = [] regex = @whitespaceRegexForTabLength(tabLength) while match = regex.exec(@value) [fullMatch, softTab, hardTab] = match if softTab and breakOutLeadingWhitespace outputTokens.push(@buildSoftTabToken(tabLength, false)) else if hardTab breakOutLeadingWhitespace = false outputTokens.push(@buildHardTabToken(tabLength, true)) else breakOutLeadingWhitespace = false value = match[0] outputTokens.push(new Token({value, @scopes})) outputTokens breakOutSurrogatePairs: -> outputTokens = [] index = 0 nonSurrogatePairStart = 0 while index < @value.length if textUtils.isSurrogatePair(@value, index) if nonSurrogatePairStart isnt index outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes})) outputTokens.push(@buildSurrogatePairToken(@value, index)) index += 2 nonSurrogatePairStart = index else index++ if nonSurrogatePairStart isnt index outputTokens.push(new Token({value: @value[nonSurrogatePairStart...index], @scopes})) outputTokens buildSurrogatePairToken: (value, index) -> new Token( value: value[index..index + 1] scopes: @scopes isAtomic: true ) buildHardTabToken: (tabLength) -> @buildTabToken(tabLength, true) buildSoftTabToken: (tabLength) -> @buildTabToken(tabLength, false) buildTabToken: (tabLength, isHardTab) -> new Token( value: _.multiplyString(" ", tabLength) scopes: @scopes bufferDelta: if isHardTab then 1 else tabLength isAtomic: true isHardTab: isHardTab ) isOnlyWhitespace: -> not WhitespaceRegex.test(@value) matchesScopeSelector: (selector) -> targetClasses = selector.replace(StartDotRegex, '').split('.') _.any @scopes, (scope) -> scopeClasses = scope.split('.') _.isSubset(targetClasses, scopeClasses) getValueAsHtml: ({invisibles, hasLeadingWhitespace, hasTrailingWhitespace, hasIndentGuide})-> invisibles ?= {} if @isHardTab classes = 'hard-tab' classes += ' indent-guide' if hasIndentGuide classes += ' invisible-character' if invisibles.tab value = if invisibles.tab then @value.replace(StartCharacterRegex, invisibles.tab) else @value html = "#{@escapeString(value)}" else startIndex = 0 endIndex = @value.length leadingHtml = '' trailingHtml = '' if hasLeadingWhitespace and match = LeadingWhitespaceRegex.exec(@value) classes = 'leading-whitespace' classes += ' indent-guide' if hasIndentGuide classes += ' invisible-character' if invisibles.space match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space leadingHtml = "#{match[0]}" startIndex = match[0].length if hasTrailingWhitespace and match = TrailingWhitespaceRegex.exec(@value) classes = 'trailing-whitespace' classes += ' indent-guide' if hasIndentGuide and not hasLeadingWhitespace classes += ' invisible-character' if invisibles.space match[0] = match[0].replace(CharacterRegex, invisibles.space) if invisibles.space trailingHtml = "#{match[0]}" endIndex = match.index html = leadingHtml if @value.length > MaxTokenLength while startIndex < endIndex html += "" + @escapeString(@value, startIndex, startIndex + MaxTokenLength) + "" startIndex += MaxTokenLength else html += @escapeString(@value, startIndex, endIndex) html += trailingHtml html escapeString: (str, startIndex, endIndex) -> strLength = str.length startIndex ?= 0 endIndex ?= strLength str = str.slice(startIndex, endIndex) if startIndex > 0 or endIndex < strLength str.replace(EscapeRegex, @escapeStringReplace) escapeStringReplace: (match) -> switch match when '&' then '&' when '"' then '"' when "'" then ''' when '<' then '<' when '>' then '>' else match