Files
meteor/packages/jsparse/lexer.js
2013-07-25 18:54:40 -07:00

421 lines
24 KiB
JavaScript

var regexEscape = function (str) {
return str.replace(/[\][^$\\.*+?(){}|]/g, '\\$&');
};
// Adapted from source code of http://xregexp.com/plugins/#unicode
var unicodeCategories = {
Ll: "0061-007A00B500DF-00F600F8-00FF01010103010501070109010B010D010F01110113011501170119011B011D011F01210123012501270129012B012D012F01310133013501370138013A013C013E014001420144014601480149014B014D014F01510153015501570159015B015D015F01610163016501670169016B016D016F0171017301750177017A017C017E-0180018301850188018C018D019201950199-019B019E01A101A301A501A801AA01AB01AD01B001B401B601B901BA01BD-01BF01C601C901CC01CE01D001D201D401D601D801DA01DC01DD01DF01E101E301E501E701E901EB01ED01EF01F001F301F501F901FB01FD01FF02010203020502070209020B020D020F02110213021502170219021B021D021F02210223022502270229022B022D022F02310233-0239023C023F0240024202470249024B024D024F-02930295-02AF037103730377037B-037D039003AC-03CE03D003D103D5-03D703D903DB03DD03DF03E103E303E503E703E903EB03ED03EF-03F303F503F803FB03FC0430-045F04610463046504670469046B046D046F04710473047504770479047B047D047F0481048B048D048F04910493049504970499049B049D049F04A104A304A504A704A904AB04AD04AF04B104B304B504B704B904BB04BD04BF04C204C404C604C804CA04CC04CE04CF04D104D304D504D704D904DB04DD04DF04E104E304E504E704E904EB04ED04EF04F104F304F504F704F904FB04FD04FF05010503050505070509050B050D050F05110513051505170519051B051D051F05210523052505270561-05871D00-1D2B1D6B-1D771D79-1D9A1E011E031E051E071E091E0B1E0D1E0F1E111E131E151E171E191E1B1E1D1E1F1E211E231E251E271E291E2B1E2D1E2F1E311E331E351E371E391E3B1E3D1E3F1E411E431E451E471E491E4B1E4D1E4F1E511E531E551E571E591E5B1E5D1E5F1E611E631E651E671E691E6B1E6D1E6F1E711E731E751E771E791E7B1E7D1E7F1E811E831E851E871E891E8B1E8D1E8F1E911E931E95-1E9D1E9F1EA11EA31EA51EA71EA91EAB1EAD1EAF1EB11EB31EB51EB71EB91EBB1EBD1EBF1EC11EC31EC51EC71EC91ECB1ECD1ECF1ED11ED31ED51ED71ED91EDB1EDD1EDF1EE11EE31EE51EE71EE91EEB1EED1EEF1EF11EF31EF51EF71EF91EFB1EFD1EFF-1F071F10-1F151F20-1F271F30-1F371F40-1F451F50-1F571F60-1F671F70-1F7D1F80-1F871F90-1F971FA0-1FA71FB0-1FB41FB61FB71FBE1FC2-1FC41FC61FC71FD0-1FD31FD61FD71FE0-1FE71FF2-1FF41FF61FF7210A210E210F2113212F21342139213C213D2146-2149214E21842C30-2C5E2C612C652C662C682C6A2C6C2C712C732C742C76-2C7B2C812C832C852C872C892C8B2C8D2C8F2C912C932C952C972C992C9B2C9D2C9F2CA12CA32CA52CA72CA92CAB2CAD2CAF2CB12CB32CB52CB72CB92CBB2CBD2CBF2CC12CC32CC52CC72CC92CCB2CCD2CCF2CD12CD32CD52CD72CD92CDB2CDD2CDF2CE12CE32CE42CEC2CEE2CF32D00-2D252D272D2DA641A643A645A647A649A64BA64DA64FA651A653A655A657A659A65BA65DA65FA661A663A665A667A669A66BA66DA681A683A685A687A689A68BA68DA68FA691A693A695A697A723A725A727A729A72BA72DA72F-A731A733A735A737A739A73BA73DA73FA741A743A745A747A749A74BA74DA74FA751A753A755A757A759A75BA75DA75FA761A763A765A767A769A76BA76DA76FA771-A778A77AA77CA77FA781A783A785A787A78CA78EA791A793A7A1A7A3A7A5A7A7A7A9A7FAFB00-FB06FB13-FB17FF41-FF5A",
Lm: "02B0-02C102C6-02D102E0-02E402EC02EE0374037A0559064006E506E607F407F507FA081A0824082809710E460EC610FC17D718431AA71C78-1C7D1D2C-1D6A1D781D9B-1DBF2071207F2090-209C2C7C2C7D2D6F2E2F30053031-3035303B309D309E30FC-30FEA015A4F8-A4FDA60CA67FA717-A71FA770A788A7F8A7F9A9CFAA70AADDAAF3AAF4FF70FF9EFF9F",
Lo: "00AA00BA01BB01C0-01C3029405D0-05EA05F0-05F20620-063F0641-064A066E066F0671-06D306D506EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA0800-08150840-085808A008A2-08AC0904-0939093D09500958-09610972-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E450E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10D0-10FA10FD-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317DC1820-18421844-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C771CE9-1CEC1CEE-1CF11CF51CF62135-21382D30-2D672D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE3006303C3041-3096309F30A1-30FA30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A014A016-A48CA4D0-A4F7A500-A60BA610-A61FA62AA62BA66EA6A0-A6E5A7FB-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2AA00-AA28AA40-AA42AA44-AA4BAA60-AA6FAA71-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADBAADCAAE0-AAEAAAF2AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF66-FF6FFF71-FF9DFFA0-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC",
Lt: "01C501C801CB01F21F88-1F8F1F98-1F9F1FA8-1FAF1FBC1FCC1FFC",
Lu: "0041-005A00C0-00D600D8-00DE01000102010401060108010A010C010E01100112011401160118011A011C011E01200122012401260128012A012C012E01300132013401360139013B013D013F0141014301450147014A014C014E01500152015401560158015A015C015E01600162016401660168016A016C016E017001720174017601780179017B017D018101820184018601870189-018B018E-0191019301940196-0198019C019D019F01A001A201A401A601A701A901AC01AE01AF01B1-01B301B501B701B801BC01C401C701CA01CD01CF01D101D301D501D701D901DB01DE01E001E201E401E601E801EA01EC01EE01F101F401F6-01F801FA01FC01FE02000202020402060208020A020C020E02100212021402160218021A021C021E02200222022402260228022A022C022E02300232023A023B023D023E02410243-02460248024A024C024E03700372037603860388-038A038C038E038F0391-03A103A3-03AB03CF03D2-03D403D803DA03DC03DE03E003E203E403E603E803EA03EC03EE03F403F703F903FA03FD-042F04600462046404660468046A046C046E04700472047404760478047A047C047E0480048A048C048E04900492049404960498049A049C049E04A004A204A404A604A804AA04AC04AE04B004B204B404B604B804BA04BC04BE04C004C104C304C504C704C904CB04CD04D004D204D404D604D804DA04DC04DE04E004E204E404E604E804EA04EC04EE04F004F204F404F604F804FA04FC04FE05000502050405060508050A050C050E05100512051405160518051A051C051E05200522052405260531-055610A0-10C510C710CD1E001E021E041E061E081E0A1E0C1E0E1E101E121E141E161E181E1A1E1C1E1E1E201E221E241E261E281E2A1E2C1E2E1E301E321E341E361E381E3A1E3C1E3E1E401E421E441E461E481E4A1E4C1E4E1E501E521E541E561E581E5A1E5C1E5E1E601E621E641E661E681E6A1E6C1E6E1E701E721E741E761E781E7A1E7C1E7E1E801E821E841E861E881E8A1E8C1E8E1E901E921E941E9E1EA01EA21EA41EA61EA81EAA1EAC1EAE1EB01EB21EB41EB61EB81EBA1EBC1EBE1EC01EC21EC41EC61EC81ECA1ECC1ECE1ED01ED21ED41ED61ED81EDA1EDC1EDE1EE01EE21EE41EE61EE81EEA1EEC1EEE1EF01EF21EF41EF61EF81EFA1EFC1EFE1F08-1F0F1F18-1F1D1F28-1F2F1F38-1F3F1F48-1F4D1F591F5B1F5D1F5F1F68-1F6F1FB8-1FBB1FC8-1FCB1FD8-1FDB1FE8-1FEC1FF8-1FFB21022107210B-210D2110-211221152119-211D212421262128212A-212D2130-2133213E213F214521832C00-2C2E2C602C62-2C642C672C692C6B2C6D-2C702C722C752C7E-2C802C822C842C862C882C8A2C8C2C8E2C902C922C942C962C982C9A2C9C2C9E2CA02CA22CA42CA62CA82CAA2CAC2CAE2CB02CB22CB42CB62CB82CBA2CBC2CBE2CC02CC22CC42CC62CC82CCA2CCC2CCE2CD02CD22CD42CD62CD82CDA2CDC2CDE2CE02CE22CEB2CED2CF2A640A642A644A646A648A64AA64CA64EA650A652A654A656A658A65AA65CA65EA660A662A664A666A668A66AA66CA680A682A684A686A688A68AA68CA68EA690A692A694A696A722A724A726A728A72AA72CA72EA732A734A736A738A73AA73CA73EA740A742A744A746A748A74AA74CA74EA750A752A754A756A758A75AA75CA75EA760A762A764A766A768A76AA76CA76EA779A77BA77DA77EA780A782A784A786A78BA78DA790A792A7A0A7A2A7A4A7A6A7A8A7AAFF21-FF3A",
Mc: "0903093B093E-09400949-094C094E094F0982098309BE-09C009C709C809CB09CC09D70A030A3E-0A400A830ABE-0AC00AC90ACB0ACC0B020B030B3E0B400B470B480B4B0B4C0B570BBE0BBF0BC10BC20BC6-0BC80BCA-0BCC0BD70C01-0C030C41-0C440C820C830CBE0CC0-0CC40CC70CC80CCA0CCB0CD50CD60D020D030D3E-0D400D46-0D480D4A-0D4C0D570D820D830DCF-0DD10DD8-0DDF0DF20DF30F3E0F3F0F7F102B102C10311038103B103C105610571062-10641067-106D108310841087-108C108F109A-109C17B617BE-17C517C717C81923-19261929-192B193019311933-193819B0-19C019C819C91A19-1A1B1A551A571A611A631A641A6D-1A721B041B351B3B1B3D-1B411B431B441B821BA11BA61BA71BAA1BAC1BAD1BE71BEA-1BEC1BEE1BF21BF31C24-1C2B1C341C351CE11CF21CF3302E302FA823A824A827A880A881A8B4-A8C3A952A953A983A9B4A9B5A9BAA9BBA9BD-A9C0AA2FAA30AA33AA34AA4DAA7BAAEBAAEEAAEFAAF5ABE3ABE4ABE6ABE7ABE9ABEAABEC",
Mn: "0300-036F0483-04870591-05BD05BF05C105C205C405C505C70610-061A064B-065F067006D6-06DC06DF-06E406E706E806EA-06ED07110730-074A07A6-07B007EB-07F30816-0819081B-08230825-08270829-082D0859-085B08E4-08FE0900-0902093A093C0941-0948094D0951-095709620963098109BC09C1-09C409CD09E209E30A010A020A3C0A410A420A470A480A4B-0A4D0A510A700A710A750A810A820ABC0AC1-0AC50AC70AC80ACD0AE20AE30B010B3C0B3F0B41-0B440B4D0B560B620B630B820BC00BCD0C3E-0C400C46-0C480C4A-0C4D0C550C560C620C630CBC0CBF0CC60CCC0CCD0CE20CE30D41-0D440D4D0D620D630DCA0DD2-0DD40DD60E310E34-0E3A0E47-0E4E0EB10EB4-0EB90EBB0EBC0EC8-0ECD0F180F190F350F370F390F71-0F7E0F80-0F840F860F870F8D-0F970F99-0FBC0FC6102D-10301032-10371039103A103D103E10581059105E-10601071-1074108210851086108D109D135D-135F1712-17141732-1734175217531772177317B417B517B7-17BD17C617C9-17D317DD180B-180D18A91920-19221927192819321939-193B1A171A181A561A58-1A5E1A601A621A65-1A6C1A73-1A7C1A7F1B00-1B031B341B36-1B3A1B3C1B421B6B-1B731B801B811BA2-1BA51BA81BA91BAB1BE61BE81BE91BED1BEF-1BF11C2C-1C331C361C371CD0-1CD21CD4-1CE01CE2-1CE81CED1CF41DC0-1DE61DFC-1DFF20D0-20DC20E120E5-20F02CEF-2CF12D7F2DE0-2DFF302A-302D3099309AA66FA674-A67DA69FA6F0A6F1A802A806A80BA825A826A8C4A8E0-A8F1A926-A92DA947-A951A980-A982A9B3A9B6-A9B9A9BCAA29-AA2EAA31AA32AA35AA36AA43AA4CAAB0AAB2-AAB4AAB7AAB8AABEAABFAAC1AAECAAEDAAF6ABE5ABE8ABEDFB1EFE00-FE0FFE20-FE26",
Nd: "0030-00390660-066906F0-06F907C0-07C90966-096F09E6-09EF0A66-0A6F0AE6-0AEF0B66-0B6F0BE6-0BEF0C66-0C6F0CE6-0CEF0D66-0D6F0E50-0E590ED0-0ED90F20-0F291040-10491090-109917E0-17E91810-18191946-194F19D0-19D91A80-1A891A90-1A991B50-1B591BB0-1BB91C40-1C491C50-1C59A620-A629A8D0-A8D9A900-A909A9D0-A9D9AA50-AA59ABF0-ABF9FF10-FF19",
Nl: "16EE-16F02160-21822185-218830073021-30293038-303AA6E6-A6EF",
Pc: "005F203F20402054FE33FE34FE4D-FE4FFF3F"
};
var unicodeClass = function (abbrev) {
return '[' +
unicodeCategories[abbrev].replace(/[0-9A-F]{4}/ig, "\\u$&") + ']';
};
// See ECMA-262 spec, 3rd edition, section 7
// Section 7.2
// Match one or more characters of whitespace, excluding line terminators.
// Do this by matching reluctantly, stopping at a non-dot (line terminator
// or end of string) or a non-whitespace.
// We are taking advantage of the fact that we are parsing JS from JS in
// regexes like this by "passing through" the spec's definition of whitespace,
// which is the same in regexes and the lexical grammar.
var rWhiteSpace = /[^\S\u000A\u000D\u2028\u2029]+/g;
// Section 7.3
// Match one line terminator. Same as (?!.)[\s\S] but more explicit.
var rLineTerminator = /[\u000A\u000D\u2028\u2029]/g;
// Section 7.4
// Match one multi-line comment.
// [\s\S] is shorthand for any character, including newlines.
// The *? reluctant qualifier makes this easy.
var rMultiLineComment = /\/\*[\s\S]*?\*\//g;
// Match one single-line comment, not including the line terminator.
var rSingleLineComment = /\/\/.*/g;
// Section 7.6
// Match one or more characters that can start an identifier.
// This is IdentifierStart+.
var rIdentifierPrefix = new RegExp(
"([a-zA-Z$_]+|\\\\u[0-9a-fA-F]{4}|" +
[unicodeClass('Lu'), unicodeClass('Ll'), unicodeClass('Lt'),
unicodeClass('Lm'), unicodeClass('Lo'), unicodeClass('Nl')].join('|') +
")+", 'g');
// Match one or more characters that can continue an identifier.
// This is (IdentifierPart and not IdentifierStart)+.
// To match a full identifier, match rIdentifierPrefix, then
// match rIdentifierMiddle followed by rIdentifierPrefix until they both fail.
var rIdentifierMiddle = new RegExp(
"([0-9]|" + [unicodeClass('Mn'), unicodeClass('Mc'), unicodeClass('Nd'),
unicodeClass('Pc')].join('|') + ")+", 'g');
// Section 7.7
// Match one punctuator (except for division punctuators).
var rPunctuator = new RegExp(
regexEscape("{ } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> "+
">>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^=")
// sort from longest to shortest so that we don't match '==' for '===' and
// '*' for '*=', etc.
.split(' ').sort(function (a,b) { return b.length - a.length; })
.join('|'), 'g');
var rDivPunctuator = /\/=?/g;
// Section 7.8.3
var rHexLiteral = /0[xX][0-9a-fA-F]+(?!\w)/g;
var rOctLiteral = /0[0-7]+(?!\w)/g; // deprecated
var rDecLiteral =
/(((0|[1-9][0-9]*)(\.[0-9]*)?)|\.[0-9]+)([Ee][+-]?[0-9]+)?(?!\w)/g;
// Section 7.8.4
var rStringQuote = /["']/g;
// Match one or more characters besides quotes, backslashes, or line ends
var rStringMiddle = /(?=.)[^"'\\]+?((?!.)|(?=["'\\]))/g;
// Match one escape sequence, including the backslash.
var rEscapeSequence =
/\\(['"\\bfnrtv]|0(?![0-9])|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|(?=.)[^ux0-9])/g;
// Match one ES5 line continuation
var rLineContinuation =
/\\(\r\n|[\u000A\u000D\u2028\u2029])/g;
// Section 7.8.5
// Match one regex literal, including slashes, not including flags.
// Support unescaped '/' in character classes, per 5th ed.
// For example: `/[/]/` will match the string `"/"`.
//
// Explanation of regex:
// - Match `/` not followed by `/` or `*`
// - Match one or more of any of these:
// - Backslash followed by one non-newline
// - One non-newline, not `[` or `\` or `/`
// - A character class, beginning with `[` and ending with `]`.
// In the middle is zero or more of any of these:
// - Backslash followed by one non-newline
// - One non-newline, not `]` or `\`
// - Match closing `/`
var rRegexLiteral =
/\/(?![*\/])(\\.|(?=.)[^\[\/\\]|\[(\\.|(?=.)[^\]\\])*\])+\//g;
var rRegexFlags = /[a-zA-Z]*/g;
var rDecider =
/((?=.)\s)|(\/[\/\*]?)|([\][{}();,<>=!+*%&|^~?:-]|\.(?![0-9]))|([\d.])|(["'])|(.)|([\S\s])/g;
var keywordLookup = {
' break': 'KEYWORD',
' case': 'KEYWORD',
' catch': 'KEYWORD',
' continue': 'KEYWORD',
' debugger': 'KEYWORD',
' default': 'KEYWORD',
' delete': 'KEYWORD',
' do': 'KEYWORD',
' else': 'KEYWORD',
' finally': 'KEYWORD',
' for': 'KEYWORD',
' function': 'KEYWORD',
' if': 'KEYWORD',
' in': 'KEYWORD',
' instanceof': 'KEYWORD',
' new': 'KEYWORD',
' return': 'KEYWORD',
' switch': 'KEYWORD',
' this': 'KEYWORD',
' throw': 'KEYWORD',
' try': 'KEYWORD',
' typeof': 'KEYWORD',
' var': 'KEYWORD',
' void': 'KEYWORD',
' while': 'KEYWORD',
' with': 'KEYWORD',
' false': 'BOOLEAN',
' true': 'BOOLEAN',
' null': 'NULL'
};
var makeSet = function (array) {
var s = {};
for (var i = 0, N = array.length; i < N; i++)
s[array[i]] = true;
return s;
};
var nonTokenTypes = makeSet('WHITESPACE COMMENT NEWLINE EOF ERROR'.split(' '));
var punctuationBeforeDivision = makeSet('] ) } ++ --'.split(' '));
var keywordsBeforeDivision = makeSet('this'.split(' '));
var guessIsDivisionPermittedAfterToken = function (tok) {
// Figure out if a '/' character should be interpreted as division
// rather than the start of a regular expression when it follows the
// token, which must be a token lexeme per isToken().
// The beginning of section 7 of the spec briefly
// explains what's going on; basically the lexical grammar can't
// distinguish, for example, `e/f/g` (division) from `e=/f/g`
// (assignment of a regular expression), among many other variations.
//
// THIS IS ONLY A HEURISTIC, though it will rarely fail.
// Here are the two cases I know of where help from the parser is needed:
// - if (foo)
// /ba/.test("banana") && console.log("matches");
// (Close paren of a control structure before a statement starting with
// a regex literal. Starting a statement with a regex literal is
// unusual, of course, because it's hard to have a side effect.)
// - ++ /foo/.abc
// (Prefix `++` or `--` before an expression starting with a regex
// literal. This will run but I can't see any use for it.)
switch (tok.type()) {
case "PUNCTUATION":
// few punctuators can end an expression, but e.g. `)`
return !! punctuationBeforeDivision[tok.text()];
case "KEYWORD":
// few keywords can end an expression, but e.g. `this`
return !! keywordsBeforeDivision[tok.text()];
case "IDENTIFIER":
return true;
default: // literal
return true;
}
};
////////// PUBLIC API
var Lexeme = function (pos, type, text) {
this._pos = pos;
this._type = type;
this._text = text;
};
Lexeme.prototype.startPos = function () {
return this._pos;
};
Lexeme.prototype.endPos = function () {
return this._pos + this._text.length;
};
Lexeme.prototype.type = function () {
return this._type;
};
Lexeme.prototype.text = function () {
return this._text;
};
Lexeme.prototype.isToken = function () {
return ! nonTokenTypes[this._type];
};
Lexeme.prototype.isError = function () {
return this._type === "ERROR";
};
Lexeme.prototype.isEOF = function () {
return this._type === "EOF";
};
Lexeme.prototype.prev = function () {
return this._prev;
};
Lexeme.prototype.next = function () {
return this._next;
};
Lexeme.prototype.toString = function () {
return this.isError() ? "ERROR" :
this.isEOF() ? "EOF" : "`" + this.text() + "`";
};
// Create a Lexer for the given string of JavaScript code.
//
// A lexer keeps a pointer `pos` into the string that is
// advanced when you ask for the next lexeme with `next()`.
//
// XXXXX UPDATE DOCS
// Properties:
// code: Original JavaScript code string.
// pos: Current index into the string. You can assign to it
// to continue lexing from a different position. After
// calling next(), it is the ending index of the most
// recent lexeme.
// lastPos: The starting index of the most recent lexeme.
// Equal to `pos - text.length`.
// text: Text of the last lexeme as a string.
// type: Type of the last lexeme, as returned by `next()`.
// divisionPermitted: Whether a '/' character should be interpreted
// as division rather than the start of a regular expression.
// This flag is set automatically during lexing based on the
// previous token (i.e. the most recent token lexeme), but
// it is technically only a heuristic.
// Thie flag can be read and set manually to affect the
// parsing of the next token.
JSLexer = function (code) {
this.code = code;
this.pos = 0;
this.divisionPermitted = false;
this.lastLexeme = null;
};
JSLexer.Lexeme = Lexeme;
// XXXX UPDATE DOCS
// Return the type of the next of lexeme starting at `pos`, and advance
// `pos` to the end of the lexeme. The text of the lexeme is available
// in `text`. The text is always the substring of `code` between the
// old and new values of `pos`. An "EOF" lexeme terminates
// the stream. "ERROR" lexemes indicate a bad input string. Out of all
// lexemes, only "EOF" has empty text, and it always has empty text.
// All others contain at least one character from the source code.
//
// Lexeme types:
// Literals: BOOLEAN, NULL, REGEX, NUMBER, STRING
// Whitespace-like: WHITESPACE, COMMENT, NEWLINE, EOF
// Other Tokens: IDENTIFIER, KEYWORD, PUNCTUATION
// ... and ERROR
JSLexer.prototype.next = function () {
var self = this;
var code = self.code;
var origPos = self.pos;
var divisionPermitted = self.divisionPermitted;
if (origPos > code.length)
throw new Error("out of range");
// Running regexes inside this function will move this local
// `pos` forward.
// When we commit to emitting a lexeme, we'll set self.pos
// based on it.
var pos = origPos;
// Emit a lexeme. Always called as `return lexeme(type)`.
var lexeme = function (type) {
// If `pos` hasn't moved, we consider this an error.
// This means that grammar cases that only run one regex
// or an alternation ('||') of regexes don't need to
// check for failure.
// This also guarantees that only EOF lexemes are empty.
if (pos === origPos && type !== 'EOF') {
type = 'ERROR';
pos = origPos + 1;
}
self.pos = pos;
var lex = new JSLexer.Lexeme(origPos, type, code.substring(origPos, pos));
if (self.lastLexeme) {
self.lastLexeme._next = lex;
lex._prev = self.lastLexeme;
}
self.lastLexeme = lex;
if (lex.isToken())
self.divisionPermitted = guessIsDivisionPermittedAfterToken(lex);
return lex;
};
if (pos === code.length)
return lexeme('EOF');
// Result of the regex match in the most recent call to `run`.
var match = null;
// Run a regex starting from `pos`, recording the end of the matched
// string in `pos` and the match data in `match`. The regex must have
// the 'g' (global) flag. If it doesn't match at `pos`, set `match`
// to null. The caller should expect the regex to match at `pos`, as
// failure is too expensive to run in a tight loop.
var run = function (regex) {
// Cause regex matching to start at `pos`.
regex.lastIndex = pos;
match = regex.exec(code);
// Simulate "sticky" matching by throwing out the match if it
// didn't match exactly at `pos`. If it didn't, we may have
// just searched the entire string.
if (match && (match.index !== pos))
match = null;
// Record the end position of the match back into `pos`.
// Avoid an IE7 bug where lastIndex is incremented when
// the match has 0 length.
if (match && match[0].length !== 0)
pos = regex.lastIndex;
return match;
};
// Decide which case of the grammar we are in based on one or two
// characters, then roll back `pos`.
run(rDecider);
pos = origPos;
// Grammar cases
if (match[1]) { // \s
run(rWhiteSpace);
return lexeme('WHITESPACE');
}
if (match[2]) { // one of //, /*, /
if (match[2] === '//') {
run(rSingleLineComment);
return lexeme('COMMENT');
}
if (match[2] === '/*') {
run(rMultiLineComment);
return lexeme(match ? 'COMMENT' : 'ERROR');
}
if (match[2] === '/') {
if (divisionPermitted) {
run(rDivPunctuator);
return lexeme('PUNCTUATION');
} else {
run(rRegexLiteral);
if (! match)
return lexeme('ERROR');
run(rRegexFlags);
return lexeme('REGEX');
}
}
}
if (match[3]) { // any other punctuation char
run(rPunctuator);
return lexeme(match ? 'PUNCTUATION' : 'ERROR');
}
if (match[4]) { // 0-9
run(rDecLiteral) || run(rHexLiteral) || run(rOctLiteral);
return lexeme(match ? 'NUMBER' : 'ERROR');
}
if (match[5]) { // " or '
run(rStringQuote);
var quote = match[0];
do {
run(rStringMiddle) || run(rEscapeSequence) ||
run(rLineContinuation) || run(rStringQuote);
} while (match && match[0] !== quote);
if (! (match && match[0] === quote))
return lexeme('ERROR');
return lexeme('STRING');
}
if (match[7]) { // non-dot (line terminator)
run(rLineTerminator);
return lexeme('NEWLINE');
}
// dot (any non-line-terminator)
run(rIdentifierPrefix);
// Use non-short-circuiting bitwise OR, '|', to always try
// both regexes in sequence, returning false only if neither
// matched.
while ((!! run(rIdentifierMiddle)) |
(!! run(rIdentifierPrefix))) { /*continue*/ }
var word = code.substring(origPos, pos);
return lexeme(keywordLookup[' '+word] || 'IDENTIFIER');
};
JSLexer.prettyOffset = function (code, pos) {
var codeUpToPos = code.substring(0, pos);
var startOfLine = codeUpToPos.lastIndexOf('\n') + 1;
var indexInLine = pos - startOfLine; // 0-based
var lineNum = codeUpToPos.replace(/[^\n]+/g, '').length + 1; // 1-based
return "line " + lineNum + ", offset " + indexInLine;
};