From 99e8bacdb3f940d1be9886bd819a7acd2ec3ca67 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 5 Sep 2012 23:34:32 -0700 Subject: [PATCH 01/86] initial commit of jsparse package and demo --- .../jsparse-demo/.meteor/.gitignore | 1 + .../unfinished/jsparse-demo/.meteor/packages | 7 + .../unfinished/jsparse-demo/jsparse-demo.css | 132 +++ .../unfinished/jsparse-demo/jsparse-demo.html | 29 + .../unfinished/jsparse-demo/jsparse-demo.js | 153 +++ packages/jsparse/lexer.js | 346 +++++++ packages/jsparse/package.js | 9 + packages/jsparse/parser.js | 935 ++++++++++++++++++ 8 files changed, 1612 insertions(+) create mode 100644 examples/unfinished/jsparse-demo/.meteor/.gitignore create mode 100644 examples/unfinished/jsparse-demo/.meteor/packages create mode 100644 examples/unfinished/jsparse-demo/jsparse-demo.css create mode 100644 examples/unfinished/jsparse-demo/jsparse-demo.html create mode 100644 examples/unfinished/jsparse-demo/jsparse-demo.js create mode 100644 packages/jsparse/lexer.js create mode 100644 packages/jsparse/package.js create mode 100644 packages/jsparse/parser.js diff --git a/examples/unfinished/jsparse-demo/.meteor/.gitignore b/examples/unfinished/jsparse-demo/.meteor/.gitignore new file mode 100644 index 0000000000..4083037423 --- /dev/null +++ b/examples/unfinished/jsparse-demo/.meteor/.gitignore @@ -0,0 +1 @@ +local diff --git a/examples/unfinished/jsparse-demo/.meteor/packages b/examples/unfinished/jsparse-demo/.meteor/packages new file mode 100644 index 0000000000..0c508c3289 --- /dev/null +++ b/examples/unfinished/jsparse-demo/.meteor/packages @@ -0,0 +1,7 @@ +# Meteor packages used by this project, one per line. +# +# 'meteor add' and 'meteor remove' will edit this file for you, +# but you can also edit it by hand. + +autopublish +jsparse diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css new file mode 100644 index 0000000000..2ddd6a37fa --- /dev/null +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -0,0 +1,132 @@ + +* { padding: 0; margin: 0; } +html, body { height: 100%; } + +#topbar { + position: absolute; + width: 100%; + top: 0; + height: 40px; + overflow: auto; + border-bottom: 1px solid #555; + background: #cfc; +} + +#topbarinner { + padding: 10px; + font-family: sans-serif; +} + +#main { + position: absolute; + width: 100%; + top: 40px; + bottom: 0; +} + +#inputarea textarea { + position: absolute; + height: 100%; + left: 0; + right: 50%; + font-family: monospace; + font-size: 100%; +} + +#output { + position: absolute; + height: 100%; + left: 50%; + right: 0; + overflow: auto; + + font-family: monospace; +} + +#inputarea textarea, #output { + line-height: 130%; +} + +.lex { border: 1px solid #333; } + +.lex_keyword { background: #0f0; } +.lex_identifier { background: #ff0; } +.lex_punctuation { background: #0ff; } +.lex_error { background: #f00; } +.lex_whitespace { background: #fcc; } +.lex_comment { background: #ccc; } + +.lex_regex { background: #f0f; } +.lex_null { background: #dac; } +.lex_boolean { background: #faf; } +.lex_number { background: #c3f; } +.lex_string { background: #fc3; } + +.parseerror { + background: #f99; + border: 1px solid blue; + cursor: pointer; +} +.parseerrormessage { color: #c00; } + +.box { + display: inline-block; + margin: 5px; + margin-top: 0; + background: #fff; +} + +#output > .box { + margin-top: 5px; +} + +.box.named { + border: 1px solid #888; + border-radius: 5px; + cursor: pointer; + overflow: hidden; +} + +.box.head { + font-family: sans-serif; + font-size: 70%; + font-weight: bold; + display: block; + margin-left: 0; + margin-right: 0; + background: #ccc; + color: #000; + padding-left: 5px; + padding-right: 5px; + border-bottom: 1px solid #888; +} + +.box.head:last-child { + margin: 0; +} + +.box.token { + background: #ddd; + /*border: 1px solid #999;*/ + border: 1px solid #00f; + cursor: pointer; + font-family: monospace; + font-weight: bold; + font-size: 120%; + padding: 1px; +} + +.box.named[mousehover] { + background: #cdf; + border: 1px solid #448; +} + +.box.token[mousehover] { + background: #ace; + border: 1px solid #448; +} + +.box.named[mousehover] > .box.head { + background: #58b; + border-bottom: 1px solid #448; +} \ No newline at end of file diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.html b/examples/unfinished/jsparse-demo/jsparse-demo.html new file mode 100644 index 0000000000..077952c415 --- /dev/null +++ b/examples/unfinished/jsparse-demo/jsparse-demo.html @@ -0,0 +1,29 @@ + + jsparser + + + + {{> page}} + + + diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js new file mode 100644 index 0000000000..3561937343 --- /dev/null +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -0,0 +1,153 @@ + + +if (Meteor.is_client) { + Meteor.startup(function () { + if (! Session.get("input")) + Session.set("input", "var x = 3;"); + }); + + Template.page.input = function () { + return Session.get("input") || ''; + }; + + Template.page.output = function () { + var input = Session.get("input") || ""; + + // LEXER + /* + if (! input) + return ""; + + var L = new Lexer(input); + var html = ""; + while (L.next() !== 'EOF') { + if (L.type === "NEWLINE") { + html += '
'; + } else { + var text = Handlebars._escape(L.text || ' '); + text = text.replace(/(?!.)\s/g, '
'); // for multiline comments + text = text.replace(/\s/g, ' '); + html += '' + + text + ''; + if (L.type === "ERROR") + break; + } + }*/ + + // PARSER + var html; + var ast = null; + var lexer = new Lexer(input); + try { + ast = parse(new Tokenizer(lexer)) || []; + } catch (parseError) { + var errorPos = lexer.lastPos; + var errorLen = lexer.text.length; + + html = Handlebars._escape(input.substring(0, errorPos)); + html += Spark.setDataContext( + {errorPos: errorPos, + errorLen: errorLen}, + '' + + Handlebars._escape(input.substring(errorPos, errorPos + errorLen) || + '') + + ''); + html = html.replace(/(?!.)\s/g, '
'); + html += '
' + + Handlebars._escape(parseError.toString()) + '
'; + } + if (ast) { + var curPos = 0; + var unclosedInfos = []; + var toHtml = function (obj) { + if (_.isArray(obj)) { + var head = obj[0] || 'nothing'; + var rest = obj.slice(1); + var info = { startPos: curPos }; + var html = Spark.setDataContext( + info, + '
' + + Handlebars._escape(head) + '
' + + _.map(rest, toHtml).join('') + '
'); + unclosedInfos.push(info); + return html; + } else if (obj.text) { + // token + _.each(unclosedInfos, function (info) { + info.endPos = curPos; + }); + curPos = obj.pos + obj.text.length; + unclosedInfos.length = 0; + return Spark.setDataContext( + obj, + '
' + + Handlebars._escape(obj.text) + '
'); + } else { + // other? + return '
' + + Handlebars._escape(JSON.stringify(obj)) + '
'; + } + }; + html = toHtml(ast); + curPos = lexer.pos; + _.each(unclosedInfos, function (info) { + info.endPos = curPos; + }); + } + + return new Handlebars.SafeString(html); + }; + + Template.page.events({ + 'keyup #inputarea textarea': function (event) { + var input = event.currentTarget.value; + Session.set("input", input); + }, + 'mouseover .box.named, mouseover .box.token': function (event) { + event.currentTarget.setAttribute('mousehover', 'mousehover'); + event.stopImmediatePropagation(); + }, + 'mouseout .box.named, mouseout .box.token': function (event) { + event.currentTarget.removeAttribute('mousehover'); + event.stopImmediatePropagation(); + }, + 'click .box.token': function (event) { + var token = this; + var startPos = token.pos; + var endPos = startPos + token.text.length; + selectInputText(startPos, endPos); + return false; + }, + 'click .box.named': function (event) { + selectInputText(this.startPos, this.endPos); + return false; + }, + 'click .parseerror': function (event) { + var startPos = this.errorPos; + var endPos = startPos + this.errorLen; + selectInputText(startPos, endPos); + return false; + } + }); + + Template.page.preserve(['#inputarea textarea']); + + var selectTextInArea = function (e, start, end){ + e.focus(); + if (e.setSelectionRange) { + e.setSelectionRange(start, end); + } else if (e.createTextRange) { + var r = e.createTextRange(); + r.collapse(true); + r.moveEnd('character', end); + r.moveStart('character', start); + r.select(); + } + }; + + var selectInputText = function(start, end) { + var textarea = DomUtils.find(document, '#inputarea textarea'); + selectTextInArea(textarea, start, end); + }; + +} diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js new file mode 100644 index 0000000000..5736050fa0 --- /dev/null +++ b/packages/jsparse/lexer.js @@ -0,0 +1,346 @@ +////////// HELPERS + +var regexEscape = function (str) { + return str.replace(/[\][^$\\.*+?(){}|]/g, '\\$&'); +}; + +// Adapted from source code of http://xregexp.com/plugins/#unicode +var unicodeCategories = { + Ll: "0061-007A00B500DF-00F600F8-00FF01010103010501070109010B010D010F01110113011501170119011B011D011F01210123012501270129012B012D012F01310133013501370138013A013C013E014001420144014601480149014B014D014F01510153015501570159015B015D015F01610163016501670169016B016D016F0171017301750177017A017C017E-0180018301850188018C018D019201950199-019B019E01A101A301A501A801AA01AB01AD01B001B401B601B901BA01BD-01BF01C601C901CC01CE01D001D201D401D601D801DA01DC01DD01DF01E101E301E501E701E901EB01ED01EF01F001F301F501F901FB01FD01FF02010203020502070209020B020D020F02110213021502170219021B021D021F02210223022502270229022B022D022F02310233-0239023C023F0240024202470249024B024D024F-02930295-02AF037103730377037B-037D039003AC-03CE03D003D103D5-03D703D903DB03DD03DF03E103E303E503E703E903EB03ED03EF-03F303F503F803FB03FC0430-045F04610463046504670469046B046D046F04710473047504770479047B047D047F0481048B048D048F04910493049504970499049B049D049F04A104A304A504A704A904AB04AD04AF04B104B304B504B704B904BB04BD04BF04C204C404C604C804CA04CC04CE04CF04D104D304D504D704D904DB04DD04DF04E104E304E504E704E904EB04ED04EF04F104F304F504F704F904FB04FD04FF05010503050505070509050B050D050F05110513051505170519051B051D051F05210523052505270561-05871D00-1D2B1D6B-1D771D79-1D9A1E011E031E051E071E091E0B1E0D1E0F1E111E131E151E171E191E1B1E1D1E1F1E211E231E251E271E291E2B1E2D1E2F1E311E331E351E371E391E3B1E3D1E3F1E411E431E451E471E491E4B1E4D1E4F1E511E531E551E571E591E5B1E5D1E5F1E611E631E651E671E691E6B1E6D1E6F1E711E731E751E771E791E7B1E7D1E7F1E811E831E851E871E891E8B1E8D1E8F1E911E931E95-1E9D1E9F1EA11EA31EA51EA71EA91EAB1EAD1EAF1EB11EB31EB51EB71EB91EBB1EBD1EBF1EC11EC31EC51EC71EC91ECB1ECD1ECF1ED11ED31ED51ED71ED91EDB1EDD1EDF1EE11EE31EE51EE71EE91EEB1EED1EEF1EF11EF31EF51EF71EF91EFB1EFD1EFF-1F071F10-1F151F20-1F271F30-1F371F40-1F451F50-1F571F60-1F671F70-1F7D1F80-1F871F90-1F971FA0-1FA71FB0-1FB41FB61FB71FBE1FC2-1FC41FC61FC71FD0-1FD31FD61FD71FE0-1FE71FF2-1FF41FF61FF7210A210E210F2113212F21342139213C213D2146-2149214E21842C30-2C5E2C612C652C662C682C6A2C6C2C712C732C742C76-2C7B2C812C832C852C872C892C8B2C8D2C8F2C912C932C952C972C992C9B2C9D2C9F2CA12CA32CA52CA72CA92CAB2CAD2CAF2CB12CB32CB52CB72CB92CBB2CBD2CBF2CC12CC32CC52CC72CC92CCB2CCD2CCF2CD12CD32CD52CD72CD92CDB2CDD2CDF2CE12CE32CE42CEC2CEE2CF32D00-2D252D272D2DA641A643A645A647A649A64BA64DA64FA651A653A655A657A659A65BA65DA65FA661A663A665A667A669A66BA66DA681A683A685A687A689A68BA68DA68FA691A693A695A697A723A725A727A729A72BA72DA72F-A731A733A735A737A739A73BA73DA73FA741A743A745A747A749A74BA74DA74FA751A753A755A757A759A75BA75DA75FA761A763A765A767A769A76BA76DA76FA771-A778A77AA77CA77FA781A783A785A787A78CA78EA791A793A7A1A7A3A7A5A7A7A7A9A7FAFB00-FB06FB13-FB17FF41-FF5A", + Lm: "02B0-02C102C6-02D102E0-02E402EC02EE0374037A0559064006E506E607F407F507FA081A0824082809710E460EC610FC17D718431AA71C78-1C7D1D2C-1D6A1D781D9B-1DBF2071207F2090-209C2C7C2C7D2D6F2E2F30053031-3035303B309D309E30FC-30FEA015A4F8-A4FDA60CA67FA717-A71FA770A788A7F8A7F9A9CFAA70AADDAAF3AAF4FF70FF9EFF9F", + Lo: "00AA00BA01BB01C0-01C3029405D0-05EA05F0-05F20620-063F0641-064A066E066F0671-06D306D506EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA0800-08150840-085808A008A2-08AC0904-0939093D09500958-09610972-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E450E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10D0-10FA10FD-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317DC1820-18421844-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C771CE9-1CEC1CEE-1CF11CF51CF62135-21382D30-2D672D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE3006303C3041-3096309F30A1-30FA30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A014A016-A48CA4D0-A4F7A500-A60BA610-A61FA62AA62BA66EA6A0-A6E5A7FB-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2AA00-AA28AA40-AA42AA44-AA4BAA60-AA6FAA71-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADBAADCAAE0-AAEAAAF2AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF66-FF6FFF71-FF9DFFA0-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC", + Lt: "01C501C801CB01F21F88-1F8F1F98-1F9F1FA8-1FAF1FBC1FCC1FFC", + Lu: "0041-005A00C0-00D600D8-00DE01000102010401060108010A010C010E01100112011401160118011A011C011E01200122012401260128012A012C012E01300132013401360139013B013D013F0141014301450147014A014C014E01500152015401560158015A015C015E01600162016401660168016A016C016E017001720174017601780179017B017D018101820184018601870189-018B018E-0191019301940196-0198019C019D019F01A001A201A401A601A701A901AC01AE01AF01B1-01B301B501B701B801BC01C401C701CA01CD01CF01D101D301D501D701D901DB01DE01E001E201E401E601E801EA01EC01EE01F101F401F6-01F801FA01FC01FE02000202020402060208020A020C020E02100212021402160218021A021C021E02200222022402260228022A022C022E02300232023A023B023D023E02410243-02460248024A024C024E03700372037603860388-038A038C038E038F0391-03A103A3-03AB03CF03D2-03D403D803DA03DC03DE03E003E203E403E603E803EA03EC03EE03F403F703F903FA03FD-042F04600462046404660468046A046C046E04700472047404760478047A047C047E0480048A048C048E04900492049404960498049A049C049E04A004A204A404A604A804AA04AC04AE04B004B204B404B604B804BA04BC04BE04C004C104C304C504C704C904CB04CD04D004D204D404D604D804DA04DC04DE04E004E204E404E604E804EA04EC04EE04F004F204F404F604F804FA04FC04FE05000502050405060508050A050C050E05100512051405160518051A051C051E05200522052405260531-055610A0-10C510C710CD1E001E021E041E061E081E0A1E0C1E0E1E101E121E141E161E181E1A1E1C1E1E1E201E221E241E261E281E2A1E2C1E2E1E301E321E341E361E381E3A1E3C1E3E1E401E421E441E461E481E4A1E4C1E4E1E501E521E541E561E581E5A1E5C1E5E1E601E621E641E661E681E6A1E6C1E6E1E701E721E741E761E781E7A1E7C1E7E1E801E821E841E861E881E8A1E8C1E8E1E901E921E941E9E1EA01EA21EA41EA61EA81EAA1EAC1EAE1EB01EB21EB41EB61EB81EBA1EBC1EBE1EC01EC21EC41EC61EC81ECA1ECC1ECE1ED01ED21ED41ED61ED81EDA1EDC1EDE1EE01EE21EE41EE61EE81EEA1EEC1EEE1EF01EF21EF41EF61EF81EFA1EFC1EFE1F08-1F0F1F18-1F1D1F28-1F2F1F38-1F3F1F48-1F4D1F591F5B1F5D1F5F1F68-1F6F1FB8-1FBB1FC8-1FCB1FD8-1FDB1FE8-1FEC1FF8-1FFB21022107210B-210D2110-211221152119-211D212421262128212A-212D2130-2133213E213F214521832C00-2C2E2C602C62-2C642C672C692C6B2C6D-2C702C722C752C7E-2C802C822C842C862C882C8A2C8C2C8E2C902C922C942C962C982C9A2C9C2C9E2CA02CA22CA42CA62CA82CAA2CAC2CAE2CB02CB22CB42CB62CB82CBA2CBC2CBE2CC02CC22CC42CC62CC82CCA2CCC2CCE2CD02CD22CD42CD62CD82CDA2CDC2CDE2CE02CE22CEB2CED2CF2A640A642A644A646A648A64AA64CA64EA650A652A654A656A658A65AA65CA65EA660A662A664A666A668A66AA66CA680A682A684A686A688A68AA68CA68EA690A692A694A696A722A724A726A728A72AA72CA72EA732A734A736A738A73AA73CA73EA740A742A744A746A748A74AA74CA74EA750A752A754A756A758A75AA75CA75EA760A762A764A766A768A76AA76CA76EA779A77BA77DA77EA780A782A784A786A78BA78DA790A792A7A0A7A2A7A4A7A6A7A8A7AAFF21-FF3A", + Mc: "0903093B093E-09400949-094C094E094F0982098309BE-09C009C709C809CB09CC09D70A030A3E-0A400A830ABE-0AC00AC90ACB0ACC0B020B030B3E0B400B470B480B4B0B4C0B570BBE0BBF0BC10BC20BC6-0BC80BCA-0BCC0BD70C01-0C030C41-0C440C820C830CBE0CC0-0CC40CC70CC80CCA0CCB0CD50CD60D020D030D3E-0D400D46-0D480D4A-0D4C0D570D820D830DCF-0DD10DD8-0DDF0DF20DF30F3E0F3F0F7F102B102C10311038103B103C105610571062-10641067-106D108310841087-108C108F109A-109C17B617BE-17C517C717C81923-19261929-192B193019311933-193819B0-19C019C819C91A19-1A1B1A551A571A611A631A641A6D-1A721B041B351B3B1B3D-1B411B431B441B821BA11BA61BA71BAA1BAC1BAD1BE71BEA-1BEC1BEE1BF21BF31C24-1C2B1C341C351CE11CF21CF3302E302FA823A824A827A880A881A8B4-A8C3A952A953A983A9B4A9B5A9BAA9BBA9BD-A9C0AA2FAA30AA33AA34AA4DAA7BAAEBAAEEAAEFAAF5ABE3ABE4ABE6ABE7ABE9ABEAABEC", + Mn: "0300-036F0483-04870591-05BD05BF05C105C205C405C505C70610-061A064B-065F067006D6-06DC06DF-06E406E706E806EA-06ED07110730-074A07A6-07B007EB-07F30816-0819081B-08230825-08270829-082D0859-085B08E4-08FE0900-0902093A093C0941-0948094D0951-095709620963098109BC09C1-09C409CD09E209E30A010A020A3C0A410A420A470A480A4B-0A4D0A510A700A710A750A810A820ABC0AC1-0AC50AC70AC80ACD0AE20AE30B010B3C0B3F0B41-0B440B4D0B560B620B630B820BC00BCD0C3E-0C400C46-0C480C4A-0C4D0C550C560C620C630CBC0CBF0CC60CCC0CCD0CE20CE30D41-0D440D4D0D620D630DCA0DD2-0DD40DD60E310E34-0E3A0E47-0E4E0EB10EB4-0EB90EBB0EBC0EC8-0ECD0F180F190F350F370F390F71-0F7E0F80-0F840F860F870F8D-0F970F99-0FBC0FC6102D-10301032-10371039103A103D103E10581059105E-10601071-1074108210851086108D109D135D-135F1712-17141732-1734175217531772177317B417B517B7-17BD17C617C9-17D317DD180B-180D18A91920-19221927192819321939-193B1A171A181A561A58-1A5E1A601A621A65-1A6C1A73-1A7C1A7F1B00-1B031B341B36-1B3A1B3C1B421B6B-1B731B801B811BA2-1BA51BA81BA91BAB1BE61BE81BE91BED1BEF-1BF11C2C-1C331C361C371CD0-1CD21CD4-1CE01CE2-1CE81CED1CF41DC0-1DE61DFC-1DFF20D0-20DC20E120E5-20F02CEF-2CF12D7F2DE0-2DFF302A-302D3099309AA66FA674-A67DA69FA6F0A6F1A802A806A80BA825A826A8C4A8E0-A8F1A926-A92DA947-A951A980-A982A9B3A9B6-A9B9A9BCAA29-AA2EAA31AA32AA35AA36AA43AA4CAAB0AAB2-AAB4AAB7AAB8AABEAABFAAC1AAECAAEDAAF6ABE5ABE8ABEDFB1EFE00-FE0FFE20-FE26", + Nd: "0030-00390660-066906F0-06F907C0-07C90966-096F09E6-09EF0A66-0A6F0AE6-0AEF0B66-0B6F0BE6-0BEF0C66-0C6F0CE6-0CEF0D66-0D6F0E50-0E590ED0-0ED90F20-0F291040-10491090-109917E0-17E91810-18191946-194F19D0-19D91A80-1A891A90-1A991B50-1B591BB0-1BB91C40-1C491C50-1C59A620-A629A8D0-A8D9A900-A909A9D0-A9D9AA50-AA59ABF0-ABF9FF10-FF19", + Nl: "16EE-16F02160-21822185-218830073021-30293038-303AA6E6-A6EF", + Pc: "005F203F20402054FE33FE34FE4D-FE4FFF3F" +}; + +var unicodeClass = function (abbrev) { + return '[' + + unicodeCategories[abbrev].replace(/[0-9A-F]{4}/ig, "\\u$&") + ']'; +}; + +// See ECMA-262 spec, 3rd edition, section 7 + +// Section 7.2 +// Match one or more characters of whitespace, excluding line terminators. +// Do this by matching reluctantly, stopping at a non-dot (line terminator +// or end of string) or a non-whitespace. +// We are taking advantage of the fact that we are parsing JS from JS in +// regexes like this by "passing through" the spec's definition of whitespace, +// which is the same in regexes and the lexical grammar. +var rWhiteSpace = /(?=.)\s+?((?!.)|(?=\S))/g; +// Section 7.3 +// Match one line terminator. Same as (?!.)[\s\S] but more explicit. +var rLineTerminator = /[\u000A\u000D\u2028\u2029]/g; +// Section 7.4 +// Match one multi-line comment. +// [\s\S] is shorthand for any character, including newlines. +// The *? reluctant qualifier makes this easy. +var rMultiLineComment = /\/\*[\s\S]*?\*\//g; +// Match one single-line comment, not including the line terminator. +var rSingleLineComment = /\/\/.*/g; +// Section 7.6 +// Match one or more characters that can start an identifier. +// This is IdentifierStart+. +var rIdentifierPrefix = new RegExp( + "([a-zA-Z$_]+|\\\\u[0-9a-fA-F]{4}|" + + [unicodeClass('Lu'), unicodeClass('Ll'), unicodeClass('Lt'), + unicodeClass('Lm'), unicodeClass('Lo'), unicodeClass('Nl')].join('|') + + ")+", 'g'); +// Match one or more characters that can continue an identifier. +// This is (IdentifierPart and not IdentifierStart)+. +// To match a full identifier, match rIdentifierPrefix, then +// match rIdentifierMiddle followed by rIdentifierPrefix until they both fail. +var rIdentifierMiddle = new RegExp( + "([0-9]|" + [unicodeClass('Mn'), unicodeClass('Mc'), unicodeClass('Nd'), + unicodeClass('Pc')].join('|') + ")+", 'g'); +// Section 7.7 +// Match one punctuator (except for division punctuators). +var rPunctuator = new RegExp( + regexEscape("{ } ( ) [ ] . ; , < > <= >= == != === !== + - * % ++ -- << >> "+ + ">>> & | ^ ! ~ && || ? : = += -= *= %= <<= >>= >>>= &= |= ^=") + // sort from longest to shortest so that we don't match '==' for '===' and + // '*' for '*=', etc. + .split(' ').sort(function (a,b) { return b.length - a.length; }) + .join('|'), 'g'); +var rDivPunctuator = /\/|\/=/g; +// Section 7.8.3 +var rHexLiteral = /0x[0-9a-fA-F]+$/g; +var rOctLiteral = /0[0-7]+/g; // deprecated +var rDecLiteral = + /(((0|[1-9][0-9]*)(\.[0-9]*)?)|\.[0-9]+)([Ee][+-]?[0-9]+)?/g; +// Section 7.8.4 +var rStringQuote = /["']/g; +// Match one or more characters besides quotes, backslashes, or line ends +var rStringMiddle = /(?=.)[^"'\\]+?((?!.)|(?=["'\\]))/g; +// Match one escape sequence, including the backslash. +var rEscapeSequence = + /\\(['"\\bfnrtv]|0(?![0-9])|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|(?=.)[^ux0-9])/g; +// Section 7.8.5 +// Match one regex literal, including slashes, not including flags. +// XXX Add support for unescaped '/' in character class, allowed by 5th ed. +var rRegexLiteral = /\/(?![*\/])(\\.|(?=.)[^\\])+?\//g; +var rRegexFlags = /[a-zA-Z]*/g; + +var rDecider = + /((?=.)\s)|(\/[\/\*]?)|([\][{}().;,<>=!+*%&|^~?:-])|(\d)|(["'])|(.)|([\S\s])/g; + +var keywordLookup = { + ' break': 'KEYWORD', + ' case': 'KEYWORD', + ' catch': 'KEYWORD', + ' continue': 'KEYWORD', + ' debugger': 'KEYWORD', + ' default': 'KEYWORD', + ' delete': 'KEYWORD', + ' do': 'KEYWORD', + ' else': 'KEYWORD', + ' finally': 'KEYWORD', + ' for': 'KEYWORD', + ' function': 'KEYWORD', + ' if': 'KEYWORD', + ' in': 'KEYWORD', + ' instanceof': 'KEYWORD', + ' new': 'KEYWORD', + ' return': 'KEYWORD', + ' switch': 'KEYWORD', + ' this': 'KEYWORD', + ' throw': 'KEYWORD', + ' try': 'KEYWORD', + ' typeof': 'KEYWORD', + ' var': 'KEYWORD', + ' void': 'KEYWORD', + ' while': 'KEYWORD', + ' with': 'KEYWORD', + + ' false': 'BOOLEAN', + ' true': 'BOOLEAN', + + ' null': 'NULL' +}; + +var makeSet = function (array) { + var s = {}; + for (var i = 0, N = array.length; i < N; i++) + s[array[i]] = true; + return s; +}; + +var nonTokenTypes = makeSet('WHITESPACE COMMENT NEWLINE EOF ERROR'.split(' ')); + +var punctuationBeforeDivision = makeSet('] ) } ++ --'.split(' ')); +var keywordsBeforeDivision = makeSet('this'.split(' ')); + +var guessIsDivisionPermittedAfterToken = function (type, text) { + // Figure out if a '/' character should be interpreted as division + // rather than the start of a regular expression when it follows the + // token (type,text), which must be a token lexeme per + // Lexer.isToken. The beginning of section 7 of the spec briefly + // explains what's going on; basically the lexical grammar can't + // distinguish, for example, `e/f/g` (division) from `e=/f/g` + // (assignment of a regular expression), among many other variations. + // + // THIS IS ONLY A HEURISTIC, though it will rarely fail. + // Here are the two cases I know of: + // - if (foo) + // /ba/.test("banana") && console.log("matches"); + // (Close paren of a control structure before a statement starting with + // a regex literal. Starting a statement with a regex literal is + // unusual, of course, because it's hard to have a side effect.) + // - ++ /foo/.abc + // (Prefix `++` or `--` before an expression starting with a regex + // literal. This will run but I can't see any use for it.) + switch (type) { + case "PUNCTUATION": + // few punctuators can end an expression, but e.g. `)` + return !! punctuationBeforeDivision[text]; + case "KEYWORD": + // few keywords can end an expression, but e.g. `this` + return !! keywordsBeforeDivision[text]; + case "IDENTIFIER": + return true; + default: // literal + return true; + } +}; + +////////// PUBLIC API + +// Create a Lexer for the given string of JavaScript code. +// +// A lexer keeps a pointer `pos` into the string that is +// advanced when you ask for the next lexeme with `next()`. +// +// Properties: +// code: Original JavaScript code string. +// pos: Current index into the string. You can assign to it +// to continue lexing from a different position. After +// calling next(), it is the ending index of the most +// recent lexeme. +// lastPos: The starting index of the most recent lexeme. +// Equal to `pos - text.length`. +// text: Text of the last lexeme as a string. +// type: Type of the last lexeme, as returned by `next()`. +// divisionPermitted: Whether a '/' character should be interpreted +// as division rather than the start of a regular expression. +// This flag is set automatically during lexing based on the +// previous token (i.e. the most recent token lexeme), but +// it is technically only a heuristic. +// Thie flag can be read and set manually to affect the +// parsing of the next token. + +var Lexer = function (code) { + this.code = code; + this.pos = 0; + this.lastPos = 0; + this.text = ""; + this.type = null; + this.divisionPermitted = false; +}; + +Lexer.isToken = function (type) { + return ! nonTokenTypes[type]; +}; + +// Return the type of the next of lexeme starting at `pos`, and advance +// `pos` to the end of the lexeme. The text of the lexeme is available +// in `text`. The text is always the substring of `code` between the +// old and new values of `pos`. An "EOF" lexeme terminates +// the stream. "ERROR" lexemes indicate a bad input string. Out of all +// lexemes, only "EOF" has empty text, and it always has empty text. +// All others contain at least one character from the source code. +// +// Lexeme types: +// Literals: BOOLEAN, NULL, REGEX, NUMBER, STRING +// Whitespace-like: WHITESPACE, COMMENT, NEWLINE, EOF +// Other Tokens: IDENTIFIER, KEYWORD, PUNCTUATION +// ... and ERROR + +Lexer.prototype.next = function () { + var self = this; + var code = self.code; + var origPos = self.pos; + var divisionPermitted = self.divisionPermitted; + + if (origPos > code.length) + throw new Error("out of range"); + + // Running regexes inside this function will move this local + // `pos` forward. + // When we commit to emitting a lexeme, we'll set self.pos + // based on it. + var pos = origPos; + + // Emit a lexeme. Always called as `return lexeme(type)`. + var lexeme = function (type) { + // If `pos` hasn't moved, we consider this an error. + // This means that grammar cases that only run one regex + // or an alternation ('||') of regexes don't need to + // check for failure. + // This also guarantees that only EOF lexemes are empty. + if (pos === origPos && type !== 'EOF') { + type = 'ERROR'; + pos = origPos + 1; + } + self.lastPos = origPos; + self.pos = pos; + self.text = code.substring(origPos, pos); + self.type = type; + if (Lexer.isToken(type)) + self.divisionPermitted = guessIsDivisionPermittedAfterToken(type, self.text); + return type; + }; + + if (pos === code.length) + return lexeme('EOF'); + + // Result of the regex match in the most recent call to `run`. + var match = null; + + // Run a regex starting from `pos`, recording the end of the matched + // string in `pos` and the match data in `match`. The regex must have + // the 'g' (global) flag. If it doesn't match at `pos`, set `match` + // to null. The caller should expect the regex to match at `pos`, as + // failure is too expensive to run in a tight loop. + var run = function (regex) { + // Cause regex matching to start at `pos`. + regex.lastIndex = pos; + match = regex.exec(code); + // Simulate "sticky" matching by throwing out the match if it + // didn't match exactly at `pos`. If it didn't, we may have + // just searched the entire string. + if (match && (match.index !== pos)) + match = null; + // Record the end position of the match back into `pos`. + if (match) + pos = regex.lastIndex; + return match; + }; + + // Decide which case of the grammar we are in based on one or two + // characters, then roll back `pos`. + run(rDecider); + pos = origPos; + + // Grammar cases + if (match[1]) { // \s + run(rWhiteSpace); + return lexeme('WHITESPACE'); + } + if (match[2]) { // one of //, /*, / + if (match[2] === '//') { + run(rSingleLineComment); + return lexeme('COMMENT'); + } + if (match[2] === '/*') { + run(rMultiLineComment); + return lexeme(match ? 'COMMENT' : 'ERROR'); + } + if (match[2] === '/') { + if (divisionPermitted) { + run(rDivPunctuator); + return lexeme('PUNCTUATION'); + } else { + run(rRegexLiteral); + if (! match) + return lexeme('ERROR'); + run(rRegexFlags); + return lexeme('REGEX'); + } + } + } + if (match[3]) { // any other punctuation char + run(rPunctuator); + return lexeme(match ? 'PUNCTUATION' : 'ERROR'); + } + if (match[4]) { // 0-9 + run(rDecLiteral) || run(rHexLiteral) || run(rOctLiteral); + return lexeme(match ? 'NUMBER' : 'ERROR'); + } + if (match[5]) { // " or ' + run(rStringQuote); + var quote = match[0]; + do { + run(rStringMiddle) || run(rEscapeSequence) || run(rStringQuote); + } while (match && match[0] !== quote); + if (! (match && match[0] === quote)) + return lexeme('ERROR'); + return lexeme('STRING'); + } + if (match[7]) { // non-dot (line terminator) + run(rLineTerminator); + return lexeme('NEWLINE'); + } + // dot (any non-line-terminator) + run(rIdentifierPrefix); + // Use non-short-circuiting OR, '|', to allow matching + // both regexes in sequence, returning false only if neither + // matched. + while (run(rIdentifierMiddle) | run(rIdentifierPrefix)) {/*continue*/} + var word = code.substring(origPos, pos); + return lexeme(keywordLookup[' '+word] || 'IDENTIFIER'); +}; diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js new file mode 100644 index 0000000000..a3b4119909 --- /dev/null +++ b/packages/jsparse/package.js @@ -0,0 +1,9 @@ +Package.describe({ + summary: "Full-featured JaavScript parser" +}); + +Package.on_use(function (api) { + api.add_files([ + 'parser.js', + 'lexer.js'], ['client', 'server']); +}); diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js new file mode 100644 index 0000000000..861cdc159d --- /dev/null +++ b/packages/jsparse/parser.js @@ -0,0 +1,935 @@ + +// NOTES + +// push/pop lexer position +// need to support [no LineTerminator here] +// need to be able to look back at original whitespace later, +// find all the whitespace before a token +// "token" means anything but whitespace, newline, or comment +// multiline comments produce virtual newlines +// maybe conform to the spec's token input to the syntactic grammar? + +// XXX track line/col position, for errors and maybe token info +// XXX implement `required(parser, prev)` + +var isArray = function (obj) { + return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); +}; + +Tokenizer = function (codeOrLexer) { + // XXX rethink codeOrLexer later + this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : + new Lexer(codeOrLexer)); + this.peekType = null; + this.peekText = null; + this.tokenType = null; + this.tokenText = null; + this.lastPos = 0; + this.pos = 0; + this.isLineTerminatorHere = false; + + // load peekType and peekText + this.consume(); +}; + +_.extend(Tokenizer.prototype, { + // consumes the token (peekType, peekText) and moves + // it into (type, text), loading the next token + // into (peekType, peekText). A token is a lexeme + // besides WHITESPACE, COMMENT, and NEWLINE. + consume: function () { + var self = this; + var lexer = self.lexer; + self.type = self.peekType; + self.text = self.peekText; + self.lastPos = self.pos; + self.isLineTerminatorHere = false; + do { + lexer.next(); + if (lexer.type === "ERROR") + throw new Error("Bad token at position " + lexer.lastPos + + ", text `" + lexer.text + "`"); + else if (lexer.type === "NEWLINE") + self.isLineTerminatorHere = true; + else if (lexer.type === "COMMENT" && ! /^.*$/.test(lexer.text)) + // multiline comments containing line terminators count + // as line terminators. + self.isLineTerminatorHere = true; + } while (lexer.type !== "EOF" && ! Lexer.isToken(lexer.type)); + self.peekType = lexer.type; + self.peekText = lexer.text; + self.pos = lexer.lastPos; + } +}); + +// A parser that consume()s has to succeed. +// Similarly, a parser that fails can't have consumed. + +// mutates the parser; don't describe an existing parser. +var describe = function (description, parser) { + parser.description = description; + return parser; +}; + +// Call this as `throw parseError(...)`. +// `expected` is a parser, `after` is a string. +var parseError = function (t, expected, after) { + var str = (expected.description ? "Expected " + expected.description : + // all parsers that might error should have descriptions, + // but just in case: + "Unexpected token"); + if (after) + str += " after " + (after.text ? "`" + after.text + "`" : after); + var pos = t.lastPos; + str += " at position " + pos; + str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); + var e = new Error(str); + return e; +}; + +///// TERMINAL PARSER CONSTRUCTORS + +var _tokenClassImpl = function (type, text, dontConsume) { + var textSet = (text ? makeSet(text.split(' ')) : null); + var description = (text ? text.split(' ').join(', ') : type); + return describe( + description, + function (t) { + if (t.peekType == type && (!text || textSet[t.peekText])) { + if (dontConsume) + return []; + var ret = {text: t.peekText, pos: t.pos}; + t.consume(); + return ret; + } + return null; + }); +}; + +var _tokenImpl = function (text, dontConsume) { + if (/\w/.test(text)) + return _tokenClassImpl('KEYWORD', text, dontConsume); + return _tokenClassImpl('PUNCTUATION', text, dontConsume); +}; + +var tokenClass = function (type, text) { + if (type === "ERROR" || type === "EOF") + throw new Error("Can't create EOF or ERROR tokens, can only look ahead"); + return _tokenClassImpl(type, text); +}; + +var token = function (text) { + return _tokenImpl(text); +}; + +// Like token, but marks tokens that need to defy the lexer's +// heuristic about whether the next '/' is a division or +// starts a regex. +var preSlashToken = function (text, divisionNotRegex) { + var impl = _tokenImpl(text); + return describe(impl.description, + function (t) { + // temporarily set divisionPermitted, + // restoring it if we don't match. + var oldValue = t.lexer.divisionPermitted; + var result; + try { + t.lexer.divisionPermitted = divisionNotRegex; + result = impl(t); + return result; + } finally { + if (! result) + t.lexer.divisionPermitted = oldValue; + } + }); +}; + +// NON-CONSUMING PARSER CONSTRUCTORS + +var lookAheadTokenClass = function (type, text) { + return _tokenClassImpl(type, text, true); +}; + +var lookAheadToken = function (text) { + return _tokenImpl(text, true); +}; + +///// NON-TERMINAL PARSER CONSTRUCTORS + +// Combinators that take names must provide descriptions. +// Otherwise, it is up to the call to provide a description. + +// Polymorphic in parsers and results; an experiment. +var named = function(name, parserOrResult) { + return describe( + name, + revalue( + parserOrResult, + function (value) { + if (! value) + return null; + + var result; + if (isArray(value) && ! value.named) + // bare array, prepend the name + result = [name].concat(Array.prototype.slice.call(value)); + else + // token or named array; construct a new named array + result = [name, value]; + + // don't name the same thing twice + result.named = true; + + return result; + })); +}; + +var or = function (/*parsers*/) { + var args = arguments; + return function (t) { + var result; + for(var i = 0, N = args.length; i < N; i++) { + result = args[i](t); + if (result) + return result; + } + return null; + }; +}; + +// Parses a left-recursive expression with zero or more occurrences +// of a binary op. Leaves the term unwrapped if no op. For example +// (in a hypothetical use case): +// `1` => "1" +// `1+2` => ["binary", "1", "+", "2"] +// `1+2+3` => ["binary", ["binary", "1", "+", "2"], "+", "3"] +// +// opParser can also be an array of op parsers from high to low +// precedence (tightest-binding first) +var binaryLeft = function (termParser, opParser) { + if (isArray(opParser)) { + if (opParser.length === 1) { + // take single opParser out of its array + opParser = opParser[0]; + } else { + // pop off last opParser (non-destructively) and replace + // termParser with a recursive binaryLeft on the remaining + // ops. + termParser = binaryLeft(termParser, opParser.slice(0, -1)); + opParser = opParser[opParser.length - 1]; + } + } + + return describe( + termParser.description, + function (t) { + var result = termParser(t); + if (! result) + return null; + + var op; + while ((op = opParser(t))) { + result = named('binary', [result, op, termParser(t)]); + if (! result[result.length - 1]) + throw parseError(t, termParser, result[result.length - 2]); + } + return result; + }); +}; + +// Parses a list of one or more items with a separator, listing the +// items and separators. (Separator is optional.) For example: +// `x` => ["x"] +// `x,y` => ["x", ",", "y"] +// `x,y,z` => ["x", ",", "y", ",", "z"] +var list = function (itemParser, sepParser) { + return describe( + itemParser.description, + function (t) { + var result = [itemParser(t)]; + if (! result[0]) + return null; + + if (sepParser) { + var sep; + while ((sep = sepParser(t))) { + result.push(sep, itemParser(t)); + if (! result[result.length - 1]) + throw parseError(t, itemParser, result[result.length - 2]); + } + } else { + var item; + while ((item = itemParser(t))) + result.push(item); + } + return result; + }); +}; + +var seq = function (/*parsers*/) { + var args = arguments; + if (! args.length) + return describe("(empty)", + function (t) { return []; }); + + var description = args[0].description; + for (var i = 1; i < args.length; i++) + description += " " + args[i].description; + return describe( + description, + function (t) { + var result = []; + for (var i = 0, N = args.length; i < N; i++) { + var r = args[i](t); + if (! r) { + if (i === 0) + return null; // not committed on first item + throw parseError(t, args[i]); + } + if (r.unpack) // append array! + result.push.apply(result, r); + else + result.push(r); + } + return result; + }); +}; + +var unpack = function (arrayParser) { + return revalue(arrayParser, function (v) { + if (v && isArray(v)) + v.unpack = true; + return v; + }); +}; + +// lookAhead parser must never consume +var lookAhead = function (lookAheadParser, nextParser) { + return describe( + lookAheadParser.description, + function (t) { + if (! lookAheadParser(t)) + return null; + return nextParser(t); + }); +}; +var negLookAhead = function (lookAheadParser, nextParser) { + if (! nextParser) + return function (t) { + return lookAheadParser(t) ? null : []; + }; + + return describe( + nextParser.description, + function (t) { + if (lookAheadParser(t)) + return null; + return nextParser(t); + }); +}; + +// parser that looks at nothing and returns result +var constant = function (result) { + // no description + return function (t) { + return result; + }; +}; + +// afterLookAhead allows the parser to fail rather than +// succeed if would otherwise fail at a position where +// afterLookAhead doesn't match, potentially providing +// a better error message. For example, the illegal +// object literal `{true:1}` will stop at the `true` +// and say something like "expected property name" +// instead of "expected }". As another example, +// `for(;var;) {}` will lead to "Expected expression" +// instead of "Expected ;" when the optional expression +// turns out to be an illegal `var`. +var opt = function (parser, afterLookAhead) { + return describe(parser.description, + or(parser, afterLookAhead ? afterLookAhead : seq())); +}; + +// note: valueTransformFunc gets the tokenizer as a second argument +// if it's called on a parser. This func is allowed to then +// run more parsers. +var revalue = function (parserOrValue, valueTransformFunc) { + if (typeof parserOrValue === 'function') + // it's a parser + return describe(parserOrValue.description, + function (t) { + return valueTransformFunc(parserOrValue(t), t); + }); + else + return valueTransformFunc(parserOrValue); +}; + +var parse = function (tokenizer) { + var noLineTerminatorHere = describe( + 'noLineTerminator', function (t) { + return t.isLineTerminatorHere ? null : []; + }); + // Function that takes one-item arrays to their single item and names other + // arrays with `name`. Works on parsers too. + var nameIfMultipart = function (name, parser) { + return revalue( + parser, + function (parts) { + if (! parts) + return null; + return (parts.length === 1) ? + parts[0] : named(name, parts); + }); + }; + + // These "pointers" allow grammar circularity, i.e. accessing + // later parsers from earlier ones. + var expressionPtrFunc = function (noIn) { + return describe( + "expression", + function (t) { + return expressionFunc(noIn)(t); + }); + }; + var expressionPtr = expressionPtrFunc(false); + + var assignmentExpressionPtrFunc = function (noIn) { + return describe( + "expression", + function (t) { + return assignmentExpressionFunc(noIn)(t); + }); + }; + var assignmentExpressionPtr = assignmentExpressionPtrFunc(false); + + var functionBodyPtr = describe( + "functionBody", function (t) { + return functionBody(t); + }); + + var statementPtr = describe( + "statement", function (t) { + return statement(t); + }); + + var arrayLiteral = + named('array', + seq(token('['), + unpack(opt(list(assignmentExpressionPtr, + token(',')), lookAheadToken(']'))), + token(']'))); + + var propertyName = describe('propertyName', or( + named('identifier', tokenClass('IDENTIFIER')), + named('number', tokenClass('NUMBER')), + named('string', tokenClass('STRING')))); + var nameColonValue = describe( + 'name:value', + named('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); + + var objectLiteral = + named('object', + seq(token('{'), + unpack(opt(list(nameColonValue, + token(',')), lookAheadToken('}'))), + token('}'))); + + // not memoized; only call at construction time + var functionFunc = function (nameRequired) { + return seq(token('function'), + (nameRequired ? tokenClass('IDENTIFIER') : + opt(tokenClass('IDENTIFIER'), + lookAheadToken('('))), + token('('), + unpack(opt(list(tokenClass('IDENTIFIER'), token(',')), + lookAheadToken(')'))), + token(')'), + token('{'), + unpack(functionBodyPtr), + token('}')); + }; + var functionExpression = named('functionExpr', + functionFunc(false)); + + var primaryOrFunctionExpression = + describe('expression', + or(named('this', token('this')), + named('identifier', tokenClass('IDENTIFIER')), + named('number', tokenClass('NUMBER')), + named('boolean', tokenClass('BOOLEAN')), + named('null', tokenClass('NULL')), + named('regex', tokenClass('REGEX')), + named('string', tokenClass('STRING')), + named('parens', + seq(token('('), expressionPtr, token(')'))), + arrayLiteral, + objectLiteral, + functionExpression)); + + var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); + var bracketEnding = seq(token('['), expressionPtr, token(']')); + var callArgs = seq(token('('), + unpack(opt(list(assignmentExpressionPtr, + token(',')), lookAheadToken(')'))), + token(')')); + + var newKeyword = token('new'); + + // This is a completely equivalent refactor of the spec's production + // for a LeftHandSideExpression. + // + // An lhsExpression is basically an expression that can serve as + // the left-hand-side of an assignment, though function calls and + // "new" invocation are included because they have the same + // precedence. Actually, the spec technically allows a function + // call to "return" a valid l-value, as in `foo(bar) = baz`, + // though no built-in or user-specifiable call has this property + // (it would have to be defined by a browser or other "host"). + var lhsExpression = describe( + 'expression', + function (t) { + // Accumulate all initial "new" keywords, not yet knowing + // if they have a corresponding argument list later. + var news = []; + var n; + while ((n = newKeyword(t))) + news.push(n); + + // Read the primaryOrFunctionExpression that will be the "core" + // of this lhsExpression. It is preceded by zero or more `new` + // keywords, and followed by any sequence of (...), [...], + // and .foo add-ons. + var result = primaryOrFunctionExpression(t); + if (! result) { + if (! news.length) + return null; // not committed + else + throw parseError(t, primaryOrFunctionExpression, + news[news.length - 1]); + } + + // Our plan of attack is to apply each dot, bracket, or call + // as we come across it. Whether a call is a `new` call depends + // on whether there are `new` keywords we haven't used. If so, + // we pop one off the stack. + var done = false; + while (! done) { + var r; + if ((r = dotEnding(t))) { + result = named('dot', [result].concat(r)); + } else if ((r = bracketEnding(t))) { + result = named('bracket', [result].concat(r)); + } else if ((r = callArgs(t))) { + if (news.length) + result = named('newcall', [news.pop(), result].concat(r)); + else + result = named('call', [result].concat(r)); + } else { + done = true; + } + } + + // There may be more `new` keywords than calls, which is how + // paren-less constructions (`new Date`) are parsed. We've + // already handled `new foo().bar()`, now handle `new new foo().bar`. + while (news.length) + result = named('new', [news.pop(), result]); + + // mark any LeftHandSideExpression, for the benefit of + // assignmentExpression + result.lhs = true; + + return result; + }); + + var postfixToken = token('++ --'); + var postfixLookahead = lookAheadToken('++ --'); + var postfixExpression = describe( + 'expression', + nameIfMultipart( + 'postfix', + seq(lhsExpression, + unpack(opt(lookAhead(noLineTerminatorHere, + lookAhead(postfixLookahead, + postfixToken))))))); + var unaryList = opt(list(or(token('delete void typeof'), + preSlashToken('++ -- + - ~ !', false)))); + var unaryExpression = describe( + 'expression', + function (t) { + var unaries = unaryList(t); + var result = postfixExpression(t); + if (! result) { + if (unaries.length) + // committed, have to error + throw parseError(t, postfixExpression, unaries[unaries.length - 1]); + return null; + } + + while (unaries.length) + result = named('unary', [unaries.pop(), result]); + return result; + }); + + var memoizeBooleanFunc = function (func) { + var trueResult, falseResult; + return function (flag) { + if (flag) + return trueResult || (trueResult = func(true)); + else + return falseResult || (falseResult = func(false)); + }; + }; + + // actually this is the spec's LogicalORExpression + var binaryExpressionFunc = memoizeBooleanFunc( + function (noIn) { + // high to low precedence + var binaryOps = [token('* / %'), + token('+ -'), + token('<< >> >>>'), + or(token('< > <= >='), + noIn ? token('instanceof') : + token('instanceof in')), + token('== != === !=='), + token('&'), + token('^'), + token('|'), + token('&&'), + token('||')]; + return describe( + 'expression', + binaryLeft(unaryExpression, binaryOps)); + }); + var binaryExpression = binaryExpressionFunc(false); + + var conditionalExpressionFunc = memoizeBooleanFunc( + function (noIn) { + return describe( + 'expression', + nameIfMultipart( + 'ternary', + seq(binaryExpressionFunc(noIn), unpack(opt(seq( + token('?'), + assignmentExpressionPtrFunc(false), token(':'), + assignmentExpressionPtrFunc(noIn))))))); + }); + var conditionalExpression = conditionalExpressionFunc(false); + + var assignOp = token('= *= /= %= += -= <<= >>= >>>= &= ^= |= '); + + var assignmentExpressionFunc = memoizeBooleanFunc( + function (noIn) { + return describe( + 'expression', + function (t) { + var r = conditionalExpressionFunc(noIn)(t); + if (! r) + return null; + + // Assignment is right-associative. + // Plan of attack: make a list of all the parts + // [expression, op, expression, op, ... expression] + // and then fold them up at the end. + var parts = [r]; + var op; + while (r.lhs && (op = assignOp(t))) { + r = conditionalExpressionFunc(noIn)(t); + if (! r) + throw parseError(t, conditionalExpressionFunc(noIn), r); + parts.push(op, r); + } + + var result = parts.pop(); + while (parts.length) { + op = parts.pop(); + var lhs = parts.pop(); + result = named('assignment', [lhs, op, result]); + } + return result; + }); + }); + var assignmentExpression = assignmentExpressionFunc(false); + + var expressionFunc = memoizeBooleanFunc( + function (noIn) { + return describe( + 'expression', + nameIfMultipart( + 'comma', + list(assignmentExpressionFunc(noIn), token(',')))); + }); + var expression = expressionFunc(false); + + // STATEMENTS + + var statements = list(statementPtr); + + // implements JavaScript's semicolon "insertion" rules + var maybeSemicolon = describe( + 'semicolon', + or(token(';'), + // the rest of these produce empty `[]` boxes + lookAheadToken('}'), + lookAheadTokenClass('EOF'), + function (t) { + return t.isLineTerminatorHere ? [] : null; + })); + + var expressionStatement = named( + 'expression', + negLookAhead( + or(lookAheadToken('{'), lookAheadToken('function')), + seq(expression, + describe('semicolon', + or(maybeSemicolon, + // allow presence of colon to terminate + // statement legally, for the benefit of + // expressionOrLabelStatement. Basically assume + // an implicit semicolon. This + // is safe because a colon can never legally + // follow a semicolon anyway. + lookAheadToken(':')))))); + + // it's hard to parse statement labels, as in + // `foo: x = 1`, because we can't tell from the + // first token whether we are looking at an expression + // statement or a label statement. To work around this, + // expressionOrLabelStatement parses the expression and + // then rewrites the result if it is an identifier + // followed by a colon. + var labelColonAndStatement = seq(token(':'), statementPtr); + var noColon = describe( + 'semicolon', + negLookAhead(lookAheadToken(':'))); + var expressionOrLabelStatement = function (t) { + var exprStmnt = expressionStatement(t); + if (! exprStmnt) + return null; + + var expr = exprStmnt[1]; + var maybeSemi = exprStmnt[2]; + if (expr[0] !== 'identifier' || maybeSemi.length) { + if (! noColon(t)) + // For better error messages, if there is a colon + // at the end of the expression, fail now and + // say "Expected semicolon" instead of failing + // later saying "Expected statement" after the + // colon. + throw parseError(t, noColon); + return exprStmnt; + } + + var rest = labelColonAndStatement(t); + if (! rest) + return exprStmnt; + + return named('label', + [expr[1]].concat(rest)); + }; + + var emptyStatement = named('empty', token(';')); // not maybeSemicolon + + var blockStatement = named('block', seq( + token('{'), unpack(opt(statements, lookAheadToken('}'))), + token('}'))); + + var varDeclFunc = memoizeBooleanFunc(function (noIn) { + return named( + 'varDecl', + seq(tokenClass('IDENTIFIER'), + unpack(opt(seq(token('='), + assignmentExpressionFunc(noIn)))))); + }); + var varDecl = varDeclFunc(false); + + var variableStatement = named( + 'variables', + seq(token('var'), unpack(list(varDecl, token(','))), + maybeSemicolon)); + + // A paren that may be followed by a statement + // beginning with a regex literal. + var parenBeforeStatement = preSlashToken(')', false); + + var ifStatement = named( + 'if', + seq(token('if'), token('('), expression, + parenBeforeStatement, statementPtr, + unpack(opt(seq(token('else'), statementPtr))))); + + var secondThirdClauses = describe( + 'semicolon', + lookAhead(lookAheadToken(';'), + seq( + token(';'), + opt(expressionPtr, lookAheadToken(';')), + token(';'), + opt(expressionPtr, lookAheadToken(')'))))); + var inExpr = seq(token('in'), expression); + var inExprExpectingSemi = describe('semicolon', + seq(token('in'), expression)); + var forClauses = named( + 'forClauses', + or(seq(token('var'), + varDeclFunc(true), + describe( + 'commaOrIn', + or(unpack(inExpr), + unpack(seq( + unpack(opt( + seq(token(','), + unpack(list(varDeclFunc(true), token(',')))), + lookAheadToken(';'))), + unpack(secondThirdClauses)))))), + // get the case where the first clause is empty out of the way. + // the lookAhead's return value is the empty placeholder for the + // missing expression. + seq(lookAheadToken(';'), unpack(secondThirdClauses)), + // custom parser the non-var case because we have to + // read the first expression before we know if there's + // an "in". + function (t) { + var firstExpr = expressionFunc(true)(t); + if (! firstExpr) + return null; + var rest = secondThirdClauses(t); + if (! rest) { + // we need a left-hand-side expression for a + // `for (x in y)` loop. + if (! firstExpr.lhs) + throw parseError(t, secondThirdClauses); + // if we don't see 'in' at this point, it's probably + // a missing semicolon + rest = inExprExpectingSemi(t); + if (! rest) + throw parseError(t, inExprExpectingSemi); + } + + return [firstExpr].concat(rest); + })); + + var iterationStatement = or( + named('do', seq(token('do'), statementPtr, token('while'), + token('('), expression, token(')'), + maybeSemicolon)), + named('while', seq(token('while'), token('('), expression, + parenBeforeStatement, statementPtr)), + // semicolons must be real, not maybeSemicolons + named('for', seq( + token('for'), token('('), forClauses, parenBeforeStatement, + statementPtr))); + + var returnStatement = named( + 'return', + seq(token('return'), opt( + lookAhead(noLineTerminatorHere, expression)), + maybeSemicolon)); + var continueStatement = named( + 'continue', + seq(token('continue'), opt( + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER'))), + maybeSemicolon)); + var breakStatement = named( + 'break', + seq(token('break'), opt( + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER'))), + maybeSemicolon)); + var throwStatement = named( + 'throw', + seq(token('throw'), + lookAhead(noLineTerminatorHere, expression), + maybeSemicolon)); + + var withStatement = named( + 'with', + seq(token('with'), token('('), expression, parenBeforeStatement, + statementPtr)); + + var switchCase = named( + 'case', + seq(token('case'), expression, token(':'), + unpack(opt(statements, or(lookAheadToken('}'), + lookAheadToken('case default')))))); +var switchDefault = named( + 'default', + seq(token('default'), token(':'), + unpack(opt(statements, or(lookAheadToken('}'), + lookAheadToken('case')))))); + + var switchStatement = named( + 'switch', + seq(token('switch'), token('('), expression, token(')'), + token('{'), unpack(opt(list(switchCase), + or(lookAheadToken('}'), + lookAheadToken('default')))), + unpack(opt(seq(switchDefault, + unpack(opt(list(switchCase)))))), + token('}'))); + + var catchFinally = describe( + 'catchOrFinally', + lookAhead(lookAheadToken('catch finally'), + seq( + opt(named( + 'catch', + seq(token('catch'), token('('), tokenClass('IDENTIFIER'), + token(')'), blockStatement))), + opt(named( + 'finally', + seq(token('finally'), blockStatement)))))); + var tryStatement = named( + 'try', + seq(token('try'), blockStatement, unpack(catchFinally))); + var debuggerStatement = named( + 'debugger', seq(token('debugger'), maybeSemicolon)); + + var statement = describe('statement', + or(expressionOrLabelStatement, + emptyStatement, + blockStatement, + variableStatement, + ifStatement, + iterationStatement, + returnStatement, + continueStatement, + breakStatement, + withStatement, + switchStatement, + throwStatement, + tryStatement, + debuggerStatement)); + + // PROGRAM + + var functionDecl = named('functionDecl', + functionFunc(true)); + + var sourceElement = or(statement, functionDecl); + var sourceElements = list(sourceElement); + + var functionBody = describe('functionBody', + opt(sourceElements, + lookAheadToken('}'))); + + var program = named('program', + seq(unpack(opt(sourceElements)), + // we rely on the fact that opt(sourceElements) + // will never fail, and non-first arguments + // to seq are required to succeed -- meaning + // this parser will never fail without throwing + // a parse error. + describe('statement', + revalue(lookAheadTokenClass("EOF"), + function (v, t) { + if (! v) + return null; + // eat the last "EOF" so that + // our position is updated + t.consume(); + return unpack([]); + })))); + + return program(tokenizer); +}; From d17d38600d2e5628fba4690f56de7d6e4dc0bff7 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 5 Sep 2012 23:50:58 -0700 Subject: [PATCH 02/86] tweaks, better-looking implicit semis --- .../unfinished/jsparse-demo/jsparse-demo.css | 8 ++++++- .../unfinished/jsparse-demo/jsparse-demo.js | 2 +- packages/jsparse/parser.js | 21 ++++++++++++------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css index 2ddd6a37fa..df70c3c0d6 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.css +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -103,6 +103,7 @@ html, body { height: 100%; } .box.head:last-child { margin: 0; + border-bottom: 0; } .box.token { @@ -129,4 +130,9 @@ html, body { height: 100%; } .box.named[mousehover] > .box.head { background: #58b; border-bottom: 1px solid #448; -} \ No newline at end of file +} + +.box.named[mousehover] > .box.head:last-child { + border-bottom: 0; +} + diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 3561937343..ef499af6ac 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -61,7 +61,7 @@ if (Meteor.is_client) { var unclosedInfos = []; var toHtml = function (obj) { if (_.isArray(obj)) { - var head = obj[0] || 'nothing'; + var head = obj[0] || 'none'; var rest = obj.slice(1); var info = { startPos: curPos }; var html = Spark.setDataContext( diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 861cdc159d..54513658c3 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -80,7 +80,7 @@ var parseError = function (t, expected, after) { "Unexpected token"); if (after) str += " after " + (after.text ? "`" + after.text + "`" : after); - var pos = t.lastPos; + var pos = t.pos; str += " at position " + pos; str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); var e = new Error(str); @@ -670,12 +670,17 @@ var parse = function (tokenizer) { var maybeSemicolon = describe( 'semicolon', or(token(';'), - // the rest of these produce empty `[]` boxes - lookAheadToken('}'), - lookAheadTokenClass('EOF'), - function (t) { - return t.isLineTerminatorHere ? [] : null; - })); + revalue( + or( + lookAheadToken('}'), + lookAheadTokenClass('EOF'), + function (t) { + return t.isLineTerminatorHere ? [] : null; + }), + function (v) { + return v && named(';', []); + }))); + var expressionStatement = named( 'expression', @@ -710,7 +715,7 @@ var parse = function (tokenizer) { var expr = exprStmnt[1]; var maybeSemi = exprStmnt[2]; - if (expr[0] !== 'identifier' || maybeSemi.length) { + if (expr[0] !== 'identifier' || ! isArray(maybeSemi)) { if (! noColon(t)) // For better error messages, if there is a colon // at the end of the expression, fail now and From 6f46d17a54ba598dbcda727076d7782deb5ce344 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 00:18:55 -0700 Subject: [PATCH 03/86] change starting example --- examples/unfinished/jsparse-demo/jsparse-demo.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index ef499af6ac..2e07d09a27 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -3,7 +3,7 @@ if (Meteor.is_client) { Meteor.startup(function () { if (! Session.get("input")) - Session.set("input", "var x = 3;"); + Session.set("input", "var x = 3"); }); Template.page.input = function () { From cad29bd0b103317909e6c7c854dd019af0acec16 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 00:58:23 -0700 Subject: [PATCH 04/86] css tweaks --- examples/unfinished/jsparse-demo/jsparse-demo.css | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css index df70c3c0d6..9f7e79344f 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.css +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -6,9 +6,9 @@ html, body { height: 100%; } position: absolute; width: 100%; top: 0; - height: 40px; - overflow: auto; + height: 39px; border-bottom: 1px solid #555; + overflow: auto; background: #cfc; } @@ -25,6 +25,8 @@ html, body { height: 100%; } } #inputarea textarea { + border: 0; + border-right: 1px solid #555; position: absolute; height: 100%; left: 0; From 194d3151f3bf3372d1b12956bdf5f802957156a0 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 01:05:21 -0700 Subject: [PATCH 05/86] implement runRequired and runMaybeRequired --- packages/jsparse/parser.js | 102 +++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 54513658c3..db3c22395b 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -10,7 +10,6 @@ // maybe conform to the spec's token input to the syntactic grammar? // XXX track line/col position, for errors and maybe token info -// XXX implement `required(parser, prev)` var isArray = function (obj) { return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); @@ -156,8 +155,24 @@ var lookAheadToken = function (text) { ///// NON-TERMINAL PARSER CONSTRUCTORS -// Combinators that take names must provide descriptions. -// Otherwise, it is up to the call to provide a description. +// call as: runRequired(parser, tokenizer[, prevToken]) +// to run parser(tokenizer) and assert it matches +var runRequired = function (parser, tokenizer, prevToken) { + return revalue( + tokenizer ? parser(tokenizer) : parser, + function (v, t) { + if (! v) + throw parseError(t || tokenizer, parser, prevToken); + return v; + }); +}; + +var runMaybeRequired = function (require, parser, tokenizer, prevToken) { + if (require) + return runRequired(parser, tokenizer, prevToken); + else + return parser(tokenizer); +}; // Polymorphic in parsers and results; an experiment. var named = function(name, parserOrResult) { @@ -229,9 +244,9 @@ var binaryLeft = function (termParser, opParser) { var op; while ((op = opParser(t))) { - result = named('binary', [result, op, termParser(t)]); - if (! result[result.length - 1]) - throw parseError(t, termParser, result[result.length - 2]); + result = named( + 'binary', + [result, op, runRequired(termParser, t, op)]); } return result; }); @@ -252,11 +267,8 @@ var list = function (itemParser, sepParser) { if (sepParser) { var sep; - while ((sep = sepParser(t))) { - result.push(sep, itemParser(t)); - if (! result[result.length - 1]) - throw parseError(t, itemParser, result[result.length - 2]); - } + while ((sep = sepParser(t))) + result.push(sep, runRequired(itemParser, t, sep)); } else { var item; while ((item = itemParser(t))) @@ -280,12 +292,12 @@ var seq = function (/*parsers*/) { function (t) { var result = []; for (var i = 0, N = args.length; i < N; i++) { - var r = args[i](t); - if (! r) { - if (i === 0) - return null; // not committed on first item - throw parseError(t, args[i]); - } + // first item in sequence can fail, and we + // fail (without error); after that, error on failure + var r = runMaybeRequired(i > 0, args[i], t); + if (! r) + return null; + if (r.unpack) // append array! result.push.apply(result, r); else @@ -470,9 +482,9 @@ var parse = function (tokenizer) { var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); var bracketEnding = seq(token('['), expressionPtr, token(']')); var callArgs = seq(token('('), - unpack(opt(list(assignmentExpressionPtr, - token(',')), lookAheadToken(')'))), - token(')')); + unpack(opt(list(assignmentExpressionPtr, + token(',')), lookAheadToken(')'))), + token(')')); var newKeyword = token('new'); @@ -500,14 +512,12 @@ var parse = function (tokenizer) { // of this lhsExpression. It is preceded by zero or more `new` // keywords, and followed by any sequence of (...), [...], // and .foo add-ons. - var result = primaryOrFunctionExpression(t); - if (! result) { - if (! news.length) - return null; // not committed - else - throw parseError(t, primaryOrFunctionExpression, - news[news.length - 1]); - } + // if we have 'new' keywords, we are committed and must + // match an expression or error. + var result = runMaybeRequired(news.length, primaryOrFunctionExpression, + t, news[news.length - 1]); + if (! result) + return null; // Our plan of attack is to apply each dot, bracket, or call // as we come across it. Whether a call is a `new` call depends @@ -559,13 +569,12 @@ var parse = function (tokenizer) { 'expression', function (t) { var unaries = unaryList(t); - var result = postfixExpression(t); - if (! result) { - if (unaries.length) - // committed, have to error - throw parseError(t, postfixExpression, unaries[unaries.length - 1]); + // if we have unaries, we are committed and + // have to match an expression or error. + var result = runMaybeRequired(unaries.length, postfixExpression, + t, unaries[unaries.length - 1]); + if (! result) return null; - } while (unaries.length) result = named('unary', [unaries.pop(), result]); @@ -634,12 +643,9 @@ var parse = function (tokenizer) { // and then fold them up at the end. var parts = [r]; var op; - while (r.lhs && (op = assignOp(t))) { - r = conditionalExpressionFunc(noIn)(t); - if (! r) - throw parseError(t, conditionalExpressionFunc(noIn), r); - parts.push(op, r); - } + while (r.lhs && (op = assignOp(t))) + parts.push(op, + runRequired(conditionalExpressionFunc(noIn), t, op)); var result = parts.pop(); while (parts.length) { @@ -716,13 +722,11 @@ var parse = function (tokenizer) { var expr = exprStmnt[1]; var maybeSemi = exprStmnt[2]; if (expr[0] !== 'identifier' || ! isArray(maybeSemi)) { - if (! noColon(t)) - // For better error messages, if there is a colon - // at the end of the expression, fail now and - // say "Expected semicolon" instead of failing - // later saying "Expected statement" after the - // colon. - throw parseError(t, noColon); + // For better error messages, for example in `1+1:`, + // if there is a colon at the end of the expression, + // fail now and say "Expected semicolon" instead of failing + // later saying "Expected statement" at the colon. + runRequired(noColon, t); return exprStmnt; } @@ -807,9 +811,7 @@ var parse = function (tokenizer) { throw parseError(t, secondThirdClauses); // if we don't see 'in' at this point, it's probably // a missing semicolon - rest = inExprExpectingSemi(t); - if (! rest) - throw parseError(t, inExprExpectingSemi); + rest = runRequired(inExprExpectingSemi, t); } return [firstExpr].concat(rest); From ce65ffede8d1e72ffc40894cd4a30a486ea32173 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 01:05:31 -0700 Subject: [PATCH 06/86] whitespace fixes --- .../unfinished/jsparse-demo/jsparse-demo.js | 2 +- packages/jsparse/parser.js | 26 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 2e07d09a27..77c36b9325 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -145,7 +145,7 @@ if (Meteor.is_client) { } }; - var selectInputText = function(start, end) { + var selectInputText = function (start, end) { var textarea = DomUtils.find(document, '#inputarea textarea'); selectTextInArea(textarea, start, end); }; diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index db3c22395b..625ca5e040 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -175,7 +175,7 @@ var runMaybeRequired = function (require, parser, tokenizer, prevToken) { }; // Polymorphic in parsers and results; an experiment. -var named = function(name, parserOrResult) { +var named = function (name, parserOrResult) { return describe( name, revalue( @@ -596,17 +596,17 @@ var parse = function (tokenizer) { function (noIn) { // high to low precedence var binaryOps = [token('* / %'), - token('+ -'), - token('<< >> >>>'), - or(token('< > <= >='), - noIn ? token('instanceof') : - token('instanceof in')), - token('== != === !=='), - token('&'), - token('^'), - token('|'), - token('&&'), - token('||')]; + token('+ -'), + token('<< >> >>>'), + or(token('< > <= >='), + noIn ? token('instanceof') : + token('instanceof in')), + token('== != === !=='), + token('&'), + token('^'), + token('|'), + token('&&'), + token('||')]; return describe( 'expression', binaryLeft(unaryExpression, binaryOps)); @@ -859,7 +859,7 @@ var parse = function (tokenizer) { seq(token('case'), expression, token(':'), unpack(opt(statements, or(lookAheadToken('}'), lookAheadToken('case default')))))); -var switchDefault = named( + var switchDefault = named( 'default', seq(token('default'), token(':'), unpack(opt(statements, or(lookAheadToken('}'), From cbfad7f578b41ecaf26a64a844d2551b800e6a4d Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 02:13:11 -0700 Subject: [PATCH 07/86] UI tweaks --- .../unfinished/jsparse-demo/jsparse-demo.css | 5 +++++ examples/unfinished/jsparse-demo/jsparse-demo.js | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css index 9f7e79344f..938e02c4d0 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.css +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -78,6 +78,10 @@ html, body { height: 100%; } background: #fff; } +.box.statement { + display: block; +} + #output > .box { margin-top: 5px; } @@ -87,6 +91,7 @@ html, body { height: 100%; } border-radius: 5px; cursor: pointer; overflow: hidden; + position: relative; } .box.head { diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 77c36b9325..7050c681c1 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -59,14 +59,20 @@ if (Meteor.is_client) { if (ast) { var curPos = 0; var unclosedInfos = []; + var statementHeads = makeSet( + ('functionDecl expression label block variables ' + + 'if for do while return continue break throw with switch ' + + 'try debugger').split(' ')); var toHtml = function (obj) { if (_.isArray(obj)) { var head = obj[0] || 'none'; var rest = obj.slice(1); var info = { startPos: curPos }; + var isStatement = statementHeads[head]; var html = Spark.setDataContext( info, - '
' + + '
' + Handlebars._escape(head) + '
' + _.map(rest, toHtml).join('') + '
'); unclosedInfos.push(info); @@ -78,10 +84,14 @@ if (Meteor.is_client) { }); curPos = obj.pos + obj.text.length; unclosedInfos.length = 0; + var text = obj.text; + // insert zero-width spaces to allow wrapping + text = text.replace(/\w{10}/g, "$&\n"); + text = Handlebars._escape(text); + text = text.replace(/\n/g, '­'); return Spark.setDataContext( obj, - '
' + - Handlebars._escape(obj.text) + '
'); + '
' + text + '
'); } else { // other? return '
' + From 1fc75baa0acdbcca7b4212dd3712221c69fc5df1 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 02:14:50 -0700 Subject: [PATCH 08/86] spark efficiency hack --- packages/spark/spark.js | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/spark/spark.js b/packages/spark/spark.js index 5311d3192c..af0d696fca 100644 --- a/packages/spark/spark.js +++ b/packages/spark/spark.js @@ -704,16 +704,32 @@ Spark.attachEvents = withRenderer(function (eventMap, html, _renderer) { var selector = handler.selector; if (selector) { + // Check if event.currentTarget matches `selector`, + // scoped to `range`. + // As an efficiency hack, give the node we are looking + // for an id so the selector will match only it. + var node = event.currentTarget; + var tempId; + if (! node.id) + node.setAttribute('id', tempId = 'spark_currentTarget'); + var escapedNodeId = node.id.replace(/'/g, "\\$&"); + // XXX OLD COMMENT // This ends up doing O(n) findAllClipped calls when an // event bubbles up N level in the DOM. If this ends up // being too slow, we could memoize findAllClipped across // the processing of each event. - var results = DomUtils.findAllClipped( - range.containerNode(), selector, range.firstNode(), range.lastNode()); - // This is a linear search through what could be a large - // result set. - if (! _.contains(results, event.currentTarget)) - continue; + try { + var result = DomUtils.findClipped( + range.containerNode(), + selector + "[id='" + escapedNodeId + "']", + range.firstNode(), range.lastNode()); + + if (result !== node) + continue; + } finally { + if (tempId) + node.removeAttribute('id'); + } } else { // if no selector, only match the event target if (event.currentTarget !== event.target) From 48c62ea850437181a7a13f35339315cd1b339850 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 02:42:00 -0700 Subject: [PATCH 09/86] further demo tweaks --- examples/unfinished/jsparse-demo/jsparse-demo.css | 3 ++- examples/unfinished/jsparse-demo/jsparse-demo.js | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css index 938e02c4d0..448b35182b 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.css +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -91,7 +91,8 @@ html, body { height: 100%; } border-radius: 5px; cursor: pointer; overflow: hidden; - position: relative; + /* position:relative breaks overflow:hidden effect of rounded corners? */ + position: static; } .box.head { diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 7050c681c1..f36f7449b2 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -62,7 +62,7 @@ if (Meteor.is_client) { var statementHeads = makeSet( ('functionDecl expression label block variables ' + 'if for do while return continue break throw with switch ' + - 'try debugger').split(' ')); + 'try debugger empty').split(' ')); var toHtml = function (obj) { if (_.isArray(obj)) { var head = obj[0] || 'none'; @@ -73,7 +73,8 @@ if (Meteor.is_client) { info, '
' + - Handlebars._escape(head) + '
' + + Handlebars._escape(head + (isStatement ? ' statement' : '')) + + '
' + _.map(rest, toHtml).join('') + '
'); unclosedInfos.push(info); return html; @@ -86,9 +87,9 @@ if (Meteor.is_client) { unclosedInfos.length = 0; var text = obj.text; // insert zero-width spaces to allow wrapping - text = text.replace(/\w{10}/g, "$&\n"); + text = text.replace(/.{20}/g, "$&\n"); text = Handlebars._escape(text); - text = text.replace(/\n/g, '­'); + text = text.replace(/\n/g, '​'); return Spark.setDataContext( obj, '
' + text + '
'); From 42675e5c0296030699b99620d9bd3d1c7847af02 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 02:46:16 -0700 Subject: [PATCH 10/86] "variables" statement => "var" statement --- examples/unfinished/jsparse-demo/jsparse-demo.js | 2 +- packages/jsparse/parser.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index f36f7449b2..693fe6ab8e 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -60,7 +60,7 @@ if (Meteor.is_client) { var curPos = 0; var unclosedInfos = []; var statementHeads = makeSet( - ('functionDecl expression label block variables ' + + ('functionDecl expression label block var ' + 'if for do while return continue break throw with switch ' + 'try debugger empty').split(' ')); var toHtml = function (obj) { diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 625ca5e040..2fc907e162 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -754,7 +754,7 @@ var parse = function (tokenizer) { var varDecl = varDeclFunc(false); var variableStatement = named( - 'variables', + 'var', seq(token('var'), unpack(list(varDecl, token(','))), maybeSemicolon)); From b869ecbd80d588d4c4ca1c904cc2b31d08574574 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 02:57:52 -0700 Subject: [PATCH 11/86] move parser combinators into own file --- packages/jsparse/package.js | 5 +- packages/jsparse/parser.js | 379 +--------------------------------- packages/jsparse/parserlib.js | 370 +++++++++++++++++++++++++++++++++ 3 files changed, 374 insertions(+), 380 deletions(-) create mode 100644 packages/jsparse/parserlib.js diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index a3b4119909..3e3ec4d06f 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -3,7 +3,6 @@ Package.describe({ }); Package.on_use(function (api) { - api.add_files([ - 'parser.js', - 'lexer.js'], ['client', 'server']); + api.add_files(['lexer.js', 'parserlib.js', 'parser.js'], + ['client', 'server']); }); diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 2fc907e162..8f9281ef51 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -1,381 +1,6 @@ +///// JAVASCRIPT PARSER -// NOTES - -// push/pop lexer position -// need to support [no LineTerminator here] -// need to be able to look back at original whitespace later, -// find all the whitespace before a token -// "token" means anything but whitespace, newline, or comment -// multiline comments produce virtual newlines -// maybe conform to the spec's token input to the syntactic grammar? - -// XXX track line/col position, for errors and maybe token info - -var isArray = function (obj) { - return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); -}; - -Tokenizer = function (codeOrLexer) { - // XXX rethink codeOrLexer later - this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : - new Lexer(codeOrLexer)); - this.peekType = null; - this.peekText = null; - this.tokenType = null; - this.tokenText = null; - this.lastPos = 0; - this.pos = 0; - this.isLineTerminatorHere = false; - - // load peekType and peekText - this.consume(); -}; - -_.extend(Tokenizer.prototype, { - // consumes the token (peekType, peekText) and moves - // it into (type, text), loading the next token - // into (peekType, peekText). A token is a lexeme - // besides WHITESPACE, COMMENT, and NEWLINE. - consume: function () { - var self = this; - var lexer = self.lexer; - self.type = self.peekType; - self.text = self.peekText; - self.lastPos = self.pos; - self.isLineTerminatorHere = false; - do { - lexer.next(); - if (lexer.type === "ERROR") - throw new Error("Bad token at position " + lexer.lastPos + - ", text `" + lexer.text + "`"); - else if (lexer.type === "NEWLINE") - self.isLineTerminatorHere = true; - else if (lexer.type === "COMMENT" && ! /^.*$/.test(lexer.text)) - // multiline comments containing line terminators count - // as line terminators. - self.isLineTerminatorHere = true; - } while (lexer.type !== "EOF" && ! Lexer.isToken(lexer.type)); - self.peekType = lexer.type; - self.peekText = lexer.text; - self.pos = lexer.lastPos; - } -}); - -// A parser that consume()s has to succeed. -// Similarly, a parser that fails can't have consumed. - -// mutates the parser; don't describe an existing parser. -var describe = function (description, parser) { - parser.description = description; - return parser; -}; - -// Call this as `throw parseError(...)`. -// `expected` is a parser, `after` is a string. -var parseError = function (t, expected, after) { - var str = (expected.description ? "Expected " + expected.description : - // all parsers that might error should have descriptions, - // but just in case: - "Unexpected token"); - if (after) - str += " after " + (after.text ? "`" + after.text + "`" : after); - var pos = t.pos; - str += " at position " + pos; - str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); - var e = new Error(str); - return e; -}; - -///// TERMINAL PARSER CONSTRUCTORS - -var _tokenClassImpl = function (type, text, dontConsume) { - var textSet = (text ? makeSet(text.split(' ')) : null); - var description = (text ? text.split(' ').join(', ') : type); - return describe( - description, - function (t) { - if (t.peekType == type && (!text || textSet[t.peekText])) { - if (dontConsume) - return []; - var ret = {text: t.peekText, pos: t.pos}; - t.consume(); - return ret; - } - return null; - }); -}; - -var _tokenImpl = function (text, dontConsume) { - if (/\w/.test(text)) - return _tokenClassImpl('KEYWORD', text, dontConsume); - return _tokenClassImpl('PUNCTUATION', text, dontConsume); -}; - -var tokenClass = function (type, text) { - if (type === "ERROR" || type === "EOF") - throw new Error("Can't create EOF or ERROR tokens, can only look ahead"); - return _tokenClassImpl(type, text); -}; - -var token = function (text) { - return _tokenImpl(text); -}; - -// Like token, but marks tokens that need to defy the lexer's -// heuristic about whether the next '/' is a division or -// starts a regex. -var preSlashToken = function (text, divisionNotRegex) { - var impl = _tokenImpl(text); - return describe(impl.description, - function (t) { - // temporarily set divisionPermitted, - // restoring it if we don't match. - var oldValue = t.lexer.divisionPermitted; - var result; - try { - t.lexer.divisionPermitted = divisionNotRegex; - result = impl(t); - return result; - } finally { - if (! result) - t.lexer.divisionPermitted = oldValue; - } - }); -}; - -// NON-CONSUMING PARSER CONSTRUCTORS - -var lookAheadTokenClass = function (type, text) { - return _tokenClassImpl(type, text, true); -}; - -var lookAheadToken = function (text) { - return _tokenImpl(text, true); -}; - -///// NON-TERMINAL PARSER CONSTRUCTORS - -// call as: runRequired(parser, tokenizer[, prevToken]) -// to run parser(tokenizer) and assert it matches -var runRequired = function (parser, tokenizer, prevToken) { - return revalue( - tokenizer ? parser(tokenizer) : parser, - function (v, t) { - if (! v) - throw parseError(t || tokenizer, parser, prevToken); - return v; - }); -}; - -var runMaybeRequired = function (require, parser, tokenizer, prevToken) { - if (require) - return runRequired(parser, tokenizer, prevToken); - else - return parser(tokenizer); -}; - -// Polymorphic in parsers and results; an experiment. -var named = function (name, parserOrResult) { - return describe( - name, - revalue( - parserOrResult, - function (value) { - if (! value) - return null; - - var result; - if (isArray(value) && ! value.named) - // bare array, prepend the name - result = [name].concat(Array.prototype.slice.call(value)); - else - // token or named array; construct a new named array - result = [name, value]; - - // don't name the same thing twice - result.named = true; - - return result; - })); -}; - -var or = function (/*parsers*/) { - var args = arguments; - return function (t) { - var result; - for(var i = 0, N = args.length; i < N; i++) { - result = args[i](t); - if (result) - return result; - } - return null; - }; -}; - -// Parses a left-recursive expression with zero or more occurrences -// of a binary op. Leaves the term unwrapped if no op. For example -// (in a hypothetical use case): -// `1` => "1" -// `1+2` => ["binary", "1", "+", "2"] -// `1+2+3` => ["binary", ["binary", "1", "+", "2"], "+", "3"] -// -// opParser can also be an array of op parsers from high to low -// precedence (tightest-binding first) -var binaryLeft = function (termParser, opParser) { - if (isArray(opParser)) { - if (opParser.length === 1) { - // take single opParser out of its array - opParser = opParser[0]; - } else { - // pop off last opParser (non-destructively) and replace - // termParser with a recursive binaryLeft on the remaining - // ops. - termParser = binaryLeft(termParser, opParser.slice(0, -1)); - opParser = opParser[opParser.length - 1]; - } - } - - return describe( - termParser.description, - function (t) { - var result = termParser(t); - if (! result) - return null; - - var op; - while ((op = opParser(t))) { - result = named( - 'binary', - [result, op, runRequired(termParser, t, op)]); - } - return result; - }); -}; - -// Parses a list of one or more items with a separator, listing the -// items and separators. (Separator is optional.) For example: -// `x` => ["x"] -// `x,y` => ["x", ",", "y"] -// `x,y,z` => ["x", ",", "y", ",", "z"] -var list = function (itemParser, sepParser) { - return describe( - itemParser.description, - function (t) { - var result = [itemParser(t)]; - if (! result[0]) - return null; - - if (sepParser) { - var sep; - while ((sep = sepParser(t))) - result.push(sep, runRequired(itemParser, t, sep)); - } else { - var item; - while ((item = itemParser(t))) - result.push(item); - } - return result; - }); -}; - -var seq = function (/*parsers*/) { - var args = arguments; - if (! args.length) - return describe("(empty)", - function (t) { return []; }); - - var description = args[0].description; - for (var i = 1; i < args.length; i++) - description += " " + args[i].description; - return describe( - description, - function (t) { - var result = []; - for (var i = 0, N = args.length; i < N; i++) { - // first item in sequence can fail, and we - // fail (without error); after that, error on failure - var r = runMaybeRequired(i > 0, args[i], t); - if (! r) - return null; - - if (r.unpack) // append array! - result.push.apply(result, r); - else - result.push(r); - } - return result; - }); -}; - -var unpack = function (arrayParser) { - return revalue(arrayParser, function (v) { - if (v && isArray(v)) - v.unpack = true; - return v; - }); -}; - -// lookAhead parser must never consume -var lookAhead = function (lookAheadParser, nextParser) { - return describe( - lookAheadParser.description, - function (t) { - if (! lookAheadParser(t)) - return null; - return nextParser(t); - }); -}; -var negLookAhead = function (lookAheadParser, nextParser) { - if (! nextParser) - return function (t) { - return lookAheadParser(t) ? null : []; - }; - - return describe( - nextParser.description, - function (t) { - if (lookAheadParser(t)) - return null; - return nextParser(t); - }); -}; - -// parser that looks at nothing and returns result -var constant = function (result) { - // no description - return function (t) { - return result; - }; -}; - -// afterLookAhead allows the parser to fail rather than -// succeed if would otherwise fail at a position where -// afterLookAhead doesn't match, potentially providing -// a better error message. For example, the illegal -// object literal `{true:1}` will stop at the `true` -// and say something like "expected property name" -// instead of "expected }". As another example, -// `for(;var;) {}` will lead to "Expected expression" -// instead of "Expected ;" when the optional expression -// turns out to be an illegal `var`. -var opt = function (parser, afterLookAhead) { - return describe(parser.description, - or(parser, afterLookAhead ? afterLookAhead : seq())); -}; - -// note: valueTransformFunc gets the tokenizer as a second argument -// if it's called on a parser. This func is allowed to then -// run more parsers. -var revalue = function (parserOrValue, valueTransformFunc) { - if (typeof parserOrValue === 'function') - // it's a parser - return describe(parserOrValue.description, - function (t) { - return valueTransformFunc(parserOrValue(t), t); - }); - else - return valueTransformFunc(parserOrValue); -}; +// XXX unit tests var parse = function (tokenizer) { var noLineTerminatorHere = describe( diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js new file mode 100644 index 0000000000..6cf694e337 --- /dev/null +++ b/packages/jsparse/parserlib.js @@ -0,0 +1,370 @@ +///// TOKENIZER AND PARSER COMBINATORS + +// XXX track line/col position, for errors and maybe token info +// XXX unit tests + +var isArray = function (obj) { + return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); +}; + +Tokenizer = function (codeOrLexer) { + // XXX rethink codeOrLexer later + this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : + new Lexer(codeOrLexer)); + this.peekType = null; + this.peekText = null; + this.tokenType = null; + this.tokenText = null; + this.lastPos = 0; + this.pos = 0; + this.isLineTerminatorHere = false; + + // load peekType and peekText + this.consume(); +}; + +_.extend(Tokenizer.prototype, { + // consumes the token (peekType, peekText) and moves + // it into (type, text), loading the next token + // into (peekType, peekText). A token is a lexeme + // besides WHITESPACE, COMMENT, and NEWLINE. + consume: function () { + var self = this; + var lexer = self.lexer; + self.type = self.peekType; + self.text = self.peekText; + self.lastPos = self.pos; + self.isLineTerminatorHere = false; + do { + lexer.next(); + if (lexer.type === "ERROR") + throw new Error("Bad token at position " + lexer.lastPos + + ", text `" + lexer.text + "`"); + else if (lexer.type === "NEWLINE") + self.isLineTerminatorHere = true; + else if (lexer.type === "COMMENT" && ! /^.*$/.test(lexer.text)) + // multiline comments containing line terminators count + // as line terminators. + self.isLineTerminatorHere = true; + } while (lexer.type !== "EOF" && ! Lexer.isToken(lexer.type)); + self.peekType = lexer.type; + self.peekText = lexer.text; + self.pos = lexer.lastPos; + } +}); + +// A parser that consume()s has to succeed. +// Similarly, a parser that fails can't have consumed. + +// mutates the parser; don't describe an existing parser. +var describe = function (description, parser) { + parser.description = description; + return parser; +}; + +// Call this as `throw parseError(...)`. +// `expected` is a parser, `after` is a string. +var parseError = function (t, expected, after) { + var str = (expected.description ? "Expected " + expected.description : + // all parsers that might error should have descriptions, + // but just in case: + "Unexpected token"); + if (after) + str += " after " + (after.text ? "`" + after.text + "`" : after); + var pos = t.pos; + str += " at position " + pos; + str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); + var e = new Error(str); + return e; +}; + +///// TERMINAL PARSER CONSTRUCTORS + +var _tokenClassImpl = function (type, text, dontConsume) { + var textSet = (text ? makeSet(text.split(' ')) : null); + var description = (text ? text.split(' ').join(', ') : type); + return describe( + description, + function (t) { + if (t.peekType == type && (!text || textSet[t.peekText])) { + if (dontConsume) + return []; + var ret = {text: t.peekText, pos: t.pos}; + t.consume(); + return ret; + } + return null; + }); +}; + +var _tokenImpl = function (text, dontConsume) { + if (/\w/.test(text)) + return _tokenClassImpl('KEYWORD', text, dontConsume); + return _tokenClassImpl('PUNCTUATION', text, dontConsume); +}; + +var tokenClass = function (type, text) { + if (type === "ERROR" || type === "EOF") + throw new Error("Can't create EOF or ERROR tokens, can only look ahead"); + return _tokenClassImpl(type, text); +}; + +var token = function (text) { + return _tokenImpl(text); +}; + +// Like token, but marks tokens that need to defy the lexer's +// heuristic about whether the next '/' is a division or +// starts a regex. +var preSlashToken = function (text, divisionNotRegex) { + var impl = _tokenImpl(text); + return describe(impl.description, + function (t) { + // temporarily set divisionPermitted, + // restoring it if we don't match. + var oldValue = t.lexer.divisionPermitted; + var result; + try { + t.lexer.divisionPermitted = divisionNotRegex; + result = impl(t); + return result; + } finally { + if (! result) + t.lexer.divisionPermitted = oldValue; + } + }); +}; + +// NON-CONSUMING PARSER CONSTRUCTORS + +var lookAheadTokenClass = function (type, text) { + return _tokenClassImpl(type, text, true); +}; + +var lookAheadToken = function (text) { + return _tokenImpl(text, true); +}; + +///// NON-TERMINAL PARSER CONSTRUCTORS + +// call as: runRequired(parser, tokenizer[, prevToken]) +// to run parser(tokenizer) and assert it matches +var runRequired = function (parser, tokenizer, prevToken) { + return revalue( + tokenizer ? parser(tokenizer) : parser, + function (v, t) { + if (! v) + throw parseError(t || tokenizer, parser, prevToken); + return v; + }); +}; + +var runMaybeRequired = function (require, parser, tokenizer, prevToken) { + if (require) + return runRequired(parser, tokenizer, prevToken); + else + return parser(tokenizer); +}; + +// Polymorphic in parsers and results; an experiment. +var named = function (name, parserOrResult) { + return describe( + name, + revalue( + parserOrResult, + function (value) { + if (! value) + return null; + + var result; + if (isArray(value) && ! value.named) + // bare array, prepend the name + result = [name].concat(Array.prototype.slice.call(value)); + else + // token or named array; construct a new named array + result = [name, value]; + + // don't name the same thing twice + result.named = true; + + return result; + })); +}; + +var or = function (/*parsers*/) { + var args = arguments; + return function (t) { + var result; + for(var i = 0, N = args.length; i < N; i++) { + result = args[i](t); + if (result) + return result; + } + return null; + }; +}; + +// Parses a left-recursive expression with zero or more occurrences +// of a binary op. Leaves the term unwrapped if no op. For example +// (in a hypothetical use case): +// `1` => "1" +// `1+2` => ["binary", "1", "+", "2"] +// `1+2+3` => ["binary", ["binary", "1", "+", "2"], "+", "3"] +// +// opParser can also be an array of op parsers from high to low +// precedence (tightest-binding first) +var binaryLeft = function (termParser, opParser) { + if (isArray(opParser)) { + if (opParser.length === 1) { + // take single opParser out of its array + opParser = opParser[0]; + } else { + // pop off last opParser (non-destructively) and replace + // termParser with a recursive binaryLeft on the remaining + // ops. + termParser = binaryLeft(termParser, opParser.slice(0, -1)); + opParser = opParser[opParser.length - 1]; + } + } + + return describe( + termParser.description, + function (t) { + var result = termParser(t); + if (! result) + return null; + + var op; + while ((op = opParser(t))) { + result = named( + 'binary', + [result, op, runRequired(termParser, t, op)]); + } + return result; + }); +}; + +// Parses a list of one or more items with a separator, listing the +// items and separators. (Separator is optional.) For example: +// `x` => ["x"] +// `x,y` => ["x", ",", "y"] +// `x,y,z` => ["x", ",", "y", ",", "z"] +var list = function (itemParser, sepParser) { + return describe( + itemParser.description, + function (t) { + var result = [itemParser(t)]; + if (! result[0]) + return null; + + if (sepParser) { + var sep; + while ((sep = sepParser(t))) + result.push(sep, runRequired(itemParser, t, sep)); + } else { + var item; + while ((item = itemParser(t))) + result.push(item); + } + return result; + }); +}; + +var seq = function (/*parsers*/) { + var args = arguments; + if (! args.length) + return describe("(empty)", + function (t) { return []; }); + + var description = args[0].description; + for (var i = 1; i < args.length; i++) + description += " " + args[i].description; + return describe( + description, + function (t) { + var result = []; + for (var i = 0, N = args.length; i < N; i++) { + // first item in sequence can fail, and we + // fail (without error); after that, error on failure + var r = runMaybeRequired(i > 0, args[i], t); + if (! r) + return null; + + if (r.unpack) // append array! + result.push.apply(result, r); + else + result.push(r); + } + return result; + }); +}; + +var unpack = function (arrayParser) { + return revalue(arrayParser, function (v) { + if (v && isArray(v)) + v.unpack = true; + return v; + }); +}; + +// lookAhead parser must never consume +var lookAhead = function (lookAheadParser, nextParser) { + return describe( + lookAheadParser.description, + function (t) { + if (! lookAheadParser(t)) + return null; + return nextParser(t); + }); +}; +var negLookAhead = function (lookAheadParser, nextParser) { + if (! nextParser) + return function (t) { + return lookAheadParser(t) ? null : []; + }; + + return describe( + nextParser.description, + function (t) { + if (lookAheadParser(t)) + return null; + return nextParser(t); + }); +}; + +// parser that looks at nothing and returns result +var constant = function (result) { + // no description + return function (t) { + return result; + }; +}; + +// afterLookAhead allows the parser to fail rather than +// succeed if would otherwise fail at a position where +// afterLookAhead doesn't match, potentially providing +// a better error message. For example, the illegal +// object literal `{true:1}` will stop at the `true` +// and say something like "expected property name" +// instead of "expected }". As another example, +// `for(;var;) {}` will lead to "Expected expression" +// instead of "Expected ;" when the optional expression +// turns out to be an illegal `var`. +var opt = function (parser, afterLookAhead) { + return describe(parser.description, + or(parser, afterLookAhead ? afterLookAhead : seq())); +}; + +// note: valueTransformFunc gets the tokenizer as a second argument +// if it's called on a parser. This func is allowed to then +// run more parsers. +var revalue = function (parserOrValue, valueTransformFunc) { + if (typeof parserOrValue === 'function') + // it's a parser + return describe(parserOrValue.description, + function (t) { + return valueTransformFunc(parserOrValue(t), t); + }); + else + return valueTransformFunc(parserOrValue); +}; From 82323d61f307b26e20d3712c2390bb9f4263827c Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 03:17:57 -0700 Subject: [PATCH 12/86] support elision in array literals --- packages/jsparse/parser.js | 19 +++++++++++++++++-- packages/jsparse/parserlib.js | 22 +++++++++++++++++----- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 8f9281ef51..2872241792 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -53,8 +53,23 @@ var parse = function (tokenizer) { var arrayLiteral = named('array', seq(token('['), - unpack(opt(list(assignmentExpressionPtr, - token(',')), lookAheadToken(']'))), + unpack(opt(list(token(',')))), + unpack( + opt( + list( + describe( + 'expression', + or(assignmentExpressionPtr, + // count a peeked-at ']' as an expression + // to support elisions at end, e.g. + // `[1,2,3,,,,,,]`. Because it's unpacked, + // the look-ahead won't show up in the + // parse tree. + unpack(lookAheadToken(']')))), + // list seperator is one or more commas + // to support elision + unpack(list(token(',')))), + lookAheadToken(']'))), token(']'))); var propertyName = describe('propertyName', or( diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 6cf694e337..17b4c53b6f 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -249,22 +249,34 @@ var binaryLeft = function (termParser, opParser) { // `x` => ["x"] // `x,y` => ["x", ",", "y"] // `x,y,z` => ["x", ",", "y", ",", "z"] +// Respects `unpack`. var list = function (itemParser, sepParser) { + var push = function(array, newThing) { + if (newThing.unpack) + array.push.apply(array, newThing); + else + array.push(newThing); + }; return describe( itemParser.description, function (t) { - var result = [itemParser(t)]; - if (! result[0]) + var result = []; + var firstItem = itemParser(t); + if (! firstItem) return null; + push(result, firstItem); if (sepParser) { var sep; - while ((sep = sepParser(t))) - result.push(sep, runRequired(itemParser, t, sep)); + while ((sep = sepParser(t))) { + push(result, sep); + push(result, runRequired(itemParser, t, + sep.unpack ? sep[sep.length - 1] : sep)); + } } else { var item; while ((item = itemParser(t))) - result.push(item); + push(result, item); } return result; }); From 6dccc293bacd114c309ab7b64346d82e9d311683 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 03:19:53 -0700 Subject: [PATCH 13/86] add notes --- packages/jsparse/parser.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 2872241792..87fb0a8b62 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -2,6 +2,10 @@ // XXX unit tests +// What we don't have from ECMA-262 5.1: +// - object literal trailing comma +// - object literal get/set + var parse = function (tokenizer) { var noLineTerminatorHere = describe( 'noLineTerminator', function (t) { From 120b5cf0f8572e5028b60a8c1f52e4133fb15ada Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 12:49:14 -0700 Subject: [PATCH 14/86] more XXX notes --- packages/jsparse/parserlib.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 17b4c53b6f..0cc0e11bf6 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,5 +1,7 @@ ///// TOKENIZER AND PARSER COMBINATORS +// XXX make Parser object with parse method? +// XXX rework describe, call "expecting"? // XXX track line/col position, for errors and maybe token info // XXX unit tests From a0bb26e735940146ef60bfb567c7f0ebfa52cbda Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 12:49:22 -0700 Subject: [PATCH 15/86] parser_tests --- packages/jsparse/package.js | 7 +++++++ packages/jsparse/parser_tests.js | 5 +++++ 2 files changed, 12 insertions(+) create mode 100644 packages/jsparse/parser_tests.js diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index 3e3ec4d06f..cb2dd675c5 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -6,3 +6,10 @@ Package.on_use(function (api) { api.add_files(['lexer.js', 'parserlib.js', 'parser.js'], ['client', 'server']); }); + +Package.on_test(function (api) { + api.use('tinytest'); + api.use('jsparse', 'client'); + + api.add_files('parser_tests.js', ['client', 'server']); +}); diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js new file mode 100644 index 0000000000..8526cab2a5 --- /dev/null +++ b/packages/jsparse/parser_tests.js @@ -0,0 +1,5 @@ + + +Tinytest.add("jsparse - parser", function (test) { + test.isTrue(true); +}); From 996da3b01f9f3c04e7d212f965b150f2859db241 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 15:11:39 -0700 Subject: [PATCH 16/86] first parser tests --- packages/jsparse/parser_tests.js | 143 ++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 2 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 8526cab2a5..9030d02ea0 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,5 +1,144 @@ -Tinytest.add("jsparse - parser", function (test) { - test.isTrue(true); +var makeTester = function (test) { + var parseTestASTFormat = function (str) { + var results = []; + var ptrStack = []; + var ptr = results; + _.each(str.match(/\s?\(|\)|`.*?`|`|[^\s()`]+/g), function (txt) { + var whitespaceBeforeOpen = false; + if (/^\s\($/.test(txt)) { + // paren preceded by whitespace + txt = '('; + whitespaceBeforeOpen = true; + } + switch (txt.charAt(0)) { + case '(': + var newArray = + ((! whitespaceBeforeOpen) && ptr.length && + (typeof ptr[ptr.length - 1] === "string")) ? + [ptr.pop()] : []; + ptr.push(newArray); + ptrStack.push(ptr); + ptr = newArray; + break; + case ')': + ptr = ptrStack.pop(); + break; + case '`': + if (txt.length === 1) + throw new Error("Mismatched ` in " + str); + ptr.push(txt.slice(1, -1)); + break; + default: + ptr.push(txt); + break; + } + if (results.length > 1) + throw new Error("Not expecting " + txt + " in " + str); + }); + if (ptr !== results) + throw new Error("Mismatched parentheses in " + str); + return results[0]; + }; + var stringifyTestASTFormat = function (obj) { + if (typeof obj === "string") { + if (obj.charAt(0) === '(' || obj.charAt(0) === ')') + return '`' + obj + '`'; + else + return obj; + } else { + if (! obj.length) + return '()'; + else + return (stringifyTestASTFormat(obj[0]) + '(' + + _.map(obj.slice(1), stringifyTestASTFormat).join(' ') + + ')'); + } + }; + + return { + goodParse: function (code, expectedAstString) { + var expectedAst = parseTestASTFormat(expectedAstString); + + // first use lexer to collect all tokens + var lexer = new Lexer(code); + var allTokensInOrder = []; + while (lexer.next() !== 'EOF') { + if (lexer.type === 'ERROR') + test.fail("Lexer error at " + lexer.lastPos); + if (Lexer.isToken(lexer.type)) + allTokensInOrder.push({ pos: lexer.lastPos, text: lexer.text }); + } + lexer = new Lexer(code); + + var tokenizer = new Tokenizer(code); + var ast = parse(tokenizer); + + var nextTokenIndex = 0; + var informalize = function (part) { + if (_.isArray(part)) { + if (part.length === 0) { + // This is an EMPTY -- `[]`. All good, pass it through + return []; + } else { + // This is a NODE (non-terminal). Make sure it actually is. + if (! (part[0] && typeof part[0] === "string")) + test.fail("Not a node name: " + part[0]); + return part.slice(0, 1).concat( + _.map(part.slice(1), informalize)); + } + } else if (typeof part === 'object' && part.text && + (typeof part.pos === 'number')) { + // This is a TOKEN (terminal). + // Make sure we are visiting every token once, in order. + if (nextTokenIndex >= allTokensInOrder.length) + test.fail("Too many tokens: " + (nextTokenIndex + 1)); + var referenceToken = allTokensInOrder[nextTokenIndex++]; + test.equal(part.text, referenceToken.text); + test.equal(part.pos, referenceToken.pos); + test.equal(code.substring(part.pos, + part.pos + part.text.length), part.text); + return part.text; + } else { + test.fail("Unknown AST part: " + part); + return []; + } + }; + + var actualAst = informalize(ast); + if (nextTokenIndex !== allTokensInOrder.length) + test.fail("Too few tokens: " + nextTokenIndex); + + test.equal(stringifyTestASTFormat(actualAst), + stringifyTestASTFormat(expectedAst)); + } + // XXX write badParse + }; +}; + + +Tinytest.add("jsparse - basics", function (test) { + var tester = makeTester(test); + tester.goodParse('1', "program(expression(number(1) ;()))"); + tester.goodParse('1 + 1', "program(expression(binary(number(1) + number(1)) ;()))"); + tester.goodParse('1*2+3*4', "program(expression(binary(binary(number(1) * number(2)) + " + + "binary(number(3) * number(4))) ;()))"); + tester.goodParse('1 + 1;', "program(expression(binary(number(1) + number(1)) ;))"); + tester.goodParse('1 + 1;;', "program(expression(binary(number(1) + number(1)) ;) empty(;))"); + tester.goodParse('', "program()"); + tester.goodParse('\n', "program()"); + tester.goodParse(';;;\n\n;\n', "program(empty(;) empty(;) empty(;) empty(;))"); + tester.goodParse('foo', "program(expression(identifier(foo) ;()))"); + tester.goodParse('foo();', "program(expression(call(identifier(foo) `(` `)`) ;))"); + tester.goodParse('var x = 3', "program(var(var varDecl(x = number(3)) ;()))"); + tester.goodParse('++x;', "program(expression(unary(++ identifier(x)) ;))"); + tester.goodParse('x++;', "program(expression(postfix(identifier(x) ++) ;))"); + tester.goodParse( + 'throw new Error', + "program(throw(throw new(new identifier(Error)) ;()))"); + tester.goodParse( + 'var x = function () { return 123; };', + 'program(var(var varDecl(x = functionExpr(function () `(` `)` ' + + '{ return(return number(123) ;) })) ;))'); }); From ef62683280a27c0ec91d15ea63ff16e8dba12f7d Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 6 Sep 2012 15:11:46 -0700 Subject: [PATCH 17/86] none => empty --- examples/unfinished/jsparse-demo/jsparse-demo.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 693fe6ab8e..44f25e07f4 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -65,7 +65,7 @@ if (Meteor.is_client) { 'try debugger empty').split(' ')); var toHtml = function (obj) { if (_.isArray(obj)) { - var head = obj[0] || 'none'; + var head = obj[0] || 'empty'; var rest = obj.slice(1); var info = { startPos: curPos }; var isStatement = statementHeads[head]; From d6370e762acba03d28f601f73147cfa97142c6de Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 14:15:04 -0700 Subject: [PATCH 18/86] tweaks, node naming --- .../unfinished/jsparse-demo/jsparse-demo.js | 2 +- packages/jsparse/parser.js | 122 ++++++++++-------- packages/jsparse/parserlib.js | 7 + 3 files changed, 78 insertions(+), 53 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 44f25e07f4..e0b6282ad4 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -65,7 +65,7 @@ if (Meteor.is_client) { 'try debugger empty').split(' ')); var toHtml = function (obj) { if (_.isArray(obj)) { - var head = obj[0] || 'empty'; + var head = obj[0] || ''; var rest = obj.slice(1); var info = { startPos: curPos }; var isStatement = statementHeads[head]; diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 87fb0a8b62..8951bf09f4 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -77,9 +77,9 @@ var parse = function (tokenizer) { token(']'))); var propertyName = describe('propertyName', or( - named('identifier', tokenClass('IDENTIFIER')), - named('number', tokenClass('NUMBER')), - named('string', tokenClass('STRING')))); + named('idProp', tokenClass('IDENTIFIER')), + named('numProp', tokenClass('NUMBER')), + named('strProp', tokenClass('STRING')))); var nameColonValue = describe( 'name:value', named('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); @@ -95,8 +95,8 @@ var parse = function (tokenizer) { var functionFunc = function (nameRequired) { return seq(token('function'), (nameRequired ? tokenClass('IDENTIFIER') : - opt(tokenClass('IDENTIFIER'), - lookAheadToken('('))), + or(tokenClass('IDENTIFIER'), + revalue(lookAheadToken('('), named('nil', [])))), token('('), unpack(opt(list(tokenClass('IDENTIFIER'), token(',')), lookAheadToken(')'))), @@ -270,7 +270,7 @@ var parse = function (tokenizer) { }); var conditionalExpression = conditionalExpressionFunc(false); - var assignOp = token('= *= /= %= += -= <<= >>= >>>= &= ^= |= '); + var assignOp = token('= *= /= %= += -= <<= >>= >>>= &= ^= |='); var assignmentExpressionFunc = memoizeBooleanFunc( function (noIn) { @@ -326,14 +326,10 @@ var parse = function (tokenizer) { lookAheadTokenClass('EOF'), function (t) { return t.isLineTerminatorHere ? [] : null; - }), - function (v) { - return v && named(';', []); - }))); - + }), named(';', [])))); var expressionStatement = named( - 'expression', + 'expressionStmnt', negLookAhead( or(lookAheadToken('{'), lookAheadToken('function')), seq(expression, @@ -378,13 +374,13 @@ var parse = function (tokenizer) { if (! rest) return exprStmnt; - return named('label', + return named('labelStmnt', [expr[1]].concat(rest)); }; - var emptyStatement = named('empty', token(';')); // not maybeSemicolon + var emptyStatement = named('emptyStmnt', token(';')); // not maybeSemicolon - var blockStatement = named('block', seq( + var blockStatement = named('blockStmnt', seq( token('{'), unpack(opt(statements, lookAheadToken('}'))), token('}'))); @@ -398,18 +394,18 @@ var parse = function (tokenizer) { var varDecl = varDeclFunc(false); var variableStatement = named( - 'var', + 'varStmnt', seq(token('var'), unpack(list(varDecl, token(','))), maybeSemicolon)); // A paren that may be followed by a statement // beginning with a regex literal. - var parenBeforeStatement = preSlashToken(')', false); + var closeParenBeforeStatement = preSlashToken(')', false); var ifStatement = named( - 'if', + 'ifStmnt', seq(token('if'), token('('), expression, - parenBeforeStatement, statementPtr, + closeParenBeforeStatement, statementPtr, unpack(opt(seq(token('else'), statementPtr))))); var secondThirdClauses = describe( @@ -417,14 +413,14 @@ var parse = function (tokenizer) { lookAhead(lookAheadToken(';'), seq( token(';'), - opt(expressionPtr, lookAheadToken(';')), + opt(expressionPtr, revalue(lookAheadToken(';'), named('nil', []))), token(';'), - opt(expressionPtr, lookAheadToken(')'))))); + opt(expressionPtr, revalue(lookAheadToken(')'), named('nil', [])))))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = describe('semicolon', seq(token('in'), expression)); - var forClauses = named( - 'forClauses', + var forSpec = revalue(named( + 'forSpec', or(seq(token('var'), varDeclFunc(true), describe( @@ -439,7 +435,7 @@ var parse = function (tokenizer) { // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(lookAheadToken(';'), unpack(secondThirdClauses)), + seq(revalue(lookAheadToken(';'), named('nil', [])), unpack(secondThirdClauses)), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". @@ -459,43 +455,63 @@ var parse = function (tokenizer) { } return [firstExpr].concat(rest); - })); + })), + function (clauses) { + // There are four kinds of for-loop, and we call the + // part between the parens one of forSpec, forVarSpec, + // forInSpec, and forVarInSpec. Having parsed it + // already, we rewrite the node name based on how + // many items came out. forIn and forVarIn always + // have 3 and 4 items respectively. for has 5 + // (the optional expressions are present as nils). + // forVar has 6 or more, because `for(var x;;);` + // produces [`var` `x` `;` nil `;` nil]. + if (! clauses) + return null; + if (clauses.length === 4) + clauses[0] = 'forInSpec'; + else if (clauses.length === 5) + clauses[0] = 'forVarInSpec'; + else if (clauses.length >= 7) + clauses[0] = 'forVarSpec'; + return clauses; + }); var iterationStatement = or( - named('do', seq(token('do'), statementPtr, token('while'), - token('('), expression, token(')'), - maybeSemicolon)), - named('while', seq(token('while'), token('('), expression, - parenBeforeStatement, statementPtr)), + named('doStmnt', seq(token('do'), statementPtr, token('while'), + token('('), expression, token(')'), + maybeSemicolon)), + named('whileStmnt', seq(token('while'), token('('), expression, + closeParenBeforeStatement, statementPtr)), // semicolons must be real, not maybeSemicolons - named('for', seq( - token('for'), token('('), forClauses, parenBeforeStatement, + named('forStmnt', seq( + token('for'), token('('), forSpec, closeParenBeforeStatement, statementPtr))); var returnStatement = named( - 'return', - seq(token('return'), opt( - lookAhead(noLineTerminatorHere, expression)), + 'returnStmnt', + seq(token('return'), or( + lookAhead(noLineTerminatorHere, expression), constant(named('nil', []))), maybeSemicolon)); var continueStatement = named( - 'continue', - seq(token('continue'), opt( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER'))), + 'continueStmnt', + seq(token('continue'), or( + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))), maybeSemicolon)); var breakStatement = named( - 'break', - seq(token('break'), opt( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER'))), + 'breakStmnt', + seq(token('break'), or( + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))), maybeSemicolon)); var throwStatement = named( - 'throw', + 'throwStmnt', seq(token('throw'), lookAhead(noLineTerminatorHere, expression), maybeSemicolon)); var withStatement = named( - 'with', - seq(token('with'), token('('), expression, parenBeforeStatement, + 'withStmnt', + seq(token('with'), token('('), expression, closeParenBeforeStatement, statementPtr)); var switchCase = named( @@ -510,7 +526,7 @@ var parse = function (tokenizer) { lookAheadToken('case')))))); var switchStatement = named( - 'switch', + 'switchStmnt', seq(token('switch'), token('('), expression, token(')'), token('{'), unpack(opt(list(switchCase), or(lookAheadToken('}'), @@ -523,18 +539,20 @@ var parse = function (tokenizer) { 'catchOrFinally', lookAhead(lookAheadToken('catch finally'), seq( - opt(named( + or(named( 'catch', seq(token('catch'), token('('), tokenClass('IDENTIFIER'), - token(')'), blockStatement))), - opt(named( + token(')'), blockStatement)), + constant(named('nil', []))), + or(named( 'finally', - seq(token('finally'), blockStatement)))))); + seq(token('finally'), blockStatement)), + constant(named('nil', [])))))); var tryStatement = named( - 'try', + 'tryStmnt', seq(token('try'), blockStatement, unpack(catchFinally))); var debuggerStatement = named( - 'debugger', seq(token('debugger'), maybeSemicolon)); + 'debuggerStmnt', seq(token('debugger'), maybeSemicolon)); var statement = describe('statement', or(expressionOrLabelStatement, @@ -576,7 +594,7 @@ var parse = function (tokenizer) { function (v, t) { if (! v) return null; - // eat the last "EOF" so that + // eat the ending "EOF" so that // our position is updated t.consume(); return unpack([]); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 0cc0e11bf6..366f19ab3f 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -373,6 +373,13 @@ var opt = function (parser, afterLookAhead) { // if it's called on a parser. This func is allowed to then // run more parsers. var revalue = function (parserOrValue, valueTransformFunc) { + if (typeof valueTransformFunc !== 'function') { + var value = valueTransformFunc; + valueTransformFunc = function (v) { + return (v ? value : null); + }; + } + if (typeof parserOrValue === 'function') // it's a parser return describe(parserOrValue.description, From c63278332b7f65d867e1faae1db26f0f19932479 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 14:20:45 -0700 Subject: [PATCH 19/86] AST => tree --- .../unfinished/jsparse-demo/jsparse-demo.js | 8 ++-- packages/jsparse/parser_tests.js | 39 ++++++++----------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index e0b6282ad4..be112c60b8 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -36,10 +36,10 @@ if (Meteor.is_client) { // PARSER var html; - var ast = null; + var tree = null; var lexer = new Lexer(input); try { - ast = parse(new Tokenizer(lexer)) || []; + tree = parse(new Tokenizer(lexer)) || []; } catch (parseError) { var errorPos = lexer.lastPos; var errorLen = lexer.text.length; @@ -56,7 +56,7 @@ if (Meteor.is_client) { html += '
' + Handlebars._escape(parseError.toString()) + '
'; } - if (ast) { + if (tree) { var curPos = 0; var unclosedInfos = []; var statementHeads = makeSet( @@ -99,7 +99,7 @@ if (Meteor.is_client) { Handlebars._escape(JSON.stringify(obj)) + '
'; } }; - html = toHtml(ast); + html = toHtml(tree); curPos = lexer.pos; _.each(unclosedInfos, function (info) { info.endPos = curPos; diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 9030d02ea0..3eb97cc2ec 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,7 +1,7 @@ var makeTester = function (test) { - var parseTestASTFormat = function (str) { + var parseTestFormat = function (str) { var results = []; var ptrStack = []; var ptr = results; @@ -41,7 +41,7 @@ var makeTester = function (test) { throw new Error("Mismatched parentheses in " + str); return results[0]; }; - var stringifyTestASTFormat = function (obj) { + var stringifyTestFormat = function (obj) { if (typeof obj === "string") { if (obj.charAt(0) === '(' || obj.charAt(0) === ')') return '`' + obj + '`'; @@ -51,15 +51,15 @@ var makeTester = function (test) { if (! obj.length) return '()'; else - return (stringifyTestASTFormat(obj[0]) + '(' + - _.map(obj.slice(1), stringifyTestASTFormat).join(' ') + + return (stringifyTestFormat(obj[0]) + '(' + + _.map(obj.slice(1), stringifyTestFormat).join(' ') + ')'); } }; return { - goodParse: function (code, expectedAstString) { - var expectedAst = parseTestASTFormat(expectedAstString); + goodParse: function (code, expectedTreeString) { + var expectedTree = parseTestFormat(expectedTreeString); // first use lexer to collect all tokens var lexer = new Lexer(code); @@ -73,21 +73,16 @@ var makeTester = function (test) { lexer = new Lexer(code); var tokenizer = new Tokenizer(code); - var ast = parse(tokenizer); + var tree = parse(tokenizer); var nextTokenIndex = 0; var informalize = function (part) { - if (_.isArray(part)) { - if (part.length === 0) { - // This is an EMPTY -- `[]`. All good, pass it through - return []; - } else { - // This is a NODE (non-terminal). Make sure it actually is. - if (! (part[0] && typeof part[0] === "string")) - test.fail("Not a node name: " + part[0]); - return part.slice(0, 1).concat( - _.map(part.slice(1), informalize)); - } + if (_.isArray(part) && part.length) { + // This is a NODE (non-terminal). Make sure it actually is. + if (! (part[0] && typeof part[0] === "string")) + test.fail("Not a node name: " + part[0]); + return part.slice(0, 1).concat( + _.map(part.slice(1), informalize)); } else if (typeof part === 'object' && part.text && (typeof part.pos === 'number')) { // This is a TOKEN (terminal). @@ -101,17 +96,17 @@ var makeTester = function (test) { part.pos + part.text.length), part.text); return part.text; } else { - test.fail("Unknown AST part: " + part); + test.fail("Unknown tree part: " + part); return []; } }; - var actualAst = informalize(ast); + var actualTree = informalize(tree); if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); - test.equal(stringifyTestASTFormat(actualAst), - stringifyTestASTFormat(expectedAst)); + test.equal(stringifyTestFormat(actualTree), + stringifyTestFormat(expectedTree)); } // XXX write badParse }; From fb183e1e5cc19f38ccacfd91c90a3bc97ceb66b5 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 18:44:21 -0700 Subject: [PATCH 20/86] a few name changes --- packages/jsparse/parser.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 8951bf09f4..3a68125d79 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -77,9 +77,9 @@ var parse = function (tokenizer) { token(']'))); var propertyName = describe('propertyName', or( - named('idProp', tokenClass('IDENTIFIER')), - named('numProp', tokenClass('NUMBER')), - named('strProp', tokenClass('STRING')))); + named('idPropName', tokenClass('IDENTIFIER')), + named('numPropName', tokenClass('NUMBER')), + named('strPropName', tokenClass('STRING')))); var nameColonValue = describe( 'name:value', named('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); From f1379b8ec94a06248a35cced0d67ac03e41ff85a Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 19:31:22 -0700 Subject: [PATCH 21/86] demo tweaks --- examples/unfinished/jsparse-demo/jsparse-demo.css | 3 ++- examples/unfinished/jsparse-demo/jsparse-demo.html | 8 ++++---- examples/unfinished/jsparse-demo/jsparse-demo.js | 10 ++-------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.css b/examples/unfinished/jsparse-demo/jsparse-demo.css index 448b35182b..bea973c397 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.css +++ b/examples/unfinished/jsparse-demo/jsparse-demo.css @@ -10,10 +10,11 @@ html, body { height: 100%; } border-bottom: 1px solid #555; overflow: auto; background: #cfc; + font-size: 12px; } #topbarinner { - padding: 10px; + padding: 7px; font-family: sans-serif; } diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.html b/examples/unfinished/jsparse-demo/jsparse-demo.html index 077952c415..d8a676b2b6 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.html +++ b/examples/unfinished/jsparse-demo/jsparse-demo.html @@ -16,10 +16,10 @@
-
-{{! whitespace is significant here; browser swallows initial - newline in textarea }} -
diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index be112c60b8..99c4138675 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -59,22 +59,16 @@ if (Meteor.is_client) { if (tree) { var curPos = 0; var unclosedInfos = []; - var statementHeads = makeSet( - ('functionDecl expression label block var ' + - 'if for do while return continue break throw with switch ' + - 'try debugger empty').split(' ')); var toHtml = function (obj) { if (_.isArray(obj)) { var head = obj[0] || ''; var rest = obj.slice(1); var info = { startPos: curPos }; - var isStatement = statementHeads[head]; + var isStatement = (head.indexOf('Stmnt') >= 0); var html = Spark.setDataContext( info, '
' + - Handlebars._escape(head + (isStatement ? ' statement' : '')) + - '
' + + '">
' + Handlebars._escape(head) + '
' + _.map(rest, toHtml).join('') + '
'); unclosedInfos.push(info); return html; From 5af09e5d9752d132136da4e08660481df6f35ec4 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 20:34:00 -0700 Subject: [PATCH 22/86] fix tests for API changes --- packages/jsparse/parser_tests.js | 43 +++++++++++++------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 3eb97cc2ec..b7d583ee97 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -5,19 +5,12 @@ var makeTester = function (test) { var results = []; var ptrStack = []; var ptr = results; - _.each(str.match(/\s?\(|\)|`.*?`|`|[^\s()`]+/g), function (txt) { - var whitespaceBeforeOpen = false; - if (/^\s\($/.test(txt)) { - // paren preceded by whitespace - txt = '('; - whitespaceBeforeOpen = true; - } + _.each(str.match(/\(|\)|`.*?`|`|[^\s()`]+/g), function (txt) { switch (txt.charAt(0)) { case '(': - var newArray = - ((! whitespaceBeforeOpen) && ptr.length && - (typeof ptr[ptr.length - 1] === "string")) ? - [ptr.pop()] : []; + if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) + throw new Error("Nameless node in " + str); + var newArray = [ptr.pop()]; ptr.push(newArray); ptrStack.push(ptr); ptr = newArray; @@ -115,25 +108,25 @@ var makeTester = function (test) { Tinytest.add("jsparse - basics", function (test) { var tester = makeTester(test); - tester.goodParse('1', "program(expression(number(1) ;()))"); - tester.goodParse('1 + 1', "program(expression(binary(number(1) + number(1)) ;()))"); - tester.goodParse('1*2+3*4', "program(expression(binary(binary(number(1) * number(2)) + " + + tester.goodParse('1', "program(expressionStmnt(number(1) ;()))"); + tester.goodParse('1 + 1', "program(expressionStmnt(binary(number(1) + number(1)) ;()))"); + tester.goodParse('1*2+3*4', "program(expressionStmnt(binary(binary(number(1) * number(2)) + " + "binary(number(3) * number(4))) ;()))"); - tester.goodParse('1 + 1;', "program(expression(binary(number(1) + number(1)) ;))"); - tester.goodParse('1 + 1;;', "program(expression(binary(number(1) + number(1)) ;) empty(;))"); + tester.goodParse('1 + 1;', "program(expressionStmnt(binary(number(1) + number(1)) ;))"); + tester.goodParse('1 + 1;;', "program(expressionStmnt(binary(number(1) + number(1)) ;) emptyStmnt(;))"); tester.goodParse('', "program()"); tester.goodParse('\n', "program()"); - tester.goodParse(';;;\n\n;\n', "program(empty(;) empty(;) empty(;) empty(;))"); - tester.goodParse('foo', "program(expression(identifier(foo) ;()))"); - tester.goodParse('foo();', "program(expression(call(identifier(foo) `(` `)`) ;))"); - tester.goodParse('var x = 3', "program(var(var varDecl(x = number(3)) ;()))"); - tester.goodParse('++x;', "program(expression(unary(++ identifier(x)) ;))"); - tester.goodParse('x++;', "program(expression(postfix(identifier(x) ++) ;))"); + tester.goodParse(';;;\n\n;\n', "program(emptyStmnt(;) emptyStmnt(;) emptyStmnt(;) emptyStmnt(;))"); + tester.goodParse('foo', "program(expressionStmnt(identifier(foo) ;()))"); + tester.goodParse('foo();', "program(expressionStmnt(call(identifier(foo) `(` `)`) ;))"); + tester.goodParse('var x = 3', "program(varStmnt(var varDecl(x = number(3)) ;()))"); + tester.goodParse('++x;', "program(expressionStmnt(unary(++ identifier(x)) ;))"); + tester.goodParse('x++;', "program(expressionStmnt(postfix(identifier(x) ++) ;))"); tester.goodParse( 'throw new Error', - "program(throw(throw new(new identifier(Error)) ;()))"); + "program(throwStmnt(throw new(new identifier(Error)) ;()))"); tester.goodParse( 'var x = function () { return 123; };', - 'program(var(var varDecl(x = functionExpr(function () `(` `)` ' + - '{ return(return number(123) ;) })) ;))'); + 'program(varStmnt(var varDecl(x = functionExpr(function nil() `(` `)` ' + + '{ returnStmnt(return number(123) ;) })) ;))'); }); From 907ad8990bdc35dbba91a011edf4b1beb7d79103 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 20:48:47 -0700 Subject: [PATCH 23/86] tokenization error tests --- packages/jsparse/parser_tests.js | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index b7d583ee97..a72fa8212d 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -100,6 +100,28 @@ var makeTester = function (test) { test.equal(stringifyTestFormat(actualTree), stringifyTestFormat(expectedTree)); + }, + badToken: function (code, expectedMessage) { + var constructMessage = function (pos, text) { + return "Bad token at position " + pos + ", text `" + text + "`"; + }; + var pos = code.indexOf('`'); + var text = code.match(/`(.*?)`/)[1]; + code = code.replace(/`/g, ''); + + var parsed = false; + var error = null; + try { + var lexer = new Lexer(code); + var tokenizer = new Tokenizer(code); + var tree = parse(tokenizer); + parsed = true; + } catch (e) { + error = e; + } + test.isFalse(parsed); + test.isTrue(error); + test.equal(error.message, constructMessage(pos, text)); } // XXX write badParse }; @@ -130,3 +152,9 @@ Tinytest.add("jsparse - basics", function (test) { 'program(varStmnt(var varDecl(x = functionExpr(function nil() `(` `)` ' + '{ returnStmnt(return number(123) ;) })) ;))'); }); + +Tinytest.add("jsparse - tokenization errors", function (test) { + var tester = makeTester(test); + tester.badToken("123`@`"); + tester.badToken("thisIsATestOf = `'unterminated `\n strings'"); +}); \ No newline at end of file From 447fef8f0ef696cf657d79ba5da32de9552058c7 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Fri, 7 Sep 2012 21:00:55 -0700 Subject: [PATCH 24/86] remove "after" arg to parseError --- packages/jsparse/parser.js | 1 + packages/jsparse/parser_tests.js | 7 +++++-- packages/jsparse/parserlib.js | 17 ++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 3a68125d79..7bee387345 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -1,6 +1,7 @@ ///// JAVASCRIPT PARSER // XXX unit tests +// XXX remove prevToken argument from calls to runRequired and maybeRunRequired here. // What we don't have from ECMA-262 5.1: // - object literal trailing comma diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index a72fa8212d..48720f1155 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -101,7 +101,7 @@ var makeTester = function (test) { test.equal(stringifyTestFormat(actualTree), stringifyTestFormat(expectedTree)); }, - badToken: function (code, expectedMessage) { + badToken: function (code) { var constructMessage = function (pos, text) { return "Bad token at position " + pos + ", text `" + text + "`"; }; @@ -122,8 +122,11 @@ var makeTester = function (test) { test.isFalse(parsed); test.isTrue(error); test.equal(error.message, constructMessage(pos, text)); + }, + badParse: function (code, expecting) { +// var constructMessage = function (expecting, pos, found + //XXX } - // XXX write badParse }; }; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 366f19ab3f..11037019e1 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -66,13 +66,12 @@ var describe = function (description, parser) { // Call this as `throw parseError(...)`. // `expected` is a parser, `after` is a string. -var parseError = function (t, expected, after) { +var parseError = function (t, expected) { var str = (expected.description ? "Expected " + expected.description : // all parsers that might error should have descriptions, // but just in case: "Unexpected token"); - if (after) - str += " after " + (after.text ? "`" + after.text + "`" : after); + str += " after `" + t.text + "`"; var pos = t.pos; str += " at position " + pos; str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); @@ -149,21 +148,21 @@ var lookAheadToken = function (text) { ///// NON-TERMINAL PARSER CONSTRUCTORS -// call as: runRequired(parser, tokenizer[, prevToken]) +// call as: runRequired(parser, tokenizer) // to run parser(tokenizer) and assert it matches -var runRequired = function (parser, tokenizer, prevToken) { +var runRequired = function (parser, tokenizer) { return revalue( tokenizer ? parser(tokenizer) : parser, function (v, t) { if (! v) - throw parseError(t || tokenizer, parser, prevToken); + throw parseError(t || tokenizer, parser); return v; }); }; -var runMaybeRequired = function (require, parser, tokenizer, prevToken) { +var runMaybeRequired = function (require, parser, tokenizer) { if (require) - return runRequired(parser, tokenizer, prevToken); + return runRequired(parser, tokenizer); else return parser(tokenizer); }; @@ -324,7 +323,7 @@ var unpack = function (arrayParser) { // lookAhead parser must never consume var lookAhead = function (lookAheadParser, nextParser) { return describe( - lookAheadParser.description, + nextParser.description, function (t) { if (! lookAheadParser(t)) return null; From 23f6e753af2e8cbb5f675351334fe60653a20fbb Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sat, 8 Sep 2012 09:53:05 -0700 Subject: [PATCH 25/86] kill prevToken argument to runRequired --- packages/jsparse/parser.js | 10 ++++------ packages/jsparse/parserlib.js | 8 ++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 7bee387345..237e34af69 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -1,7 +1,6 @@ ///// JAVASCRIPT PARSER // XXX unit tests -// XXX remove prevToken argument from calls to runRequired and maybeRunRequired here. // What we don't have from ECMA-262 5.1: // - object literal trailing comma @@ -159,8 +158,8 @@ var parse = function (tokenizer) { // and .foo add-ons. // if we have 'new' keywords, we are committed and must // match an expression or error. - var result = runMaybeRequired(news.length, primaryOrFunctionExpression, - t, news[news.length - 1]); + var result = runMaybeRequired( + news.length, primaryOrFunctionExpression, t); if (! result) return null; @@ -216,8 +215,7 @@ var parse = function (tokenizer) { var unaries = unaryList(t); // if we have unaries, we are committed and // have to match an expression or error. - var result = runMaybeRequired(unaries.length, postfixExpression, - t, unaries[unaries.length - 1]); + var result = runMaybeRequired(unaries.length, postfixExpression, t); if (! result) return null; @@ -290,7 +288,7 @@ var parse = function (tokenizer) { var op; while (r.lhs && (op = assignOp(t))) parts.push(op, - runRequired(conditionalExpressionFunc(noIn), t, op)); + runRequired(conditionalExpressionFunc(noIn), t)); var result = parts.pop(); while (parts.length) { diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 11037019e1..56d67e6a6a 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -148,8 +148,7 @@ var lookAheadToken = function (text) { ///// NON-TERMINAL PARSER CONSTRUCTORS -// call as: runRequired(parser, tokenizer) -// to run parser(tokenizer) and assert it matches +// run parser(tokenizer) and assert it matches var runRequired = function (parser, tokenizer) { return revalue( tokenizer ? parser(tokenizer) : parser, @@ -161,10 +160,7 @@ var runRequired = function (parser, tokenizer) { }; var runMaybeRequired = function (require, parser, tokenizer) { - if (require) - return runRequired(parser, tokenizer); - else - return parser(tokenizer); + return require ? runRequired(parser, tokenizer) : parser(tokenizer); }; // Polymorphic in parsers and results; an experiment. From faac13b290ad7f3784307b5028385b9d99918f07 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sat, 8 Sep 2012 10:01:54 -0700 Subject: [PATCH 26/86] implement badParse in tester --- packages/jsparse/parser_tests.js | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 48720f1155..d069ba5a55 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -123,9 +123,31 @@ var makeTester = function (test) { test.isTrue(error); test.equal(error.message, constructMessage(pos, text)); }, - badParse: function (code, expecting) { -// var constructMessage = function (expecting, pos, found - //XXX + badParse: function (code) { + var constructMessage = function (whatExpected, pos, found, after) { + return "Expected " + whatExpected + " after `" + after + + "` at position " + pos + ", found " + + (found ? "`" + found + "`" : "EOF"); + }; + var pos = code.indexOf('`'); + var whatExpected = code.match(/`(.*?)`/)[1]; + code = code.replace(/`.*?`/g, ''); + + var parsed = false; + var error = null; + try { + var lexer = new Lexer(code); + var tokenizer = new Tokenizer(code); + var tree = parse(tokenizer); + parsed = true; + } catch (e) { + error = e; + } + test.isFalse(parsed); + test.isTrue(error); + var after = tokenizer.text; + var found = tokenizer.peekText; + test.equal(error.message, constructMessage(whatExpected, pos, found, after)); } }; }; @@ -154,6 +176,10 @@ Tinytest.add("jsparse - basics", function (test) { 'var x = function () { return 123; };', 'program(varStmnt(var varDecl(x = functionExpr(function nil() `(` `)` ' + '{ returnStmnt(return number(123) ;) })) ;))'); + + tester.badParse("var x = `expression`"); + tester.badParse("1 `semicolon`1"); + tester.badParse("1+1`semicolon`:"); }); Tinytest.add("jsparse - tokenization errors", function (test) { From b2345a5329ae293758af84c65b01ed96ff41d7d7 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sat, 8 Sep 2012 10:56:15 -0700 Subject: [PATCH 27/86] start of syntax forms test --- packages/jsparse/parser_tests.js | 66 +++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index d069ba5a55..099a7e1918 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,5 +1,68 @@ +var allNodeNames = [ + ";", + "array", + "assignment", + "binary", + "blockStmnt", + "boolean", + "bracket", + "breakStmnt", + "call", + "case", + "catch", + "comma", + "continueStmnt", + "debuggerStmnt", + "default", + "doStmnt", + "dot", + "emptyStmnt", + "expressionStmnt", + "finally", + "forInSpec", + "forSpec", + "forStmnt", + "forVarInSpec", + "forVarSpec", + "functionDecl", + "functionExpr", + "idPropName", + "identifier", + "ifStmnt", + "labelStmnt", + "new", + "newcall", + "nil", + "null", + "numPropName", + "number", + "object", + "parens", + "postfix", + "program", + "prop", + "regex", + "returnStmnt", + "strPropName", + "string", + "switchStmnt", + "ternary", + "this", + "throwStmnt", + "tryStmnt", + "unary", + "varDecl", + "varStmnt", + "whileStmnt", + "withStmnt" +]; + +var allNodeNamesSet = {}; +_.each(allNodeNames, function (n) { allNodeNamesSet[n] = true; }); + + var makeTester = function (test) { var parseTestFormat = function (str) { var results = []; @@ -72,7 +135,8 @@ var makeTester = function (test) { var informalize = function (part) { if (_.isArray(part) && part.length) { // This is a NODE (non-terminal). Make sure it actually is. - if (! (part[0] && typeof part[0] === "string")) + if (! (part[0] && typeof part[0] === "string" && + allNodeNamesSet[part[0]] === true)) test.fail("Not a node name: " + part[0]); return part.slice(0, 1).concat( _.map(part.slice(1), informalize)); From c859dad1a46dde70515d2b409f57cb7f7b0542df Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sun, 9 Sep 2012 14:49:55 -0700 Subject: [PATCH 28/86] more tests --- packages/jsparse/parser_tests.js | 281 ++++++++++++++++++++++++------- 1 file changed, 222 insertions(+), 59 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 099a7e1918..ba62f98292 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -62,60 +62,81 @@ var allNodeNames = [ var allNodeNamesSet = {}; _.each(allNodeNames, function (n) { allNodeNamesSet[n] = true; }); +// The "tree string" format is a simple format for representing syntax trees. +// +// For example, the parse of `x++;` is written as: +// "program(expressionStmnt(postfix(identifier(x) ++) ;))" +// +// A Node is written as "name(item1 item2 item3)", with additional whitespace +// allowed anywhere between the name, parentheses, and items. +// +// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or +// backticks. If they do, they can be written enclosed in backticks. To escape +// a backtick within backticks, double it. +// +// `stringifyNode` generates "canonical" tree strings, which have no extra escaping +// or whitespace, just one space between items in a Node. + +var parseTreeString = function (str) { + var results = []; + var ptrStack = []; + var ptr = results; + _.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) { + switch (txt.charAt(0)) { + case '(': + if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) + throw new Error("Nameless node in " + str); + var newArray = [ptr.pop()]; + ptr.push(newArray); + ptrStack.push(ptr); + ptr = newArray; + break; + case ')': + ptr = ptrStack.pop(); + break; + case '`': + if (txt.length === 1) + throw new Error("Mismatched ` in " + str); + ptr.push(txt.slice(1, -1).replace(/``/g, '`')); + break; + default: + ptr.push(txt); + break; + } + if (results.length > 1) + throw new Error("Not expecting " + txt + " in " + str); + }); + if (ptr !== results) + throw new Error("Mismatched parentheses in " + str); + return results[0]; +}; +var stringifyNode = function (obj) { + if (obj.text) + obj = obj.text; + if (typeof obj === "string") { + if (/[\s()`]/.test(obj)) + return '`' + obj.replace(/`/g, '``') + '`'; + else + return obj; + } else { + return (stringifyNode(obj[0]) + '(' + + _.map(obj.slice(1), stringifyNode).join(' ') + + ')'); + } +}; + +var parseToTreeString = function (code) { + var lexer = new Lexer(code); + var tokenizer = new Tokenizer(code); + var tree = parse(tokenizer); + return stringifyNode(tree); +}; var makeTester = function (test) { - var parseTestFormat = function (str) { - var results = []; - var ptrStack = []; - var ptr = results; - _.each(str.match(/\(|\)|`.*?`|`|[^\s()`]+/g), function (txt) { - switch (txt.charAt(0)) { - case '(': - if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) - throw new Error("Nameless node in " + str); - var newArray = [ptr.pop()]; - ptr.push(newArray); - ptrStack.push(ptr); - ptr = newArray; - break; - case ')': - ptr = ptrStack.pop(); - break; - case '`': - if (txt.length === 1) - throw new Error("Mismatched ` in " + str); - ptr.push(txt.slice(1, -1)); - break; - default: - ptr.push(txt); - break; - } - if (results.length > 1) - throw new Error("Not expecting " + txt + " in " + str); - }); - if (ptr !== results) - throw new Error("Mismatched parentheses in " + str); - return results[0]; - }; - var stringifyTestFormat = function (obj) { - if (typeof obj === "string") { - if (obj.charAt(0) === '(' || obj.charAt(0) === ')') - return '`' + obj + '`'; - else - return obj; - } else { - if (! obj.length) - return '()'; - else - return (stringifyTestFormat(obj[0]) + '(' + - _.map(obj.slice(1), stringifyTestFormat).join(' ') + - ')'); - } - }; - return { + // Parse code and make sure it matches expectedTreeString. goodParse: function (code, expectedTreeString) { - var expectedTree = parseTestFormat(expectedTreeString); + var expectedTree = parseTreeString(expectedTreeString); // first use lexer to collect all tokens var lexer = new Lexer(code); @@ -129,17 +150,16 @@ var makeTester = function (test) { lexer = new Lexer(code); var tokenizer = new Tokenizer(code); - var tree = parse(tokenizer); + var actualTree = parse(tokenizer); var nextTokenIndex = 0; - var informalize = function (part) { + var check = function (part) { if (_.isArray(part) && part.length) { // This is a NODE (non-terminal). Make sure it actually is. if (! (part[0] && typeof part[0] === "string" && allNodeNamesSet[part[0]] === true)) test.fail("Not a node name: " + part[0]); - return part.slice(0, 1).concat( - _.map(part.slice(1), informalize)); + _.each(part.slice(1), check); } else if (typeof part === 'object' && part.text && (typeof part.pos === 'number')) { // This is a TOKEN (terminal). @@ -151,20 +171,26 @@ var makeTester = function (test) { test.equal(part.pos, referenceToken.pos); test.equal(code.substring(part.pos, part.pos + part.text.length), part.text); - return part.text; } else { test.fail("Unknown tree part: " + part); - return []; } }; - var actualTree = informalize(tree); + check(actualTree); if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); - test.equal(stringifyTestFormat(actualTree), - stringifyTestFormat(expectedTree)); + test.equal(stringifyNode(actualTree), + stringifyNode(expectedTree), code); }, + // Takes code with part of it surrounding with backticks. + // Removes the two backtick characters, tries to parse the code, + // and then asserts that there was a tokenization-level error, + // with the part that was between the backticks called out as + // the bad token. + // + // For example, the test "123`@`" will try to parse "123@" and + // assert that a tokenization error occurred at '@'. badToken: function (code) { var constructMessage = function (pos, text) { return "Bad token at position " + pos + ", text `" + text + "`"; @@ -187,6 +213,17 @@ var makeTester = function (test) { test.isTrue(error); test.equal(error.message, constructMessage(pos, text)); }, + // Takes code with a backtick-quoted string embedded in it. + // Removes the backticks and their contents, tries to parse the code, + // and then asserts that there was a parse error at the location + // where the backtick-quoted string was embedded. The embedded + // string must match whatever the error message says was "expected". + // + // For example, the test "{`statement`" will try to parse the code + // "{" and then assert that an error occured at the end of the string + // saying "Expected statement". The test "1 `semicolon`2" will try + // to parse "1 2" and assert that the error "Expected semicolon" + // appeared after the space and before the 2. badParse: function (code) { var constructMessage = function (whatExpected, pos, found, after) { return "Expected " + whatExpected + " after `" + after + @@ -250,4 +287,130 @@ Tinytest.add("jsparse - tokenization errors", function (test) { var tester = makeTester(test); tester.badToken("123`@`"); tester.badToken("thisIsATestOf = `'unterminated `\n strings'"); +}); + +Tinytest.add("jsparse - syntax forms", function (test) { + var tester = makeTester(test); + var trials = [ + ['1', + 'program(expressionStmnt(number(1) ;()))'], + ['1;;;;2', + 'program(expressionStmnt(number(1) ;) emptyStmnt(;) emptyStmnt(;) emptyStmnt(;) ' + + 'expressionStmnt(number(2) ;()))'], + ['{}', + 'program(blockStmnt({ }))'], + ['{null}', + 'program(blockStmnt({ expressionStmnt(null(null) ;()) }))'], + ['{\nfoo()\nbar();\n}', + 'program(blockStmnt({ expressionStmnt(call(identifier(foo) `(` `)`) ;()) ' + + 'expressionStmnt(call(identifier(bar) `(` `)`) ;) }))'], + ['{{{}}}', + 'program(blockStmnt({ blockStmnt({ blockStmnt({ }) }) }))'], + ['var x = y, z,\n a = b = c;', + 'program(varStmnt(var varDecl(x = identifier(y)) , varDecl(z) , varDecl(a = ' + + 'assignment(identifier(b) = identifier(c))) ;))'], + ['if (x === y);', + 'program(ifStmnt(if `(` binary(identifier(x) === identifier(y)) `)` emptyStmnt(;)))'], + ['if (z) return', + 'program(ifStmnt(if `(` identifier(z) `)` returnStmnt(return nil() ;())))'], + ['if (a) b; else c', + 'program(ifStmnt(if `(` identifier(a) `)` expressionStmnt(identifier(b) ;) else ' + + 'expressionStmnt(identifier(c) ;())))'], + ['if (n === 1) { foo(); } else if (n === 2) { bar(); } else { baz(); }', + 'program(ifStmnt(if `(` binary(identifier(n) === number(1)) `)` blockStmnt(' + + '{ expressionStmnt(call(identifier(foo) `(` `)`) ;) }) else ifStmnt(' + + 'if `(` binary(identifier(n) === number(2)) `)` blockStmnt(' + + '{ expressionStmnt(call(identifier(bar) `(` `)`) ;) }) else blockStmnt(' + + '{ expressionStmnt(call(identifier(baz) `(` `)`) ;) }))))'], + ['while (false);', + 'program(whileStmnt(while `(` boolean(false) `)` emptyStmnt(;)))'], + ['while (/foo/.test(bar.baz)) {\n bar = bar.baz;\n}', + 'program(whileStmnt(while `(` call(dot(regex(/foo/) . test) `(` ' + + 'dot(identifier(bar) . baz) `)`) `)` blockStmnt({ expressionStmnt(' + + 'assignment(identifier(bar) = dot(identifier(bar) . baz)) ;) })))'], + ['while (false) while (false);', + 'program(whileStmnt(while `(` boolean(false) `)` ' + + 'whileStmnt(while `(` boolean (false) `)` emptyStmnt(;))))'], + ['do a; while (b);', + 'program(doStmnt(do expressionStmnt(identifier(a) ;) while `(` identifier(b) `)` ;))'], + ['do { x-- } while (x);', + 'program(doStmnt(do blockStmnt({ expressionStmnt(postfix(identifier(x) --) ;()) }) ' + + 'while `(` identifier(x) `)` ;))'], + ['do a\n while (b)\n x++', + 'program(doStmnt(do expressionStmnt(identifier(a) ;()) while `(` identifier(b) `)` ;()) ' + + 'expressionStmnt(postfix(identifier(x) ++) ;()))'], + ["for(;;);", + "program(forStmnt(for `(` forSpec(nil() ; nil() ; nil()) `)` emptyStmnt(;)))"], + ["for(x in y);", + "program(forStmnt(for `(` forInSpec(identifier(x) in identifier(y)) `)` emptyStmnt(;)))"], + ["for(var x in y);", + "program(forStmnt(for `(` forVarInSpec(var varDecl(x) in identifier(y)) `)` emptyStmnt(;)))"], + ["for(var x;;);", + "program(forStmnt(for `(` forVarSpec(var varDecl(x) ; nil() ; nil()) `)` emptyStmnt(;)))"], + ["for(var i=0;i Date: Sun, 9 Sep 2012 22:35:22 -0700 Subject: [PATCH 29/86] more tests --- packages/jsparse/package.js | 4 +- packages/jsparse/parser.js | 19 ++++-- packages/jsparse/parser_tests.js | 112 ++++++++++++++++++++++++++++--- packages/jsparse/parserlib.js | 4 +- 4 files changed, 119 insertions(+), 20 deletions(-) diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index cb2dd675c5..508709e1d3 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -11,5 +11,7 @@ Package.on_test(function (api) { api.use('tinytest'); api.use('jsparse', 'client'); - api.add_files('parser_tests.js', ['client', 'server']); + api.add_files('parser_tests.js', + 'client'); // for faster loading + //['client', 'server']); }); diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 237e34af69..0cf8218733 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -379,9 +379,9 @@ var parse = function (tokenizer) { var emptyStatement = named('emptyStmnt', token(';')); // not maybeSemicolon - var blockStatement = named('blockStmnt', seq( + var blockStatement = describe('block', named('blockStmnt', seq( token('{'), unpack(opt(statements, lookAheadToken('}'))), - token('}'))); + token('}')))); var varDeclFunc = memoizeBooleanFunc(function (noIn) { return named( @@ -411,9 +411,9 @@ var parse = function (tokenizer) { 'semicolon', lookAhead(lookAheadToken(';'), seq( - token(';'), + describe('semicolon', token(';')), opt(expressionPtr, revalue(lookAheadToken(';'), named('nil', []))), - token(';'), + describe('semicolon', token(';')), opt(expressionPtr, revalue(lookAheadToken(')'), named('nil', [])))))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = describe('semicolon', @@ -505,7 +505,14 @@ var parse = function (tokenizer) { var throwStatement = named( 'throwStmnt', seq(token('throw'), - lookAhead(noLineTerminatorHere, expression), + lookAhead(revalue(noLineTerminatorHere, + function (v, t) { + if (v) + return v; + if (t.peekText) + throw parseError(t, expression, 'end of line'); + return null; + }), expression), maybeSemicolon)); var withStatement = named( @@ -535,7 +542,7 @@ var parse = function (tokenizer) { token('}'))); var catchFinally = describe( - 'catchOrFinally', + 'catch', lookAhead(lookAheadToken('catch finally'), seq( or(named( diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index ba62f98292..36ac76e017 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,5 +1,4 @@ - var allNodeNames = [ ";", "array", @@ -135,7 +134,7 @@ var parseToTreeString = function (code) { var makeTester = function (test) { return { // Parse code and make sure it matches expectedTreeString. - goodParse: function (code, expectedTreeString) { + goodParse: function (code, expectedTreeString, regexTokenHints) { var expectedTree = parseTreeString(expectedTreeString); // first use lexer to collect all tokens @@ -146,6 +145,8 @@ var makeTester = function (test) { test.fail("Lexer error at " + lexer.lastPos); if (Lexer.isToken(lexer.type)) allTokensInOrder.push({ pos: lexer.lastPos, text: lexer.text }); + if (regexTokenHints && regexTokenHints[allTokensInOrder.length]) + lexer.divisionPermitted = false; } lexer = new Lexer(code); @@ -224,14 +225,19 @@ var makeTester = function (test) { // saying "Expected statement". The test "1 `semicolon`2" will try // to parse "1 2" and assert that the error "Expected semicolon" // appeared after the space and before the 2. + // + // A second backtick-quoted string is used as the "found" token + // in the error message. badParse: function (code) { var constructMessage = function (whatExpected, pos, found, after) { return "Expected " + whatExpected + " after `" + after + - "` at position " + pos + ", found " + - (found ? "`" + found + "`" : "EOF"); + "` at position " + pos + ", found " + found; }; var pos = code.indexOf('`'); - var whatExpected = code.match(/`(.*?)`/)[1]; + + var backticked = code.match(/`.*?`/g); + var whatExpected = backticked[0] && backticked[0].slice(1,-1); + var found = backticked[1] && backticked[1].slice(1, -1); code = code.replace(/`.*?`/g, ''); var parsed = false; @@ -247,8 +253,10 @@ var makeTester = function (test) { test.isFalse(parsed); test.isTrue(error); var after = tokenizer.text; - var found = tokenizer.peekText; - test.equal(error.message, constructMessage(whatExpected, pos, found, after)); + found = (found || (tokenizer.peekText ? '`' + tokenizer.peekText + '`' + : 'EOF')); + test.equal(error.message, + constructMessage(whatExpected, pos, found, after)); } }; }; @@ -382,8 +390,38 @@ Tinytest.add("jsparse - syntax forms", function (test) { ["break foo;", "program(breakStmnt(break foo ;))"], ["break\n foo;", - "program(breakStmnt(break nil() ;()) expressionStmnt(identifier(foo) ;))"] - // throwStmnt, ... + "program(breakStmnt(break nil() ;()) expressionStmnt(identifier(foo) ;))"], + ["throw e;", + "program(throwStmnt(throw identifier(e) ;))"], + ["throw e", + "program(throwStmnt(throw identifier(e) ;()))"], + ["throw new Error;", + "program(throwStmnt(throw new(new identifier(Error)) ;))"], + ["with(x);", + "program(withStmnt(with `(` identifier(x) `)` emptyStmnt(;)))"], + ["with(a=b) {}", + "program(withStmnt(with `(` assignment(identifier(a) = identifier(b)) `)` blockStmnt({ })))"], + ["switch(x) {}", + "program(switchStmnt(switch `(` identifier(x) `)` { }))"], + ["switch(x) {case 1:case 2:case 3:default:case 4:}", + "program(switchStmnt(switch `(` identifier(x) `)` { " + + "case(case number(1) :) case(case number(2) :) case(case number(3) :) " + + "default(default :) case(case number(4) :) }))"], + ["switch(x) {\ncase 1:\n return\ncase 2:\ncase 3:\n throw e}", + "program(switchStmnt(switch `(` identifier(x) `)` { " + + "case(case number(1) : returnStmnt(return nil() ;())) " + + "case(case number(2) :) case(case number(3) : " + + "throwStmnt(throw identifier(e) ;())) }))"], + ["switch(x) {default:;}", + "program(switchStmnt(switch `(` identifier(x) `)` { default(default : emptyStmnt(;)) }))"], + ["try {} catch (e) {} finally {}", + "program(tryStmnt(try blockStmnt({ }) catch(catch `(` e `)` blockStmnt({ })) " + + "finally(finally blockStmnt({ }))))"], + ["try {} finally {}", + "program(tryStmnt(try blockStmnt({ }) nil() finally(finally blockStmnt({ }))))"], + ["try {} catch (e) {}", + "program(tryStmnt(try blockStmnt({ }) catch(catch `(` e `)` blockStmnt({ })) nil()))"] + // label, debugger ... ]; _.each(trials, function (tr) { tester.goodParse(tr[0], tr[1]); @@ -408,9 +446,61 @@ Tinytest.add("jsparse - bad parses", function (test) { 'for (`forSpec`);', 'for (1\n`semicolon`2\n3);', 'continue `semicolon`1+1;', - 'break `semicolon`1+1;' - ]; + 'break `semicolon`1+1;', + 'throw`expression`', + 'throw`expression`;', + 'throw\n`expression``end of line`e', + 'throw `expression`=;', + 'with(`expression`);', + 'switch(`expression`)', + 'switch(x)`{`;', + 'try`block`', + 'try {}`catch`', + 'try {} catch`(`;', + 'try {} catch(e)`block`;', + '1+1`semicolon`:', + '{a:`statement`}' + ]; _.each(trials, function (tr) { tester.badParse(tr); }); +}); + +Tinytest.add("jsparse - regex division ambiguity", function (test) { + var tester = makeTester(test); + tester.goodParse("if (e) /f/g;", + "program(ifStmnt(if `(` identifier(e) `)` expressionStmnt(regex(/f/g) ;)))", + {4: true}); + tester.goodParse("++/x/.y;", + "program(expressionStmnt(unary(++ dot(regex(/x/) . y)) ;))", + {1: true}); + tester.goodParse("x++/2/g;", + "program(expressionStmnt(binary(binary(postfix(identifier(x) ++) / " + + "number(2)) / identifier(g)) ;))"); + tester.goodParse("(1+1)/2/g;", + "program(expressionStmnt(binary(binary(parens(`(` binary(number(1) + " + + "number(1)) `)`) / " + + "number(2)) / identifier(g)) ;))"); + tester.goodParse("/x/", + "program(expressionStmnt(regex(/x/) ;()))"); +}); + +Tinytest.add("jsparse - semicolon insertion", function (test) { + var tester = makeTester(test); + // Spec section 7.9.2 + tester.badParse("{ 1 `semicolon`2 } 3"); + tester.goodParse("{ 1\n2 } 3", "program(blockStmnt({ expressionStmnt(number(1) " + + ";()) expressionStmnt(number(2) ;()) }) expressionStmnt(number(3) ;()))"); + tester.badParse("for (a; b\n`semicolon`)"); + tester.goodParse("return\na + b", + "program(returnStmnt(return nil() ;()) " + + "expressionStmnt(binary(identifier(a) + identifier(b)) ;()))"); + tester.goodParse("a = b\n++c", + "program(expressionStmnt(assignment(identifier(a) = identifier(b)) ;())" + + "expressionStmnt(unary(++ identifier(c)) ;()))"); + tester.badParse("if (a > b)\n`statement`else c = d"); + tester.goodParse("a = b + c\n(d + e).print()", + "program(expressionStmnt(assignment(identifier(a) = " + + "binary(identifier(b) + call(dot(call(identifier(c) `(` " + + "binary(identifier(d) + identifier(e)) `)`) . print) `(` `)`))) ;()))"); }); \ No newline at end of file diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 56d67e6a6a..3601e7762a 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -66,7 +66,7 @@ var describe = function (description, parser) { // Call this as `throw parseError(...)`. // `expected` is a parser, `after` is a string. -var parseError = function (t, expected) { +var parseError = function (t, expected, found) { var str = (expected.description ? "Expected " + expected.description : // all parsers that might error should have descriptions, // but just in case: @@ -74,7 +74,7 @@ var parseError = function (t, expected) { str += " after `" + t.text + "`"; var pos = t.pos; str += " at position " + pos; - str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF"); + str += ", found " + (found || (t.peekText ? "`" + t.peekText + "`" : "EOF")); var e = new Error(str); return e; }; From 9f2b4eb0364b33cc565facd01436001ef350f042 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Mon, 10 Sep 2012 11:40:19 -0700 Subject: [PATCH 30/86] more tests --- packages/jsparse/parser_tests.js | 74 ++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 36ac76e017..36e6f7a7a0 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,4 +1,6 @@ +// XXX test treatment of comments, including multilines with newlines + var allNodeNames = [ ";", "array", @@ -300,6 +302,7 @@ Tinytest.add("jsparse - tokenization errors", function (test) { Tinytest.add("jsparse - syntax forms", function (test) { var tester = makeTester(test); var trials = [ + // STATEMENTS ['1', 'program(expressionStmnt(number(1) ;()))'], ['1;;;;2', @@ -420,8 +423,64 @@ Tinytest.add("jsparse - syntax forms", function (test) { ["try {} finally {}", "program(tryStmnt(try blockStmnt({ }) nil() finally(finally blockStmnt({ }))))"], ["try {} catch (e) {}", - "program(tryStmnt(try blockStmnt({ }) catch(catch `(` e `)` blockStmnt({ })) nil()))"] - // label, debugger ... + "program(tryStmnt(try blockStmnt({ }) catch(catch `(` e `)` blockStmnt({ })) nil()))"], + ["a:;", + "program(labelStmnt(a : emptyStmnt(;)))"], + ["{x:1}", + "program(blockStmnt({ labelStmnt(x : expressionStmnt(number(1) ;())) }))"], + ["{x:y:z:1}", + "program(blockStmnt({ labelStmnt(x : labelStmnt(y : " + + "labelStmnt(z : expressionStmnt(number(1) ;())))) }))"], + [";;foo:\nfor(;;);", + "program(emptyStmnt(;) emptyStmnt(;) labelStmnt(foo : " + + "forStmnt(for `(` forSpec(nil() ; nil() ; nil()) `)` emptyStmnt(;))))"], + ["debugger", + "program(debuggerStmnt(debugger ;()))"], + ["debugger;", + "program(debuggerStmnt(debugger ;))"], + ["function foo() {}", + "program(functionDecl(function foo `(` `)` { }))"], + ["function foo() {function bar() {}}", + "program(functionDecl(function foo `(` `)` { functionDecl(function bar `(` `)` { }) }))"], + [";;function f() {};;", + "program(emptyStmnt(;) emptyStmnt(;) functionDecl(function f `(` `)` { }) " + + "emptyStmnt(;) emptyStmnt(;))"], + + // EXPRESSIONS + ["null + this - 3 + true", + "program(expressionStmnt(binary(binary(binary(null(null) + this(this)) - " + + "number(3)) + boolean(true)) ;()))"], + ["a / /b/mgi / c", + "program(expressionStmnt(binary(binary(identifier(a) / " + + "regex(/b/mgi)) / identifier(c)) ;()))"], + ["'a' + \"\" + \"b\" + '\\''", + "program(expressionStmnt(binary(binary(binary(string('a') + string(\"\")) + " + + "string(\"b\")) + string('\\'')) ;()))"], + ["_ + x0123 + $", + "program(expressionStmnt(binary(binary(identifier(_) + " + + "identifier(x0123)) + identifier($)) ;()))"], + ["if ((x = 1)) return ((1+2))*((1<<2));", + "program(ifStmnt(if `(` parens(`(` assignment(identifier(x) = number(1)) `)`) " + + "`)` returnStmnt(return binary(parens(`(` parens(`(` binary(number(1) + " + + "number(2)) `)`) `)`) * parens(`(` parens(`(` binary(number(1) << number(2)) " + + "`)`) `)`)) ;)))"], + ["[];", + "program(expressionStmnt(array([ ]) ;))"], + ["[,,,];", + "program(expressionStmnt(array([ , , , ]) ;))"], + ["[(1,2),,3];", + "program(expressionStmnt(array([ parens(`(` comma(number(1) , " + + "number(2)) `)`) , , number(3) ]) ;))"], + ["({});", + "program(expressionStmnt(parens(`(` object({ }) `)`) ;))"], + ["({1:1});", + "program(expressionStmnt(parens(`(` object({ prop(numPropName(1) : number(1)) }) `)`) ;))"], + ["({x:true});", + "program(expressionStmnt(parens(`(` object({ prop(idPropName(x) : boolean(true)) }) `)`) ;))"], + ["({'a':b, c:'d', 1:null});", + "program(expressionStmnt(parens(`(` object({ prop(strPropName('a') : " + + "identifier(b)) , prop(idPropName(c) : string('d')) , prop(numPropName(1) " + + ": null(null)) }) `)`) ;))"] ]; _.each(trials, function (tr) { tester.goodParse(tr[0], tr[1]); @@ -459,8 +518,15 @@ Tinytest.add("jsparse - bad parses", function (test) { 'try {} catch`(`;', 'try {} catch(e)`block`;', '1+1`semicolon`:', - '{a:`statement`}' - ]; + '{a:`statement`}', + 'function `IDENTIFIER`() {}', + 'foo: `statement`function foo() {}', + '[`expression`=', + '[,,`expression`=', + '({`name:value`true:3})', + '({1:2,3`:`})', + '({1:2,`name:value`' + ]; _.each(trials, function (tr) { tester.badParse(tr); }); From 940955a71d788c45f9f2ca61a11e83704dfd4254 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Mon, 10 Sep 2012 11:40:31 -0700 Subject: [PATCH 31/86] fix typo --- packages/jsparse/package.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index 508709e1d3..75a2da8800 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -1,5 +1,5 @@ Package.describe({ - summary: "Full-featured JaavScript parser" + summary: "Full-featured JavaScript parser" }); Package.on_use(function (api) { From 1043248092677fa02d05cceb66e1986d1ccce887 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Mon, 10 Sep 2012 15:20:34 -0700 Subject: [PATCH 32/86] finished syntax forms tests --- packages/jsparse/parser_tests.js | 114 ++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 36e6f7a7a0..1bf9bfcf17 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -1,6 +1,4 @@ -// XXX test treatment of comments, including multilines with newlines - var allNodeNames = [ ";", "array", @@ -170,10 +168,14 @@ var makeTester = function (test) { if (nextTokenIndex >= allTokensInOrder.length) test.fail("Too many tokens: " + (nextTokenIndex + 1)); var referenceToken = allTokensInOrder[nextTokenIndex++]; - test.equal(part.text, referenceToken.text); - test.equal(part.pos, referenceToken.pos); - test.equal(code.substring(part.pos, - part.pos + part.text.length), part.text); + if (part.text !== referenceToken.text) + test.fail(part.text + " !== " + referenceToken.text); + if (part.pos !== referenceToken.pos) + test.fail(part.pos + " !== " + referenceToken.pos); + if (code.substring(part.pos, + part.pos + part.text.length) !== part.text) + test.fail("Didn't see " + part.text + " at " + part.pos + + " in " + code); } else { test.fail("Unknown tree part: " + part); } @@ -445,6 +447,8 @@ Tinytest.add("jsparse - syntax forms", function (test) { [";;function f() {};;", "program(emptyStmnt(;) emptyStmnt(;) functionDecl(function f `(` `)` { }) " + "emptyStmnt(;) emptyStmnt(;))"], + ["function foo(a,b,c) {}", + "program(functionDecl(function foo `(` a , b , c `)` { }))"], // EXPRESSIONS ["null + this - 3 + true", @@ -480,7 +484,91 @@ Tinytest.add("jsparse - syntax forms", function (test) { ["({'a':b, c:'d', 1:null});", "program(expressionStmnt(parens(`(` object({ prop(strPropName('a') : " + "identifier(b)) , prop(idPropName(c) : string('d')) , prop(numPropName(1) " + - ": null(null)) }) `)`) ;))"] + ": null(null)) }) `)`) ;))"], + ["(function () {});", + "program(expressionStmnt(parens(`(` functionExpr(function nil() `(` `)` { }) `)`) ;))"], + ["(function foo() {});", + "program(expressionStmnt(parens(`(` functionExpr(function foo `(` `)` { }) `)`) ;))"], + ["x = function () {}.y;", + "program(expressionStmnt(assignment(identifier(x) = dot(functionExpr(" + + "function nil() `(` `)` { }) . y)) ;))"], + ["(function (a) {})", + "program(expressionStmnt(parens(`(` functionExpr(function nil() " + + "`(` a `)` { }) `)`) ;()))"], + ["(function (a,b,c) {})", + "program(expressionStmnt(parens(`(` functionExpr(function nil() `(` " + + "a , b , c `)` { }) `)`) ;()))"], + ["foo.bar.baz;", + "program(expressionStmnt(dot(dot(identifier(foo) . bar) . baz) ;))"], + ["foo[bar,bar][baz].qux[1+1];", + "program(expressionStmnt(bracket(dot(bracket(bracket(identifier(foo) " + + "[ comma(identifier(bar) , identifier(bar)) ]) [ identifier(baz) ]) . qux) " + + "[ binary(number(1) + number(1)) ]) ;))"], + ["new new a.b.c[d]", + "program(expressionStmnt(new(new new(new bracket(dot(dot(identifier(a) " + + ". b) . c) [ identifier(d) ]))) ;()))"], + ["new new a.b.c[d]()", + "program(expressionStmnt(new(new newcall(new " + + "bracket(dot(dot(identifier(a) . b) . c) [ identifier(d) ]) `(` `)`)) ;()))"], + ["new new a.b.c[d]()()", + "program(expressionStmnt(newcall(new newcall(new " + + "bracket(dot(dot(identifier(a) . b) . c) [ identifier(d) ]) `(` `)`) `(` `)`) ;()))"], + ["new foo(x).bar(y)", + "program(expressionStmnt(call(dot(newcall(new identifier(foo) `(` " + + "identifier(x) `)`) . bar) `(` identifier(y) `)`) ;()))"], + ["new new foo().bar", + "program(expressionStmnt(new(new dot(newcall(new identifier(foo) `(` `)`) . bar)) ;()))"], + ["delete void typeof - + ~ ! -- ++ x;", + "program(expressionStmnt(unary(delete unary(void unary(typeof unary(- unary(+ " + + "unary(~ unary(! unary(-- unary(++ identifier(x)))))))))) ;))"], + ["x++ + ++y", + "program(expressionStmnt(binary(postfix(identifier(x) ++) + " + + "unary(++ identifier(y))) ;()))"], + ["1*2+3*4", + "program(expressionStmnt(binary(binary(number(1) * number(2)) " + + "+ binary(number(3) * number(4))) ;()))"], + ["a*b/c%d+e-f<>h>>>ik<=l>=m instanceof n in o==p!=q===r!==s&t^u|v&&w||x", + "program(expressionStmnt(binary(binary(binary(binary(binary(binary(binary(" + + "binary(binary(binary(binary(binary(binary(binary(binary(binary(binary(binary(" + + "binary(binary(binary(binary(binary(identifier(a) * identifier(b)) / " + + "identifier(c)) % identifier(d)) + identifier(e)) - identifier(f)) << identifier(g)) " + + ">> identifier(h)) >>> identifier(i)) < identifier(j)) > identifier(k)) <= " + + "identifier(l)) >= identifier(m)) instanceof identifier(n)) in identifier(o)) == " + + "identifier(p)) != identifier(q)) === identifier(r)) !== identifier(s)) & " + + "identifier(t)) ^ identifier(u)) | identifier(v)) && identifier(w)) || " + + "identifier(x)) ;()))"], + ["a||b&&c|d^e&f!==g===h!=i==j in k instanceof l>=m<=n>>q>>r<= identifier(m)) <= identifier(n)) < " + + "identifier(o)) < binary(binary(binary(identifier(p) >>> identifier(q)) >> " + + "identifier(r)) << binary(binary(identifier(s) - identifier(t)) + " + + "binary(binary(binary(identifier(u) % identifier(v)) / identifier(w)) * " + + "identifier(x))))))))))) ;()))"], + ["a?b:c", + "program(expressionStmnt(ternary(identifier(a) ? identifier(b) : " + + "identifier(c)) ;()))"], + ["1==2?3=4:5=6", + "program(expressionStmnt(ternary(binary(number(1) == number(2)) ? " + + "assignment(number(3) = number(4)) : assignment(number(5) = number(6))) ;()))"], + ["1=2,3=4", + "program(expressionStmnt(comma(assignment(number(1) = number(2)) , " + + "assignment(number(3) = number(4))) ;()))"], + ["a=b=c=d", + "program(expressionStmnt(assignment(identifier(a) = assignment(identifier(b) " + + "= assignment(identifier(c) = identifier(d)))) ;()))"], + ["x[0]=x[1]=true", + "program(expressionStmnt(assignment(bracket(identifier(x) [ number(0) ]) = " + + "assignment(bracket(identifier(x) [ number(1) ]) = boolean(true))) ;()))"], + ["a*=b/=c%=d+=e-=f<<=g>>=h>>>=i&=j^=k|=l", + "program(expressionStmnt(assignment(identifier(a) *= assignment(identifier(b) " + + "/= assignment(identifier(c) %= assignment(identifier(d) += " + + "assignment(identifier(e) -= assignment(identifier(f) <<= " + + "assignment(identifier(g) >>= assignment(identifier(h) >>>= " + + "assignment(identifier(i) &= assignment(identifier(j) ^= " + + "assignment(identifier(k) |= identifier(l)))))))))))) ;()))"] ]; _.each(trials, function (tr) { tester.goodParse(tr[0], tr[1]); @@ -525,7 +613,8 @@ Tinytest.add("jsparse - bad parses", function (test) { '[,,`expression`=', '({`name:value`true:3})', '({1:2,3`:`})', - '({1:2,`name:value`' + '({1:2,`name:value`', + 'x.`IDENTIFIER`true' ]; _.each(trials, function (tr) { tester.badParse(tr); @@ -569,4 +658,13 @@ Tinytest.add("jsparse - semicolon insertion", function (test) { "program(expressionStmnt(assignment(identifier(a) = " + "binary(identifier(b) + call(dot(call(identifier(c) `(` " + "binary(identifier(d) + identifier(e)) `)`) . print) `(` `)`))) ;()))"); +}); + +Tinytest.add("jsparse - comments", function (test) { + var tester = makeTester(test); + // newline in multi-line comment makes it into a line break for semicolon + // insertion purposes + tester.badParse("1/**/`semicolon`2"); + tester.goodParse("1/*\n*/2", + "program(expressionStmnt(number(1) ;()) expressionStmnt(number(2) ;()))"); }); \ No newline at end of file From 027f41e8c66493a979c025a1e02c34481ac3f628 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Mon, 10 Sep 2012 15:20:45 -0700 Subject: [PATCH 33/86] fix /= operator --- packages/jsparse/lexer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index 5736050fa0..01dd143f29 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -67,7 +67,7 @@ var rPunctuator = new RegExp( // '*' for '*=', etc. .split(' ').sort(function (a,b) { return b.length - a.length; }) .join('|'), 'g'); -var rDivPunctuator = /\/|\/=/g; +var rDivPunctuator = /\/=?/g; // Section 7.8.3 var rHexLiteral = /0x[0-9a-fA-F]+$/g; var rOctLiteral = /0[0-7]+/g; // deprecated From a6bd5747b27459ca43107003f9fb3c1e2089bc20 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 18:01:50 -0700 Subject: [PATCH 34/86] ParseNode object --- .../unfinished/jsparse-demo/jsparse-demo.js | 8 +- packages/jsparse/parser.js | 56 +++++++------- packages/jsparse/parser_tests.js | 75 ++++++++++--------- packages/jsparse/parserlib.js | 22 +++--- 4 files changed, 84 insertions(+), 77 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 99c4138675..cbcf0e75a9 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -60,16 +60,16 @@ if (Meteor.is_client) { var curPos = 0; var unclosedInfos = []; var toHtml = function (obj) { - if (_.isArray(obj)) { - var head = obj[0] || ''; - var rest = obj.slice(1); + if (obj instanceof ParseNode) { + var head = obj.name || ''; + var children = obj.children; var info = { startPos: curPos }; var isStatement = (head.indexOf('Stmnt') >= 0); var html = Spark.setDataContext( info, '
' + Handlebars._escape(head) + '
' + - _.map(rest, toHtml).join('') + '
'); + _.map(children, toHtml).join('') + '
'); unclosedInfos.push(info); return html; } else if (obj.text) { diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 0cf8218733..89a3c90faf 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -77,9 +77,9 @@ var parse = function (tokenizer) { token(']'))); var propertyName = describe('propertyName', or( - named('idPropName', tokenClass('IDENTIFIER')), - named('numPropName', tokenClass('NUMBER')), - named('strPropName', tokenClass('STRING')))); + named('idPropName', seq(tokenClass('IDENTIFIER'))), + named('numPropName', seq(tokenClass('NUMBER'))), + named('strPropName', seq(tokenClass('STRING'))))); var nameColonValue = describe( 'name:value', named('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); @@ -110,13 +110,13 @@ var parse = function (tokenizer) { var primaryOrFunctionExpression = describe('expression', - or(named('this', token('this')), - named('identifier', tokenClass('IDENTIFIER')), - named('number', tokenClass('NUMBER')), - named('boolean', tokenClass('BOOLEAN')), - named('null', tokenClass('NULL')), - named('regex', tokenClass('REGEX')), - named('string', tokenClass('STRING')), + or(named('this', seq(token('this'))), + named('identifier', seq(tokenClass('IDENTIFIER'))), + named('number', seq(tokenClass('NUMBER'))), + named('boolean', seq(tokenClass('BOOLEAN'))), + named('null', seq(tokenClass('NULL'))), + named('regex', seq(tokenClass('REGEX'))), + named('string', seq(tokenClass('STRING'))), named('parens', seq(token('('), expressionPtr, token(')'))), arrayLiteral, @@ -340,7 +340,7 @@ var parse = function (tokenizer) { // an implicit semicolon. This // is safe because a colon can never legally // follow a semicolon anyway. - lookAheadToken(':')))))); + revalue(lookAheadToken(':'), named(';', []))))))); // it's hard to parse statement labels, as in // `foo: x = 1`, because we can't tell from the @@ -358,13 +358,16 @@ var parse = function (tokenizer) { if (! exprStmnt) return null; - var expr = exprStmnt[1]; - var maybeSemi = exprStmnt[2]; - if (expr[0] !== 'identifier' || ! isArray(maybeSemi)) { - // For better error messages, for example in `1+1:`, - // if there is a colon at the end of the expression, - // fail now and say "Expected semicolon" instead of failing - // later saying "Expected statement" at the colon. + var expr = exprStmnt.children[0]; + var maybeSemi = exprStmnt.children[1]; + if (expr.name !== 'identifier' || + ! (maybeSemi instanceof ParseNode)) { + // We either have a non-identifier expression or a present + // semicolon. This is not a label. + // + // Fail now if we are looking at a colon, causing an + // error message on input like `1+1:` of the same kind + // you'd get without statement label parsing. runRequired(noColon, t); return exprStmnt; } @@ -374,10 +377,10 @@ var parse = function (tokenizer) { return exprStmnt; return named('labelStmnt', - [expr[1]].concat(rest)); + [expr.children[0]].concat(rest)); }; - var emptyStatement = named('emptyStmnt', token(';')); // not maybeSemicolon + var emptyStatement = named('emptyStmnt', seq(token(';'))); // not maybeSemicolon var blockStatement = describe('block', named('blockStmnt', seq( token('{'), unpack(opt(statements, lookAheadToken('}'))), @@ -467,12 +470,13 @@ var parse = function (tokenizer) { // produces [`var` `x` `;` nil `;` nil]. if (! clauses) return null; - if (clauses.length === 4) - clauses[0] = 'forInSpec'; - else if (clauses.length === 5) - clauses[0] = 'forVarInSpec'; - else if (clauses.length >= 7) - clauses[0] = 'forVarSpec'; + var numChildren = clauses.children.length; + if (numChildren === 3) + return new ParseNode('forInSpec', clauses.children); + else if (numChildren === 4) + return new ParseNode('forVarInSpec', clauses.children); + else if (numChildren >= 6) + return new ParseNode('forVarSpec', clauses.children); return clauses; }); diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 1bf9bfcf17..46ac94bafc 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -73,7 +73,7 @@ _.each(allNodeNames, function (n) { allNodeNamesSet[n] = true; }); // backticks. If they do, they can be written enclosed in backticks. To escape // a backtick within backticks, double it. // -// `stringifyNode` generates "canonical" tree strings, which have no extra escaping +// `stringifyTree` generates "canonical" tree strings, which have no extra escaping // or whitespace, just one space between items in a Node. var parseTreeString = function (str) { @@ -92,6 +92,8 @@ var parseTreeString = function (str) { break; case ')': ptr = ptrStack.pop(); + var nodeArray = ptr.pop(); + ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1))); break; case '`': if (txt.length === 1) @@ -109,26 +111,29 @@ var parseTreeString = function (str) { throw new Error("Mismatched parentheses in " + str); return results[0]; }; -var stringifyNode = function (obj) { - if (obj.text) - obj = obj.text; - if (typeof obj === "string") { - if (/[\s()`]/.test(obj)) - return '`' + obj.replace(/`/g, '``') + '`'; - else - return obj; - } else { - return (stringifyNode(obj[0]) + '(' + - _.map(obj.slice(1), stringifyNode).join(' ') + +var escapeTokenString = function (str) { + if (/[\s()`]/.test(str)) + return '`' + str.replace(/`/g, '``') + '`'; + else + return str; +}; +var stringifyTree = function (tree) { + if (tree instanceof ParseNode) + return (escapeTokenString(tree.name) + '(' + + _.map(tree.children, stringifyTree).join(' ') + ')'); - } + + // Treat a token object or string as a token. + if (tree.text) + tree = tree.text; + return escapeTokenString(tree); }; var parseToTreeString = function (code) { var lexer = new Lexer(code); var tokenizer = new Tokenizer(code); var tree = parse(tokenizer); - return stringifyNode(tree); + return stringifyTree(tree); }; var makeTester = function (test) { @@ -154,30 +159,31 @@ var makeTester = function (test) { var actualTree = parse(tokenizer); var nextTokenIndex = 0; - var check = function (part) { - if (_.isArray(part) && part.length) { - // This is a NODE (non-terminal). Make sure it actually is. - if (! (part[0] && typeof part[0] === "string" && - allNodeNamesSet[part[0]] === true)) - test.fail("Not a node name: " + part[0]); - _.each(part.slice(1), check); - } else if (typeof part === 'object' && part.text && - (typeof part.pos === 'number')) { + var check = function (tree) { + if (tree instanceof ParseNode) { + // This is a NODE (non-terminal). + var nodeName = tree.name; + if (! (nodeName && typeof nodeName === "string" && + allNodeNamesSet[nodeName] === true)) + test.fail("Not a node name: " + nodeName); + _.each(tree.children, check); + } else if (typeof tree === 'object' && tree.text && + (typeof tree.pos === 'number')) { // This is a TOKEN (terminal). // Make sure we are visiting every token once, in order. if (nextTokenIndex >= allTokensInOrder.length) test.fail("Too many tokens: " + (nextTokenIndex + 1)); var referenceToken = allTokensInOrder[nextTokenIndex++]; - if (part.text !== referenceToken.text) - test.fail(part.text + " !== " + referenceToken.text); - if (part.pos !== referenceToken.pos) - test.fail(part.pos + " !== " + referenceToken.pos); - if (code.substring(part.pos, - part.pos + part.text.length) !== part.text) - test.fail("Didn't see " + part.text + " at " + part.pos + + if (tree.text !== referenceToken.text) + test.fail(tree.text + " !== " + referenceToken.text); + if (tree.pos !== referenceToken.pos) + test.fail(tree.pos + " !== " + referenceToken.pos); + if (code.substring(tree.pos, + tree.pos + tree.text.length) !== tree.text) + test.fail("Didn't see " + tree.text + " at " + tree.pos + " in " + code); } else { - test.fail("Unknown tree part: " + part); + test.fail("Unknown tree part: " + tree); } }; @@ -185,8 +191,8 @@ var makeTester = function (test) { if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); - test.equal(stringifyNode(actualTree), - stringifyNode(expectedTree), code); + test.equal(stringifyTree(actualTree), + stringifyTree(expectedTree), code); }, // Takes code with part of it surrounding with backticks. // Removes the two backtick characters, tries to parse the code, @@ -614,7 +620,8 @@ Tinytest.add("jsparse - bad parses", function (test) { '({`name:value`true:3})', '({1:2,3`:`})', '({1:2,`name:value`', - 'x.`IDENTIFIER`true' + 'x.`IDENTIFIER`true', + 'foo;`semicolon`:;' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 3601e7762a..0511b95f9e 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -9,6 +9,14 @@ var isArray = function (obj) { return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); }; +var ParseNode = function (name, children) { + this.name = name; + this.children = children; + + if (! isArray(children)) + throw new Error("Expected array in new ParseNode(" + name + ", ...)"); +}; + Tokenizer = function (codeOrLexer) { // XXX rethink codeOrLexer later this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : @@ -172,19 +180,7 @@ var named = function (name, parserOrResult) { function (value) { if (! value) return null; - - var result; - if (isArray(value) && ! value.named) - // bare array, prepend the name - result = [name].concat(Array.prototype.slice.call(value)); - else - // token or named array; construct a new named array - result = [name, value]; - - // don't name the same thing twice - result.named = true; - - return result; + return new ParseNode(name, Array.prototype.slice.call(value)); })); }; From 132b4897afe6bb877f1bb949b884e83cbcbf54bc Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 19:36:47 -0700 Subject: [PATCH 35/86] Parser object --- packages/jsparse/parser.js | 482 ++++++++++++++++++---------------- packages/jsparse/parserlib.js | 224 +++++++--------- 2 files changed, 360 insertions(+), 346 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 89a3c90faf..70f3a2f9b4 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -2,90 +2,119 @@ // XXX unit tests +// XXX remove unnecessary ParseNode.NILs in lookaheads +// XXX SeqParser +// XXX find all revalues, see if constant ones are necessary. +// API may be confusing if constant affects only non-null. + // What we don't have from ECMA-262 5.1: // - object literal trailing comma // - object literal get/set var parse = function (tokenizer) { - var noLineTerminatorHere = describe( + var noLineTerminatorHere = new Parser( 'noLineTerminator', function (t) { return t.isLineTerminatorHere ? null : []; }); + + // Like token, but marks tokens that need to defy the lexer's + // heuristic about whether the next '/' is a division or + // starts a regex. + var preSlashToken = function (text, divisionNotRegex) { + var inner = token(text); + return new Parser( + inner.expecting, + function (t) { + // temporarily set divisionPermitted, + // restoring it if we don't match. + var oldValue = t.lexer.divisionPermitted; + var result; + try { + t.lexer.divisionPermitted = divisionNotRegex; + result = inner.parse(t); + return result; + } finally { + if (! result) + t.lexer.divisionPermitted = oldValue; + } + }); + }; + // Function that takes one-item arrays to their single item and names other // arrays with `name`. Works on parsers too. - var nameIfMultipart = function (name, parser) { + var nodeIfMultipart = function (name, arrayParser) { return revalue( - parser, + arrayParser, function (parts) { if (! parts) return null; return (parts.length === 1) ? - parts[0] : named(name, parts); + parts[0] : new ParseNode(name, parts); }); }; // These "pointers" allow grammar circularity, i.e. accessing // later parsers from earlier ones. var expressionPtrFunc = function (noIn) { - return describe( + return new Parser( "expression", function (t) { - return expressionFunc(noIn)(t); + return expressionFunc(noIn).parse(t); }); }; var expressionPtr = expressionPtrFunc(false); var assignmentExpressionPtrFunc = function (noIn) { - return describe( + return new Parser( "expression", function (t) { - return assignmentExpressionFunc(noIn)(t); + return assignmentExpressionFunc(noIn).parse(t); }); }; var assignmentExpressionPtr = assignmentExpressionPtrFunc(false); - var functionBodyPtr = describe( + var functionBodyPtr = new Parser( "functionBody", function (t) { - return functionBody(t); + return functionBody.parse(t); }); - var statementPtr = describe( + var statementPtr = new Parser( "statement", function (t) { - return statement(t); + return statement.parse(t); }); var arrayLiteral = - named('array', - seq(token('['), - unpack(opt(list(token(',')))), - unpack( - opt( - list( - describe( - 'expression', - or(assignmentExpressionPtr, - // count a peeked-at ']' as an expression - // to support elisions at end, e.g. - // `[1,2,3,,,,,,]`. Because it's unpacked, - // the look-ahead won't show up in the - // parse tree. - unpack(lookAheadToken(']')))), - // list seperator is one or more commas - // to support elision - unpack(list(token(',')))), - lookAheadToken(']'))), - token(']'))); + node('array', + seq(token('['), + unpack(opt(list(token(',')))), + unpack( + opt( + list( + expecting( + 'expression', + or(assignmentExpressionPtr, + // count a peeked-at ']' as an expression + // to support elisions at end, e.g. + // `[1,2,3,,,,,,]`. Because it's unpacked, + // the look-ahead won't show up in the + // parse tree. + unpack(lookAheadToken(']')))), + // list seperator is one or more commas + // to support elision + unpack(list(token(',')))), + lookAheadToken(']'))), + token(']'))); - var propertyName = describe('propertyName', or( - named('idPropName', seq(tokenClass('IDENTIFIER'))), - named('numPropName', seq(tokenClass('NUMBER'))), - named('strPropName', seq(tokenClass('STRING'))))); - var nameColonValue = describe( + var propertyName = expecting('propertyName', or( + node('idPropName', seq(tokenClass('IDENTIFIER'))), + node('numPropName', seq(tokenClass('NUMBER'))), + node('strPropName', seq(tokenClass('STRING'))))); + var nameColonValue = expecting( 'name:value', - named('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); + node('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); var objectLiteral = - named('object', + node('object', seq(token('{'), unpack(opt(list(nameColonValue, token(',')), lookAheadToken('}'))), @@ -96,7 +125,7 @@ var parse = function (tokenizer) { return seq(token('function'), (nameRequired ? tokenClass('IDENTIFIER') : or(tokenClass('IDENTIFIER'), - revalue(lookAheadToken('('), named('nil', [])))), + revalue(lookAheadToken('('), ParseNode.NIL))), token('('), unpack(opt(list(tokenClass('IDENTIFIER'), token(',')), lookAheadToken(')'))), @@ -105,23 +134,23 @@ var parse = function (tokenizer) { unpack(functionBodyPtr), token('}')); }; - var functionExpression = named('functionExpr', + var functionExpression = node('functionExpr', functionFunc(false)); var primaryOrFunctionExpression = - describe('expression', - or(named('this', seq(token('this'))), - named('identifier', seq(tokenClass('IDENTIFIER'))), - named('number', seq(tokenClass('NUMBER'))), - named('boolean', seq(tokenClass('BOOLEAN'))), - named('null', seq(tokenClass('NULL'))), - named('regex', seq(tokenClass('REGEX'))), - named('string', seq(tokenClass('STRING'))), - named('parens', + expecting('expression', + or(node('this', seq(token('this'))), + node('identifier', seq(tokenClass('IDENTIFIER'))), + node('number', seq(tokenClass('NUMBER'))), + node('boolean', seq(tokenClass('BOOLEAN'))), + node('null', seq(tokenClass('NULL'))), + node('regex', seq(tokenClass('REGEX'))), + node('string', seq(tokenClass('STRING'))), + node('parens', seq(token('('), expressionPtr, token(')'))), - arrayLiteral, - objectLiteral, - functionExpression)); + arrayLiteral, + objectLiteral, + functionExpression)); var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); var bracketEnding = seq(token('['), expressionPtr, token(']')); @@ -142,14 +171,14 @@ var parse = function (tokenizer) { // call to "return" a valid l-value, as in `foo(bar) = baz`, // though no built-in or user-specifiable call has this property // (it would have to be defined by a browser or other "host"). - var lhsExpression = describe( + var lhsExpression = new Parser( 'expression', function (t) { // Accumulate all initial "new" keywords, not yet knowing // if they have a corresponding argument list later. var news = []; var n; - while ((n = newKeyword(t))) + while ((n = newKeyword.parse(t))) news.push(n); // Read the primaryOrFunctionExpression that will be the "core" @@ -158,8 +187,8 @@ var parse = function (tokenizer) { // and .foo add-ons. // if we have 'new' keywords, we are committed and must // match an expression or error. - var result = runMaybeRequired( - news.length, primaryOrFunctionExpression, t); + var result = primaryOrFunctionExpression.parse( + t, {required: news.length}); if (! result) return null; @@ -170,15 +199,15 @@ var parse = function (tokenizer) { var done = false; while (! done) { var r; - if ((r = dotEnding(t))) { - result = named('dot', [result].concat(r)); - } else if ((r = bracketEnding(t))) { - result = named('bracket', [result].concat(r)); - } else if ((r = callArgs(t))) { + if ((r = dotEnding.parse(t))) { + result = new ParseNode('dot', [result].concat(r)); + } else if ((r = bracketEnding.parse(t))) { + result = new ParseNode('bracket', [result].concat(r)); + } else if ((r = callArgs.parse(t))) { if (news.length) - result = named('newcall', [news.pop(), result].concat(r)); + result = new ParseNode('newcall', [news.pop(), result].concat(r)); else - result = named('call', [result].concat(r)); + result = new ParseNode('call', [result].concat(r)); } else { done = true; } @@ -188,7 +217,7 @@ var parse = function (tokenizer) { // paren-less constructions (`new Date`) are parsed. We've // already handled `new foo().bar()`, now handle `new new foo().bar`. while (news.length) - result = named('new', [news.pop(), result]); + result = new ParseNode('new', [news.pop(), result]); // mark any LeftHandSideExpression, for the benefit of // assignmentExpression @@ -199,9 +228,9 @@ var parse = function (tokenizer) { var postfixToken = token('++ --'); var postfixLookahead = lookAheadToken('++ --'); - var postfixExpression = describe( + var postfixExpression = expecting( 'expression', - nameIfMultipart( + nodeIfMultipart( 'postfix', seq(lhsExpression, unpack(opt(lookAhead(noLineTerminatorHere, @@ -209,18 +238,19 @@ var parse = function (tokenizer) { postfixToken))))))); var unaryList = opt(list(or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false)))); - var unaryExpression = describe( + var unaryExpression = new Parser( 'expression', function (t) { - var unaries = unaryList(t); + var unaries = unaryList.parse(t); // if we have unaries, we are committed and // have to match an expression or error. - var result = runMaybeRequired(unaries.length, postfixExpression, t); + var result = postfixExpression.parse( + t, {required: unaries.length}); if (! result) return null; while (unaries.length) - result = named('unary', [unaries.pop(), result]); + result = new ParseNode('unary', [unaries.pop(), result]); return result; }); @@ -250,7 +280,7 @@ var parse = function (tokenizer) { token('|'), token('&&'), token('||')]; - return describe( + return expecting( 'expression', binaryLeft(unaryExpression, binaryOps)); }); @@ -258,9 +288,9 @@ var parse = function (tokenizer) { var conditionalExpressionFunc = memoizeBooleanFunc( function (noIn) { - return describe( + return expecting( 'expression', - nameIfMultipart( + nodeIfMultipart( 'ternary', seq(binaryExpressionFunc(noIn), unpack(opt(seq( token('?'), @@ -273,10 +303,10 @@ var parse = function (tokenizer) { var assignmentExpressionFunc = memoizeBooleanFunc( function (noIn) { - return describe( + return new Parser( 'expression', function (t) { - var r = conditionalExpressionFunc(noIn)(t); + var r = conditionalExpressionFunc(noIn).parse(t); if (! r) return null; @@ -286,15 +316,16 @@ var parse = function (tokenizer) { // and then fold them up at the end. var parts = [r]; var op; - while (r.lhs && (op = assignOp(t))) + while (r.lhs && (op = assignOp.parse(t))) parts.push(op, - runRequired(conditionalExpressionFunc(noIn), t)); + conditionalExpressionFunc(noIn).parse( + t, {required: true})); var result = parts.pop(); while (parts.length) { op = parts.pop(); var lhs = parts.pop(); - result = named('assignment', [lhs, op, result]); + result = new ParseNode('assignment', [lhs, op, result]); } return result; }); @@ -303,9 +334,9 @@ var parse = function (tokenizer) { var expressionFunc = memoizeBooleanFunc( function (noIn) { - return describe( + return expecting( 'expression', - nameIfMultipart( + nodeIfMultipart( 'comma', list(assignmentExpressionFunc(noIn), token(',')))); }); @@ -316,31 +347,32 @@ var parse = function (tokenizer) { var statements = list(statementPtr); // implements JavaScript's semicolon "insertion" rules - var maybeSemicolon = describe( + var maybeSemicolon = expecting( 'semicolon', or(token(';'), revalue( or( lookAheadToken('}'), lookAheadTokenClass('EOF'), - function (t) { - return t.isLineTerminatorHere ? [] : null; - }), named(';', [])))); + new Parser(null, + function (t) { + return t.isLineTerminatorHere ? [] : null; + })), new ParseNode(';', [])))); - var expressionStatement = named( + var expressionStatement = node( 'expressionStmnt', negLookAhead( or(lookAheadToken('{'), lookAheadToken('function')), seq(expression, - describe('semicolon', - or(maybeSemicolon, - // allow presence of colon to terminate - // statement legally, for the benefit of - // expressionOrLabelStatement. Basically assume - // an implicit semicolon. This - // is safe because a colon can never legally - // follow a semicolon anyway. - revalue(lookAheadToken(':'), named(';', []))))))); + expecting('semicolon', + or(maybeSemicolon, + // allow presence of colon to terminate + // statement legally, for the benefit of + // expressionOrLabelStatement. Basically assume + // an implicit semicolon. This + // is safe because a colon can never legally + // follow a semicolon anyway. + revalue(lookAheadToken(':'), new ParseNode(';', []))))))); // it's hard to parse statement labels, as in // `foo: x = 1`, because we can't tell from the @@ -350,44 +382,46 @@ var parse = function (tokenizer) { // then rewrites the result if it is an identifier // followed by a colon. var labelColonAndStatement = seq(token(':'), statementPtr); - var noColon = describe( + var noColon = expecting( 'semicolon', negLookAhead(lookAheadToken(':'))); - var expressionOrLabelStatement = function (t) { - var exprStmnt = expressionStatement(t); - if (! exprStmnt) - return null; + var expressionOrLabelStatement = new Parser( + null, + function (t) { + var exprStmnt = expressionStatement.parse(t); + if (! exprStmnt) + return null; - var expr = exprStmnt.children[0]; - var maybeSemi = exprStmnt.children[1]; - if (expr.name !== 'identifier' || - ! (maybeSemi instanceof ParseNode)) { - // We either have a non-identifier expression or a present - // semicolon. This is not a label. - // - // Fail now if we are looking at a colon, causing an - // error message on input like `1+1:` of the same kind - // you'd get without statement label parsing. - runRequired(noColon, t); - return exprStmnt; - } + var expr = exprStmnt.children[0]; + var maybeSemi = exprStmnt.children[1]; + if (expr.name !== 'identifier' || + ! (maybeSemi instanceof ParseNode)) { + // We either have a non-identifier expression or a present + // semicolon. This is not a label. + // + // Fail now if we are looking at a colon, causing an + // error message on input like `1+1:` of the same kind + // you'd get without statement label parsing. + noColon.parse(t, {required: true}); + return exprStmnt; + } - var rest = labelColonAndStatement(t); - if (! rest) - return exprStmnt; + var rest = labelColonAndStatement.parse(t); + if (! rest) + return exprStmnt; - return named('labelStmnt', - [expr.children[0]].concat(rest)); - }; + return new ParseNode('labelStmnt', + [expr.children[0]].concat(rest)); + }); - var emptyStatement = named('emptyStmnt', seq(token(';'))); // not maybeSemicolon + var emptyStatement = node('emptyStmnt', seq(token(';'))); // not maybeSemicolon - var blockStatement = describe('block', named('blockStmnt', seq( + var blockStatement = expecting('block', node('blockStmnt', seq( token('{'), unpack(opt(statements, lookAheadToken('}'))), token('}')))); var varDeclFunc = memoizeBooleanFunc(function (noIn) { - return named( + return node( 'varDecl', seq(tokenClass('IDENTIFIER'), unpack(opt(seq(token('='), @@ -395,7 +429,7 @@ var parse = function (tokenizer) { }); var varDecl = varDeclFunc(false); - var variableStatement = named( + var variableStatement = node( 'varStmnt', seq(token('var'), unpack(list(varDecl, token(','))), maybeSemicolon)); @@ -404,28 +438,28 @@ var parse = function (tokenizer) { // beginning with a regex literal. var closeParenBeforeStatement = preSlashToken(')', false); - var ifStatement = named( + var ifStatement = node( 'ifStmnt', seq(token('if'), token('('), expression, closeParenBeforeStatement, statementPtr, unpack(opt(seq(token('else'), statementPtr))))); - var secondThirdClauses = describe( + var secondThirdClauses = expecting( 'semicolon', lookAhead(lookAheadToken(';'), seq( - describe('semicolon', token(';')), - opt(expressionPtr, revalue(lookAheadToken(';'), named('nil', []))), - describe('semicolon', token(';')), - opt(expressionPtr, revalue(lookAheadToken(')'), named('nil', [])))))); + expecting('semicolon', token(';')), + opt(expressionPtr, revalue(lookAheadToken(';'), ParseNode.NIL)), + expecting('semicolon', token(';')), + opt(expressionPtr, revalue(lookAheadToken(')'), ParseNode.NIL))))); var inExpr = seq(token('in'), expression); - var inExprExpectingSemi = describe('semicolon', - seq(token('in'), expression)); - var forSpec = revalue(named( + var inExprExpectingSemi = expecting('semicolon', + seq(token('in'), expression)); + var forSpec = revalue(node( 'forSpec', or(seq(token('var'), varDeclFunc(true), - describe( + expecting( 'commaOrIn', or(unpack(inExpr), unpack(seq( @@ -437,76 +471,78 @@ var parse = function (tokenizer) { // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(revalue(lookAheadToken(';'), named('nil', [])), unpack(secondThirdClauses)), + seq(revalue(lookAheadToken(';'), ParseNode.NIL), unpack(secondThirdClauses)), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". - function (t) { - var firstExpr = expressionFunc(true)(t); - if (! firstExpr) - return null; - var rest = secondThirdClauses(t); - if (! rest) { - // we need a left-hand-side expression for a - // `for (x in y)` loop. - if (! firstExpr.lhs) - throw parseError(t, secondThirdClauses); - // if we don't see 'in' at this point, it's probably - // a missing semicolon - rest = runRequired(inExprExpectingSemi, t); - } + new Parser( + null, + function (t) { + var firstExpr = expressionFunc(true).parse(t); + if (! firstExpr) + return null; + var rest = secondThirdClauses.parse(t); + if (! rest) { + // we need a left-hand-side expression for a + // `for (x in y)` loop. + if (! firstExpr.lhs) + throw parseError(t, secondThirdClauses); + // if we don't see 'in' at this point, it's probably + // a missing semicolon + rest = inExprExpectingSemi.parse(t, {required: true}); + } - return [firstExpr].concat(rest); - })), - function (clauses) { - // There are four kinds of for-loop, and we call the - // part between the parens one of forSpec, forVarSpec, - // forInSpec, and forVarInSpec. Having parsed it - // already, we rewrite the node name based on how - // many items came out. forIn and forVarIn always - // have 3 and 4 items respectively. for has 5 - // (the optional expressions are present as nils). - // forVar has 6 or more, because `for(var x;;);` - // produces [`var` `x` `;` nil `;` nil]. - if (! clauses) - return null; - var numChildren = clauses.children.length; - if (numChildren === 3) - return new ParseNode('forInSpec', clauses.children); - else if (numChildren === 4) - return new ParseNode('forVarInSpec', clauses.children); - else if (numChildren >= 6) - return new ParseNode('forVarSpec', clauses.children); - return clauses; - }); + return [firstExpr].concat(rest); + }))), + function (clauses) { + // There are four kinds of for-loop, and we call the + // part between the parens one of forSpec, forVarSpec, + // forInSpec, and forVarInSpec. Having parsed it + // already, we rewrite the node name based on how + // many items came out. forIn and forVarIn always + // have 3 and 4 items respectively. for has 5 + // (the optional expressions are present as nils). + // forVar has 6 or more, because `for(var x;;);` + // produces [`var` `x` `;` nil `;` nil]. + if (! clauses) + return null; + var numChildren = clauses.children.length; + if (numChildren === 3) + return new ParseNode('forInSpec', clauses.children); + else if (numChildren === 4) + return new ParseNode('forVarInSpec', clauses.children); + else if (numChildren >= 6) + return new ParseNode('forVarSpec', clauses.children); + return clauses; + }); var iterationStatement = or( - named('doStmnt', seq(token('do'), statementPtr, token('while'), + node('doStmnt', seq(token('do'), statementPtr, token('while'), token('('), expression, token(')'), maybeSemicolon)), - named('whileStmnt', seq(token('while'), token('('), expression, + node('whileStmnt', seq(token('while'), token('('), expression, closeParenBeforeStatement, statementPtr)), // semicolons must be real, not maybeSemicolons - named('forStmnt', seq( + node('forStmnt', seq( token('for'), token('('), forSpec, closeParenBeforeStatement, statementPtr))); - var returnStatement = named( + var returnStatement = node( 'returnStmnt', seq(token('return'), or( - lookAhead(noLineTerminatorHere, expression), constant(named('nil', []))), + lookAhead(noLineTerminatorHere, expression), constant(ParseNode.NIL)), maybeSemicolon)); - var continueStatement = named( + var continueStatement = node( 'continueStmnt', seq(token('continue'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))), + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), maybeSemicolon)); - var breakStatement = named( + var breakStatement = node( 'breakStmnt', seq(token('break'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(named('nil', []))), + lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), maybeSemicolon)); - var throwStatement = named( + var throwStatement = node( 'throwStmnt', seq(token('throw'), lookAhead(revalue(noLineTerminatorHere, @@ -519,23 +555,23 @@ var parse = function (tokenizer) { }), expression), maybeSemicolon)); - var withStatement = named( + var withStatement = node( 'withStmnt', seq(token('with'), token('('), expression, closeParenBeforeStatement, statementPtr)); - var switchCase = named( + var switchCase = node( 'case', seq(token('case'), expression, token(':'), unpack(opt(statements, or(lookAheadToken('}'), lookAheadToken('case default')))))); - var switchDefault = named( + var switchDefault = node( 'default', seq(token('default'), token(':'), unpack(opt(statements, or(lookAheadToken('}'), lookAheadToken('case')))))); - var switchStatement = named( + var switchStatement = node( 'switchStmnt', seq(token('switch'), token('('), expression, token(')'), token('{'), unpack(opt(list(switchCase), @@ -545,70 +581,70 @@ var parse = function (tokenizer) { unpack(opt(list(switchCase)))))), token('}'))); - var catchFinally = describe( + var catchFinally = expecting( 'catch', lookAhead(lookAheadToken('catch finally'), seq( - or(named( + or(node( 'catch', seq(token('catch'), token('('), tokenClass('IDENTIFIER'), token(')'), blockStatement)), - constant(named('nil', []))), - or(named( + constant(ParseNode.NIL)), + or(node( 'finally', seq(token('finally'), blockStatement)), - constant(named('nil', [])))))); - var tryStatement = named( + constant(ParseNode.NIL))))); + var tryStatement = node( 'tryStmnt', seq(token('try'), blockStatement, unpack(catchFinally))); - var debuggerStatement = named( + var debuggerStatement = node( 'debuggerStmnt', seq(token('debugger'), maybeSemicolon)); - var statement = describe('statement', - or(expressionOrLabelStatement, - emptyStatement, - blockStatement, - variableStatement, - ifStatement, - iterationStatement, - returnStatement, - continueStatement, - breakStatement, - withStatement, - switchStatement, - throwStatement, - tryStatement, - debuggerStatement)); + var statement = expecting('statement', + or(expressionOrLabelStatement, + emptyStatement, + blockStatement, + variableStatement, + ifStatement, + iterationStatement, + returnStatement, + continueStatement, + breakStatement, + withStatement, + switchStatement, + throwStatement, + tryStatement, + debuggerStatement)); // PROGRAM - var functionDecl = named('functionDecl', + var functionDecl = node('functionDecl', functionFunc(true)); var sourceElement = or(statement, functionDecl); var sourceElements = list(sourceElement); - var functionBody = describe('functionBody', - opt(sourceElements, - lookAheadToken('}'))); + var functionBody = expecting('functionBody', + opt(sourceElements, + lookAheadToken('}'))); - var program = named('program', + var program = node('program', seq(unpack(opt(sourceElements)), // we rely on the fact that opt(sourceElements) // will never fail, and non-first arguments // to seq are required to succeed -- meaning // this parser will never fail without throwing // a parse error. - describe('statement', - revalue(lookAheadTokenClass("EOF"), - function (v, t) { - if (! v) - return null; - // eat the ending "EOF" so that - // our position is updated - t.consume(); - return unpack([]); - })))); + expecting('statement', + revalue(lookAheadTokenClass("EOF"), + function (v, t) { + if (! v) + return null; + // eat the ending "EOF" so that + // our position is updated + t.consume(); + return unpack([]); + })))); - return program(tokenizer); + return program.parse(tokenizer); }; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 0511b95f9e..9cb9a05737 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,7 +1,6 @@ ///// TOKENIZER AND PARSER COMBINATORS // XXX make Parser object with parse method? -// XXX rework describe, call "expecting"? // XXX track line/col position, for errors and maybe token info // XXX unit tests @@ -17,6 +16,26 @@ var ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; +ParseNode.NIL = new ParseNode('nil', []); + +var Parser = function (expecting, runFunc) { + this.expecting = expecting; + this._run = runFunc; +}; + +_.extend(Parser.prototype, { + parse: function (t, options) { + var result = this._run(t); + + if (options) { + if (options.required && ! result) + throw parseError(t, this); + } + + return result; + } +}); + Tokenizer = function (codeOrLexer) { // XXX rethink codeOrLexer later this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : @@ -66,16 +85,17 @@ _.extend(Tokenizer.prototype, { // A parser that consume()s has to succeed. // Similarly, a parser that fails can't have consumed. -// mutates the parser; don't describe an existing parser. -var describe = function (description, parser) { - parser.description = description; +// mutates the parser +var expecting = function (expecting, parser) { + parser.expecting = expecting; return parser; }; // Call this as `throw parseError(...)`. // `expected` is a parser, `after` is a string. -var parseError = function (t, expected, found) { - var str = (expected.description ? "Expected " + expected.description : +var parseError = function (t, expectedParser, found) { + var str = (expectedParser.expecting ? "Expected " + + expectedParser.expecting : // all parsers that might error should have descriptions, // but just in case: "Unexpected token"); @@ -89,14 +109,14 @@ var parseError = function (t, expected, found) { ///// TERMINAL PARSER CONSTRUCTORS -var _tokenClassImpl = function (type, text, dontConsume) { +var _tokenClassImpl = function (type, text, onlyLook) { var textSet = (text ? makeSet(text.split(' ')) : null); - var description = (text ? text.split(' ').join(', ') : type); - return describe( - description, + var expecting = (text ? text.split(' ').join(', ') : type); + return new Parser( + expecting, function (t) { if (t.peekType == type && (!text || textSet[t.peekText])) { - if (dontConsume) + if (onlyLook) return []; var ret = {text: t.peekText, pos: t.pos}; t.consume(); @@ -106,10 +126,10 @@ var _tokenClassImpl = function (type, text, dontConsume) { }); }; -var _tokenImpl = function (text, dontConsume) { +var _tokenImpl = function (text, onlyLook) { if (/\w/.test(text)) - return _tokenClassImpl('KEYWORD', text, dontConsume); - return _tokenClassImpl('PUNCTUATION', text, dontConsume); + return _tokenClassImpl('KEYWORD', text, onlyLook); + return _tokenClassImpl('PUNCTUATION', text, onlyLook); }; var tokenClass = function (type, text) { @@ -122,28 +142,6 @@ var token = function (text) { return _tokenImpl(text); }; -// Like token, but marks tokens that need to defy the lexer's -// heuristic about whether the next '/' is a division or -// starts a regex. -var preSlashToken = function (text, divisionNotRegex) { - var impl = _tokenImpl(text); - return describe(impl.description, - function (t) { - // temporarily set divisionPermitted, - // restoring it if we don't match. - var oldValue = t.lexer.divisionPermitted; - var result; - try { - t.lexer.divisionPermitted = divisionNotRegex; - result = impl(t); - return result; - } finally { - if (! result) - t.lexer.divisionPermitted = oldValue; - } - }); -}; - // NON-CONSUMING PARSER CONSTRUCTORS var lookAheadTokenClass = function (type, text) { @@ -156,45 +154,28 @@ var lookAheadToken = function (text) { ///// NON-TERMINAL PARSER CONSTRUCTORS -// run parser(tokenizer) and assert it matches -var runRequired = function (parser, tokenizer) { - return revalue( - tokenizer ? parser(tokenizer) : parser, - function (v, t) { - if (! v) - throw parseError(t || tokenizer, parser); - return v; - }); -}; - -var runMaybeRequired = function (require, parser, tokenizer) { - return require ? runRequired(parser, tokenizer) : parser(tokenizer); -}; - -// Polymorphic in parsers and results; an experiment. -var named = function (name, parserOrResult) { - return describe( - name, - revalue( - parserOrResult, - function (value) { - if (! value) - return null; - return new ParseNode(name, Array.prototype.slice.call(value)); - })); +var node = function (name, childrenParser) { + return new Parser(name, function (t) { + var children = childrenParser.parse(t); + if (! children) + return null; + return new ParseNode(name, children); + }); }; var or = function (/*parsers*/) { var args = arguments; - return function (t) { - var result; - for(var i = 0, N = args.length; i < N; i++) { - result = args[i](t); - if (result) - return result; - } - return null; - }; + return new Parser( + null, + function (t) { + var result; + for(var i = 0, N = args.length; i < N; i++) { + result = args[i].parse(t); + if (result) + return result; + } + return null; + }); }; // Parses a left-recursive expression with zero or more occurrences @@ -220,18 +201,18 @@ var binaryLeft = function (termParser, opParser) { } } - return describe( - termParser.description, + return new Parser( + termParser.expecting, function (t) { - var result = termParser(t); + var result = termParser.parse(t); if (! result) return null; var op; - while ((op = opParser(t))) { - result = named( + while ((op = opParser.parse(t))) { + result = new ParseNode( 'binary', - [result, op, runRequired(termParser, t, op)]); + [result, op, termParser.parse(t, {required: true})]); } return result; }); @@ -250,25 +231,24 @@ var list = function (itemParser, sepParser) { else array.push(newThing); }; - return describe( - itemParser.description, + return new Parser( + itemParser.expecting, function (t) { var result = []; - var firstItem = itemParser(t); + var firstItem = itemParser.parse(t); if (! firstItem) return null; push(result, firstItem); if (sepParser) { var sep; - while ((sep = sepParser(t))) { + while ((sep = sepParser.parse(t))) { push(result, sep); - push(result, runRequired(itemParser, t, - sep.unpack ? sep[sep.length - 1] : sep)); + push(result, itemParser.parse(t, {required: true})); } } else { var item; - while ((item = itemParser(t))) + while ((item = itemParser.parse(t))) push(result, item); } return result; @@ -278,20 +258,17 @@ var list = function (itemParser, sepParser) { var seq = function (/*parsers*/) { var args = arguments; if (! args.length) - return describe("(empty)", - function (t) { return []; }); + return new Parser("(empty)", + function (t) { return []; }); - var description = args[0].description; - for (var i = 1; i < args.length; i++) - description += " " + args[i].description; - return describe( - description, + return new Parser( + args[0].expecting, function (t) { var result = []; for (var i = 0, N = args.length; i < N; i++) { // first item in sequence can fail, and we // fail (without error); after that, error on failure - var r = runMaybeRequired(i > 0, args[i], t); + var r = args[i].parse(t, {required: i > 0}); if (! r) return null; @@ -304,8 +281,12 @@ var seq = function (/*parsers*/) { }); }; -var unpack = function (arrayParser) { - return revalue(arrayParser, function (v) { +var unpack = function (arrayOrParser) { + if (isArray(arrayOrParser)) { + arrayOrParser.unpack = true; + return arrayOrParser; + } + return revalue(arrayOrParser, function (v) { if (v && isArray(v)) v.unpack = true; return v; @@ -314,35 +295,36 @@ var unpack = function (arrayParser) { // lookAhead parser must never consume var lookAhead = function (lookAheadParser, nextParser) { - return describe( - nextParser.description, + return new Parser( + nextParser.expecting, function (t) { - if (! lookAheadParser(t)) + if (! lookAheadParser.parse(t)) return null; - return nextParser(t); + return nextParser.parse(t); }); }; + var negLookAhead = function (lookAheadParser, nextParser) { if (! nextParser) - return function (t) { - return lookAheadParser(t) ? null : []; - }; + return new Parser( + null, + function (t) { + return lookAheadParser.parse(t) ? null : []; + }); - return describe( - nextParser.description, + return new Parser( + nextParser.expecting, function (t) { - if (lookAheadParser(t)) + if (lookAheadParser.parse(t)) return null; - return nextParser(t); + return nextParser.parse(t); }); }; // parser that looks at nothing and returns result var constant = function (result) { - // no description - return function (t) { - return result; - }; + return new Parser(null, + function (t) { return result; }); }; // afterLookAhead allows the parser to fail rather than @@ -356,14 +338,13 @@ var constant = function (result) { // instead of "Expected ;" when the optional expression // turns out to be an illegal `var`. var opt = function (parser, afterLookAhead) { - return describe(parser.description, - or(parser, afterLookAhead ? afterLookAhead : seq())); + return expecting(parser.expecting, + or(parser, afterLookAhead ? afterLookAhead : seq())); }; -// note: valueTransformFunc gets the tokenizer as a second argument -// if it's called on a parser. This func is allowed to then -// run more parsers. -var revalue = function (parserOrValue, valueTransformFunc) { +// note: valueTransformFunc gets the tokenizer as a second argument. +// This func is allowed to then run more parsers. +var revalue = function (parser, valueTransformFunc) { if (typeof valueTransformFunc !== 'function') { var value = valueTransformFunc; valueTransformFunc = function (v) { @@ -371,12 +352,9 @@ var revalue = function (parserOrValue, valueTransformFunc) { }; } - if (typeof parserOrValue === 'function') - // it's a parser - return describe(parserOrValue.description, - function (t) { - return valueTransformFunc(parserOrValue(t), t); - }); - else - return valueTransformFunc(parserOrValue); + return new Parser( + parser.expecting, + function (t) { + return valueTransformFunc(parser.parse(t), t); + }); }; From 2b41592236f5e63549e714b52c6d532c3c694a50 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 19:59:52 -0700 Subject: [PATCH 36/86] no more unpack! --- packages/jsparse/parser.js | 103 +++++++++++++++++----------------- packages/jsparse/parserlib.js | 17 +----- 2 files changed, 52 insertions(+), 68 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 70f3a2f9b4..f57f3a51ce 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -86,23 +86,20 @@ var parse = function (tokenizer) { var arrayLiteral = node('array', seq(token('['), - unpack(opt(list(token(',')))), - unpack( - opt( - list( - expecting( - 'expression', - or(assignmentExpressionPtr, - // count a peeked-at ']' as an expression - // to support elisions at end, e.g. - // `[1,2,3,,,,,,]`. Because it's unpacked, - // the look-ahead won't show up in the - // parse tree. - unpack(lookAheadToken(']')))), - // list seperator is one or more commas - // to support elision - unpack(list(token(',')))), - lookAheadToken(']'))), + opt(list(token(','))), + opt( + list( + expecting( + 'expression', + or(assignmentExpressionPtr, + // count a peeked-at ']' as an expression + // to support elisions at end, e.g. + // `[1,2,3,,,,,,]`. + lookAheadToken(']'))), + // list seperator is one or more commas + // to support elision + list(token(','))), + lookAheadToken(']')), token(']'))); var propertyName = expecting('propertyName', or( @@ -116,8 +113,8 @@ var parse = function (tokenizer) { var objectLiteral = node('object', seq(token('{'), - unpack(opt(list(nameColonValue, - token(',')), lookAheadToken('}'))), + opt(list(nameColonValue, + token(',')), lookAheadToken('}')), token('}'))); // not memoized; only call at construction time @@ -127,11 +124,11 @@ var parse = function (tokenizer) { or(tokenClass('IDENTIFIER'), revalue(lookAheadToken('('), ParseNode.NIL))), token('('), - unpack(opt(list(tokenClass('IDENTIFIER'), token(',')), - lookAheadToken(')'))), + opt(list(tokenClass('IDENTIFIER'), token(',')), + lookAheadToken(')')), token(')'), token('{'), - unpack(functionBodyPtr), + functionBodyPtr, token('}')); }; var functionExpression = node('functionExpr', @@ -155,8 +152,8 @@ var parse = function (tokenizer) { var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); var bracketEnding = seq(token('['), expressionPtr, token(']')); var callArgs = seq(token('('), - unpack(opt(list(assignmentExpressionPtr, - token(',')), lookAheadToken(')'))), + opt(list(assignmentExpressionPtr, + token(',')), lookAheadToken(')')), token(')')); var newKeyword = token('new'); @@ -233,9 +230,9 @@ var parse = function (tokenizer) { nodeIfMultipart( 'postfix', seq(lhsExpression, - unpack(opt(lookAhead(noLineTerminatorHere, - lookAhead(postfixLookahead, - postfixToken))))))); + opt(lookAhead(noLineTerminatorHere, + lookAhead(postfixLookahead, + postfixToken)))))); var unaryList = opt(list(or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false)))); var unaryExpression = new Parser( @@ -292,10 +289,10 @@ var parse = function (tokenizer) { 'expression', nodeIfMultipart( 'ternary', - seq(binaryExpressionFunc(noIn), unpack(opt(seq( + seq(binaryExpressionFunc(noIn), opt(seq( token('?'), assignmentExpressionPtrFunc(false), token(':'), - assignmentExpressionPtrFunc(noIn))))))); + assignmentExpressionPtrFunc(noIn)))))); }); var conditionalExpression = conditionalExpressionFunc(false); @@ -417,21 +414,21 @@ var parse = function (tokenizer) { var emptyStatement = node('emptyStmnt', seq(token(';'))); // not maybeSemicolon var blockStatement = expecting('block', node('blockStmnt', seq( - token('{'), unpack(opt(statements, lookAheadToken('}'))), + token('{'), opt(statements, lookAheadToken('}')), token('}')))); var varDeclFunc = memoizeBooleanFunc(function (noIn) { return node( 'varDecl', seq(tokenClass('IDENTIFIER'), - unpack(opt(seq(token('='), - assignmentExpressionFunc(noIn)))))); + opt(seq(token('='), + assignmentExpressionFunc(noIn))))); }); var varDecl = varDeclFunc(false); var variableStatement = node( 'varStmnt', - seq(token('var'), unpack(list(varDecl, token(','))), + seq(token('var'), list(varDecl, token(',')), maybeSemicolon)); // A paren that may be followed by a statement @@ -442,7 +439,7 @@ var parse = function (tokenizer) { 'ifStmnt', seq(token('if'), token('('), expression, closeParenBeforeStatement, statementPtr, - unpack(opt(seq(token('else'), statementPtr))))); + opt(seq(token('else'), statementPtr)))); var secondThirdClauses = expecting( 'semicolon', @@ -461,17 +458,17 @@ var parse = function (tokenizer) { varDeclFunc(true), expecting( 'commaOrIn', - or(unpack(inExpr), - unpack(seq( - unpack(opt( + or(inExpr, + seq( + opt( seq(token(','), - unpack(list(varDeclFunc(true), token(',')))), - lookAheadToken(';'))), - unpack(secondThirdClauses)))))), + list(varDeclFunc(true), token(','))), + lookAheadToken(';')), + secondThirdClauses)))), // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(revalue(lookAheadToken(';'), ParseNode.NIL), unpack(secondThirdClauses)), + seq(revalue(lookAheadToken(';'), ParseNode.NIL), secondThirdClauses), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". @@ -563,22 +560,22 @@ var parse = function (tokenizer) { var switchCase = node( 'case', seq(token('case'), expression, token(':'), - unpack(opt(statements, or(lookAheadToken('}'), - lookAheadToken('case default')))))); + opt(statements, or(lookAheadToken('}'), + lookAheadToken('case default'))))); var switchDefault = node( 'default', seq(token('default'), token(':'), - unpack(opt(statements, or(lookAheadToken('}'), - lookAheadToken('case')))))); + opt(statements, or(lookAheadToken('}'), + lookAheadToken('case'))))); var switchStatement = node( 'switchStmnt', seq(token('switch'), token('('), expression, token(')'), - token('{'), unpack(opt(list(switchCase), - or(lookAheadToken('}'), - lookAheadToken('default')))), - unpack(opt(seq(switchDefault, - unpack(opt(list(switchCase)))))), + token('{'), opt(list(switchCase), + or(lookAheadToken('}'), + lookAheadToken('default'))), + opt(seq(switchDefault, + opt(list(switchCase)))), token('}'))); var catchFinally = expecting( @@ -596,7 +593,7 @@ var parse = function (tokenizer) { constant(ParseNode.NIL))))); var tryStatement = node( 'tryStmnt', - seq(token('try'), blockStatement, unpack(catchFinally))); + seq(token('try'), blockStatement, catchFinally)); var debuggerStatement = node( 'debuggerStmnt', seq(token('debugger'), maybeSemicolon)); @@ -629,7 +626,7 @@ var parse = function (tokenizer) { lookAheadToken('}'))); var program = node('program', - seq(unpack(opt(sourceElements)), + seq(opt(sourceElements), // we rely on the fact that opt(sourceElements) // will never fail, and non-first arguments // to seq are required to succeed -- meaning @@ -643,7 +640,7 @@ var parse = function (tokenizer) { // eat the ending "EOF" so that // our position is updated t.consume(); - return unpack([]); + return v; })))); return program.parse(tokenizer); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 9cb9a05737..2b100923ed 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -223,10 +223,9 @@ var binaryLeft = function (termParser, opParser) { // `x` => ["x"] // `x,y` => ["x", ",", "y"] // `x,y,z` => ["x", ",", "y", ",", "z"] -// Respects `unpack`. var list = function (itemParser, sepParser) { var push = function(array, newThing) { - if (newThing.unpack) + if (isArray(newThing)) array.push.apply(array, newThing); else array.push(newThing); @@ -272,7 +271,7 @@ var seq = function (/*parsers*/) { if (! r) return null; - if (r.unpack) // append array! + if (isArray(r)) // append array! result.push.apply(result, r); else result.push(r); @@ -281,18 +280,6 @@ var seq = function (/*parsers*/) { }); }; -var unpack = function (arrayOrParser) { - if (isArray(arrayOrParser)) { - arrayOrParser.unpack = true; - return arrayOrParser; - } - return revalue(arrayOrParser, function (v) { - if (v && isArray(v)) - v.unpack = true; - return v; - }); -}; - // lookAhead parser must never consume var lookAhead = function (lookAheadParser, nextParser) { return new Parser( From b34ed9eb6e94e7e29728927c01e346cfc684647f Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 20:26:25 -0700 Subject: [PATCH 37/86] reduce use of revalue(...) --- packages/jsparse/parser.js | 50 ++++++++++++++++---------------- packages/jsparse/parser_tests.js | 3 +- packages/jsparse/parserlib.js | 9 ++++-- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index f57f3a51ce..35972fb4f8 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -2,10 +2,10 @@ // XXX unit tests -// XXX remove unnecessary ParseNode.NILs in lookaheads +// XXX examine uses of lookAhead(...) // XXX SeqParser -// XXX find all revalues, see if constant ones are necessary. -// API may be confusing if constant affects only non-null. +// XXX examine when revalue(...) takes a constant vs. func, break into two +// XXX chain revalue(...)? Chain other things? // What we don't have from ECMA-262 5.1: // - object literal trailing comma @@ -46,8 +46,6 @@ var parse = function (tokenizer) { return revalue( arrayParser, function (parts) { - if (! parts) - return null; return (parts.length === 1) ? parts[0] : new ParseNode(name, parts); }); @@ -122,7 +120,7 @@ var parse = function (tokenizer) { return seq(token('function'), (nameRequired ? tokenClass('IDENTIFIER') : or(tokenClass('IDENTIFIER'), - revalue(lookAheadToken('('), ParseNode.NIL))), + lookAhead(lookAheadToken('('), constant(ParseNode.NIL)))), token('('), opt(list(tokenClass('IDENTIFIER'), token(',')), lookAheadToken(')')), @@ -347,14 +345,15 @@ var parse = function (tokenizer) { var maybeSemicolon = expecting( 'semicolon', or(token(';'), - revalue( + lookAhead( or( lookAheadToken('}'), lookAheadTokenClass('EOF'), - new Parser(null, + new Parser("lineTerminator", function (t) { return t.isLineTerminatorHere ? [] : null; - })), new ParseNode(';', [])))); + })), + constant(new ParseNode(';', []))))); var expressionStatement = node( 'expressionStmnt', @@ -369,7 +368,7 @@ var parse = function (tokenizer) { // an implicit semicolon. This // is safe because a colon can never legally // follow a semicolon anyway. - revalue(lookAheadToken(':'), new ParseNode(';', []))))))); + lookAhead(lookAheadToken(':'), constant(new ParseNode(';', [])))))))); // it's hard to parse statement labels, as in // `foo: x = 1`, because we can't tell from the @@ -446,9 +445,11 @@ var parse = function (tokenizer) { lookAhead(lookAheadToken(';'), seq( expecting('semicolon', token(';')), - opt(expressionPtr, revalue(lookAheadToken(';'), ParseNode.NIL)), + opt(expressionPtr, lookAhead(lookAheadToken(';'), + constant(ParseNode.NIL))), expecting('semicolon', token(';')), - opt(expressionPtr, revalue(lookAheadToken(')'), ParseNode.NIL))))); + opt(expressionPtr, lookAhead(lookAheadToken(')'), + constant(ParseNode.NIL)))))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); @@ -468,7 +469,8 @@ var parse = function (tokenizer) { // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(revalue(lookAheadToken(';'), ParseNode.NIL), secondThirdClauses), + seq(lookAhead(lookAheadToken(';'), + constant(ParseNode.NIL)), secondThirdClauses), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". @@ -501,8 +503,6 @@ var parse = function (tokenizer) { // (the optional expressions are present as nils). // forVar has 6 or more, because `for(var x;;);` // produces [`var` `x` `;` nil `;` nil]. - if (! clauses) - return null; var numChildren = clauses.children.length; if (numChildren === 3) return new ParseNode('forInSpec', clauses.children); @@ -542,14 +542,16 @@ var parse = function (tokenizer) { var throwStatement = node( 'throwStmnt', seq(token('throw'), - lookAhead(revalue(noLineTerminatorHere, - function (v, t) { - if (v) - return v; - if (t.peekText) - throw parseError(t, expression, 'end of line'); - return null; - }), expression), + lookAhead(new Parser(null, + function (t) { + var v = noLineTerminatorHere.parse(t); + if (v) + return v; + if (t.peekText) + throw parseError(t, expression, 'end of line'); + // EOF: + return null; + }), expression), maybeSemicolon)); var withStatement = node( @@ -635,8 +637,6 @@ var parse = function (tokenizer) { expecting('statement', revalue(lookAheadTokenClass("EOF"), function (v, t) { - if (! v) - return null; // eat the ending "EOF" so that // our position is updated t.consume(); diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 46ac94bafc..7fe30fb6f1 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -621,7 +621,8 @@ Tinytest.add("jsparse - bad parses", function (test) { '({1:2,3`:`})', '({1:2,`name:value`', 'x.`IDENTIFIER`true', - 'foo;`semicolon`:;' + 'foo;`semicolon`:;', + 'throw`expression`' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 2b100923ed..799e3dccc6 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -329,19 +329,24 @@ var opt = function (parser, afterLookAhead) { or(parser, afterLookAhead ? afterLookAhead : seq())); }; +// Takes a parser and runs a function on its output +// when the parser matches. // note: valueTransformFunc gets the tokenizer as a second argument. // This func is allowed to then run more parsers. var revalue = function (parser, valueTransformFunc) { if (typeof valueTransformFunc !== 'function') { var value = valueTransformFunc; valueTransformFunc = function (v) { - return (v ? value : null); + return value; }; } return new Parser( parser.expecting, function (t) { - return valueTransformFunc(parser.parse(t), t); + var v = parser.parse(t); + if (! v) + return null; + return valueTransformFunc(v, t); }); }; From c44592d074e2ff64369109302db1336d0de04de3 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:00:01 -0700 Subject: [PATCH 38/86] and/not, more refinements --- packages/jsparse/parser.js | 155 +++++++++++++++++----------------- packages/jsparse/parserlib.js | 55 ++++++------ 2 files changed, 107 insertions(+), 103 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 35972fb4f8..1478d6fb22 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -2,20 +2,22 @@ // XXX unit tests -// XXX examine uses of lookAhead(...) // XXX SeqParser // XXX examine when revalue(...) takes a constant vs. func, break into two // XXX chain revalue(...)? Chain other things? +// XXX better way to declare parsers, including boolean flagged ones // What we don't have from ECMA-262 5.1: // - object literal trailing comma // - object literal get/set var parse = function (tokenizer) { - var noLineTerminatorHere = new Parser( - 'noLineTerminator', function (t) { - return t.isLineTerminatorHere ? null : []; - }); + var NIL = new ParseNode('nil', []); + + var noLineTerminatorHere = expecting( + 'noLineTerminator', assertion(function (t) { + return ! t.isLineTerminatorHere; + })); // Like token, but marks tokens that need to defy the lexer's // heuristic about whether the next '/' is a division or @@ -110,17 +112,17 @@ var parse = function (tokenizer) { var objectLiteral = node('object', - seq(token('{'), - opt(list(nameColonValue, - token(',')), lookAheadToken('}')), - token('}'))); + seq(token('{'), + opt(list(nameColonValue, + token(',')), lookAheadToken('}')), + token('}'))); // not memoized; only call at construction time var functionFunc = function (nameRequired) { return seq(token('function'), (nameRequired ? tokenClass('IDENTIFIER') : or(tokenClass('IDENTIFIER'), - lookAhead(lookAheadToken('('), constant(ParseNode.NIL)))), + and(lookAheadToken('('), constant(NIL)))), token('('), opt(list(tokenClass('IDENTIFIER'), token(',')), lookAheadToken(')')), @@ -130,7 +132,7 @@ var parse = function (tokenizer) { token('}')); }; var functionExpression = node('functionExpr', - functionFunc(false)); + functionFunc(false)); var primaryOrFunctionExpression = expecting('expression', @@ -228,9 +230,9 @@ var parse = function (tokenizer) { nodeIfMultipart( 'postfix', seq(lhsExpression, - opt(lookAhead(noLineTerminatorHere, - lookAhead(postfixLookahead, - postfixToken)))))); + opt(and(noLineTerminatorHere, + postfixLookahead, + postfixToken))))); var unaryList = opt(list(or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false)))); var unaryExpression = new Parser( @@ -277,7 +279,7 @@ var parse = function (tokenizer) { token('||')]; return expecting( 'expression', - binaryLeft(unaryExpression, binaryOps)); + binaryLeft('binary', unaryExpression, binaryOps)); }); var binaryExpression = binaryExpressionFunc(false); @@ -345,20 +347,19 @@ var parse = function (tokenizer) { var maybeSemicolon = expecting( 'semicolon', or(token(';'), - lookAhead( + and( or( lookAheadToken('}'), lookAheadTokenClass('EOF'), - new Parser("lineTerminator", - function (t) { - return t.isLineTerminatorHere ? [] : null; - })), + assertion(function (t) { + return t.isLineTerminatorHere; + })), constant(new ParseNode(';', []))))); var expressionStatement = node( 'expressionStmnt', - negLookAhead( - or(lookAheadToken('{'), lookAheadToken('function')), + and( + not(or(lookAheadToken('{'), lookAheadToken('function'))), seq(expression, expecting('semicolon', or(maybeSemicolon, @@ -368,7 +369,8 @@ var parse = function (tokenizer) { // an implicit semicolon. This // is safe because a colon can never legally // follow a semicolon anyway. - lookAhead(lookAheadToken(':'), constant(new ParseNode(';', [])))))))); + and(lookAheadToken(':'), + constant(new ParseNode(';', [])))))))); // it's hard to parse statement labels, as in // `foo: x = 1`, because we can't tell from the @@ -379,8 +381,7 @@ var parse = function (tokenizer) { // followed by a colon. var labelColonAndStatement = seq(token(':'), statementPtr); var noColon = expecting( - 'semicolon', - negLookAhead(lookAheadToken(':'))); + 'semicolon', not(lookAheadToken(':'))); var expressionOrLabelStatement = new Parser( null, function (t) { @@ -442,14 +443,14 @@ var parse = function (tokenizer) { var secondThirdClauses = expecting( 'semicolon', - lookAhead(lookAheadToken(';'), - seq( - expecting('semicolon', token(';')), - opt(expressionPtr, lookAhead(lookAheadToken(';'), - constant(ParseNode.NIL))), - expecting('semicolon', token(';')), - opt(expressionPtr, lookAhead(lookAheadToken(')'), - constant(ParseNode.NIL)))))); + and(lookAheadToken(';'), + seq( + expecting('semicolon', token(';')), + opt(expressionPtr, and(lookAheadToken(';'), + constant(NIL))), + expecting('semicolon', token(';')), + opt(expressionPtr, and(lookAheadToken(')'), + constant(NIL)))))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); @@ -469,8 +470,8 @@ var parse = function (tokenizer) { // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the // missing expression. - seq(lookAhead(lookAheadToken(';'), - constant(ParseNode.NIL)), secondThirdClauses), + seq(and(lookAheadToken(';'), + constant(NIL)), secondThirdClauses), // custom parser the non-var case because we have to // read the first expression before we know if there's // an "in". @@ -515,10 +516,10 @@ var parse = function (tokenizer) { var iterationStatement = or( node('doStmnt', seq(token('do'), statementPtr, token('while'), - token('('), expression, token(')'), - maybeSemicolon)), + token('('), expression, token(')'), + maybeSemicolon)), node('whileStmnt', seq(token('while'), token('('), expression, - closeParenBeforeStatement, statementPtr)), + closeParenBeforeStatement, statementPtr)), // semicolons must be real, not maybeSemicolons node('forStmnt', seq( token('for'), token('('), forSpec, closeParenBeforeStatement, @@ -527,31 +528,31 @@ var parse = function (tokenizer) { var returnStatement = node( 'returnStmnt', seq(token('return'), or( - lookAhead(noLineTerminatorHere, expression), constant(ParseNode.NIL)), + and(noLineTerminatorHere, expression), constant(NIL)), maybeSemicolon)); var continueStatement = node( 'continueStmnt', seq(token('continue'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), + and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var breakStatement = node( 'breakStmnt', seq(token('break'), or( - lookAhead(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(ParseNode.NIL)), + and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var throwStatement = node( 'throwStmnt', seq(token('throw'), - lookAhead(new Parser(null, - function (t) { - var v = noLineTerminatorHere.parse(t); - if (v) - return v; - if (t.peekText) - throw parseError(t, expression, 'end of line'); - // EOF: - return null; - }), expression), + and(new Parser(null, + function (t) { + var v = noLineTerminatorHere.parse(t); + if (v) + return v; + if (t.peekText) + throw parseError(t, expression, 'end of line'); + // EOF: + return null; + }), expression), maybeSemicolon)); var withStatement = node( @@ -582,17 +583,17 @@ var parse = function (tokenizer) { var catchFinally = expecting( 'catch', - lookAhead(lookAheadToken('catch finally'), - seq( - or(node( - 'catch', - seq(token('catch'), token('('), tokenClass('IDENTIFIER'), - token(')'), blockStatement)), - constant(ParseNode.NIL)), - or(node( - 'finally', - seq(token('finally'), blockStatement)), - constant(ParseNode.NIL))))); + and(lookAheadToken('catch finally'), + seq( + or(node( + 'catch', + seq(token('catch'), token('('), tokenClass('IDENTIFIER'), + token(')'), blockStatement)), + constant(NIL)), + or(node( + 'finally', + seq(token('finally'), blockStatement)), + constant(NIL))))); var tryStatement = node( 'tryStmnt', seq(token('try'), blockStatement, catchFinally)); @@ -618,7 +619,7 @@ var parse = function (tokenizer) { // PROGRAM var functionDecl = node('functionDecl', - functionFunc(true)); + functionFunc(true)); var sourceElement = or(statement, functionDecl); var sourceElements = list(sourceElement); @@ -628,20 +629,20 @@ var parse = function (tokenizer) { lookAheadToken('}'))); var program = node('program', - seq(opt(sourceElements), - // we rely on the fact that opt(sourceElements) - // will never fail, and non-first arguments - // to seq are required to succeed -- meaning - // this parser will never fail without throwing - // a parse error. - expecting('statement', - revalue(lookAheadTokenClass("EOF"), - function (v, t) { - // eat the ending "EOF" so that - // our position is updated - t.consume(); - return v; - })))); + seq(opt(sourceElements), + // we rely on the fact that opt(sourceElements) + // will never fail, and non-first arguments + // to seq are required to succeed -- meaning + // this parser will never fail without throwing + // a parse error. + expecting('statement', + revalue(lookAheadTokenClass("EOF"), + function (v, t) { + // eat the ending "EOF" so that + // our position is updated + t.consume(); + return v; + })))); return program.parse(tokenizer); }; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 799e3dccc6..721229f0b4 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,6 +1,5 @@ ///// TOKENIZER AND PARSER COMBINATORS -// XXX make Parser object with parse method? // XXX track line/col position, for errors and maybe token info // XXX unit tests @@ -16,8 +15,6 @@ var ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; -ParseNode.NIL = new ParseNode('nil', []); - var Parser = function (expecting, runFunc) { this.expecting = expecting; this._run = runFunc; @@ -152,6 +149,13 @@ var lookAheadToken = function (text) { return _tokenImpl(text, true); }; +var assertion = function (test) { + return new Parser( + null, function (t) { + return test(t) ? [] : null; + }); +}; + ///// NON-TERMINAL PARSER CONSTRUCTORS var node = function (name, childrenParser) { @@ -187,7 +191,7 @@ var or = function (/*parsers*/) { // // opParser can also be an array of op parsers from high to low // precedence (tightest-binding first) -var binaryLeft = function (termParser, opParser) { +var binaryLeft = function (name, termParser, opParser) { if (isArray(opParser)) { if (opParser.length === 1) { // take single opParser out of its array @@ -196,7 +200,7 @@ var binaryLeft = function (termParser, opParser) { // pop off last opParser (non-destructively) and replace // termParser with a recursive binaryLeft on the remaining // ops. - termParser = binaryLeft(termParser, opParser.slice(0, -1)); + termParser = binaryLeft(name, termParser, opParser.slice(0, -1)); opParser = opParser[opParser.length - 1]; } } @@ -211,7 +215,7 @@ var binaryLeft = function (termParser, opParser) { var op; while ((op = opParser.parse(t))) { result = new ParseNode( - 'binary', + name, [result, op, termParser.parse(t, {required: true})]); } return result; @@ -257,8 +261,7 @@ var list = function (itemParser, sepParser) { var seq = function (/*parsers*/) { var args = arguments; if (! args.length) - return new Parser("(empty)", - function (t) { return []; }); + return constant([]); return new Parser( args[0].expecting, @@ -280,31 +283,31 @@ var seq = function (/*parsers*/) { }); }; -// lookAhead parser must never consume -var lookAhead = function (lookAheadParser, nextParser) { +// parsers except last must never consume +var and = function (/*parsers*/) { + var args = arguments; + if (! args.length) + return constant([]); + return new Parser( - nextParser.expecting, + args[args.length - 1].expecting, function (t) { - if (! lookAheadParser.parse(t)) - return null; - return nextParser.parse(t); + var result; + for(var i = 0, N = args.length; i < N; i++) { + result = args[i].parse(t); + if (! result) + return null; + } + return result; }); }; -var negLookAhead = function (lookAheadParser, nextParser) { - if (! nextParser) - return new Parser( - null, - function (t) { - return lookAheadParser.parse(t) ? null : []; - }); - +// parser must not consume +var not = function (parser) { return new Parser( - nextParser.expecting, + null, function (t) { - if (lookAheadParser.parse(t)) - return null; - return nextParser.parse(t); + return parser.parse(t) ? null : []; }); }; From 98859f68744d87374610f1e8ea0b2d65e284b024 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:20:36 -0700 Subject: [PATCH 39/86] more refinements --- packages/jsparse/parser.js | 127 +++++++++++++++++----------------- packages/jsparse/parserlib.js | 43 ++++-------- 2 files changed, 79 insertions(+), 91 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 1478d6fb22..3fbd853e5c 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -3,8 +3,6 @@ // XXX unit tests // XXX SeqParser -// XXX examine when revalue(...) takes a constant vs. func, break into two -// XXX chain revalue(...)? Chain other things? // XXX better way to declare parsers, including boolean flagged ones // What we don't have from ECMA-262 5.1: @@ -42,17 +40,6 @@ var parse = function (tokenizer) { }); }; - // Function that takes one-item arrays to their single item and names other - // arrays with `name`. Works on parsers too. - var nodeIfMultipart = function (name, arrayParser) { - return revalue( - arrayParser, - function (parts) { - return (parts.length === 1) ? - parts[0] : new ParseNode(name, parts); - }); - }; - // These "pointers" allow grammar circularity, i.e. accessing // later parsers from earlier ones. var expressionPtrFunc = function (noIn) { @@ -227,12 +214,15 @@ var parse = function (tokenizer) { var postfixLookahead = lookAheadToken('++ --'); var postfixExpression = expecting( 'expression', - nodeIfMultipart( - 'postfix', - seq(lhsExpression, - opt(and(noLineTerminatorHere, - postfixLookahead, - postfixToken))))); + mapResult(seq(lhsExpression, + opt(and(noLineTerminatorHere, + postfixLookahead, + postfixToken))), + function (v) { + if (v.length === 1) + return v[0]; + return new ParseNode('postfix', v); + })); var unaryList = opt(list(or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false)))); var unaryExpression = new Parser( @@ -287,12 +277,17 @@ var parse = function (tokenizer) { function (noIn) { return expecting( 'expression', - nodeIfMultipart( - 'ternary', - seq(binaryExpressionFunc(noIn), opt(seq( - token('?'), - assignmentExpressionPtrFunc(false), token(':'), - assignmentExpressionPtrFunc(noIn)))))); + mapResult( + seq(binaryExpressionFunc(noIn), + opt(seq( + token('?'), + assignmentExpressionPtrFunc(false), token(':'), + assignmentExpressionPtrFunc(noIn)))), + function (v) { + if (v.length === 1) + return v[0]; + return new ParseNode('ternary', v); + })); }); var conditionalExpression = conditionalExpressionFunc(false); @@ -333,9 +328,13 @@ var parse = function (tokenizer) { function (noIn) { return expecting( 'expression', - nodeIfMultipart( - 'comma', - list(assignmentExpressionFunc(noIn), token(',')))); + mapResult( + list(assignmentExpressionFunc(noIn), token(',')), + function (v) { + if (v.length === 1) + return v[0]; + return new ParseNode('comma', v); + })); }); var expression = expressionFunc(false); @@ -454,7 +453,7 @@ var parse = function (tokenizer) { var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); - var forSpec = revalue(node( + var forSpec = mapResult(node( 'forSpec', or(seq(token('var'), varDeclFunc(true), @@ -494,25 +493,25 @@ var parse = function (tokenizer) { return [firstExpr].concat(rest); }))), - function (clauses) { - // There are four kinds of for-loop, and we call the - // part between the parens one of forSpec, forVarSpec, - // forInSpec, and forVarInSpec. Having parsed it - // already, we rewrite the node name based on how - // many items came out. forIn and forVarIn always - // have 3 and 4 items respectively. for has 5 - // (the optional expressions are present as nils). - // forVar has 6 or more, because `for(var x;;);` - // produces [`var` `x` `;` nil `;` nil]. - var numChildren = clauses.children.length; - if (numChildren === 3) - return new ParseNode('forInSpec', clauses.children); - else if (numChildren === 4) - return new ParseNode('forVarInSpec', clauses.children); - else if (numChildren >= 6) - return new ParseNode('forVarSpec', clauses.children); - return clauses; - }); + function (clauses) { + // There are four kinds of for-loop, and we call the + // part between the parens one of forSpec, forVarSpec, + // forInSpec, and forVarInSpec. Having parsed it + // already, we rewrite the node name based on how + // many items came out. forIn and forVarIn always + // have 3 and 4 items respectively. for has 5 + // (the optional expressions are present as nils). + // forVar has 6 or more, because `for(var x;;);` + // produces [`var` `x` `;` nil `;` nil]. + var numChildren = clauses.children.length; + if (numChildren === 3) + return new ParseNode('forInSpec', clauses.children); + else if (numChildren === 4) + return new ParseNode('forVarInSpec', clauses.children); + else if (numChildren >= 6) + return new ParseNode('forVarSpec', clauses.children); + return clauses; + }); var iterationStatement = or( node('doStmnt', seq(token('do'), statementPtr, token('while'), @@ -628,21 +627,23 @@ var parse = function (tokenizer) { opt(sourceElements, lookAheadToken('}'))); - var program = node('program', - seq(opt(sourceElements), - // we rely on the fact that opt(sourceElements) - // will never fail, and non-first arguments - // to seq are required to succeed -- meaning - // this parser will never fail without throwing - // a parse error. - expecting('statement', - revalue(lookAheadTokenClass("EOF"), - function (v, t) { - // eat the ending "EOF" so that - // our position is updated - t.consume(); - return v; - })))); + var program = node( + 'program', + seq(opt(sourceElements), + // we rely on the fact that opt(sourceElements) + // will never fail, and non-first arguments + // to seq are required to succeed -- meaning + // this parser will never fail without throwing + // a parse error. + expecting('statement', + mapResult( + lookAheadTokenClass("EOF"), + function (v, t) { + // eat the ending "EOF" so that + // our position is updated + t.consume(); + return v; + })))); return program.parse(tokenizer); }; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 721229f0b4..7270fd3053 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -189,20 +189,20 @@ var or = function (/*parsers*/) { // `1+2` => ["binary", "1", "+", "2"] // `1+2+3` => ["binary", ["binary", "1", "+", "2"], "+", "3"] // -// opParser can also be an array of op parsers from high to low +// opParsers is an array of op parsers from high to low // precedence (tightest-binding first) -var binaryLeft = function (name, termParser, opParser) { - if (isArray(opParser)) { - if (opParser.length === 1) { - // take single opParser out of its array - opParser = opParser[0]; - } else { - // pop off last opParser (non-destructively) and replace - // termParser with a recursive binaryLeft on the remaining - // ops. - termParser = binaryLeft(name, termParser, opParser.slice(0, -1)); - opParser = opParser[opParser.length - 1]; - } +var binaryLeft = function (name, termParser, opParsers) { + var opParser; + + if (opParsers.length === 1) { + // take single opParser out of its array + opParser = opParsers[0]; + } else { + // pop off last opParser (non-destructively) and replace + // termParser with a recursive binaryLeft on the remaining + // ops. + termParser = binaryLeft(name, termParser, opParsers.slice(0, -1)); + opParser = opParsers[opParsers.length - 1]; } return new Parser( @@ -332,24 +332,11 @@ var opt = function (parser, afterLookAhead) { or(parser, afterLookAhead ? afterLookAhead : seq())); }; -// Takes a parser and runs a function on its output -// when the parser matches. -// note: valueTransformFunc gets the tokenizer as a second argument. -// This func is allowed to then run more parsers. -var revalue = function (parser, valueTransformFunc) { - if (typeof valueTransformFunc !== 'function') { - var value = valueTransformFunc; - valueTransformFunc = function (v) { - return value; - }; - } - +var mapResult = function (parser, func) { return new Parser( parser.expecting, function (t) { var v = parser.parse(t); - if (! v) - return null; - return valueTransformFunc(v, t); + return v ? func(v, t) : null; }); }; From 9d55f11b694a9b173ef2ae35e5fe3ae46c23f6ba Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:30:36 -0700 Subject: [PATCH 40/86] move unary parsing into lib --- packages/jsparse/parser.js | 21 +++++---------------- packages/jsparse/parserlib.js | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 3fbd853e5c..0daf758b6a 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -4,6 +4,7 @@ // XXX SeqParser // XXX better way to declare parsers, including boolean flagged ones +// XXX examine 'opt' // What we don't have from ECMA-262 5.1: // - object literal trailing comma @@ -223,23 +224,11 @@ var parse = function (tokenizer) { return v[0]; return new ParseNode('postfix', v); })); - var unaryList = opt(list(or(token('delete void typeof'), - preSlashToken('++ -- + - ~ !', false)))); - var unaryExpression = new Parser( - 'expression', - function (t) { - var unaries = unaryList.parse(t); - // if we have unaries, we are committed and - // have to match an expression or error. - var result = postfixExpression.parse( - t, {required: unaries.length}); - if (! result) - return null; - while (unaries.length) - result = new ParseNode('unary', [unaries.pop(), result]); - return result; - }); + var unaryExpression = unary( + 'unary', postfixExpression, + or(token('delete void typeof'), + preSlashToken('++ -- + - ~ !', false))); var memoizeBooleanFunc = function (func) { var trueResult, falseResult; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 7270fd3053..3571e5fb05 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -222,6 +222,25 @@ var binaryLeft = function (name, termParser, opParsers) { }); }; +var unary = function (name, termParser, opParser) { + var unaryList = opt(list(opParser)); + return new Parser( + termParser.expecting, + function (t) { + var unaries = unaryList.parse(t); + // if we have unaries, we are committed and + // have to match an expression or error. + var result = termParser.parse( + t, {required: unaries.length}); + if (! result) + return null; + + while (unaries.length) + result = new ParseNode(name, [unaries.pop(), result]); + return result; + }); +}; + // Parses a list of one or more items with a separator, listing the // items and separators. (Separator is optional.) For example: // `x` => ["x"] From 79ce16bfe90291c7365c6eef7f5e70e8acd3130d Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:51:06 -0700 Subject: [PATCH 41/86] kill second arg to opt(parser) --- packages/jsparse/parser.js | 60 +++++++++++++++++++---------------- packages/jsparse/parserlib.js | 6 ++-- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 0daf758b6a..f67d644b36 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -4,7 +4,6 @@ // XXX SeqParser // XXX better way to declare parsers, including boolean flagged ones -// XXX examine 'opt' // What we don't have from ECMA-262 5.1: // - object literal trailing comma @@ -75,7 +74,8 @@ var parse = function (tokenizer) { node('array', seq(token('['), opt(list(token(','))), - opt( + or( + lookAheadToken(']'), list( expecting( 'expression', @@ -86,8 +86,7 @@ var parse = function (tokenizer) { lookAheadToken(']'))), // list seperator is one or more commas // to support elision - list(token(','))), - lookAheadToken(']')), + list(token(',')))), token(']'))); var propertyName = expecting('propertyName', or( @@ -101,8 +100,9 @@ var parse = function (tokenizer) { var objectLiteral = node('object', seq(token('{'), - opt(list(nameColonValue, - token(',')), lookAheadToken('}')), + or(lookAheadToken('}'), + list(nameColonValue, + token(','))), token('}'))); // not memoized; only call at construction time @@ -112,8 +112,8 @@ var parse = function (tokenizer) { or(tokenClass('IDENTIFIER'), and(lookAheadToken('('), constant(NIL)))), token('('), - opt(list(tokenClass('IDENTIFIER'), token(',')), - lookAheadToken(')')), + or(lookAheadToken(')'), + list(tokenClass('IDENTIFIER'), token(','))), token(')'), token('{'), functionBodyPtr, @@ -140,8 +140,9 @@ var parse = function (tokenizer) { var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); var bracketEnding = seq(token('['), expressionPtr, token(']')); var callArgs = seq(token('('), - opt(list(assignmentExpressionPtr, - token(',')), lookAheadToken(')')), + or(lookAheadToken(')'), + list(assignmentExpressionPtr, + token(','))), token(')')); var newKeyword = token('new'); @@ -402,7 +403,7 @@ var parse = function (tokenizer) { var emptyStatement = node('emptyStmnt', seq(token(';'))); // not maybeSemicolon var blockStatement = expecting('block', node('blockStmnt', seq( - token('{'), opt(statements, lookAheadToken('}')), + token('{'), or(lookAheadToken('}'), statements), token('}')))); var varDeclFunc = memoizeBooleanFunc(function (noIn) { @@ -434,11 +435,13 @@ var parse = function (tokenizer) { and(lookAheadToken(';'), seq( expecting('semicolon', token(';')), - opt(expressionPtr, and(lookAheadToken(';'), - constant(NIL))), + or(and(lookAheadToken(';'), + constant(NIL)), + expressionPtr), expecting('semicolon', token(';')), - opt(expressionPtr, and(lookAheadToken(')'), - constant(NIL)))))); + or(and(lookAheadToken(')'), + constant(NIL)), + expressionPtr)))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); @@ -450,10 +453,10 @@ var parse = function (tokenizer) { 'commaOrIn', or(inExpr, seq( - opt( + or( + lookAheadToken(';'), seq(token(','), - list(varDeclFunc(true), token(','))), - lookAheadToken(';')), + list(varDeclFunc(true), token(',')))), secondThirdClauses)))), // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the @@ -551,20 +554,23 @@ var parse = function (tokenizer) { var switchCase = node( 'case', seq(token('case'), expression, token(':'), - opt(statements, or(lookAheadToken('}'), - lookAheadToken('case default'))))); + or(lookAheadToken('}'), + lookAheadToken('case default'), + statements))); var switchDefault = node( 'default', seq(token('default'), token(':'), - opt(statements, or(lookAheadToken('}'), - lookAheadToken('case'))))); + or(lookAheadToken('}'), + lookAheadToken('case'), + statements))); var switchStatement = node( 'switchStmnt', seq(token('switch'), token('('), expression, token(')'), - token('{'), opt(list(switchCase), - or(lookAheadToken('}'), - lookAheadToken('default'))), + token('{'), + or(lookAheadToken('}'), + lookAheadToken('default'), + list(switchCase)), opt(seq(switchDefault, opt(list(switchCase)))), token('}'))); @@ -613,8 +619,8 @@ var parse = function (tokenizer) { var sourceElements = list(sourceElement); var functionBody = expecting('functionBody', - opt(sourceElements, - lookAheadToken('}'))); + or(lookAheadToken('}'), + sourceElements)); var program = node( 'program', diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 3571e5fb05..2325767f9b 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -170,7 +170,7 @@ var node = function (name, childrenParser) { var or = function (/*parsers*/) { var args = arguments; return new Parser( - null, + args[args.length - 1].expecting, function (t) { var result; for(var i = 0, N = args.length; i < N; i++) { @@ -346,9 +346,9 @@ var constant = function (result) { // `for(;var;) {}` will lead to "Expected expression" // instead of "Expected ;" when the optional expression // turns out to be an illegal `var`. -var opt = function (parser, afterLookAhead) { +var opt = function (parser) { return expecting(parser.expecting, - or(parser, afterLookAhead ? afterLookAhead : seq())); + or(parser, seq())); }; var mapResult = function (parser, func) { From d2c99c490a404d47aa33a3160fa312dd3df547b8 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 11 Sep 2012 21:54:10 -0700 Subject: [PATCH 42/86] start of removing seq args to node(..) --- packages/jsparse/parser.js | 2 +- packages/jsparse/parserlib.js | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index f67d644b36..ebc29f741c 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -124,7 +124,7 @@ var parse = function (tokenizer) { var primaryOrFunctionExpression = expecting('expression', - or(node('this', seq(token('this'))), + or(node('this', token('this')), // XXXX remove unnecessary seqs in node(...) args node('identifier', seq(tokenClass('IDENTIFIER'))), node('number', seq(tokenClass('NUMBER'))), node('boolean', seq(tokenClass('BOOLEAN'))), diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 2325767f9b..8ccf623ca9 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -163,6 +163,8 @@ var node = function (name, childrenParser) { var children = childrenParser.parse(t); if (! children) return null; + if (! isArray(children)) + children = [children]; return new ParseNode(name, children); }); }; @@ -246,6 +248,7 @@ var unary = function (name, termParser, opParser) { // `x` => ["x"] // `x,y` => ["x", ",", "y"] // `x,y,z` => ["x", ",", "y", ",", "z"] +// Unpacks. var list = function (itemParser, sepParser) { var push = function(array, newThing) { if (isArray(newThing)) @@ -277,6 +280,7 @@ var list = function (itemParser, sepParser) { }); }; +// Unpacks arrays (nested seqs). var seq = function (/*parsers*/) { var args = arguments; if (! args.length) From ef3e4f41b8541aa553195c745c14dbc014f0c9a4 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 11:09:30 -0700 Subject: [PATCH 43/86] more clarity around noIn flags --- packages/jsparse/parser.js | 143 ++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 66 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index ebc29f741c..0e8bb2ebf8 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -12,6 +12,13 @@ var parse = function (tokenizer) { var NIL = new ParseNode('nil', []); + var booleanFlaggedParser = function (parserConstructFunc) { + return { + false: parserConstructFunc(false), + true: parserConstructFunc(true) + }; + }; + var noLineTerminatorHere = expecting( 'noLineTerminator', assertion(function (t) { return ! t.isLineTerminatorHere; @@ -42,23 +49,25 @@ var parse = function (tokenizer) { // These "pointers" allow grammar circularity, i.e. accessing // later parsers from earlier ones. - var expressionPtrFunc = function (noIn) { - return new Parser( - "expression", - function (t) { - return expressionFunc(noIn).parse(t); - }); - }; - var expressionPtr = expressionPtrFunc(false); + var expressionMaybeNoInPtr = booleanFlaggedParser( + function (noIn) { + return new Parser( + "expression", + function (t) { + return expressionMaybeNoIn[noIn].parse(t); + }); + }); + var expressionPtr = expressionMaybeNoInPtr[false]; - var assignmentExpressionPtrFunc = function (noIn) { - return new Parser( - "expression", - function (t) { - return assignmentExpressionFunc(noIn).parse(t); - }); - }; - var assignmentExpressionPtr = assignmentExpressionPtrFunc(false); + var assignmentExpressionMaybeNoInPtr = booleanFlaggedParser( + function (noIn) { + return new Parser( + "expression", + function (t) { + return assignmentExpressionMaybeNoIn[noIn].parse(t); + }); + }); + var assignmentExpressionPtr = assignmentExpressionMaybeNoInPtr[false]; var functionBodyPtr = new Parser( "functionBody", function (t) { @@ -105,22 +114,22 @@ var parse = function (tokenizer) { token(','))), token('}'))); - // not memoized; only call at construction time - var functionFunc = function (nameRequired) { - return seq(token('function'), - (nameRequired ? tokenClass('IDENTIFIER') : - or(tokenClass('IDENTIFIER'), - and(lookAheadToken('('), constant(NIL)))), - token('('), - or(lookAheadToken(')'), - list(tokenClass('IDENTIFIER'), token(','))), - token(')'), - token('{'), - functionBodyPtr, - token('}')); - }; + var functionMaybeNameRequired = booleanFlaggedParser( + function (nameRequired) { + return seq(token('function'), + (nameRequired ? tokenClass('IDENTIFIER') : + or(tokenClass('IDENTIFIER'), + and(lookAheadToken('('), constant(NIL)))), + token('('), + or(lookAheadToken(')'), + list(tokenClass('IDENTIFIER'), token(','))), + token(')'), + token('{'), + functionBodyPtr, + token('}')); + }); var functionExpression = node('functionExpr', - functionFunc(false)); + functionMaybeNameRequired[false]); var primaryOrFunctionExpression = expecting('expression', @@ -231,18 +240,21 @@ var parse = function (tokenizer) { or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false))); - var memoizeBooleanFunc = function (func) { - var trueResult, falseResult; - return function (flag) { - if (flag) - return trueResult || (trueResult = func(true)); - else - return falseResult || (falseResult = func(false)); - }; - }; + // The "noIn" business is all to facilitate parsing + // of for-in constructs, though the cases that make + // this required are quite obscure. + // The `for(var x in y)` form is allowed to take + // an initializer for `x` (which is only useful for + // its side effects, or if `y` has no properties). + // So an example might be: + // `for(var x = a().b in c);` + // In this example, `var x = a().b` is parsed without + // the `in`, which would otherwise be part of the + // varDecl, using varDeclNoIn. - // actually this is the spec's LogicalORExpression - var binaryExpressionFunc = memoizeBooleanFunc( + // Our binaryExpression is the spec's LogicalORExpression, + // which includes all the higher-precendence operators. + var binaryExpressionMaybeNoIn = booleanFlaggedParser( function (noIn) { // high to low precedence var binaryOps = [token('* / %'), @@ -261,34 +273,34 @@ var parse = function (tokenizer) { 'expression', binaryLeft('binary', unaryExpression, binaryOps)); }); - var binaryExpression = binaryExpressionFunc(false); + var binaryExpression = binaryExpressionMaybeNoIn[false]; - var conditionalExpressionFunc = memoizeBooleanFunc( + var conditionalExpressionMaybeNoIn = booleanFlaggedParser( function (noIn) { return expecting( 'expression', mapResult( - seq(binaryExpressionFunc(noIn), + seq(binaryExpressionMaybeNoIn[noIn], opt(seq( token('?'), - assignmentExpressionPtrFunc(false), token(':'), - assignmentExpressionPtrFunc(noIn)))), + assignmentExpressionPtr, token(':'), + assignmentExpressionMaybeNoInPtr[noIn]))), function (v) { if (v.length === 1) return v[0]; return new ParseNode('ternary', v); })); }); - var conditionalExpression = conditionalExpressionFunc(false); + var conditionalExpression = conditionalExpressionMaybeNoIn[false]; var assignOp = token('= *= /= %= += -= <<= >>= >>>= &= ^= |='); - var assignmentExpressionFunc = memoizeBooleanFunc( + var assignmentExpressionMaybeNoIn = booleanFlaggedParser( function (noIn) { return new Parser( 'expression', function (t) { - var r = conditionalExpressionFunc(noIn).parse(t); + var r = conditionalExpressionMaybeNoIn[noIn].parse(t); if (! r) return null; @@ -300,7 +312,7 @@ var parse = function (tokenizer) { var op; while (r.lhs && (op = assignOp.parse(t))) parts.push(op, - conditionalExpressionFunc(noIn).parse( + conditionalExpressionMaybeNoIn[noIn].parse( t, {required: true})); var result = parts.pop(); @@ -312,21 +324,21 @@ var parse = function (tokenizer) { return result; }); }); - var assignmentExpression = assignmentExpressionFunc(false); + var assignmentExpression = assignmentExpressionMaybeNoIn[false]; - var expressionFunc = memoizeBooleanFunc( + var expressionMaybeNoIn = booleanFlaggedParser( function (noIn) { return expecting( 'expression', mapResult( - list(assignmentExpressionFunc(noIn), token(',')), + list(assignmentExpressionMaybeNoIn[noIn], token(',')), function (v) { if (v.length === 1) return v[0]; return new ParseNode('comma', v); })); }); - var expression = expressionFunc(false); + var expression = expressionMaybeNoIn[false]; // STATEMENTS @@ -406,14 +418,14 @@ var parse = function (tokenizer) { token('{'), or(lookAheadToken('}'), statements), token('}')))); - var varDeclFunc = memoizeBooleanFunc(function (noIn) { + var varDeclMaybeNoIn = booleanFlaggedParser(function (noIn) { return node( 'varDecl', seq(tokenClass('IDENTIFIER'), opt(seq(token('='), - assignmentExpressionFunc(noIn))))); + assignmentExpressionMaybeNoIn[noIn])))); }); - var varDecl = varDeclFunc(false); + var varDecl = varDeclMaybeNoIn[false]; var variableStatement = node( 'varStmnt', @@ -448,7 +460,7 @@ var parse = function (tokenizer) { var forSpec = mapResult(node( 'forSpec', or(seq(token('var'), - varDeclFunc(true), + varDeclMaybeNoIn[true], expecting( 'commaOrIn', or(inExpr, @@ -456,7 +468,7 @@ var parse = function (tokenizer) { or( lookAheadToken(';'), seq(token(','), - list(varDeclFunc(true), token(',')))), + list(varDeclMaybeNoIn[true], token(',')))), secondThirdClauses)))), // get the case where the first clause is empty out of the way. // the lookAhead's return value is the empty placeholder for the @@ -469,7 +481,7 @@ var parse = function (tokenizer) { new Parser( null, function (t) { - var firstExpr = expressionFunc(true).parse(t); + var firstExpr = expressionMaybeNoIn[true].parse(t); if (! firstExpr) return null; var rest = secondThirdClauses.parse(t); @@ -612,15 +624,14 @@ var parse = function (tokenizer) { // PROGRAM - var functionDecl = node('functionDecl', - functionFunc(true)); + var functionDecl = node( + 'functionDecl', functionMaybeNameRequired[true]); var sourceElement = or(statement, functionDecl); var sourceElements = list(sourceElement); - var functionBody = expecting('functionBody', - or(lookAheadToken('}'), - sourceElements)); + var functionBody = expecting( + 'functionBody', or(lookAheadToken('}'), sourceElements)); var program = node( 'program', From 6e5e7497f0635179f06bafe4d8d6fc0b3d5193bc Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 13:04:19 -0700 Subject: [PATCH 44/86] Lexeme object --- .../unfinished/jsparse-demo/jsparse-demo.js | 25 +++---- packages/jsparse/lexer.js | 68 ++++++++++++++----- packages/jsparse/parser.js | 38 ++++------- packages/jsparse/parser_tests.js | 48 +++++++------ packages/jsparse/parserlib.js | 42 +++++------- 5 files changed, 117 insertions(+), 104 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index cbcf0e75a9..30d9f389d8 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -41,16 +41,14 @@ if (Meteor.is_client) { try { tree = parse(new Tokenizer(lexer)) || []; } catch (parseError) { - var errorPos = lexer.lastPos; - var errorLen = lexer.text.length; + var errorLexeme = lexer.lastLexeme; - html = Handlebars._escape(input.substring(0, errorPos)); + html = Handlebars._escape( + input.substring(0, errorLexeme.startPos())); html += Spark.setDataContext( - {errorPos: errorPos, - errorLen: errorLen}, + errorLexeme, '' + - Handlebars._escape(input.substring(errorPos, errorPos + errorLen) || - '') + + Handlebars._escape(errorLexeme.text() || '') + ''); html = html.replace(/(?!.)\s/g, '
'); html += '
' + @@ -77,9 +75,9 @@ if (Meteor.is_client) { _.each(unclosedInfos, function (info) { info.endPos = curPos; }); - curPos = obj.pos + obj.text.length; + curPos = obj.endPos(); unclosedInfos.length = 0; - var text = obj.text; + var text = obj.text(); // insert zero-width spaces to allow wrapping text = text.replace(/.{20}/g, "$&\n"); text = Handlebars._escape(text); @@ -117,10 +115,7 @@ if (Meteor.is_client) { event.stopImmediatePropagation(); }, 'click .box.token': function (event) { - var token = this; - var startPos = token.pos; - var endPos = startPos + token.text.length; - selectInputText(startPos, endPos); + selectInputText(this.startPos(), this.endPos()); return false; }, 'click .box.named': function (event) { @@ -128,9 +123,7 @@ if (Meteor.is_client) { return false; }, 'click .parseerror': function (event) { - var startPos = this.errorPos; - var endPos = startPos + this.errorLen; - selectInputText(startPos, endPos); + selectInputText(this.startPos(), this.endPos()); return false; } }); diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index 01dd143f29..78124daa84 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -135,7 +135,7 @@ var nonTokenTypes = makeSet('WHITESPACE COMMENT NEWLINE EOF ERROR'.split(' ')); var punctuationBeforeDivision = makeSet('] ) } ++ --'.split(' ')); var keywordsBeforeDivision = makeSet('this'.split(' ')); -var guessIsDivisionPermittedAfterToken = function (type, text) { +var guessIsDivisionPermittedAfterToken = function (tok) { // Figure out if a '/' character should be interpreted as division // rather than the start of a regular expression when it follows the // token (type,text), which must be a token lexeme per @@ -154,13 +154,13 @@ var guessIsDivisionPermittedAfterToken = function (type, text) { // - ++ /foo/.abc // (Prefix `++` or `--` before an expression starting with a regex // literal. This will run but I can't see any use for it.) - switch (type) { + switch (tok.type()) { case "PUNCTUATION": // few punctuators can end an expression, but e.g. `)` - return !! punctuationBeforeDivision[text]; + return !! punctuationBeforeDivision[tok.text()]; case "KEYWORD": // few keywords can end an expression, but e.g. `this` - return !! keywordsBeforeDivision[text]; + return !! keywordsBeforeDivision[tok.text()]; case "IDENTIFIER": return true; default: // literal @@ -170,11 +170,51 @@ var guessIsDivisionPermittedAfterToken = function (type, text) { ////////// PUBLIC API +var Lexeme = function (pos, type, text) { + this._pos = pos; + this._type = type; + this._text = text; +}; + +Lexeme.prototype.startPos = function () { + return this._pos; +}; + +Lexeme.prototype.endPos = function () { + return this._pos + this._text.length; +}; + +Lexeme.prototype.type = function () { + return this._type; +}; + +Lexeme.prototype.text = function () { + return this._text; +}; + +Lexeme.prototype.isToken = function () { + return ! nonTokenTypes[this._type]; +}; + +Lexeme.prototype.isError = function () { + return this._type === "ERROR"; +}; + +Lexeme.prototype.isEOF = function () { + return this._type === "EOF"; +}; + +Lexeme.prototype.toString = function () { + return this.isError() ? "ERROR" : + this.isEOF() ? "EOF" : "`" + this.text() + "`"; +}; + // Create a Lexer for the given string of JavaScript code. // // A lexer keeps a pointer `pos` into the string that is // advanced when you ask for the next lexeme with `next()`. // +// XXXXX UPDATE DOCS // Properties: // code: Original JavaScript code string. // pos: Current index into the string. You can assign to it @@ -196,16 +236,11 @@ var guessIsDivisionPermittedAfterToken = function (type, text) { var Lexer = function (code) { this.code = code; this.pos = 0; - this.lastPos = 0; - this.text = ""; - this.type = null; this.divisionPermitted = false; + this.lastLexeme = null; }; -Lexer.isToken = function (type) { - return ! nonTokenTypes[type]; -}; - +// XXXX UPDATE DOCS // Return the type of the next of lexeme starting at `pos`, and advance // `pos` to the end of the lexeme. The text of the lexeme is available // in `text`. The text is always the substring of `code` between the @@ -246,13 +281,12 @@ Lexer.prototype.next = function () { type = 'ERROR'; pos = origPos + 1; } - self.lastPos = origPos; self.pos = pos; - self.text = code.substring(origPos, pos); - self.type = type; - if (Lexer.isToken(type)) - self.divisionPermitted = guessIsDivisionPermittedAfterToken(type, self.text); - return type; + var lex = new Lexeme(origPos, type, code.substring(origPos, pos)); + self.lastLexeme = lex; + if (lex.isToken()) + self.divisionPermitted = guessIsDivisionPermittedAfterToken(lex); + return lex; }; if (pos === code.length) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 0e8bb2ebf8..4c94a88b4c 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -546,16 +546,16 @@ var parse = function (tokenizer) { var throwStatement = node( 'throwStmnt', seq(token('throw'), - and(new Parser(null, - function (t) { - var v = noLineTerminatorHere.parse(t); - if (v) - return v; - if (t.peekText) - throw parseError(t, expression, 'end of line'); - // EOF: - return null; - }), expression), + and(or(noLineTerminatorHere, + // If there is a line break here and more tokens after, + // we want to error appropriately. `throw \n e` should + // complain about the "end of line", not the `e`. + and(not(lookAheadTokenClass("EOF")), + new Parser(null, + function (t) { + throw parseError(t, expression, 'end of line'); + }))), + expression), maybeSemicolon)); var withStatement = node( @@ -627,7 +627,7 @@ var parse = function (tokenizer) { var functionDecl = node( 'functionDecl', functionMaybeNameRequired[true]); - var sourceElement = or(statement, functionDecl); + var sourceElement = or(functionDecl, statement); var sourceElements = list(sourceElement); var functionBody = expecting( @@ -636,20 +636,8 @@ var parse = function (tokenizer) { var program = node( 'program', seq(opt(sourceElements), - // we rely on the fact that opt(sourceElements) - // will never fail, and non-first arguments - // to seq are required to succeed -- meaning - // this parser will never fail without throwing - // a parse error. - expecting('statement', - mapResult( - lookAheadTokenClass("EOF"), - function (v, t) { - // eat the ending "EOF" so that - // our position is updated - t.consume(); - return v; - })))); + // If not at EOF, complain "expecting statement" + expecting('statement', lookAheadTokenClass("EOF")))); return program.parse(tokenizer); }; diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 7fe30fb6f1..d5a7109696 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -125,7 +125,7 @@ var stringifyTree = function (tree) { // Treat a token object or string as a token. if (tree.text) - tree = tree.text; + tree = tree.text(); return escapeTokenString(tree); }; @@ -145,11 +145,12 @@ var makeTester = function (test) { // first use lexer to collect all tokens var lexer = new Lexer(code); var allTokensInOrder = []; - while (lexer.next() !== 'EOF') { - if (lexer.type === 'ERROR') - test.fail("Lexer error at " + lexer.lastPos); - if (Lexer.isToken(lexer.type)) - allTokensInOrder.push({ pos: lexer.lastPos, text: lexer.text }); + while (! lexer.next().isEOF()) { + var lex = lexer.lastLexeme; + if (lex.isError()) + test.fail("Lexer error at " + lex.startPos()); + if (lex.isToken()) + allTokensInOrder.push(lex); if (regexTokenHints && regexTokenHints[allTokensInOrder.length]) lexer.divisionPermitted = false; } @@ -167,20 +168,19 @@ var makeTester = function (test) { allNodeNamesSet[nodeName] === true)) test.fail("Not a node name: " + nodeName); _.each(tree.children, check); - } else if (typeof tree === 'object' && tree.text && - (typeof tree.pos === 'number')) { + } else if (typeof tree === 'object' && + typeof tree.text === 'function') { // This is a TOKEN (terminal). // Make sure we are visiting every token once, in order. if (nextTokenIndex >= allTokensInOrder.length) test.fail("Too many tokens: " + (nextTokenIndex + 1)); var referenceToken = allTokensInOrder[nextTokenIndex++]; - if (tree.text !== referenceToken.text) - test.fail(tree.text + " !== " + referenceToken.text); - if (tree.pos !== referenceToken.pos) - test.fail(tree.pos + " !== " + referenceToken.pos); - if (code.substring(tree.pos, - tree.pos + tree.text.length) !== tree.text) - test.fail("Didn't see " + tree.text + " at " + tree.pos + + if (tree.text() !== referenceToken.text()) + test.fail(tree.text() + " !== " + referenceToken.text()); + if (tree.startPos() !== referenceToken.startPos()) + test.fail(tree.startPos() + " !== " + referenceToken.startPos()); + if (code.substring(tree.startPos(), tree.endPos()) !== tree.text()) + test.fail("Didn't see " + tree.text() + " at " + tree.startPos() + " in " + code); } else { test.fail("Unknown tree part: " + tree); @@ -191,6 +191,8 @@ var makeTester = function (test) { if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); + test.equal(tokenizer.pos, code.length); + test.equal(stringifyTree(actualTree), stringifyTree(expectedTree), code); }, @@ -240,8 +242,8 @@ var makeTester = function (test) { // in the error message. badParse: function (code) { var constructMessage = function (whatExpected, pos, found, after) { - return "Expected " + whatExpected + " after `" + after + - "` at position " + pos + ", found " + found; + return "Expected " + whatExpected + " after " + after + + " at position " + pos + ", found " + found; }; var pos = code.indexOf('`'); @@ -262,9 +264,8 @@ var makeTester = function (test) { } test.isFalse(parsed); test.isTrue(error); - var after = tokenizer.text; - found = (found || (tokenizer.peekText ? '`' + tokenizer.peekText + '`' - : 'EOF')); + var after = tokenizer.oldToken; + found = (found || tokenizer.newToken); test.equal(error.message, constructMessage(whatExpected, pos, found, after)); } @@ -574,7 +575,9 @@ Tinytest.add("jsparse - syntax forms", function (test) { "assignment(identifier(e) -= assignment(identifier(f) <<= " + "assignment(identifier(g) >>= assignment(identifier(h) >>>= " + "assignment(identifier(i) &= assignment(identifier(j) ^= " + - "assignment(identifier(k) |= identifier(l)))))))))))) ;()))"] + "assignment(identifier(k) |= identifier(l)))))))))))) ;()))"], + ["1;\n\n\n\n/* foo */\n// bar\n", // trailing whitespace and comments + "program(expressionStmnt(number(1) ;))"] ]; _.each(trials, function (tr) { tester.goodParse(tr[0], tr[1]); @@ -602,6 +605,7 @@ Tinytest.add("jsparse - bad parses", function (test) { 'break `semicolon`1+1;', 'throw`expression`', 'throw`expression`;', + 'throw\n`expression`', 'throw\n`expression``end of line`e', 'throw `expression`=;', 'with(`expression`);', @@ -622,7 +626,7 @@ Tinytest.add("jsparse - bad parses", function (test) { '({1:2,`name:value`', 'x.`IDENTIFIER`true', 'foo;`semicolon`:;', - 'throw`expression`' + '1;`statement`=' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 8ccf623ca9..86a43afd55 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -37,15 +37,12 @@ Tokenizer = function (codeOrLexer) { // XXX rethink codeOrLexer later this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : new Lexer(codeOrLexer)); - this.peekType = null; - this.peekText = null; - this.tokenType = null; - this.tokenText = null; - this.lastPos = 0; + this.newToken = null; + this.oldToken = null; this.pos = 0; this.isLineTerminatorHere = false; - // load peekType and peekText + // load newToken this.consume(); }; @@ -57,25 +54,23 @@ _.extend(Tokenizer.prototype, { consume: function () { var self = this; var lexer = self.lexer; - self.type = self.peekType; - self.text = self.peekText; - self.lastPos = self.pos; + self.oldToken = self.newToken; self.isLineTerminatorHere = false; + var lex; do { - lexer.next(); - if (lexer.type === "ERROR") - throw new Error("Bad token at position " + lexer.lastPos + - ", text `" + lexer.text + "`"); - else if (lexer.type === "NEWLINE") + lex = lexer.next(); + if (lex.isError()) + throw new Error("Bad token at position " + lex.startPos() + + ", text `" + lex.text() + "`"); + else if (lex.type() === "NEWLINE") self.isLineTerminatorHere = true; - else if (lexer.type === "COMMENT" && ! /^.*$/.test(lexer.text)) + else if (lex.type() === "COMMENT" && ! /^.*$/.test(lex.text())) // multiline comments containing line terminators count // as line terminators. self.isLineTerminatorHere = true; - } while (lexer.type !== "EOF" && ! Lexer.isToken(lexer.type)); - self.peekType = lexer.type; - self.peekText = lexer.text; - self.pos = lexer.lastPos; + } while (! lex.isEOF() && ! lex.isToken()); + self.newToken = lex; + self.pos = lex.startPos(); } }); @@ -96,10 +91,10 @@ var parseError = function (t, expectedParser, found) { // all parsers that might error should have descriptions, // but just in case: "Unexpected token"); - str += " after `" + t.text + "`"; + str += " after " + t.oldToken; var pos = t.pos; str += " at position " + pos; - str += ", found " + (found || (t.peekText ? "`" + t.peekText + "`" : "EOF")); + str += ", found " + (found || t.newToken); var e = new Error(str); return e; }; @@ -112,12 +107,11 @@ var _tokenClassImpl = function (type, text, onlyLook) { return new Parser( expecting, function (t) { - if (t.peekType == type && (!text || textSet[t.peekText])) { + if (t.newToken.type() == type && (!text || textSet[t.newToken.text()])) { if (onlyLook) return []; - var ret = {text: t.peekText, pos: t.pos}; t.consume(); - return ret; + return t.oldToken; } return null; }); From 3197dd241cb6fe1795d44995348f02780eb7b501 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 13:11:54 -0700 Subject: [PATCH 45/86] prev/next pointers in Lexemes --- packages/jsparse/lexer.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index 78124daa84..b7e994a478 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -283,6 +283,10 @@ Lexer.prototype.next = function () { } self.pos = pos; var lex = new Lexeme(origPos, type, code.substring(origPos, pos)); + if (self.lastLexeme) { + self.lastLexeme.next = lex; + lex.prev = self.lastLexeme; + } self.lastLexeme = lex; if (lex.isToken()) self.divisionPermitted = guessIsDivisionPermittedAfterToken(lex); From ce2a41c1c30b8aaaca89916ce9f6312544a3c1ab Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 13:42:54 -0700 Subject: [PATCH 46/86] kill node.lhs, put error msg in Tokenizer --- packages/jsparse/parser.js | 19 ++++---- packages/jsparse/parser_tests.js | 20 +++++---- packages/jsparse/parserlib.js | 75 +++++++++++++++----------------- 3 files changed, 56 insertions(+), 58 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 4c94a88b4c..722bdd24a5 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -1,10 +1,5 @@ ///// JAVASCRIPT PARSER -// XXX unit tests - -// XXX SeqParser -// XXX better way to declare parsers, including boolean flagged ones - // What we don't have from ECMA-262 5.1: // - object literal trailing comma // - object literal get/set @@ -24,6 +19,12 @@ var parse = function (tokenizer) { return ! t.isLineTerminatorHere; })); + var nonLHSExpressionNames = makeSet( + 'unary binary postfix ternary assignment comma'.split(' ')); + var isExpressionLHS = function (exprNode) { + return ! nonLHSExpressionNames[exprNode.name]; + }; + // Like token, but marks tokens that need to defy the lexer's // heuristic about whether the next '/' is a division or // starts a regex. @@ -214,10 +215,6 @@ var parse = function (tokenizer) { while (news.length) result = new ParseNode('new', [news.pop(), result]); - // mark any LeftHandSideExpression, for the benefit of - // assignmentExpression - result.lhs = true; - return result; }); @@ -310,7 +307,7 @@ var parse = function (tokenizer) { // and then fold them up at the end. var parts = [r]; var op; - while (r.lhs && (op = assignOp.parse(t))) + while (isExpressionLHS(r) &&(op = assignOp.parse(t))) parts.push(op, conditionalExpressionMaybeNoIn[noIn].parse( t, {required: true})); @@ -488,7 +485,7 @@ var parse = function (tokenizer) { if (! rest) { // we need a left-hand-side expression for a // `for (x in y)` loop. - if (! firstExpr.lhs) + if (! isExpressionLHS(firstExpr)) throw parseError(t, secondThirdClauses); // if we don't see 'in' at this point, it's probably // a missing semicolon diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index d5a7109696..95413dbac4 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -264,10 +264,12 @@ var makeTester = function (test) { } test.isFalse(parsed); test.isTrue(error); - var after = tokenizer.oldToken; - found = (found || tokenizer.newToken); - test.equal(error.message, - constructMessage(whatExpected, pos, found, after)); + if (! parsed && error) { + var after = tokenizer.oldToken; + found = (found || tokenizer.newToken); + test.equal(error.message, + constructMessage(whatExpected, pos, found, after)); + } } }; }; @@ -560,9 +562,9 @@ Tinytest.add("jsparse - syntax forms", function (test) { ["1==2?3=4:5=6", "program(expressionStmnt(ternary(binary(number(1) == number(2)) ? " + "assignment(number(3) = number(4)) : assignment(number(5) = number(6))) ;()))"], - ["1=2,3=4", - "program(expressionStmnt(comma(assignment(number(1) = number(2)) , " + - "assignment(number(3) = number(4))) ;()))"], + ["a=b,c=d", + "program(expressionStmnt(comma(assignment(identifier(a) = identifier(b)) , " + + "assignment(identifier(c) = identifier(d))) ;()))"], ["a=b=c=d", "program(expressionStmnt(assignment(identifier(a) = assignment(identifier(b) " + "= assignment(identifier(c) = identifier(d)))) ;()))"], @@ -626,7 +628,9 @@ Tinytest.add("jsparse - bad parses", function (test) { '({1:2,`name:value`', 'x.`IDENTIFIER`true', 'foo;`semicolon`:;', - '1;`statement`=' + '1;`statement`=', + 'a+b`semicolon`=c;', + 'for(1+1 `semicolon`in {});' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 86a43afd55..69f60559a7 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,7 +1,6 @@ ///// TOKENIZER AND PARSER COMBINATORS // XXX track line/col position, for errors and maybe token info -// XXX unit tests var isArray = function (obj) { return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); @@ -46,33 +45,41 @@ Tokenizer = function (codeOrLexer) { this.consume(); }; -_.extend(Tokenizer.prototype, { - // consumes the token (peekType, peekText) and moves - // it into (type, text), loading the next token - // into (peekType, peekText). A token is a lexeme - // besides WHITESPACE, COMMENT, and NEWLINE. - consume: function () { - var self = this; - var lexer = self.lexer; - self.oldToken = self.newToken; - self.isLineTerminatorHere = false; - var lex; - do { - lex = lexer.next(); - if (lex.isError()) - throw new Error("Bad token at position " + lex.startPos() + - ", text `" + lex.text() + "`"); - else if (lex.type() === "NEWLINE") - self.isLineTerminatorHere = true; - else if (lex.type() === "COMMENT" && ! /^.*$/.test(lex.text())) - // multiline comments containing line terminators count - // as line terminators. - self.isLineTerminatorHere = true; - } while (! lex.isEOF() && ! lex.isToken()); - self.newToken = lex; - self.pos = lex.startPos(); - } -}); +// UPDATE DOCS +// consumes the token (peekType, peekText) and moves +// it into (type, text), loading the next token +// into (peekType, peekText). A token is a lexeme +// besides WHITESPACE, COMMENT, and NEWLINE. +Tokenizer.prototype.consume = function () { + var self = this; + var lexer = self.lexer; + self.oldToken = self.newToken; + self.isLineTerminatorHere = false; + var lex; + do { + lex = lexer.next(); + if (lex.isError()) + throw new Error("Bad token at position " + lex.startPos() + + ", text `" + lex.text() + "`"); + else if (lex.type() === "NEWLINE") + self.isLineTerminatorHere = true; + else if (lex.type() === "COMMENT" && ! /^.*$/.test(lex.text())) + // multiline comments containing line terminators count + // as line terminators. + self.isLineTerminatorHere = true; + } while (! lex.isEOF() && ! lex.isToken()); + self.newToken = lex; + self.pos = lex.startPos(); +}; + +Tokenizer.prototype.getErrorMessage = function (expecting, found) { + var msg = (expecting ? "Expected " + expecting : "Unexpected token"); + msg += " after " + this.oldToken; + var pos = this.pos; + msg += " at position " + pos; + msg += ", found " + (found || this.newToken); + return msg; +}; // A parser that consume()s has to succeed. // Similarly, a parser that fails can't have consumed. @@ -86,17 +93,7 @@ var expecting = function (expecting, parser) { // Call this as `throw parseError(...)`. // `expected` is a parser, `after` is a string. var parseError = function (t, expectedParser, found) { - var str = (expectedParser.expecting ? "Expected " + - expectedParser.expecting : - // all parsers that might error should have descriptions, - // but just in case: - "Unexpected token"); - str += " after " + t.oldToken; - var pos = t.pos; - str += " at position " + pos; - str += ", found " + (found || t.newToken); - var e = new Error(str); - return e; + return new Error(t.getErrorMessage(expectedParser.expecting, found)); }; ///// TERMINAL PARSER CONSTRUCTORS From 603ad14570b1103f7835275c60638685d7e43217 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 13:55:36 -0700 Subject: [PATCH 47/86] tweaks --- packages/jsparse/parser.js | 4 ++-- packages/jsparse/parser_tests.js | 11 ++++++----- packages/jsparse/parserlib.js | 17 ++++++----------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 722bdd24a5..3568f8fdd7 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -486,7 +486,7 @@ var parse = function (tokenizer) { // we need a left-hand-side expression for a // `for (x in y)` loop. if (! isExpressionLHS(firstExpr)) - throw parseError(t, secondThirdClauses); + throw t.getParseError("semicolon"); // if we don't see 'in' at this point, it's probably // a missing semicolon rest = inExprExpectingSemi.parse(t, {required: true}); @@ -550,7 +550,7 @@ var parse = function (tokenizer) { and(not(lookAheadTokenClass("EOF")), new Parser(null, function (t) { - throw parseError(t, expression, 'end of line'); + throw t.getParseError('expression', 'end of line'); }))), expression), maybeSemicolon)); diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 95413dbac4..58d5647558 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -154,9 +154,9 @@ var makeTester = function (test) { if (regexTokenHints && regexTokenHints[allTokensInOrder.length]) lexer.divisionPermitted = false; } - lexer = new Lexer(code); - var tokenizer = new Tokenizer(code); + lexer = new Lexer(code); + var tokenizer = new Tokenizer(lexer); var actualTree = parse(tokenizer); var nextTokenIndex = 0; @@ -191,7 +191,7 @@ var makeTester = function (test) { if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); - test.equal(tokenizer.pos, code.length); + test.equal(lexer.pos, code.length); test.equal(stringifyTree(actualTree), stringifyTree(expectedTree), code); @@ -242,7 +242,7 @@ var makeTester = function (test) { // in the error message. badParse: function (code) { var constructMessage = function (whatExpected, pos, found, after) { - return "Expected " + whatExpected + " after " + after + + return "Expected " + whatExpected + (after ? " after " + after : "") + " at position " + pos + ", found " + found; }; var pos = code.indexOf('`'); @@ -630,7 +630,8 @@ Tinytest.add("jsparse - bad parses", function (test) { 'foo;`semicolon`:;', '1;`statement`=', 'a+b`semicolon`=c;', - 'for(1+1 `semicolon`in {});' + 'for(1+1 `semicolon`in {});', + '`statement`=' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 69f60559a7..84cedc92d7 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -25,7 +25,7 @@ _.extend(Parser.prototype, { if (options) { if (options.required && ! result) - throw parseError(t, this); + throw t.getParseError(this.expecting); } return result; @@ -72,13 +72,14 @@ Tokenizer.prototype.consume = function () { self.pos = lex.startPos(); }; -Tokenizer.prototype.getErrorMessage = function (expecting, found) { +Tokenizer.prototype.getParseError = function (expecting, found) { var msg = (expecting ? "Expected " + expecting : "Unexpected token"); - msg += " after " + this.oldToken; + if (this.oldToken) + msg += " after " + this.oldToken; var pos = this.pos; msg += " at position " + pos; msg += ", found " + (found || this.newToken); - return msg; + return new Error(msg); }; // A parser that consume()s has to succeed. @@ -90,12 +91,6 @@ var expecting = function (expecting, parser) { return parser; }; -// Call this as `throw parseError(...)`. -// `expected` is a parser, `after` is a string. -var parseError = function (t, expectedParser, found) { - return new Error(t.getErrorMessage(expectedParser.expecting, found)); -}; - ///// TERMINAL PARSER CONSTRUCTORS var _tokenClassImpl = function (type, text, onlyLook) { @@ -222,7 +217,7 @@ var unary = function (name, termParser, opParser) { function (t) { var unaries = unaryList.parse(t); // if we have unaries, we are committed and - // have to match an expression or error. + // have to match a term or error. var result = termParser.parse( t, {required: unaries.length}); if (! result) From 34b4ebbf224ca829144e18d0a74b2053a1247ef7 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:11:28 -0700 Subject: [PATCH 48/86] create JSParser --- .../unfinished/jsparse-demo/jsparse-demo.js | 8 +-- packages/jsparse/parser.js | 46 +++++++++++++++- packages/jsparse/parser_tests.js | 25 ++++----- packages/jsparse/parserlib.js | 52 +------------------ 4 files changed, 58 insertions(+), 73 deletions(-) diff --git a/examples/unfinished/jsparse-demo/jsparse-demo.js b/examples/unfinished/jsparse-demo/jsparse-demo.js index 30d9f389d8..9ea3d008de 100644 --- a/examples/unfinished/jsparse-demo/jsparse-demo.js +++ b/examples/unfinished/jsparse-demo/jsparse-demo.js @@ -37,11 +37,11 @@ if (Meteor.is_client) { // PARSER var html; var tree = null; - var lexer = new Lexer(input); + var parser = new JSParser(input); try { - tree = parse(new Tokenizer(lexer)) || []; + tree = parser.getSyntaxTree(); } catch (parseError) { - var errorLexeme = lexer.lastLexeme; + var errorLexeme = parser.lexer.lastLexeme; html = Handlebars._escape( input.substring(0, errorLexeme.startPos())); @@ -92,7 +92,7 @@ if (Meteor.is_client) { } }; html = toHtml(tree); - curPos = lexer.pos; + curPos = parser.lexer.pos; _.each(unclosedInfos, function (info) { info.endPos = curPos; }); diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 3568f8fdd7..4f731c43b6 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -4,7 +4,49 @@ // - object literal trailing comma // - object literal get/set -var parse = function (tokenizer) { +var JSParser = function (code) { + this.lexer = new Lexer(code); + this.oldToken = null; + this.newToken = null; + this.pos = 0; + this.isLineTerminatorHere = false; + + this.consumeNewToken(); +}; + +JSParser.prototype.consumeNewToken = function () { + var self = this; + var lexer = self.lexer; + self.oldToken = self.newToken; + self.isLineTerminatorHere = false; + var lex; + do { + lex = lexer.next(); + if (lex.isError()) + throw new Error("Bad token at position " + lex.startPos() + + ", text `" + lex.text() + "`"); + else if (lex.type() === "NEWLINE") + self.isLineTerminatorHere = true; + else if (lex.type() === "COMMENT" && ! /^.*$/.test(lex.text())) + // multiline comments containing line terminators count + // as line terminators. + self.isLineTerminatorHere = true; + } while (! lex.isEOF() && ! lex.isToken()); + self.newToken = lex; + self.pos = lex.startPos(); +}; + +JSParser.prototype.getParseError = function (expecting, found) { + var msg = (expecting ? "Expected " + expecting : "Unexpected token"); + if (this.oldToken) + msg += " after " + this.oldToken; + var pos = this.pos; + msg += " at position " + pos; + msg += ", found " + (found || this.newToken); + return new Error(msg); +}; + +JSParser.prototype.getSyntaxTree = function () { var NIL = new ParseNode('nil', []); var booleanFlaggedParser = function (parserConstructFunc) { @@ -636,5 +678,5 @@ var parse = function (tokenizer) { // If not at EOF, complain "expecting statement" expecting('statement', lookAheadTokenClass("EOF")))); - return program.parse(tokenizer); + return program.parse(this); }; diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 58d5647558..d53a4563e2 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -130,10 +130,7 @@ var stringifyTree = function (tree) { }; var parseToTreeString = function (code) { - var lexer = new Lexer(code); - var tokenizer = new Tokenizer(code); - var tree = parse(tokenizer); - return stringifyTree(tree); + return stringifyTree(new JSParser(code).getSyntaxTree()); }; var makeTester = function (test) { @@ -155,9 +152,8 @@ var makeTester = function (test) { lexer.divisionPermitted = false; } - lexer = new Lexer(code); - var tokenizer = new Tokenizer(lexer); - var actualTree = parse(tokenizer); + var parser = new JSParser(code); + var actualTree = parser.getSyntaxTree(); var nextTokenIndex = 0; var check = function (tree) { @@ -191,7 +187,7 @@ var makeTester = function (test) { if (nextTokenIndex !== allTokensInOrder.length) test.fail("Too few tokens: " + nextTokenIndex); - test.equal(lexer.pos, code.length); + test.equal(parser.pos, code.length); test.equal(stringifyTree(actualTree), stringifyTree(expectedTree), code); @@ -215,9 +211,7 @@ var makeTester = function (test) { var parsed = false; var error = null; try { - var lexer = new Lexer(code); - var tokenizer = new Tokenizer(code); - var tree = parse(tokenizer); + var tree = new JSParser(code).getSyntaxTree(); parsed = true; } catch (e) { error = e; @@ -254,10 +248,9 @@ var makeTester = function (test) { var parsed = false; var error = null; + var parser = new JSParser(code); try { - var lexer = new Lexer(code); - var tokenizer = new Tokenizer(code); - var tree = parse(tokenizer); + var tree = parser.getSyntaxTree(); parsed = true; } catch (e) { error = e; @@ -265,8 +258,8 @@ var makeTester = function (test) { test.isFalse(parsed); test.isTrue(error); if (! parsed && error) { - var after = tokenizer.oldToken; - found = (found || tokenizer.newToken); + var after = parser.oldToken; + found = (found || parser.newToken); test.equal(error.message, constructMessage(whatExpected, pos, found, after)); } diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 84cedc92d7..23e6bd6ba9 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -32,56 +32,6 @@ _.extend(Parser.prototype, { } }); -Tokenizer = function (codeOrLexer) { - // XXX rethink codeOrLexer later - this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer : - new Lexer(codeOrLexer)); - this.newToken = null; - this.oldToken = null; - this.pos = 0; - this.isLineTerminatorHere = false; - - // load newToken - this.consume(); -}; - -// UPDATE DOCS -// consumes the token (peekType, peekText) and moves -// it into (type, text), loading the next token -// into (peekType, peekText). A token is a lexeme -// besides WHITESPACE, COMMENT, and NEWLINE. -Tokenizer.prototype.consume = function () { - var self = this; - var lexer = self.lexer; - self.oldToken = self.newToken; - self.isLineTerminatorHere = false; - var lex; - do { - lex = lexer.next(); - if (lex.isError()) - throw new Error("Bad token at position " + lex.startPos() + - ", text `" + lex.text() + "`"); - else if (lex.type() === "NEWLINE") - self.isLineTerminatorHere = true; - else if (lex.type() === "COMMENT" && ! /^.*$/.test(lex.text())) - // multiline comments containing line terminators count - // as line terminators. - self.isLineTerminatorHere = true; - } while (! lex.isEOF() && ! lex.isToken()); - self.newToken = lex; - self.pos = lex.startPos(); -}; - -Tokenizer.prototype.getParseError = function (expecting, found) { - var msg = (expecting ? "Expected " + expecting : "Unexpected token"); - if (this.oldToken) - msg += " after " + this.oldToken; - var pos = this.pos; - msg += " at position " + pos; - msg += ", found " + (found || this.newToken); - return new Error(msg); -}; - // A parser that consume()s has to succeed. // Similarly, a parser that fails can't have consumed. @@ -102,7 +52,7 @@ var _tokenClassImpl = function (type, text, onlyLook) { if (t.newToken.type() == type && (!text || textSet[t.newToken.text()])) { if (onlyLook) return []; - t.consume(); + t.consumeNewToken(); return t.oldToken; } return null; From 39056590d4f67e628f5dd79eafa8b4449565d5fb Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:31:47 -0700 Subject: [PATCH 49/86] move token matchers out of parser lib --- packages/jsparse/parser.js | 84 ++++++++++++++++++++++++-------- packages/jsparse/parser_tests.js | 7 +-- packages/jsparse/parserlib.js | 57 +--------------------- 3 files changed, 68 insertions(+), 80 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 4f731c43b6..979b30f662 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -56,6 +56,48 @@ JSParser.prototype.getSyntaxTree = function () { }; }; + // Takes a space-separated list of either punctuation or keyword tokens + var lookAheadToken = function (tokens) { + var type = (/\w/.test(tokens) ? 'KEYWORD' : 'PUNCTUATION'); + var textSet = makeSet(tokens.split(' ')); + return expecting( + tokens.split(' ').join(', '), + assertion(function (t) { + return (t.newToken.type() === type && textSet[t.newToken.text()]); + })); + }; + + var lookAheadTokenType = function (type) { + return expecting(type, assertion(function (t) { + return t.newToken.type() === type; + })); + }; + + // Takes a space-separated list of either punctuation or keyword tokens + var token = function (tokens) { + var type = (/\w/.test(tokens) ? 'KEYWORD' : 'PUNCTUATION'); + var textSet = makeSet(tokens.split(' ')); + return new Parser( + tokens.split(' ').join(', '), + function (t) { + if (t.newToken.type() === type && textSet[t.newToken.text()]) { + t.consumeNewToken(); + return t.oldToken; + } + return null; + }); + }; + + var tokenType = function (type) { + return new Parser(type, function (t) { + if (t.newToken.type() === type) { + t.consumeNewToken(); + return t.oldToken; + } + return null; + }); + }; + var noLineTerminatorHere = expecting( 'noLineTerminator', assertion(function (t) { return ! t.isLineTerminatorHere; @@ -142,11 +184,11 @@ JSParser.prototype.getSyntaxTree = function () { token(']'))); var propertyName = expecting('propertyName', or( - node('idPropName', seq(tokenClass('IDENTIFIER'))), - node('numPropName', seq(tokenClass('NUMBER'))), - node('strPropName', seq(tokenClass('STRING'))))); + node('idPropName', seq(tokenType('IDENTIFIER'))), + node('numPropName', seq(tokenType('NUMBER'))), + node('strPropName', seq(tokenType('STRING'))))); var nameColonValue = expecting( - 'name:value', + 'propertyName', node('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); var objectLiteral = @@ -160,12 +202,12 @@ JSParser.prototype.getSyntaxTree = function () { var functionMaybeNameRequired = booleanFlaggedParser( function (nameRequired) { return seq(token('function'), - (nameRequired ? tokenClass('IDENTIFIER') : - or(tokenClass('IDENTIFIER'), + (nameRequired ? tokenType('IDENTIFIER') : + or(tokenType('IDENTIFIER'), and(lookAheadToken('('), constant(NIL)))), token('('), or(lookAheadToken(')'), - list(tokenClass('IDENTIFIER'), token(','))), + list(tokenType('IDENTIFIER'), token(','))), token(')'), token('{'), functionBodyPtr, @@ -177,19 +219,19 @@ JSParser.prototype.getSyntaxTree = function () { var primaryOrFunctionExpression = expecting('expression', or(node('this', token('this')), // XXXX remove unnecessary seqs in node(...) args - node('identifier', seq(tokenClass('IDENTIFIER'))), - node('number', seq(tokenClass('NUMBER'))), - node('boolean', seq(tokenClass('BOOLEAN'))), - node('null', seq(tokenClass('NULL'))), - node('regex', seq(tokenClass('REGEX'))), - node('string', seq(tokenClass('STRING'))), + node('identifier', seq(tokenType('IDENTIFIER'))), + node('number', seq(tokenType('NUMBER'))), + node('boolean', seq(tokenType('BOOLEAN'))), + node('null', seq(tokenType('NULL'))), + node('regex', seq(tokenType('REGEX'))), + node('string', seq(tokenType('STRING'))), node('parens', seq(token('('), expressionPtr, token(')'))), arrayLiteral, objectLiteral, functionExpression)); - var dotEnding = seq(token('.'), tokenClass('IDENTIFIER')); + var dotEnding = seq(token('.'), tokenType('IDENTIFIER')); var bracketEnding = seq(token('['), expressionPtr, token(']')); var callArgs = seq(token('('), or(lookAheadToken(')'), @@ -390,7 +432,7 @@ JSParser.prototype.getSyntaxTree = function () { and( or( lookAheadToken('}'), - lookAheadTokenClass('EOF'), + lookAheadTokenType('EOF'), assertion(function (t) { return t.isLineTerminatorHere; })), @@ -460,7 +502,7 @@ JSParser.prototype.getSyntaxTree = function () { var varDeclMaybeNoIn = booleanFlaggedParser(function (noIn) { return node( 'varDecl', - seq(tokenClass('IDENTIFIER'), + seq(tokenType('IDENTIFIER'), opt(seq(token('='), assignmentExpressionMaybeNoIn[noIn])))); }); @@ -575,12 +617,12 @@ JSParser.prototype.getSyntaxTree = function () { var continueStatement = node( 'continueStmnt', seq(token('continue'), or( - and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), + and(noLineTerminatorHere, tokenType('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var breakStatement = node( 'breakStmnt', seq(token('break'), or( - and(noLineTerminatorHere, tokenClass('IDENTIFIER')), constant(NIL)), + and(noLineTerminatorHere, tokenType('IDENTIFIER')), constant(NIL)), maybeSemicolon)); var throwStatement = node( 'throwStmnt', @@ -589,7 +631,7 @@ JSParser.prototype.getSyntaxTree = function () { // If there is a line break here and more tokens after, // we want to error appropriately. `throw \n e` should // complain about the "end of line", not the `e`. - and(not(lookAheadTokenClass("EOF")), + and(not(lookAheadTokenType("EOF")), new Parser(null, function (t) { throw t.getParseError('expression', 'end of line'); @@ -632,7 +674,7 @@ JSParser.prototype.getSyntaxTree = function () { seq( or(node( 'catch', - seq(token('catch'), token('('), tokenClass('IDENTIFIER'), + seq(token('catch'), token('('), tokenType('IDENTIFIER'), token(')'), blockStatement)), constant(NIL)), or(node( @@ -676,7 +718,7 @@ JSParser.prototype.getSyntaxTree = function () { 'program', seq(opt(sourceElements), // If not at EOF, complain "expecting statement" - expecting('statement', lookAheadTokenClass("EOF")))); + expecting('statement', lookAheadTokenType("EOF")))); return program.parse(this); }; diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index d53a4563e2..ef64e6a9ba 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -616,15 +616,16 @@ Tinytest.add("jsparse - bad parses", function (test) { 'foo: `statement`function foo() {}', '[`expression`=', '[,,`expression`=', - '({`name:value`true:3})', + '({`propertyName`true:3})', '({1:2,3`:`})', - '({1:2,`name:value`', + '({1:2,`propertyName`', 'x.`IDENTIFIER`true', 'foo;`semicolon`:;', '1;`statement`=', 'a+b`semicolon`=c;', 'for(1+1 `semicolon`in {});', - '`statement`=' + '`statement`=', + 'for(;`expression`var;) {}' ]; _.each(trials, function (tr) { tester.badParse(tr); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 23e6bd6ba9..4824b79a5e 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -41,50 +41,6 @@ var expecting = function (expecting, parser) { return parser; }; -///// TERMINAL PARSER CONSTRUCTORS - -var _tokenClassImpl = function (type, text, onlyLook) { - var textSet = (text ? makeSet(text.split(' ')) : null); - var expecting = (text ? text.split(' ').join(', ') : type); - return new Parser( - expecting, - function (t) { - if (t.newToken.type() == type && (!text || textSet[t.newToken.text()])) { - if (onlyLook) - return []; - t.consumeNewToken(); - return t.oldToken; - } - return null; - }); -}; - -var _tokenImpl = function (text, onlyLook) { - if (/\w/.test(text)) - return _tokenClassImpl('KEYWORD', text, onlyLook); - return _tokenClassImpl('PUNCTUATION', text, onlyLook); -}; - -var tokenClass = function (type, text) { - if (type === "ERROR" || type === "EOF") - throw new Error("Can't create EOF or ERROR tokens, can only look ahead"); - return _tokenClassImpl(type, text); -}; - -var token = function (text) { - return _tokenImpl(text); -}; - -// NON-CONSUMING PARSER CONSTRUCTORS - -var lookAheadTokenClass = function (type, text) { - return _tokenClassImpl(type, text, true); -}; - -var lookAheadToken = function (text) { - return _tokenImpl(text, true); -}; - var assertion = function (test) { return new Parser( null, function (t) { @@ -276,19 +232,8 @@ var constant = function (result) { function (t) { return result; }); }; -// afterLookAhead allows the parser to fail rather than -// succeed if would otherwise fail at a position where -// afterLookAhead doesn't match, potentially providing -// a better error message. For example, the illegal -// object literal `{true:1}` will stop at the `true` -// and say something like "expected property name" -// instead of "expected }". As another example, -// `for(;var;) {}` will lead to "Expected expression" -// instead of "Expected ;" when the optional expression -// turns out to be an illegal `var`. var opt = function (parser) { - return expecting(parser.expecting, - or(parser, seq())); + return expecting(parser.expecting, or(parser, seq())); }; var mapResult = function (parser, func) { From 76cac261008e02808af1cfce62260dc26d71d2dd Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:34:50 -0700 Subject: [PATCH 50/86] =?UTF-8?q?finish=20removing=20unnecessary=20seqs=20?= =?UTF-8?q?in=20node(=E2=80=A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/jsparse/parser.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 979b30f662..c3d541d086 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -184,9 +184,9 @@ JSParser.prototype.getSyntaxTree = function () { token(']'))); var propertyName = expecting('propertyName', or( - node('idPropName', seq(tokenType('IDENTIFIER'))), - node('numPropName', seq(tokenType('NUMBER'))), - node('strPropName', seq(tokenType('STRING'))))); + node('idPropName', tokenType('IDENTIFIER')), + node('numPropName', tokenType('NUMBER')), + node('strPropName', tokenType('STRING')))); var nameColonValue = expecting( 'propertyName', node('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); @@ -218,13 +218,13 @@ JSParser.prototype.getSyntaxTree = function () { var primaryOrFunctionExpression = expecting('expression', - or(node('this', token('this')), // XXXX remove unnecessary seqs in node(...) args - node('identifier', seq(tokenType('IDENTIFIER'))), - node('number', seq(tokenType('NUMBER'))), - node('boolean', seq(tokenType('BOOLEAN'))), - node('null', seq(tokenType('NULL'))), - node('regex', seq(tokenType('REGEX'))), - node('string', seq(tokenType('STRING'))), + or(node('this', token('this')), + node('identifier', tokenType('IDENTIFIER')), + node('number', tokenType('NUMBER')), + node('boolean', tokenType('BOOLEAN')), + node('null', tokenType('NULL')), + node('regex', tokenType('REGEX')), + node('string', tokenType('STRING')), node('parens', seq(token('('), expressionPtr, token(')'))), arrayLiteral, @@ -493,7 +493,7 @@ JSParser.prototype.getSyntaxTree = function () { [expr.children[0]].concat(rest)); }); - var emptyStatement = node('emptyStmnt', seq(token(';'))); // not maybeSemicolon + var emptyStatement = node('emptyStmnt', token(';')); // required semicolon var blockStatement = expecting('block', node('blockStmnt', seq( token('{'), or(lookAheadToken('}'), statements), From 25b564014a23653eae7d26da3ecb904b1e47e0ef Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:39:28 -0700 Subject: [PATCH 51/86] remove comment --- packages/jsparse/parserlib.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 4824b79a5e..09c82747ea 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -48,8 +48,6 @@ var assertion = function (test) { }); }; -///// NON-TERMINAL PARSER CONSTRUCTORS - var node = function (name, childrenParser) { return new Parser(name, function (t) { var children = childrenParser.parse(t); From f9cfe9701f1cfd6aecc8b00aa61fa490536c35ce Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:48:32 -0700 Subject: [PATCH 52/86] JSLexer, reduce namespace pollution --- packages/jsparse/lexer.js | 19 ++++++++++++------- packages/jsparse/parser.js | 16 ++++++++++++++-- packages/jsparse/parser_tests.js | 2 +- packages/jsparse/parserlib.js | 18 ++++++++---------- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index b7e994a478..57029915bd 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -1,4 +1,5 @@ -////////// HELPERS + +(function () { var regexEscape = function (str) { return str.replace(/[\][^$\\.*+?(){}|]/g, '\\$&'); @@ -138,14 +139,14 @@ var keywordsBeforeDivision = makeSet('this'.split(' ')); var guessIsDivisionPermittedAfterToken = function (tok) { // Figure out if a '/' character should be interpreted as division // rather than the start of a regular expression when it follows the - // token (type,text), which must be a token lexeme per - // Lexer.isToken. The beginning of section 7 of the spec briefly + // token, which must be a token lexeme per isToken(). + // The beginning of section 7 of the spec briefly // explains what's going on; basically the lexical grammar can't // distinguish, for example, `e/f/g` (division) from `e=/f/g` // (assignment of a regular expression), among many other variations. // // THIS IS ONLY A HEURISTIC, though it will rarely fail. - // Here are the two cases I know of: + // Here are the two cases I know of where help from the parser is needed: // - if (foo) // /ba/.test("banana") && console.log("matches"); // (Close paren of a control structure before a statement starting with @@ -233,13 +234,15 @@ Lexeme.prototype.toString = function () { // Thie flag can be read and set manually to affect the // parsing of the next token. -var Lexer = function (code) { +JSLexer = function (code) { this.code = code; this.pos = 0; this.divisionPermitted = false; this.lastLexeme = null; }; +JSLexer.Lexeme = Lexeme; + // XXXX UPDATE DOCS // Return the type of the next of lexeme starting at `pos`, and advance // `pos` to the end of the lexeme. The text of the lexeme is available @@ -255,7 +258,7 @@ var Lexer = function (code) { // Other Tokens: IDENTIFIER, KEYWORD, PUNCTUATION // ... and ERROR -Lexer.prototype.next = function () { +JSLexer.prototype.next = function () { var self = this; var code = self.code; var origPos = self.pos; @@ -282,7 +285,7 @@ Lexer.prototype.next = function () { pos = origPos + 1; } self.pos = pos; - var lex = new Lexeme(origPos, type, code.substring(origPos, pos)); + var lex = new JSLexer.Lexeme(origPos, type, code.substring(origPos, pos)); if (self.lastLexeme) { self.lastLexeme.next = lex; lex.prev = self.lastLexeme; @@ -382,3 +385,5 @@ Lexer.prototype.next = function () { var word = code.substring(origPos, pos); return lexeme(keywordLookup[' '+word] || 'IDENTIFIER'); }; + +})(); \ No newline at end of file diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index c3d541d086..a920afb420 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -4,8 +4,18 @@ // - object literal trailing comma // - object literal get/set -var JSParser = function (code) { - this.lexer = new Lexer(code); +(function () { + +var makeSet = function (array) { + var s = {}; + for (var i = 0, N = array.length; i < N; i++) + s[array[i]] = true; + return s; +}; + + +JSParser = function (code) { + this.lexer = new JSLexer(code); this.oldToken = null; this.newToken = null; this.pos = 0; @@ -722,3 +732,5 @@ JSParser.prototype.getSyntaxTree = function () { return program.parse(this); }; + +})(); \ No newline at end of file diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index ef64e6a9ba..9788c01e5e 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -140,7 +140,7 @@ var makeTester = function (test) { var expectedTree = parseTreeString(expectedTreeString); // first use lexer to collect all tokens - var lexer = new Lexer(code); + var lexer = new JSLexer(code); var allTokensInOrder = []; while (! lexer.next().isEOF()) { var lex = lexer.lastLexeme; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 09c82747ea..d6b501b789 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -19,18 +19,16 @@ var Parser = function (expecting, runFunc) { this._run = runFunc; }; -_.extend(Parser.prototype, { - parse: function (t, options) { - var result = this._run(t); +Parser.prototype.parse = function (t, options) { + var result = this._run(t); - if (options) { - if (options.required && ! result) - throw t.getParseError(this.expecting); - } - - return result; + if (options) { + if (options.required && ! result) + throw t.getParseError(this.expecting); } -}); + + return result; +}; // A parser that consume()s has to succeed. // Similarly, a parser that fails can't have consumed. From f9dba29b2de28f55c256fe865be284d6b5c7ed40 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 14:54:31 -0700 Subject: [PATCH 53/86] namespace parsers --- packages/jsparse/parser.js | 18 ++++++++++-- packages/jsparse/parserlib.js | 55 ++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index a920afb420..57e27c5350 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -6,6 +6,20 @@ (function () { +var expecting = Parser.expecting; + +var assertion = Parsers.assertion; +var node = Parsers.node; +var or = Parsers.or; +var and = Parsers.and; +var not = Parsers.not; +var list = Parsers.list; +var seq = Parsers.seq; +var opt = Parsers.opt; +var constant = Parsers.constant; +var mapResult = Parsers.mapResult; + + var makeSet = function (array) { var s = {}; for (var i = 0, N = array.length; i < N; i++) @@ -326,7 +340,7 @@ JSParser.prototype.getSyntaxTree = function () { return new ParseNode('postfix', v); })); - var unaryExpression = unary( + var unaryExpression = Parsers.unary( 'unary', postfixExpression, or(token('delete void typeof'), preSlashToken('++ -- + - ~ !', false))); @@ -362,7 +376,7 @@ JSParser.prototype.getSyntaxTree = function () { token('||')]; return expecting( 'expression', - binaryLeft('binary', unaryExpression, binaryOps)); + Parsers.binaryLeft('binary', unaryExpression, binaryOps)); }); var binaryExpression = binaryExpressionMaybeNoIn[false]; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index d6b501b789..e8bf808de1 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -1,12 +1,14 @@ ///// TOKENIZER AND PARSER COMBINATORS +(function () { + // XXX track line/col position, for errors and maybe token info var isArray = function (obj) { return obj && (typeof obj === 'object') && (typeof obj.length === 'number'); }; -var ParseNode = function (name, children) { +ParseNode = function (name, children) { this.name = name; this.children = children; @@ -14,7 +16,7 @@ var ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; -var Parser = function (expecting, runFunc) { +Parser = function (expecting, runFunc) { this.expecting = expecting; this._run = runFunc; }; @@ -30,23 +32,26 @@ Parser.prototype.parse = function (t, options) { return result; }; -// A parser that consume()s has to succeed. -// Similarly, a parser that fails can't have consumed. - // mutates the parser -var expecting = function (expecting, parser) { +Parser.expecting = function (expecting, parser) { parser.expecting = expecting; return parser; }; -var assertion = function (test) { + +// A parser that consume()s has to succeed. +// Similarly, a parser that fails can't have consumed. + +Parsers = {}; + +Parsers.assertion = function (test) { return new Parser( null, function (t) { return test(t) ? [] : null; }); }; -var node = function (name, childrenParser) { +Parsers.node = function (name, childrenParser) { return new Parser(name, function (t) { var children = childrenParser.parse(t); if (! children) @@ -57,7 +62,7 @@ var node = function (name, childrenParser) { }); }; -var or = function (/*parsers*/) { +Parsers.or = function (/*parsers*/) { var args = arguments; return new Parser( args[args.length - 1].expecting, @@ -81,7 +86,7 @@ var or = function (/*parsers*/) { // // opParsers is an array of op parsers from high to low // precedence (tightest-binding first) -var binaryLeft = function (name, termParser, opParsers) { +Parsers.binaryLeft = function (name, termParser, opParsers) { var opParser; if (opParsers.length === 1) { @@ -91,7 +96,7 @@ var binaryLeft = function (name, termParser, opParsers) { // pop off last opParser (non-destructively) and replace // termParser with a recursive binaryLeft on the remaining // ops. - termParser = binaryLeft(name, termParser, opParsers.slice(0, -1)); + termParser = Parsers.binaryLeft(name, termParser, opParsers.slice(0, -1)); opParser = opParsers[opParsers.length - 1]; } @@ -112,8 +117,8 @@ var binaryLeft = function (name, termParser, opParsers) { }); }; -var unary = function (name, termParser, opParser) { - var unaryList = opt(list(opParser)); +Parsers.unary = function (name, termParser, opParser) { + var unaryList = Parsers.opt(Parsers.list(opParser)); return new Parser( termParser.expecting, function (t) { @@ -137,7 +142,7 @@ var unary = function (name, termParser, opParser) { // `x,y` => ["x", ",", "y"] // `x,y,z` => ["x", ",", "y", ",", "z"] // Unpacks. -var list = function (itemParser, sepParser) { +Parsers.list = function (itemParser, sepParser) { var push = function(array, newThing) { if (isArray(newThing)) array.push.apply(array, newThing); @@ -169,10 +174,10 @@ var list = function (itemParser, sepParser) { }; // Unpacks arrays (nested seqs). -var seq = function (/*parsers*/) { +Parsers.seq = function (/*parsers*/) { var args = arguments; if (! args.length) - return constant([]); + return Parsers.constant([]); return new Parser( args[0].expecting, @@ -195,10 +200,10 @@ var seq = function (/*parsers*/) { }; // parsers except last must never consume -var and = function (/*parsers*/) { +Parsers.and = function (/*parsers*/) { var args = arguments; if (! args.length) - return constant([]); + return Parsers.constant([]); return new Parser( args[args.length - 1].expecting, @@ -214,7 +219,7 @@ var and = function (/*parsers*/) { }; // parser must not consume -var not = function (parser) { +Parsers.not = function (parser) { return new Parser( null, function (t) { @@ -223,16 +228,18 @@ var not = function (parser) { }; // parser that looks at nothing and returns result -var constant = function (result) { +Parsers.constant = function (result) { return new Parser(null, function (t) { return result; }); }; -var opt = function (parser) { - return expecting(parser.expecting, or(parser, seq())); +Parsers.opt = function (parser) { + return Parser.expecting( + parser.expecting, + Parsers.or(parser, Parsers.seq())); }; -var mapResult = function (parser, func) { +Parsers.mapResult = function (parser, func) { return new Parser( parser.expecting, function (t) { @@ -240,3 +247,5 @@ var mapResult = function (parser, func) { return v ? func(v, t) : null; }); }; + +})(); \ No newline at end of file From 4ee4e2451cfaf5c232fb10034da68d1cc64552dc Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 15:15:05 -0700 Subject: [PATCH 54/86] ParseNode.stringify and unstringify --- packages/jsparse/parser.js | 21 +++++++-- packages/jsparse/parser_tests.js | 80 ++----------------------------- packages/jsparse/parserlib.js | 81 ++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 80 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 57e27c5350..15a1a004cd 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -28,13 +28,26 @@ var makeSet = function (array) { }; -JSParser = function (code) { +JSParser = function (code, options) { this.lexer = new JSLexer(code); this.oldToken = null; this.newToken = null; this.pos = 0; this.isLineTerminatorHere = false; + options = options || {}; + // pass {tokens:'strings'} to get strings for + // tokens instead of token objects + if (options.tokens === 'strings') { + this.tokenFunc = function (tok) { + return tok.text(); + }; + } else { + this.tokenFunc = function (tok) { + return tok; + }; + } + this.consumeNewToken(); }; @@ -71,6 +84,8 @@ JSParser.prototype.getParseError = function (expecting, found) { }; JSParser.prototype.getSyntaxTree = function () { + var self = this; + var NIL = new ParseNode('nil', []); var booleanFlaggedParser = function (parserConstructFunc) { @@ -106,7 +121,7 @@ JSParser.prototype.getSyntaxTree = function () { function (t) { if (t.newToken.type() === type && textSet[t.newToken.text()]) { t.consumeNewToken(); - return t.oldToken; + return self.tokenFunc(t.oldToken); } return null; }); @@ -116,7 +131,7 @@ JSParser.prototype.getSyntaxTree = function () { return new Parser(type, function (t) { if (t.newToken.type() === type) { t.consumeNewToken(); - return t.oldToken; + return self.tokenFunc(t.oldToken); } return null; }); diff --git a/packages/jsparse/parser_tests.js b/packages/jsparse/parser_tests.js index 9788c01e5e..4973395329 100644 --- a/packages/jsparse/parser_tests.js +++ b/packages/jsparse/parser_tests.js @@ -61,83 +61,12 @@ var allNodeNames = [ var allNodeNamesSet = {}; _.each(allNodeNames, function (n) { allNodeNamesSet[n] = true; }); -// The "tree string" format is a simple format for representing syntax trees. -// -// For example, the parse of `x++;` is written as: -// "program(expressionStmnt(postfix(identifier(x) ++) ;))" -// -// A Node is written as "name(item1 item2 item3)", with additional whitespace -// allowed anywhere between the name, parentheses, and items. -// -// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or -// backticks. If they do, they can be written enclosed in backticks. To escape -// a backtick within backticks, double it. -// -// `stringifyTree` generates "canonical" tree strings, which have no extra escaping -// or whitespace, just one space between items in a Node. - -var parseTreeString = function (str) { - var results = []; - var ptrStack = []; - var ptr = results; - _.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) { - switch (txt.charAt(0)) { - case '(': - if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) - throw new Error("Nameless node in " + str); - var newArray = [ptr.pop()]; - ptr.push(newArray); - ptrStack.push(ptr); - ptr = newArray; - break; - case ')': - ptr = ptrStack.pop(); - var nodeArray = ptr.pop(); - ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1))); - break; - case '`': - if (txt.length === 1) - throw new Error("Mismatched ` in " + str); - ptr.push(txt.slice(1, -1).replace(/``/g, '`')); - break; - default: - ptr.push(txt); - break; - } - if (results.length > 1) - throw new Error("Not expecting " + txt + " in " + str); - }); - if (ptr !== results) - throw new Error("Mismatched parentheses in " + str); - return results[0]; -}; -var escapeTokenString = function (str) { - if (/[\s()`]/.test(str)) - return '`' + str.replace(/`/g, '``') + '`'; - else - return str; -}; -var stringifyTree = function (tree) { - if (tree instanceof ParseNode) - return (escapeTokenString(tree.name) + '(' + - _.map(tree.children, stringifyTree).join(' ') + - ')'); - - // Treat a token object or string as a token. - if (tree.text) - tree = tree.text(); - return escapeTokenString(tree); -}; - -var parseToTreeString = function (code) { - return stringifyTree(new JSParser(code).getSyntaxTree()); -}; var makeTester = function (test) { return { // Parse code and make sure it matches expectedTreeString. goodParse: function (code, expectedTreeString, regexTokenHints) { - var expectedTree = parseTreeString(expectedTreeString); + var expectedTree = ParseNode.unstringify(expectedTreeString); // first use lexer to collect all tokens var lexer = new JSLexer(code); @@ -189,8 +118,8 @@ var makeTester = function (test) { test.equal(parser.pos, code.length); - test.equal(stringifyTree(actualTree), - stringifyTree(expectedTree), code); + test.equal(ParseNode.stringify(actualTree), + ParseNode.stringify(expectedTree), code); }, // Takes code with part of it surrounding with backticks. // Removes the two backtick characters, tries to parse the code, @@ -579,9 +508,6 @@ Tinytest.add("jsparse - syntax forms", function (test) { }); }); -// Generating a trial: -//(function (s) { return JSON.stringify([s, parseToTreeString(s)]); })('...') - Tinytest.add("jsparse - bad parses", function (test) { var tester = makeTester(test); var trials = [ diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index e8bf808de1..e7235dacf2 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -16,6 +16,87 @@ ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; +ParseNode.prototype.stringify = function () { + return ParseNode.stringify(this); +}; + +var escapeTokenString = function (str) { + if (/[\s()`]/.test(str)) + return '`' + str.replace(/`/g, '``') + '`'; + else if (! str) + return '``'; + else + return str; +}; + +// The "tree string" format is a simple format for representing syntax trees. +// +// For example, the parse of `x++;` is written as: +// "program(expressionStmnt(postfix(identifier(x) ++) ;))" +// +// A Node is written as "name(item1 item2 item3)", with additional whitespace +// allowed anywhere between the name, parentheses, and items. +// +// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or +// backticks, or are empty. If they do, they can be written enclosed in backticks. +// To escape a backtick within backticks, double it. +// +// `stringify` generates "canonical" tree strings, which have no extra escaping +// or whitespace, just one space between items in a Node. + +ParseNode.stringify = function (tree) { + if (tree instanceof ParseNode) + return (escapeTokenString(tree.name) + '(' + + _.map(tree.children, ParseNode.stringify).join(' ') + + ')'); + + // Treat a token object or string as a token. + if (typeof tree.text === 'function') + tree = tree.text(); + else if (tree.text) + tree = tree.text; + return escapeTokenString(String(tree)); +}; + +ParseNode.unstringify = function (str) { + var results = []; + var ptrStack = []; + var ptr = results; + _.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) { + switch (txt.charAt(0)) { + case '(': + if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) + throw new Error("Nameless node in " + str); + var newArray = [ptr.pop()]; + ptr.push(newArray); + ptrStack.push(ptr); + ptr = newArray; + break; + case ')': + ptr = ptrStack.pop(); + var nodeArray = ptr.pop(); + ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1))); + break; + case '`': + if (txt.length === 1) + throw new Error("Mismatched ` in " + str); + if (txt.length === 2) + ptr.push(''); + else + ptr.push(txt.slice(1, -1).replace(/``/g, '`')); + break; + default: + ptr.push(txt); + break; + } + if (results.length > 1) + throw new Error("Not expecting " + txt + " in " + str); + }); + if (ptr !== results) + throw new Error("Mismatched parentheses in " + str); + return results[0]; +}; + Parser = function (expecting, runFunc) { this.expecting = expecting; this._run = runFunc; From 7c3815fa278e059928273f1ce96727d47ad704ee Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 15:41:52 -0700 Subject: [PATCH 55/86] parseRequired --- packages/jsparse/parser.js | 10 ++++------ packages/jsparse/parserlib.js | 25 +++++++++++++++---------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 15a1a004cd..4abb70f0b7 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -306,8 +306,7 @@ JSParser.prototype.getSyntaxTree = function () { // and .foo add-ons. // if we have 'new' keywords, we are committed and must // match an expression or error. - var result = primaryOrFunctionExpression.parse( - t, {required: news.length}); + var result = primaryOrFunctionExpression.parseRequiredIf(t, news.length); if (! result) return null; @@ -432,8 +431,7 @@ JSParser.prototype.getSyntaxTree = function () { var op; while (isExpressionLHS(r) &&(op = assignOp.parse(t))) parts.push(op, - conditionalExpressionMaybeNoIn[noIn].parse( - t, {required: true})); + conditionalExpressionMaybeNoIn[noIn].parseRequired(t)); var result = parts.pop(); while (parts.length) { @@ -520,7 +518,7 @@ JSParser.prototype.getSyntaxTree = function () { // Fail now if we are looking at a colon, causing an // error message on input like `1+1:` of the same kind // you'd get without statement label parsing. - noColon.parse(t, {required: true}); + noColon.parseRequired(t); return exprStmnt; } @@ -612,7 +610,7 @@ JSParser.prototype.getSyntaxTree = function () { throw t.getParseError("semicolon"); // if we don't see 'in' at this point, it's probably // a missing semicolon - rest = inExprExpectingSemi.parse(t, {required: true}); + rest = inExprExpectingSemi.parseRequired(t); } return [firstExpr].concat(rest); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index e7235dacf2..4616f4b59a 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -102,13 +102,19 @@ Parser = function (expecting, runFunc) { this._run = runFunc; }; -Parser.prototype.parse = function (t, options) { +Parser.prototype.parse = function (t) { + return this._run(t); +}; + +Parser.prototype.parseRequired = function (t) { + return this.parseRequiredIf(t, true); +}; + +Parser.prototype.parseRequiredIf = function (t, required) { var result = this._run(t); - if (options) { - if (options.required && ! result) - throw t.getParseError(this.expecting); - } + if (required && ! result) + throw t.getParseError(this.expecting); return result; }; @@ -192,7 +198,7 @@ Parsers.binaryLeft = function (name, termParser, opParsers) { while ((op = opParser.parse(t))) { result = new ParseNode( name, - [result, op, termParser.parse(t, {required: true})]); + [result, op, termParser.parseRequired(t)]); } return result; }); @@ -206,8 +212,7 @@ Parsers.unary = function (name, termParser, opParser) { var unaries = unaryList.parse(t); // if we have unaries, we are committed and // have to match a term or error. - var result = termParser.parse( - t, {required: unaries.length}); + var result = termParser.parseRequiredIf(t, unaries.length); if (! result) return null; @@ -243,7 +248,7 @@ Parsers.list = function (itemParser, sepParser) { var sep; while ((sep = sepParser.parse(t))) { push(result, sep); - push(result, itemParser.parse(t, {required: true})); + push(result, itemParser.parseRequired(t)); } } else { var item; @@ -267,7 +272,7 @@ Parsers.seq = function (/*parsers*/) { for (var i = 0, N = args.length; i < N; i++) { // first item in sequence can fail, and we // fail (without error); after that, error on failure - var r = args[i].parse(t, {required: i > 0}); + var r = args[i].parseRequiredIf(t, i > 0); if (! r) return null; From 128392ccea98e35d49272a64cdcccf6c857f6368 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 16:26:10 -0700 Subject: [PATCH 56/86] real Parser for unstringify, Parsers.lazy --- packages/jsparse/package.js | 2 +- packages/jsparse/parserlib.js | 92 ++++--------------------- packages/jsparse/stringify.js | 122 ++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 81 deletions(-) create mode 100644 packages/jsparse/stringify.js diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index 75a2da8800..f45fd38100 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -3,7 +3,7 @@ Package.describe({ }); Package.on_use(function (api) { - api.add_files(['lexer.js', 'parserlib.js', 'parser.js'], + api.add_files(['lexer.js', 'parserlib.js', 'stringify.js', 'parser.js'], ['client', 'server']); }); diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 4616f4b59a..aff072f117 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -16,86 +16,6 @@ ParseNode = function (name, children) { throw new Error("Expected array in new ParseNode(" + name + ", ...)"); }; -ParseNode.prototype.stringify = function () { - return ParseNode.stringify(this); -}; - -var escapeTokenString = function (str) { - if (/[\s()`]/.test(str)) - return '`' + str.replace(/`/g, '``') + '`'; - else if (! str) - return '``'; - else - return str; -}; - -// The "tree string" format is a simple format for representing syntax trees. -// -// For example, the parse of `x++;` is written as: -// "program(expressionStmnt(postfix(identifier(x) ++) ;))" -// -// A Node is written as "name(item1 item2 item3)", with additional whitespace -// allowed anywhere between the name, parentheses, and items. -// -// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or -// backticks, or are empty. If they do, they can be written enclosed in backticks. -// To escape a backtick within backticks, double it. -// -// `stringify` generates "canonical" tree strings, which have no extra escaping -// or whitespace, just one space between items in a Node. - -ParseNode.stringify = function (tree) { - if (tree instanceof ParseNode) - return (escapeTokenString(tree.name) + '(' + - _.map(tree.children, ParseNode.stringify).join(' ') + - ')'); - - // Treat a token object or string as a token. - if (typeof tree.text === 'function') - tree = tree.text(); - else if (tree.text) - tree = tree.text; - return escapeTokenString(String(tree)); -}; - -ParseNode.unstringify = function (str) { - var results = []; - var ptrStack = []; - var ptr = results; - _.each(str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g), function (txt) { - switch (txt.charAt(0)) { - case '(': - if (! ptr.length || (typeof ptr[ptr.length - 1] !== "string")) - throw new Error("Nameless node in " + str); - var newArray = [ptr.pop()]; - ptr.push(newArray); - ptrStack.push(ptr); - ptr = newArray; - break; - case ')': - ptr = ptrStack.pop(); - var nodeArray = ptr.pop(); - ptr.push(new ParseNode(nodeArray[0], nodeArray.slice(1))); - break; - case '`': - if (txt.length === 1) - throw new Error("Mismatched ` in " + str); - if (txt.length === 2) - ptr.push(''); - else - ptr.push(txt.slice(1, -1).replace(/``/g, '`')); - break; - default: - ptr.push(txt); - break; - } - if (results.length > 1) - throw new Error("Not expecting " + txt + " in " + str); - }); - if (ptr !== results) - throw new Error("Mismatched parentheses in " + str); - return results[0]; -}; Parser = function (expecting, runFunc) { this.expecting = expecting; @@ -334,4 +254,16 @@ Parsers.mapResult = function (parser, func) { }); }; +Parsers.lazy = function (parserFunc) { + var inner = null; + var outer = new Parser(null, function (t) { + if (! inner) { + inner = parserFunc(); + outer.expecting = inner.expecting; + } + return inner.parse(t); + }); + return outer; +}; + })(); \ No newline at end of file diff --git a/packages/jsparse/stringify.js b/packages/jsparse/stringify.js new file mode 100644 index 0000000000..416f4c8bcb --- /dev/null +++ b/packages/jsparse/stringify.js @@ -0,0 +1,122 @@ +(function() { + +// The "tree string" format is a simple format for representing syntax trees. +// +// For example, the parse of `x++;` is written as: +// "program(expressionStmnt(postfix(identifier(x) ++) ;))" +// +// A Node is written as "name(item1 item2 item3)", with additional whitespace +// allowed anywhere between the name, parentheses, and items. +// +// Tokens don't need to be escaped unless they contain '(', ')', whitespace, or +// backticks, or are empty. If they do, they can be written enclosed in backticks. +// To escape a backtick within backticks, double it. +// +// `stringify` generates "canonical" tree strings, which have no extra escaping +// or whitespace, just one space between items in a Node. + + +ParseNode.prototype.stringify = function () { + return ParseNode.stringify(this); +}; + +var backtickEscape = function (str) { + if (/[\s()`]/.test(str)) + return '`' + str.replace(/`/g, '``') + '`'; + else if (! str) + return '``'; + else + return str; +}; + +var backtickUnescape = function (str) { + if (str.charAt(0) === '`') { + if (str.length === 1 || str.slice(-1) !== '`') + throw new Error("Mismatched ` in " + str); + if (str.length === 2) + str = ''; + else + str = str.slice(1, -1).replace(/``/g, '`'); + } + return str; +}; + +ParseNode.stringify = function (tree) { + if (tree instanceof ParseNode) { + var str = backtickEscape(tree.name); + str += '('; + var escapedChildren = []; + for(var i = 0, N = tree.children.length; i < N; i++) + escapedChildren.push(ParseNode.stringify(tree.children[i])); + str += escapedChildren.join(' '); + str += ')'; + return str; + } + + // Treat a token object or string as a token. + if (typeof tree.text === 'function') + tree = tree.text(); + else if (tree.text) + tree = tree.text; + return backtickEscape(String(tree)); +}; + +ParseNode.unstringify = function (str) { + var lexemes = str.match(/\(|\)|`([^`]||``)*`|`|[^\s()`]+/g) || []; + var N = lexemes.length; + var state = { + i: 0, + getParseError: function (expecting) { + throw new Error("unstringify: Expecting " + expecting +", found " + + (lexemes[this.i] || "end of string")); + }, + peek: function () { return lexemes[this.i]; }, + advance: function () { this.i++; } + }; + var paren = function (chr) { + return new Parser(chr, function (t) { + if (t.peek() !== chr) + return null; + t.advance(); + return chr; + }); + }; + var EMPTY_STRING = [""]; + var token = new Parser('token', function (t) { + var txt = t.peek(); + if (!txt || txt.charAt(0) === '(' || txt.charAt(0) === ')') + return null; + + t.advance(); + // can't return falsy value from successful parser + return backtickUnescape(txt) || EMPTY_STRING; + }); + + // Make "item" lazy so it can be recursive. + var item = Parsers.lazy(function () { return item; }); + + // Parse a single node or token. + item = Parsers.mapResult( + Parsers.seq(token, + Parsers.opt(Parsers.seq( + paren('('), Parsers.opt(Parsers.list(item)), paren(')')))), + function (v) { + for(var i = 0, N = v.length; i < N; i++) + if (v[i] === EMPTY_STRING) + v[i] = ""; + + if (v.length === 1) + // token + return v[0]; + // node. exclude parens + return new ParseNode(v[0], v.slice(2, -1)); + }); + + var endOfString = new Parser("end of string", function (t) { + return t.i === N ? [] : null; + }); + + return Parsers.seq(item, endOfString).parseRequired(state)[0]; +}; + +})(); \ No newline at end of file From c3477ec91c0c2fcbc9d3f77c09a0652ef6d7894a Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 16:34:41 -0700 Subject: [PATCH 57/86] Make use of Parsers.lazy --- packages/jsparse/parser.js | 130 +++++++++++++++++----------------- packages/jsparse/parserlib.js | 9 +-- packages/jsparse/stringify.js | 2 +- 3 files changed, 68 insertions(+), 73 deletions(-) diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 4abb70f0b7..1289043974 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -171,37 +171,35 @@ JSParser.prototype.getSyntaxTree = function () { }); }; - // These "pointers" allow grammar circularity, i.e. accessing + // Mark some productions "lazy" to allow grammar circularity, i.e. accessing // later parsers from earlier ones. - var expressionMaybeNoInPtr = booleanFlaggedParser( - function (noIn) { - return new Parser( - "expression", - function (t) { - return expressionMaybeNoIn[noIn].parse(t); - }); - }); - var expressionPtr = expressionMaybeNoInPtr[false]; + // These lazy versions will be replaced with real ones, which they will + // access when run. + var expressionMaybeNoIn = { + 'false': Parsers.lazy( + 'expression', + function () { return expressionMaybeNoIn[false]; }), + 'true': Parsers.lazy( + 'expression', + function () { return expressionMaybeNoIn[true]; }) + }; + var expression = expressionMaybeNoIn[false]; - var assignmentExpressionMaybeNoInPtr = booleanFlaggedParser( - function (noIn) { - return new Parser( - "expression", - function (t) { - return assignmentExpressionMaybeNoIn[noIn].parse(t); - }); - }); - var assignmentExpressionPtr = assignmentExpressionMaybeNoInPtr[false]; + var assignmentExpressionMaybeNoIn = { + 'false': Parsers.lazy( + 'expression', + function () { return assignmentExpressionMaybeNoIn[false]; }), + 'true': Parsers.lazy( + 'expression', + function () { return assignmentExpressionMaybeNoIn[true]; }) + }; + var assignmentExpression = assignmentExpressionMaybeNoIn[false]; - var functionBodyPtr = new Parser( - "functionBody", function (t) { - return functionBody.parse(t); - }); - - var statementPtr = new Parser( - "statement", function (t) { - return statement.parse(t); - }); + var functionBody = Parsers.lazy( + 'statement', function () { return functionBody; }); + var statement = Parsers.lazy( + 'statement', function () { return statement; }); + //// var arrayLiteral = node('array', @@ -212,7 +210,7 @@ JSParser.prototype.getSyntaxTree = function () { list( expecting( 'expression', - or(assignmentExpressionPtr, + or(assignmentExpression, // count a peeked-at ']' as an expression // to support elisions at end, e.g. // `[1,2,3,,,,,,]`. @@ -228,7 +226,7 @@ JSParser.prototype.getSyntaxTree = function () { node('strPropName', tokenType('STRING')))); var nameColonValue = expecting( 'propertyName', - node('prop', seq(propertyName, token(':'), assignmentExpressionPtr))); + node('prop', seq(propertyName, token(':'), assignmentExpression))); var objectLiteral = node('object', @@ -249,7 +247,7 @@ JSParser.prototype.getSyntaxTree = function () { list(tokenType('IDENTIFIER'), token(','))), token(')'), token('{'), - functionBodyPtr, + functionBody, token('}')); }); var functionExpression = node('functionExpr', @@ -265,16 +263,16 @@ JSParser.prototype.getSyntaxTree = function () { node('regex', tokenType('REGEX')), node('string', tokenType('STRING')), node('parens', - seq(token('('), expressionPtr, token(')'))), + seq(token('('), expression, token(')'))), arrayLiteral, objectLiteral, functionExpression)); var dotEnding = seq(token('.'), tokenType('IDENTIFIER')); - var bracketEnding = seq(token('['), expressionPtr, token(']')); + var bracketEnding = seq(token('['), expression, token(']')); var callArgs = seq(token('('), or(lookAheadToken(')'), - list(assignmentExpressionPtr, + list(assignmentExpression, token(','))), token(')')); @@ -402,8 +400,8 @@ JSParser.prototype.getSyntaxTree = function () { seq(binaryExpressionMaybeNoIn[noIn], opt(seq( token('?'), - assignmentExpressionPtr, token(':'), - assignmentExpressionMaybeNoInPtr[noIn]))), + assignmentExpression, token(':'), + assignmentExpressionMaybeNoIn[noIn]))), function (v) { if (v.length === 1) return v[0]; @@ -414,7 +412,7 @@ JSParser.prototype.getSyntaxTree = function () { var assignOp = token('= *= /= %= += -= <<= >>= >>>= &= ^= |='); - var assignmentExpressionMaybeNoIn = booleanFlaggedParser( + assignmentExpressionMaybeNoIn = booleanFlaggedParser( function (noIn) { return new Parser( 'expression', @@ -442,9 +440,9 @@ JSParser.prototype.getSyntaxTree = function () { return result; }); }); - var assignmentExpression = assignmentExpressionMaybeNoIn[false]; + assignmentExpression = assignmentExpressionMaybeNoIn[false]; - var expressionMaybeNoIn = booleanFlaggedParser( + expressionMaybeNoIn = booleanFlaggedParser( function (noIn) { return expecting( 'expression', @@ -456,11 +454,11 @@ JSParser.prototype.getSyntaxTree = function () { return new ParseNode('comma', v); })); }); - var expression = expressionMaybeNoIn[false]; + expression = expressionMaybeNoIn[false]; // STATEMENTS - var statements = list(statementPtr); + var statements = list(statement); // implements JavaScript's semicolon "insertion" rules var maybeSemicolon = expecting( @@ -498,7 +496,7 @@ JSParser.prototype.getSyntaxTree = function () { // expressionOrLabelStatement parses the expression and // then rewrites the result if it is an identifier // followed by a colon. - var labelColonAndStatement = seq(token(':'), statementPtr); + var labelColonAndStatement = seq(token(':'), statement); var noColon = expecting( 'semicolon', not(lookAheadToken(':'))); var expressionOrLabelStatement = new Parser( @@ -557,8 +555,8 @@ JSParser.prototype.getSyntaxTree = function () { var ifStatement = node( 'ifStmnt', seq(token('if'), token('('), expression, - closeParenBeforeStatement, statementPtr, - opt(seq(token('else'), statementPtr)))); + closeParenBeforeStatement, statement, + opt(seq(token('else'), statement)))); var secondThirdClauses = expecting( 'semicolon', @@ -567,11 +565,11 @@ JSParser.prototype.getSyntaxTree = function () { expecting('semicolon', token(';')), or(and(lookAheadToken(';'), constant(NIL)), - expressionPtr), + expression), expecting('semicolon', token(';')), or(and(lookAheadToken(')'), constant(NIL)), - expressionPtr)))); + expression)))); var inExpr = seq(token('in'), expression); var inExprExpectingSemi = expecting('semicolon', seq(token('in'), expression)); @@ -636,15 +634,15 @@ JSParser.prototype.getSyntaxTree = function () { }); var iterationStatement = or( - node('doStmnt', seq(token('do'), statementPtr, token('while'), + node('doStmnt', seq(token('do'), statement, token('while'), token('('), expression, token(')'), maybeSemicolon)), node('whileStmnt', seq(token('while'), token('('), expression, - closeParenBeforeStatement, statementPtr)), + closeParenBeforeStatement, statement)), // semicolons must be real, not maybeSemicolons node('forStmnt', seq( token('for'), token('('), forSpec, closeParenBeforeStatement, - statementPtr))); + statement))); var returnStatement = node( 'returnStmnt', @@ -679,7 +677,7 @@ JSParser.prototype.getSyntaxTree = function () { var withStatement = node( 'withStmnt', seq(token('with'), token('('), expression, closeParenBeforeStatement, - statementPtr)); + statement)); var switchCase = node( 'case', @@ -724,21 +722,21 @@ JSParser.prototype.getSyntaxTree = function () { var debuggerStatement = node( 'debuggerStmnt', seq(token('debugger'), maybeSemicolon)); - var statement = expecting('statement', - or(expressionOrLabelStatement, - emptyStatement, - blockStatement, - variableStatement, - ifStatement, - iterationStatement, - returnStatement, - continueStatement, - breakStatement, - withStatement, - switchStatement, - throwStatement, - tryStatement, - debuggerStatement)); + statement = expecting('statement', + or(expressionOrLabelStatement, + emptyStatement, + blockStatement, + variableStatement, + ifStatement, + iterationStatement, + returnStatement, + continueStatement, + breakStatement, + withStatement, + switchStatement, + throwStatement, + tryStatement, + debuggerStatement)); // PROGRAM @@ -748,7 +746,7 @@ JSParser.prototype.getSyntaxTree = function () { var sourceElement = or(functionDecl, statement); var sourceElements = list(sourceElement); - var functionBody = expecting( + functionBody = expecting( 'functionBody', or(lookAheadToken('}'), sourceElements)); var program = node( diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index aff072f117..d8b3e932cb 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -254,16 +254,13 @@ Parsers.mapResult = function (parser, func) { }); }; -Parsers.lazy = function (parserFunc) { +Parsers.lazy = function (expecting, parserFunc) { var inner = null; - var outer = new Parser(null, function (t) { - if (! inner) { + return new Parser(expecting, function (t) { + if (! inner) inner = parserFunc(); - outer.expecting = inner.expecting; - } return inner.parse(t); }); - return outer; }; })(); \ No newline at end of file diff --git a/packages/jsparse/stringify.js b/packages/jsparse/stringify.js index 416f4c8bcb..b00d7725a3 100644 --- a/packages/jsparse/stringify.js +++ b/packages/jsparse/stringify.js @@ -93,7 +93,7 @@ ParseNode.unstringify = function (str) { }); // Make "item" lazy so it can be recursive. - var item = Parsers.lazy(function () { return item; }); + var item = Parsers.lazy('token', function () { return item; }); // Parse a single node or token. item = Parsers.mapResult( From 90e516d7c9c88b70b0fd5b2978ec5fe98cc404a0 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 12 Sep 2012 16:59:24 -0700 Subject: [PATCH 58/86] make methods out of lexeme.next()/prev() --- packages/jsparse/lexer.js | 12 ++++++++++-- packages/jsparse/stringify.js | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index 57029915bd..04620b18d1 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -205,6 +205,14 @@ Lexeme.prototype.isEOF = function () { return this._type === "EOF"; }; +Lexeme.prototype.prev = function () { + return this._prev; +}; + +Lexeme.prototype.next = function () { + return this._next; +}; + Lexeme.prototype.toString = function () { return this.isError() ? "ERROR" : this.isEOF() ? "EOF" : "`" + this.text() + "`"; @@ -287,8 +295,8 @@ JSLexer.prototype.next = function () { self.pos = pos; var lex = new JSLexer.Lexeme(origPos, type, code.substring(origPos, pos)); if (self.lastLexeme) { - self.lastLexeme.next = lex; - lex.prev = self.lastLexeme; + self.lastLexeme._next = lex; + lex._prev = self.lastLexeme; } self.lastLexeme = lex; if (lex.isToken()) diff --git a/packages/jsparse/stringify.js b/packages/jsparse/stringify.js index b00d7725a3..816b765a64 100644 --- a/packages/jsparse/stringify.js +++ b/packages/jsparse/stringify.js @@ -56,7 +56,7 @@ ParseNode.stringify = function (tree) { // Treat a token object or string as a token. if (typeof tree.text === 'function') tree = tree.text(); - else if (tree.text) + else if (typeof tree.text === 'string') tree = tree.text; return backtickEscape(String(tree)); }; From 036916fb31678c13762bb9bebafbfe7ee10858a6 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 15:18:59 -0700 Subject: [PATCH 59/86] First draft of Meteor 0.4.1 release notes. --- History.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/History.md b/History.md index 5ec2d0a1fa..2bdfcd330d 100644 --- a/History.md +++ b/History.md @@ -1,6 +1,62 @@ ## vNEXT +## v0.4.1 + +* New `email` smart package, with `Email.send` API. + +* Upgrade Node from 0.6.17 to 0.8.8, as well as many Node modules in the dev + bundle; those that are user-exposed are: + * coffee-script: 1.3.3 (from 1.3.1) + * stylus: 0.29.0 (from 0.28.1) + * nib: 0.8.2 (from 0.7.0) + +* All publicly documented APIs now use camelCase rather than under_scores. The + old spellings continue to work for now. New names are: + - Meteor.isClient/isServer + - this.isSimulation inside a method invocation + - Meteor.deps.Context.onInvalidate + - Meteor.status().retryCount/retryTime + +* Spark improvements + * Optimize selector matching for event maps. + * Spark._currentRenderer shouldn't persist into timer callbacks. + * Fix bug caused by interaction between template.preserve and + {{#constant}}. #323 + * Allow {{#each}} over a collection of objects without _id. #281 + * Added a script to build a standalone spark.js that does not depend on + Meteor. + * Meteor and Spark no longer depend on jQuery unless you need IE7 + support. (All Meteor apps still include jQuery, for now.) + +* If you use Meteor.setTimer/setInterval/defer inside a method + invocation, and the callback is invoked before all writes directly + created by the invocation are committed, and the callback creates + writes, then those writes will be added to the same "write fence" + as the method's own writes, causing the client to wait for those + writes to be committed before quiescing. + +* Optimize LocalCollection.remove(id). + +* Make Meteor.Cursor.forEach fully synchronous even if the user's callback + yields. #321. + +* Avoid running full query result diffs on the client when unnecessary. + +* Better error reporting when a package in `.meteor/packages` does not exist. + +* Better error reporting for coffeescript. #331 + +* Better error handling in Handlebars.Exception. + +* Fix internal docs for bundler API add_resource. #326 + +* Change the implementation of the "meteor deploy" password prompt to not crash + Emacs M-x shell. + +* Upgrade bootstrap to version 2.1.1. #336, #337, #288, #293 + + ## v0.4.0 * Merge Spark, a new live page update engine From 7ffd598e2617976f318b45c88e5af2eaa0bc6372 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 15:20:54 -0700 Subject: [PATCH 60/86] Don't tell end users to run "meteor add". --- packages/domutils/domutils.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/domutils/domutils.js b/packages/domutils/domutils.js index fdc7e6efb7..53f266d601 100644 --- a/packages/domutils/domutils.js +++ b/packages/domutils/domutils.js @@ -5,11 +5,10 @@ DomUtils = {}; (function () { var qsaFindAllBySelector = function (selector, contextNode) { + // If IE7 users report the following error message, you + // can fix it with "meteor add jquery". if (! document.querySelectorAll) - // IE 7 - throw new Error( - "This browser doesn't support querySelectorAll. " + - "You need Sizzle or jQuery (`meteor add jquery`)."); + throw new Error("This browser doesn't support querySelectorAll."); // the search is constrained to descendants of `ancestor`, // but it doesn't affect the scope of the query. From b54daf2c740247567f8584d3d46d9d2823e03191 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 16:07:54 -0700 Subject: [PATCH 61/86] Second draft of 0.4.1 release notes. --- History.md | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/History.md b/History.md index 2bdfcd330d..32bdc65cd6 100644 --- a/History.md +++ b/History.md @@ -3,7 +3,8 @@ ## v0.4.1 -* New `email` smart package, with `Email.send` API. +* New `email` smart package, with [`Email.send`](http://docs.meteor.com/#email) + API. * Upgrade Node from 0.6.17 to 0.8.8, as well as many Node modules in the dev bundle; those that are user-exposed are: @@ -11,50 +12,50 @@ * stylus: 0.29.0 (from 0.28.1) * nib: 0.8.2 (from 0.7.0) -* All publicly documented APIs now use camelCase rather than under_scores. The - old spellings continue to work for now. New names are: - - Meteor.isClient/isServer - - this.isSimulation inside a method invocation - - Meteor.deps.Context.onInvalidate - - Meteor.status().retryCount/retryTime +* All publicly documented APIs now use `camelCase` rather than + `under_scores`. The old spellings continue to work for now. New names are: + - `Meteor.isClient`/`isServer` + - `this.isSimulation` inside a method invocation + - `Meteor.deps.Context.onInvalidate` + - `Meteor.status().retryCount`/`retryTime` * Spark improvements * Optimize selector matching for event maps. - * Spark._currentRenderer shouldn't persist into timer callbacks. - * Fix bug caused by interaction between template.preserve and - {{#constant}}. #323 - * Allow {{#each}} over a collection of objects without _id. #281 + * `Spark._currentRenderer` shouldn't persist into timer callbacks. + * Fix bug caused by interaction between `Template.foo.preserve` and + `{{#constant}}`. #323 + * Allow `{{#each}}` over a collection of objects without `_id`. #281 * Added a script to build a standalone spark.js that does not depend on Meteor. * Meteor and Spark no longer depend on jQuery unless you need IE7 support. (All Meteor apps still include jQuery, for now.) -* If you use Meteor.setTimer/setInterval/defer inside a method - invocation, and the callback is invoked before all writes directly - created by the invocation are committed, and the callback creates - writes, then those writes will be added to the same "write fence" - as the method's own writes, causing the client to wait for those - writes to be committed before quiescing. +* If you use `Meteor.setTimer`/`setInterval`/`defer` inside a method invocation, + and the callback is invoked before all writes directly created by the + invocation are committed, and the callback creates writes, then those writes + will be added to the same "write fence" as the method's own writes, causing + the client to wait for those writes to be committed before quiescing. -* Optimize LocalCollection.remove(id). - -* Make Meteor.Cursor.forEach fully synchronous even if the user's callback +* Make `Meteor.Cursor.forEach` fully synchronous even if the user's callback yields. #321. +* Upgrade bootstrap to version 2.1.1. #336, #337, #288, #293 + +* Change the implementation of the `meteor deploy` password prompt to not crash + Emacs M-x shell. + +* Optimize `LocalCollection.remove(id)` to be O(1) rather than O(n). + * Avoid running full query result diffs on the client when unnecessary. * Better error reporting when a package in `.meteor/packages` does not exist. * Better error reporting for coffeescript. #331 -* Better error handling in Handlebars.Exception. +* Better error handling in `Handlebars.Exception`. -* Fix internal docs for bundler API add_resource. #326 -* Change the implementation of the "meteor deploy" password prompt to not crash - Emacs M-x shell. - -* Upgrade bootstrap to version 2.1.1. #336, #337, #288, #293 +Patches contributed by GitHub users fivethirty, tmeasday, and xenolf. ## v0.4.0 From 30d20ea8db672554f5fa4aff4d948ee6d4f0a774 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 18:31:37 -0700 Subject: [PATCH 62/86] Preserve spaces in arguments to admin/node.sh. --- admin/node.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin/node.sh b/admin/node.sh index 3326cd3bc8..ba712deb85 100755 --- a/admin/node.sh +++ b/admin/node.sh @@ -12,4 +12,4 @@ fi cd "$ORIGDIR" export NODE_PATH="$TOPDIR/dev_bundle/lib/node_modules" -exec "$TOPDIR/dev_bundle/bin/node" $* +exec "$TOPDIR/dev_bundle/bin/node" "$@" From 18b55d05c0b70cca488e7ad5e090e32999713626 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 18:35:18 -0700 Subject: [PATCH 63/86] Revert "Work around debian build issue related to node fibers." This reverts commit 6121400983cc692eddb1acf27ed1df5257fc1842. We will solve this by deleting distracting versions of fibers.node rather than by working around them. This will fix rpmbuild as well. --- admin/debian/rules | 6 ------ 1 file changed, 6 deletions(-) diff --git a/admin/debian/rules b/admin/debian/rules index 7b223eeb5e..6a83b44185 100755 --- a/admin/debian/rules +++ b/admin/debian/rules @@ -17,11 +17,5 @@ override_dh_prep: tar -C debian/tmp/usr/lib -xzf $(TARBALL) echo -n 'deb' > debian/tmp/usr/lib/meteor/.package_stamp -# node fibers distributes copies of the library pre-compiled for many -# different architectures. This confuses shlibdeps. Just ignore the -# fibers library. -override_dh_shlibdeps: - dh_shlibdeps -Xfibers.node - %: dh $@ From e50d71a208d1234ec6a2fc7cdcec4f5e541c66b6 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 18:38:31 -0700 Subject: [PATCH 64/86] Only include the correct architecture's fibers.node in the dev bundle. --- admin/generate-dev-bundle.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/admin/generate-dev-bundle.sh b/admin/generate-dev-bundle.sh index efc27770e6..3761eef2a4 100755 --- a/admin/generate-dev-bundle.sh +++ b/admin/generate-dev-bundle.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -u BUNDLE_VERSION=0.2.2 UNAME=$(uname) @@ -182,7 +183,6 @@ npm install mongodb@1.1.5 npm install uglify-js@1.3.3 npm install clean-css@0.6.0 npm install progress@0.0.5 -npm install fibers@0.6.9 npm install useragent@1.1.0 npm install request@2.11.0 npm install http-proxy@0.8.2 @@ -201,6 +201,19 @@ git clone http://github.com/akdubya/rbytes.git npm install sockjs@0.3.1 rm -rf rbytes +npm install fibers@0.6.9 +# Fibers ships with compiled versions of its C code for a dozen platforms. This +# bloats our dev bundle, and confuses dpkg-buildpackage and rpmbuild into +# thinking that the packages need to depend on both 32- and 64-bit versions of +# libstd++. Remove all the ones other than our architecture. (Expression based +# on build.js in fibers source.) +FIBERS_ARCH=$(node -p -e 'process.platform + "-" + process.arch + "-v8-" + /[0-9]+\.[0-9]+/.exec(process.versions.v8)[0]') +cd fibers/bin +mv $FIBERS_ARCH .. +rm -rf * +mv ../$FIBERS_ARCH . +cd ../.. + cd "$DIR" curl "$MONGO_URL" | tar -xz From 9285bd9cabebf184a08e27e795f6016283ba3350 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 18:51:21 -0700 Subject: [PATCH 65/86] Bump dev bundle version number to 0.2.3. --- admin/generate-dev-bundle.sh | 2 +- meteor | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/admin/generate-dev-bundle.sh b/admin/generate-dev-bundle.sh index 3761eef2a4..6f495e7c2d 100755 --- a/admin/generate-dev-bundle.sh +++ b/admin/generate-dev-bundle.sh @@ -3,7 +3,7 @@ set -e set -u -BUNDLE_VERSION=0.2.2 +BUNDLE_VERSION=0.2.3 UNAME=$(uname) ARCH=$(uname -m) diff --git a/meteor b/meteor index 34c354c120..70e583c498 100755 --- a/meteor +++ b/meteor @@ -1,6 +1,6 @@ #!/bin/bash -BUNDLE_VERSION=0.2.2 +BUNDLE_VERSION=0.2.3 # OS Check. Put here because here is where we download the precompiled # bundles that are arch specific. From 7ff08603a9b0912927df1a3f4621b3b245be3f4a Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Mon, 17 Sep 2012 22:15:05 -0700 Subject: [PATCH 66/86] Catch errors in Meteor.publish handlers Previously, it seemed that bugs in publishers would sometimes prevent future subscriptions from working, grinding the app to a halt. --- packages/livedata/livedata_server.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/livedata/livedata_server.js b/packages/livedata/livedata_server.js index 5454629b40..e10c9d73de 100644 --- a/packages/livedata/livedata_server.js +++ b/packages/livedata/livedata_server.js @@ -314,7 +314,13 @@ _.extend(Meteor._LivedataSession.prototype, { else self.universal_subs.push(sub); - var res = handler.apply(sub, params || []); + try { + var res = handler.apply(sub, params || []); + } catch (e) { + Meteor._debug("Internal exception while starting subscription", sub_id, + e.stack); + return; + } // if Meteor._RemoteCollectionDriver is available (defined in // mongo-livedata), automatically wire up handlers that return a From 8a724d14283ef54debc2862de819fd48bddaec1d Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 23:33:08 -0700 Subject: [PATCH 67/86] Final (hopefully) History.md update for 0.4.1. --- History.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/History.md b/History.md index 32bdc65cd6..29eb2e7987 100644 --- a/History.md +++ b/History.md @@ -39,6 +39,8 @@ * Make `Meteor.Cursor.forEach` fully synchronous even if the user's callback yields. #321. +* Recover from exceptions thrown in `Meteor.publish` handlers. + * Upgrade bootstrap to version 2.1.1. #336, #337, #288, #293 * Change the implementation of the `meteor deploy` password prompt to not crash From 8ddcc21a0e50c286dba677495b7db1d23157d264 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 23:38:51 -0700 Subject: [PATCH 68/86] Update version number for 0.4.1. This will be the commit that 0.4.1 is released from. If the initial release candidate fails QA, this commit will be reverted. --- admin/debian/changelog | 2 +- admin/install-s3.sh | 2 +- admin/manifest.json | 6 +++--- admin/meteor.spec | 2 +- app/lib/updater.js | 2 +- app/meteor/post-upgrade.js | 2 +- docs/client/docs.html | 2 +- docs/client/docs.js | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/admin/debian/changelog b/admin/debian/changelog index b4c3533497..8517d7b6be 100644 --- a/admin/debian/changelog +++ b/admin/debian/changelog @@ -1,4 +1,4 @@ -meteor (0.4.0-1) unstable; urgency=low +meteor (0.4.1-1) unstable; urgency=low * Automated debian build. diff --git a/admin/install-s3.sh b/admin/install-s3.sh index d302a6ec5b..75b8502e9b 100755 --- a/admin/install-s3.sh +++ b/admin/install-s3.sh @@ -5,7 +5,7 @@ ## example. URLBASE="https://d3sqy0vbqsdhku.cloudfront.net" -VERSION="0.4.0" +VERSION="0.4.1" PKGVERSION="${VERSION}-1" UNAME=`uname` diff --git a/admin/manifest.json b/admin/manifest.json index ec2de4b0dc..d60784e251 100644 --- a/admin/manifest.json +++ b/admin/manifest.json @@ -1,6 +1,6 @@ { - "version": "0.4.0", - "deb_version": "0.4.0-1", - "rpm_version": "0.4.0-1", + "version": "0.4.1", + "deb_version": "0.4.1-1", + "rpm_version": "0.4.1-1", "urlbase": "https://d3sqy0vbqsdhku.cloudfront.net" } diff --git a/admin/meteor.spec b/admin/meteor.spec index 31233ed895..43dde1499e 100644 --- a/admin/meteor.spec +++ b/admin/meteor.spec @@ -5,7 +5,7 @@ Summary: Meteor platform and JavaScript application server Vendor: Meteor Name: meteor -Version: 0.4.0 +Version: 0.4.1 Release: 1 License: MIT Group: Networking/WWW diff --git a/app/lib/updater.js b/app/lib/updater.js index f15d1c04c6..2ff86cb452 100644 --- a/app/lib/updater.js +++ b/app/lib/updater.js @@ -1,4 +1,4 @@ -exports.CURRENT_VERSION = "0.4.0"; +exports.CURRENT_VERSION = "0.4.1"; var fs = require("fs"); var http = require("http"); diff --git a/app/meteor/post-upgrade.js b/app/meteor/post-upgrade.js index 5000bf0fda..a33008e577 100644 --- a/app/meteor/post-upgrade.js +++ b/app/meteor/post-upgrade.js @@ -2,7 +2,7 @@ try { // XXX can't get this from updater.js because in 0.3.7 and before the // updater didn't have the right NODE_PATH set. At some point we can // remove this and just use updater.CURRENT_VERSION. - var VERSION = "0.4.0"; + var VERSION = "0.4.1"; var fs = require('fs'); var path = require('path'); diff --git a/docs/client/docs.html b/docs/client/docs.html index 9306792c71..331460e720 100644 --- a/docs/client/docs.html +++ b/docs/client/docs.html @@ -11,7 +11,7 @@
-

Meteor 0.4.0

+

Meteor 0.4.1

{{> introduction }} {{> concepts }} {{> api }} diff --git a/docs/client/docs.js b/docs/client/docs.js index 165e679de5..41f2337097 100644 --- a/docs/client/docs.js +++ b/docs/client/docs.js @@ -1,4 +1,4 @@ -METEOR_VERSION = "0.4.0"; +METEOR_VERSION = "0.4.1"; Meteor.startup(function () { // XXX this is broken by the new multi-page layout. Also, it was From d1467c983d4a529bf4c5000b268504d5a589f772 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Tue, 18 Sep 2012 10:21:26 -0700 Subject: [PATCH 69/86] Revert "Update version number for 0.4.1." This reverts commit 8ddcc21a0e50c286dba677495b7db1d23157d264. More changes are necessary to History.md. --- admin/debian/changelog | 2 +- admin/install-s3.sh | 2 +- admin/manifest.json | 6 +++--- admin/meteor.spec | 2 +- app/lib/updater.js | 2 +- app/meteor/post-upgrade.js | 2 +- docs/client/docs.html | 2 +- docs/client/docs.js | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/admin/debian/changelog b/admin/debian/changelog index 8517d7b6be..b4c3533497 100644 --- a/admin/debian/changelog +++ b/admin/debian/changelog @@ -1,4 +1,4 @@ -meteor (0.4.1-1) unstable; urgency=low +meteor (0.4.0-1) unstable; urgency=low * Automated debian build. diff --git a/admin/install-s3.sh b/admin/install-s3.sh index 75b8502e9b..d302a6ec5b 100755 --- a/admin/install-s3.sh +++ b/admin/install-s3.sh @@ -5,7 +5,7 @@ ## example. URLBASE="https://d3sqy0vbqsdhku.cloudfront.net" -VERSION="0.4.1" +VERSION="0.4.0" PKGVERSION="${VERSION}-1" UNAME=`uname` diff --git a/admin/manifest.json b/admin/manifest.json index d60784e251..ec2de4b0dc 100644 --- a/admin/manifest.json +++ b/admin/manifest.json @@ -1,6 +1,6 @@ { - "version": "0.4.1", - "deb_version": "0.4.1-1", - "rpm_version": "0.4.1-1", + "version": "0.4.0", + "deb_version": "0.4.0-1", + "rpm_version": "0.4.0-1", "urlbase": "https://d3sqy0vbqsdhku.cloudfront.net" } diff --git a/admin/meteor.spec b/admin/meteor.spec index 43dde1499e..31233ed895 100644 --- a/admin/meteor.spec +++ b/admin/meteor.spec @@ -5,7 +5,7 @@ Summary: Meteor platform and JavaScript application server Vendor: Meteor Name: meteor -Version: 0.4.1 +Version: 0.4.0 Release: 1 License: MIT Group: Networking/WWW diff --git a/app/lib/updater.js b/app/lib/updater.js index 2ff86cb452..f15d1c04c6 100644 --- a/app/lib/updater.js +++ b/app/lib/updater.js @@ -1,4 +1,4 @@ -exports.CURRENT_VERSION = "0.4.1"; +exports.CURRENT_VERSION = "0.4.0"; var fs = require("fs"); var http = require("http"); diff --git a/app/meteor/post-upgrade.js b/app/meteor/post-upgrade.js index a33008e577..5000bf0fda 100644 --- a/app/meteor/post-upgrade.js +++ b/app/meteor/post-upgrade.js @@ -2,7 +2,7 @@ try { // XXX can't get this from updater.js because in 0.3.7 and before the // updater didn't have the right NODE_PATH set. At some point we can // remove this and just use updater.CURRENT_VERSION. - var VERSION = "0.4.1"; + var VERSION = "0.4.0"; var fs = require('fs'); var path = require('path'); diff --git a/docs/client/docs.html b/docs/client/docs.html index 331460e720..9306792c71 100644 --- a/docs/client/docs.html +++ b/docs/client/docs.html @@ -11,7 +11,7 @@
-

Meteor 0.4.1

+

Meteor 0.4.0

{{> introduction }} {{> concepts }} {{> api }} diff --git a/docs/client/docs.js b/docs/client/docs.js index 41f2337097..165e679de5 100644 --- a/docs/client/docs.js +++ b/docs/client/docs.js @@ -1,4 +1,4 @@ -METEOR_VERSION = "0.4.1"; +METEOR_VERSION = "0.4.0"; Meteor.startup(function () { // XXX this is broken by the new multi-page layout. Also, it was From 7f14281cfab40ea4cb4771688deace229ffc2665 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Tue, 18 Sep 2012 10:28:33 -0700 Subject: [PATCH 70/86] More History.md rewording for 0.4.1. --- History.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/History.md b/History.md index 29eb2e7987..b737f6525e 100644 --- a/History.md +++ b/History.md @@ -21,20 +21,17 @@ * Spark improvements * Optimize selector matching for event maps. - * `Spark._currentRenderer` shouldn't persist into timer callbacks. + * Fix `Spark._currentRenderer` behavior in timer callbacks. * Fix bug caused by interaction between `Template.foo.preserve` and `{{#constant}}`. #323 * Allow `{{#each}}` over a collection of objects without `_id`. #281 * Added a script to build a standalone spark.js that does not depend on - Meteor. - * Meteor and Spark no longer depend on jQuery unless you need IE7 - support. (All Meteor apps still include jQuery, for now.) + Meteor (it depends on jQuery or Sizzle if you need IE7 support, + and otherwise is fully standalone). -* If you use `Meteor.setTimer`/`setInterval`/`defer` inside a method invocation, - and the callback is invoked before all writes directly created by the - invocation are committed, and the callback creates writes, then those writes - will be added to the same "write fence" as the method's own writes, causing - the client to wait for those writes to be committed before quiescing. +* Database writes from within `Meteor.setTimeout`/`setInterval`/`defer` will be + batched with other writes from the current method invocation if they start + before the method completes. * Make `Meteor.Cursor.forEach` fully synchronous even if the user's callback yields. #321. @@ -48,7 +45,8 @@ * Optimize `LocalCollection.remove(id)` to be O(1) rather than O(n). -* Avoid running full query result diffs on the client when unnecessary. +* Optimize client-side database performance when receiving updated data from the + server outside of method calls. * Better error reporting when a package in `.meteor/packages` does not exist. From f40b7b03c000e019b4d345e8ac705c3844524425 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Mon, 17 Sep 2012 23:38:51 -0700 Subject: [PATCH 71/86] Update version number for 0.4.1. This will be the commit that 0.4.1 is released from. If the initial release candidate fails QA, this commit will be reverted. --- admin/debian/changelog | 2 +- admin/install-s3.sh | 2 +- admin/manifest.json | 6 +++--- admin/meteor.spec | 2 +- app/lib/updater.js | 2 +- app/meteor/post-upgrade.js | 2 +- docs/client/docs.html | 2 +- docs/client/docs.js | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/admin/debian/changelog b/admin/debian/changelog index b4c3533497..8517d7b6be 100644 --- a/admin/debian/changelog +++ b/admin/debian/changelog @@ -1,4 +1,4 @@ -meteor (0.4.0-1) unstable; urgency=low +meteor (0.4.1-1) unstable; urgency=low * Automated debian build. diff --git a/admin/install-s3.sh b/admin/install-s3.sh index d302a6ec5b..75b8502e9b 100755 --- a/admin/install-s3.sh +++ b/admin/install-s3.sh @@ -5,7 +5,7 @@ ## example. URLBASE="https://d3sqy0vbqsdhku.cloudfront.net" -VERSION="0.4.0" +VERSION="0.4.1" PKGVERSION="${VERSION}-1" UNAME=`uname` diff --git a/admin/manifest.json b/admin/manifest.json index ec2de4b0dc..d60784e251 100644 --- a/admin/manifest.json +++ b/admin/manifest.json @@ -1,6 +1,6 @@ { - "version": "0.4.0", - "deb_version": "0.4.0-1", - "rpm_version": "0.4.0-1", + "version": "0.4.1", + "deb_version": "0.4.1-1", + "rpm_version": "0.4.1-1", "urlbase": "https://d3sqy0vbqsdhku.cloudfront.net" } diff --git a/admin/meteor.spec b/admin/meteor.spec index 31233ed895..43dde1499e 100644 --- a/admin/meteor.spec +++ b/admin/meteor.spec @@ -5,7 +5,7 @@ Summary: Meteor platform and JavaScript application server Vendor: Meteor Name: meteor -Version: 0.4.0 +Version: 0.4.1 Release: 1 License: MIT Group: Networking/WWW diff --git a/app/lib/updater.js b/app/lib/updater.js index f15d1c04c6..2ff86cb452 100644 --- a/app/lib/updater.js +++ b/app/lib/updater.js @@ -1,4 +1,4 @@ -exports.CURRENT_VERSION = "0.4.0"; +exports.CURRENT_VERSION = "0.4.1"; var fs = require("fs"); var http = require("http"); diff --git a/app/meteor/post-upgrade.js b/app/meteor/post-upgrade.js index 5000bf0fda..a33008e577 100644 --- a/app/meteor/post-upgrade.js +++ b/app/meteor/post-upgrade.js @@ -2,7 +2,7 @@ try { // XXX can't get this from updater.js because in 0.3.7 and before the // updater didn't have the right NODE_PATH set. At some point we can // remove this and just use updater.CURRENT_VERSION. - var VERSION = "0.4.0"; + var VERSION = "0.4.1"; var fs = require('fs'); var path = require('path'); diff --git a/docs/client/docs.html b/docs/client/docs.html index 9306792c71..331460e720 100644 --- a/docs/client/docs.html +++ b/docs/client/docs.html @@ -11,7 +11,7 @@
-

Meteor 0.4.0

+

Meteor 0.4.1

{{> introduction }} {{> concepts }} {{> api }} diff --git a/docs/client/docs.js b/docs/client/docs.js index 165e679de5..41f2337097 100644 --- a/docs/client/docs.js +++ b/docs/client/docs.js @@ -1,4 +1,4 @@ -METEOR_VERSION = "0.4.0"; +METEOR_VERSION = "0.4.1"; Meteor.startup(function () { // XXX this is broken by the new multi-page layout. Also, it was From 1caf2da2b5b9e7ecf4c2552475869825ccd86389 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 18 Sep 2012 11:23:49 -0700 Subject: [PATCH 72/86] fix CSS preproc tests when Chrome is zoomed --- packages/less/less_tests.js | 7 +++---- packages/less/less_tests.less | 6 +++--- packages/sass/sass_tests.js | 7 +++---- packages/sass/sass_tests.sass | 6 +++--- packages/stylus/stylus_tests.js | 6 +++--- packages/stylus/stylus_tests.styl | 6 +++--- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/less/less_tests.js b/packages/less/less_tests.js index cf983cc7b2..63ae396e0d 100644 --- a/packages/less/less_tests.js +++ b/packages/less/less_tests.js @@ -2,13 +2,12 @@ Tinytest.add("less - presence", function(test) { var d = OnscreenDiv(Meteor.render(function() { - return '

'; })); + return '

'; })); d.node().style.display = 'block'; var p = d.node().firstChild; - var leftBorder = getStyleProperty(p, 'border-left-width'); - test.equal(leftBorder, "13px"); + var leftBorder = getStyleProperty(p, 'border-left-style'); + test.equal(leftBorder, "dashed"); d.kill(); }); - diff --git a/packages/less/less_tests.less b/packages/less/less_tests.less index 2e064a4516..9d2c65c2a3 100644 --- a/packages/less/less_tests.less +++ b/packages/less/less_tests.less @@ -1,8 +1,8 @@ #less-tests { zoom: 1; /* prop this rule open */ } -@unlucky: 13px; +@dashy: dashed; -.less-unlucky-left-border { - border-left: @unlucky solid white; +.less-dashy-left-border { + border-left: 1px @dashy black; } diff --git a/packages/sass/sass_tests.js b/packages/sass/sass_tests.js index 2c5622c8fa..e2e0e7b652 100644 --- a/packages/sass/sass_tests.js +++ b/packages/sass/sass_tests.js @@ -2,14 +2,13 @@ Tinytest.add("sass - presence", function(test) { var d = OnscreenDiv(Meteor.render(function() { - return '

'; })); + return '

'; })); d.node().style.display = 'block'; var p = d.node().firstChild; - var leftBorder = getStyleProperty(p, 'border-left-width'); - test.equal(leftBorder, "13px"); + var leftBorder = getStyleProperty(p, 'border-left-style'); + test.equal(leftBorder, "dashed"); d.kill(); }); - diff --git a/packages/sass/sass_tests.sass b/packages/sass/sass_tests.sass index cf109dbaf6..c555f4c59f 100644 --- a/packages/sass/sass_tests.sass +++ b/packages/sass/sass_tests.sass @@ -1,7 +1,7 @@ #sass-tests :zoom 1 -unlucky: 13px +dashy: dashed -.sass-unlucky-left-border - :border-left !unlucky solid white +.sass-dashy-left-border + :border-left 1px !dashy black diff --git a/packages/stylus/stylus_tests.js b/packages/stylus/stylus_tests.js index 304411ff74..0fddbd3420 100644 --- a/packages/stylus/stylus_tests.js +++ b/packages/stylus/stylus_tests.js @@ -2,12 +2,12 @@ Tinytest.add("stylus - presence", function(test) { var d = OnscreenDiv(Meteor.render(function() { - return '

'; })); + return '

'; })); d.node().style.display = 'block'; var p = d.node().firstChild; - var leftBorder = getStyleProperty(p, 'border-left-width'); - test.equal(leftBorder, "13px"); + var leftBorder = getStyleProperty(p, 'border-left-style'); + test.equal(leftBorder, "dashed"); d.kill(); diff --git a/packages/stylus/stylus_tests.styl b/packages/stylus/stylus_tests.styl index 2dec111077..feedadbdf2 100644 --- a/packages/stylus/stylus_tests.styl +++ b/packages/stylus/stylus_tests.styl @@ -2,8 +2,8 @@ #stylus-tests zoom: 1 -unlucky = 13px +dashy = dashed -.stylus-unlucky-left-border - border-left: unlucky solid white +.stylus-dashy-left-border + border-left: 1px dashy black From 58af66a11021068a08a02c3bb924ab93af8d2e63 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Tue, 18 Sep 2012 13:30:41 -0700 Subject: [PATCH 73/86] Make email tests pass when deployed. --- packages/email/email_tests.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/email/email_tests.js b/packages/email/email_tests.js index f635104ba1..68c78eef47 100644 --- a/packages/email/email_tests.js +++ b/packages/email/email_tests.js @@ -1,6 +1,9 @@ streamBuffers = __meteor_bootstrap__.require('stream-buffers'); Tinytest.add("email - dev mode smoke test", function (test) { + // This only tests dev mode, so don't run the test if this is deployed. + if (process.env.MAIL_URL) return; + var old_stream = Email._output_stream; try { Email._output_stream = new streamBuffers.WritableStreamBuffer; From 1cf759ed7ed284dc3d7a227e4d8fe3a390669a9f Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 18 Sep 2012 15:01:12 -0700 Subject: [PATCH 74/86] Unit test driver on iPad: make tests clickable --- packages/test-in-browser/driver.css | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/test-in-browser/driver.css b/packages/test-in-browser/driver.css index f8489a5a5f..29d9da59ff 100644 --- a/packages/test-in-browser/driver.css +++ b/packages/test-in-browser/driver.css @@ -44,6 +44,7 @@ line-height: 24px; vertical-align: middle; text-decoration: underline; + cursor: pointer; } .test_table .groupname { From a0ded8c3d0cb09dc852d0772243b0705b852a878 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 18 Sep 2012 15:01:28 -0700 Subject: [PATCH 75/86] Fix Spark in Firefox 3.6-4 --- packages/spark/spark.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/spark/spark.js b/packages/spark/spark.js index 3a9d3b7cfc..55d34e03af 100644 --- a/packages/spark/spark.js +++ b/packages/spark/spark.js @@ -91,8 +91,9 @@ var notifyWatchers = function (start, end) { }; Spark._createId = function () { + // Chars can't include '-' to be safe inside HTML comments. var chars = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_"; var id = ""; for (var i = 0; i < 8; i++) id += chars.substr(Math.floor(Meteor.random() * 64), 1); From 7b036318e784aecf5380f0d4ff756ef6fcad8bde Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Tue, 18 Sep 2012 15:02:07 -0700 Subject: [PATCH 76/86] Fix Spark CSS updating tests in Safari 4, iPad --- packages/spark/patch_tests.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/spark/patch_tests.js b/packages/spark/patch_tests.js index 842b8c8e80..890c34e808 100644 --- a/packages/spark/patch_tests.js +++ b/packages/spark/patch_tests.js @@ -185,11 +185,11 @@ Tinytest.add("spark - patch - copyAttributes", function(test) { {id:'foo', 'class':'bar', style:'border:1px solid blue;', name:'baz'}); a.check(); - test.equal(a.node().style.borderColor, "blue"); + test.equal(a.node().style.borderLeftColor, "blue"); a.copy({id: "foo", style:'border:1px solid red'}); a.check(); - test.equal(a.node().style.borderColor, "red"); + test.equal(a.node().style.borderLeftColor, "red"); a.copy({id: "foo", 'class':'ha'}); a.check(); From f0fd0c476c39cd052627bf19f2c2bb1054b161ff Mon Sep 17 00:00:00 2001 From: David Glasser Date: Tue, 18 Sep 2012 15:54:19 -0700 Subject: [PATCH 77/86] Make sure that Mongo remove calls finish before returning from _Mongo.remove. This started causing test failures in deployed tests while testing the 0.4.1 release candidate; we think this may be related to bumping the mongodb node driver version. (Maybe the change to this.poolSize in https://github.com/mongodb/node-mongodb-native/commit/f81870f7da596eda7c24c536bc44c4a64ccbbad9 ?) --- packages/mongo-livedata/mongo_driver.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/mongo-livedata/mongo_driver.js b/packages/mongo-livedata/mongo_driver.js index d4ee2883ac..9f0da038f6 100644 --- a/packages/mongo-livedata/mongo_driver.js +++ b/packages/mongo-livedata/mongo_driver.js @@ -164,7 +164,7 @@ _Mongo.prototype.remove = function (collection_name, selector) { return; } - collection.remove(selector, {/* XXXsafe: true*/}, function (err) { + collection.remove(selector, {safe: true}, function (err) { if (err) { future.ret(err); return; From 125cf2153d3e86a960d4377c7b687d32ebbcb70d Mon Sep 17 00:00:00 2001 From: David Glasser Date: Tue, 18 Sep 2012 16:41:09 -0700 Subject: [PATCH 78/86] Add one more change to History.md. --- History.md | 1 + 1 file changed, 1 insertion(+) diff --git a/History.md b/History.md index b737f6525e..d9a1d171c1 100644 --- a/History.md +++ b/History.md @@ -25,6 +25,7 @@ * Fix bug caused by interaction between `Template.foo.preserve` and `{{#constant}}`. #323 * Allow `{{#each}}` over a collection of objects without `_id`. #281 + * Spark now supports Firefox 3.6. * Added a script to build a standalone spark.js that does not depend on Meteor (it depends on jQuery or Sizzle if you need IE7 support, and otherwise is fully standalone). From a4c4fabbbd4a07e597a9670e57688f15c1ff041e Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Wed, 19 Sep 2012 11:42:34 -0700 Subject: [PATCH 79/86] make jsparse internal --- packages/jsparse/lexer.js | 2 +- packages/jsparse/package.js | 7 +++++-- packages/jsparse/parser.js | 2 +- packages/jsparse/parserlib.js | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index 04620b18d1..e2dcdea973 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -394,4 +394,4 @@ JSLexer.prototype.next = function () { return lexeme(keywordLookup[' '+word] || 'IDENTIFIER'); }; -})(); \ No newline at end of file +})(); diff --git a/packages/jsparse/package.js b/packages/jsparse/package.js index f45fd38100..3cb8c685a7 100644 --- a/packages/jsparse/package.js +++ b/packages/jsparse/package.js @@ -1,5 +1,6 @@ Package.describe({ - summary: "Full-featured JavaScript parser" + summary: "Full-featured JavaScript parser", + internal: true }); Package.on_use(function (api) { @@ -12,6 +13,8 @@ Package.on_test(function (api) { api.use('jsparse', 'client'); api.add_files('parser_tests.js', - 'client'); // for faster loading + // Test just on client for faster running; should run + // identically on server. + 'client'); //['client', 'server']); }); diff --git a/packages/jsparse/parser.js b/packages/jsparse/parser.js index 1289043974..a2021f3298 100644 --- a/packages/jsparse/parser.js +++ b/packages/jsparse/parser.js @@ -1,6 +1,6 @@ ///// JAVASCRIPT PARSER -// What we don't have from ECMA-262 5.1: +// What we don't support from ECMA-262 5.1: // - object literal trailing comma // - object literal get/set diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index d8b3e932cb..26813f2fe7 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -263,4 +263,4 @@ Parsers.lazy = function (expecting, parserFunc) { }); }; -})(); \ No newline at end of file +})(); From e27304c999d99eba462e12081dc789a201484d10 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Wed, 19 Sep 2012 13:50:06 -0700 Subject: [PATCH 80/86] Work around Node 0.8 brokenness with using /dev/stdin in subprocesses. --- packages/spiderable/spiderable.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/spiderable/spiderable.js b/packages/spiderable/spiderable.js index 153baa4f0e..1b883c21bb 100644 --- a/packages/spiderable/spiderable.js +++ b/packages/spiderable/spiderable.js @@ -24,7 +24,12 @@ // Use '/dev/stdin' to avoid writing to a temporary file. Can't // just omit the file, as PhantomJS takes that to mean 'use a // REPL' and exits as soon as stdin closes. - var cp = spawn('phantomjs', ['--load-images=no', '/dev/stdin']); + // + // However, Node 0.8 broke the ability to open /dev/stdin in the + // subprocess; see https://gist.github.com/3751746 for the gory + // details. Work around this with a not-so-useless use of cat. + var cp = spawn('bash', + ['-c', 'cat | phantomjs --load-images=no /dev/stdin']); var data = ''; cp.stdout.setEncoding('utf8'); From de413efe500174999211eff318ad65eb34794d74 Mon Sep 17 00:00:00 2001 From: Avital Oliver Date: Wed, 19 Sep 2012 16:19:54 -0700 Subject: [PATCH 81/86] Fold the absolute-url package into the meteor package --- docs/client/api.html | 1 + docs/client/api.js | 27 +++++++++++++++++++ docs/client/docs.js | 4 +-- docs/client/packages/absolute-url.html | 15 ----------- docs/client/packages/absolute-url.js | 24 ----------------- packages/absolute-url/package.js | 15 +++-------- packages/force-ssl/package.js | 5 ---- packages/meteor/package.js | 7 +++++ .../{absolute-url => meteor}/url_common.js | 0 .../{absolute-url => meteor}/url_server.js | 0 .../{absolute-url => meteor}/url_tests.js | 0 11 files changed, 40 insertions(+), 58 deletions(-) delete mode 100644 docs/client/packages/absolute-url.html delete mode 100644 docs/client/packages/absolute-url.js rename packages/{absolute-url => meteor}/url_common.js (100%) rename packages/{absolute-url => meteor}/url_server.js (100%) rename packages/{absolute-url => meteor}/url_tests.js (100%) diff --git a/docs/client/api.html b/docs/client/api.html index a6029b3685..b0279ca4dc 100644 --- a/docs/client/api.html +++ b/docs/client/api.html @@ -28,6 +28,7 @@ put on the screen. }); } +{{> api_box absoluteUrl}}

Publish and subscribe

diff --git a/docs/client/api.js b/docs/client/api.js index 1d14bdaa9c..c6ef8d2940 100644 --- a/docs/client/api.js +++ b/docs/client/api.js @@ -24,6 +24,33 @@ Template.api.startup = { ] }; +Template.api.absoluteUrl = { + id: "meteor_absoluteUrl", + name: "Meteor.absoluteUrl([path], [options])", + locus: "Anywhere", + descr: ["Generate an absolute URL pointing to the application. The server " + + "reads from the `ROOT_URL` environment variable to determine " + + "where it is running. This is taken care of automatically for " + + "apps deployed with `meteor deploy`, but must be provided when " + + "using `meteor bundle`."], + args: [ + {name: "path", + type: "String", + descr: 'A path to append to the root URL. Do not include a leading "`/`".' + } + ], + options: [ + {name: "secure", + type: "Boolean", + descr: "Create an HTTPS URL." + }, + {name: "rootUrl", + type: "String", + descr: "Override the default ROOT_URL from the server environment. For example: \"`http://foo.example.com`\"" + } + ] +}; + Template.api.publish = { id: "meteor_publish", name: "Meteor.publish(name, func)", diff --git a/docs/client/docs.js b/docs/client/docs.js index 41f2337097..871b46b975 100644 --- a/docs/client/docs.js +++ b/docs/client/docs.js @@ -82,7 +82,8 @@ var toc = [ "Core", [ "Meteor.isClient", "Meteor.isServer", - "Meteor.startup" + "Meteor.startup", + "Meteor.absoluteUrl" ], "Publish and subscribe", [ @@ -205,7 +206,6 @@ var toc = [ ], "Packages", [ [ - "absolute-url", "amplify", "backbone", "bootstrap", diff --git a/docs/client/packages/absolute-url.html b/docs/client/packages/absolute-url.html deleted file mode 100644 index 19a0401e63..0000000000 --- a/docs/client/packages/absolute-url.html +++ /dev/null @@ -1,15 +0,0 @@ - diff --git a/docs/client/packages/absolute-url.js b/docs/client/packages/absolute-url.js deleted file mode 100644 index 7a4ba1d036..0000000000 --- a/docs/client/packages/absolute-url.js +++ /dev/null @@ -1,24 +0,0 @@ -Template.pkg_absolute_url.absoluteUrl = { - id: "meteor_absoluteUrl", - name: "Meteor.absoluteUrl([path], [options])", - locus: "Anywhere", - descr: ["Generate an absolute URL pointing to the application."], - args: [ - {name: "path", - type: "String", - descr: 'A path to append to the root URL. Do not include a leading "`/`".' - } - ], - options: [ - {name: "secure", - type: "Boolean", - descr: "Create an HTTPS URL." - }, - {name: "rootUrl", - type: "String", - descr: "Override the default ROOT_URL from the server environment. For example: \"`http://foo.example.com`\"" - } - ] - -}; - diff --git a/packages/absolute-url/package.js b/packages/absolute-url/package.js index 796cba69b8..6c93f33226 100644 --- a/packages/absolute-url/package.js +++ b/packages/absolute-url/package.js @@ -1,17 +1,8 @@ Package.describe({ - summary: "Generate absolute URLs pointing to the application" + summary: "DEPRECATED: Generate absolute URLs pointing to the application" }); Package.on_use(function (api) { - // note server before common. usually it is the other way around, but - // in this case server must load first. - api.add_files('url_server.js', 'server'); - api.add_files('url_common.js', ['client', 'server']); -}); - -Package.on_test(function (api) { - api.use('absolute-url', ['client', 'server']); - api.use('tinytest'); - - api.add_files('url_tests.js', ['client', 'server']); + console.log('DEPRECATED. The `absolute-url` package has been folded into ' + + 'the `meteor` package and should not be used directly.'); }); diff --git a/packages/force-ssl/package.js b/packages/force-ssl/package.js index 90b5037578..bc413ad820 100644 --- a/packages/force-ssl/package.js +++ b/packages/force-ssl/package.js @@ -8,11 +8,6 @@ Package.on_use(function (api) { // server has been instantiated. api.use('livedata', 'server'); - // we don't really depend on absolute-url, but we do modify its - // behavior. If there were a way to say "if the other package is - // loaded, make sure we come after it", we should do that here. - api.use('absolute-url', ['client', 'server']); - api.add_files('force_ssl_common.js', ['client', 'server']); api.add_files('force_ssl_server.js', 'server'); diff --git a/packages/meteor/package.js b/packages/meteor/package.js index 9b93219566..a194a549d0 100644 --- a/packages/meteor/package.js +++ b/packages/meteor/package.js @@ -40,6 +40,11 @@ Package.on_use(function (api, where) { api.use('underscore', ['client', 'server']); api.add_files('dynamics_browser.js', 'client'); api.add_files('dynamics_nodejs.js', 'server'); + + // note server before common. usually it is the other way around, but + // in this case server must load first. + api.add_files('url_server.js', 'server'); + api.add_files('url_common.js', ['client', 'server']); }); Package.on_test(function (api) { @@ -50,4 +55,6 @@ Package.on_test(function (api) { api.add_files('helpers_test.js', ['client', 'server']); api.add_files('dynamics_test.js', ['client', 'server']); + + api.add_files('url_tests.js', ['client', 'server']); }); diff --git a/packages/absolute-url/url_common.js b/packages/meteor/url_common.js similarity index 100% rename from packages/absolute-url/url_common.js rename to packages/meteor/url_common.js diff --git a/packages/absolute-url/url_server.js b/packages/meteor/url_server.js similarity index 100% rename from packages/absolute-url/url_server.js rename to packages/meteor/url_server.js diff --git a/packages/absolute-url/url_tests.js b/packages/meteor/url_tests.js similarity index 100% rename from packages/absolute-url/url_tests.js rename to packages/meteor/url_tests.js From e1a7c2ff465aed7216651586f83aa38fd0155f10 Mon Sep 17 00:00:00 2001 From: Avital Oliver Date: Thu, 20 Sep 2012 07:33:08 -0700 Subject: [PATCH 82/86] Minor changes to deprecated absolute-url package based on code review comments --- packages/absolute-url/package.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/absolute-url/package.js b/packages/absolute-url/package.js index 6c93f33226..b84536203d 100644 --- a/packages/absolute-url/package.js +++ b/packages/absolute-url/package.js @@ -1,8 +1,10 @@ Package.describe({ - summary: "DEPRECATED: Generate absolute URLs pointing to the application" + summary: "DEPRECATED: Generate absolute URLs pointing to the application", + internal: true }); Package.on_use(function (api) { console.log('DEPRECATED. The `absolute-url` package has been folded into ' - + 'the `meteor` package and should not be used directly.'); + + 'the `meteor` package and should not be used directly. Run ' + + '`meteor remove absolute-url` to resolve this.'); }); From 4b8a68800052ac7bc3ef2ace3c6495122602ccf9 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Thu, 20 Sep 2012 12:04:00 -0700 Subject: [PATCH 83/86] Change some test names to make it easier to search output for failures. We should eventually improve test output in more sophisticated ways (eg, failures could show up sorted at the top, or be duplicated in a second column), but this is a simple start. --- packages/http/httpcall_tests.js | 2 +- packages/mongo-livedata/mongo_livedata_tests.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/http/httpcall_tests.js b/packages/http/httpcall_tests.js index 2baecbaec4..edd0a29fbd 100644 --- a/packages/http/httpcall_tests.js +++ b/packages/http/httpcall_tests.js @@ -69,7 +69,7 @@ testAsyncMulti("httpcall - basic", [ "/foo?fruit=apple&dog=Spot+the+dog"); }]); -testAsyncMulti("httpcall - failure", [ +testAsyncMulti("httpcall - errors", [ function(test, expect) { // Accessing unknown server (should fail to make any connection) diff --git a/packages/mongo-livedata/mongo_livedata_tests.js b/packages/mongo-livedata/mongo_livedata_tests.js index 1277048e6f..e01243a129 100644 --- a/packages/mongo-livedata/mongo_livedata_tests.js +++ b/packages/mongo-livedata/mongo_livedata_tests.js @@ -5,7 +5,7 @@ Meteor._FailureTestCollection = new Meteor.Collection("___meteor_failure_test_collection"); -testAsyncMulti("mongo-livedata - database failure reporting", [ +testAsyncMulti("mongo-livedata - database error reporting", [ function (test, expect) { var ftc = Meteor._FailureTestCollection; From b0d181dbcc1e22ec9b425af2366fa6bf7f37cd2b Mon Sep 17 00:00:00 2001 From: Avital Oliver Date: Thu, 20 Sep 2012 15:07:52 -0700 Subject: [PATCH 84/86] Add the replaceLocalhost option to Meteor.absoluteUrl --- docs/client/api.js | 5 ++++- docs/client/packages.html | 1 - packages/meteor/url_common.js | 3 +++ packages/meteor/url_tests.js | 21 +++++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/docs/client/api.js b/docs/client/api.js index c6ef8d2940..e638a8b62b 100644 --- a/docs/client/api.js +++ b/docs/client/api.js @@ -25,7 +25,7 @@ Template.api.startup = { }; Template.api.absoluteUrl = { - id: "meteor_absoluteUrl", + id: "meteor_absoluteurl", name: "Meteor.absoluteUrl([path], [options])", locus: "Anywhere", descr: ["Generate an absolute URL pointing to the application. The server " @@ -44,6 +44,9 @@ Template.api.absoluteUrl = { type: "Boolean", descr: "Create an HTTPS URL." }, + {name: "replaceLocalhost", + type: "Boolean", + descr: "Replace localhost with 127.0.0.1. Useful for services that don't recognize localhost as a domain name."}, {name: "rootUrl", type: "String", descr: "Override the default ROOT_URL from the server environment. For example: \"`http://foo.example.com`\"" diff --git a/docs/client/packages.html b/docs/client/packages.html index 51eddc0373..1c12157077 100644 --- a/docs/client/packages.html +++ b/docs/client/packages.html @@ -16,7 +16,6 @@ and removed with: $ meteor remove -{{> pkg_absolute_url}} {{> pkg_amplify}} {{> pkg_backbone}} {{> pkg_bootstrap}} diff --git a/packages/meteor/url_common.js b/packages/meteor/url_common.js index 4c65f71170..c6ac9b2770 100644 --- a/packages/meteor/url_common.js +++ b/packages/meteor/url_common.js @@ -27,6 +27,9 @@ !/http:\/\/127\.0\.0\.1[:\/]/.test(url)) // or 127.0.0.1 url = url.replace(/^http:/, 'https:'); + if (options.replaceLocalhost) + url = url.replace(/^http:\/\/localhost([:\/].*)/, 'http://127.0.0.1$1'); + return url; }; diff --git a/packages/meteor/url_tests.js b/packages/meteor/url_tests.js index 6548dfc7ab..922fe045f8 100644 --- a/packages/meteor/url_tests.js +++ b/packages/meteor/url_tests.js @@ -36,6 +36,27 @@ Tinytest.add("absolute-url - basics", function(test) { test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://127.0.0.1:3000', secure: true}), 'http://127.0.0.1:3000/foo'); + + // test replaceLocalhost + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://localhost:3000', + replaceLocalhost: true}), + 'http://127.0.0.1:3000/foo'); + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://localhost', + replaceLocalhost: true}), + 'http://127.0.0.1/foo'); + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://127.0.0.1:3000', + replaceLocalhost: true}), + 'http://127.0.0.1:3000/foo'); + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://127.0.0.1', + replaceLocalhost: true}), + 'http://127.0.0.1/foo'); + // don't replace just any localhost + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://foo.com/localhost', + replaceLocalhost: true}), + 'http://foo.com/localhost/foo'); + test.equal(Meteor.absoluteUrl('foo', {rootUrl: 'http://foo.localhost.com', + replaceLocalhost: true}), + 'http://foo.localhost.com/foo'); }); From 2768abd5c39b9f6f11b325206047d21cba0807fe Mon Sep 17 00:00:00 2001 From: David Glasser Date: Thu, 20 Sep 2012 17:04:50 -0700 Subject: [PATCH 85/86] Fix IE7 minimongo test failure: don't use LocalCollection._f._equal on Dates. (We should eventually make all the selector code support Dates, but until then, don't use it.) --- packages/minimongo/minimongo_tests.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/minimongo/minimongo_tests.js b/packages/minimongo/minimongo_tests.js index c5d64c9296..6d165501a1 100644 --- a/packages/minimongo/minimongo_tests.js +++ b/packages/minimongo/minimongo_tests.js @@ -201,7 +201,10 @@ Tinytest.add("minimongo - misc", function (test) { var a = {a: [1, 2, 3], b: "x", c: true, d: {x: 12, y: [12]}, f: null, g: new Date()}; var b = LocalCollection._deepcopy(a); - test.isTrue(LocalCollection._f._equal(a, b)); + // minimongo doesn't support Dates, so we *can't* test + // LocalCollection._f._equal here! (Currently _equal considers all dates equal + // on most browsers except IE7 where it considers all dates unequal.) + test.equal(a, b); a.a.push(4); test.length(b.a, 3); a.c = false; @@ -211,10 +214,10 @@ Tinytest.add("minimongo - misc", function (test) { test.equal(b.d.z, 15); a.d.y.push(88); test.length(b.d.y, 1); - test.equal(a.g, b.g) + test.equal(a.g, b.g); b.g.setDate(b.g.getDate() + 1); - test.notEqual(a.g, b.g) - + test.notEqual(a.g, b.g); + a = {x: function () {}}; b = LocalCollection._deepcopy(a); a.x.a = 14; From b112cf8f924203b627c29c45bbba8faf839e062f Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Thu, 20 Sep 2012 21:22:34 -0700 Subject: [PATCH 86/86] small jsparse improvements --- packages/jsparse/lexer.js | 2 +- packages/jsparse/parserlib.js | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/jsparse/lexer.js b/packages/jsparse/lexer.js index e2dcdea973..c62e2c6bd4 100644 --- a/packages/jsparse/lexer.js +++ b/packages/jsparse/lexer.js @@ -33,7 +33,7 @@ var unicodeClass = function (abbrev) { // We are taking advantage of the fact that we are parsing JS from JS in // regexes like this by "passing through" the spec's definition of whitespace, // which is the same in regexes and the lexical grammar. -var rWhiteSpace = /(?=.)\s+?((?!.)|(?=\S))/g; +var rWhiteSpace = /[^\S\u000A\u000D\u2028\u2029]+/g; // Section 7.3 // Match one line terminator. Same as (?!.)[\s\S] but more explicit. var rLineTerminator = /[\u000A\u000D\u2028\u2029]/g; diff --git a/packages/jsparse/parserlib.js b/packages/jsparse/parserlib.js index 26813f2fe7..a8bbcac670 100644 --- a/packages/jsparse/parserlib.js +++ b/packages/jsparse/parserlib.js @@ -39,10 +39,8 @@ Parser.prototype.parseRequiredIf = function (t, required) { return result; }; -// mutates the parser Parser.expecting = function (expecting, parser) { - parser.expecting = expecting; - return parser; + return new Parser(expecting, parser._run); };