// Parse a "fragment" of HTML, up to the end of the input or a particular // template tag (using the "shouldStop" option). HTMLTools.parseFragment = function (input, options) { var scanner; if (typeof input === 'string') scanner = new Scanner(input); else // input can be a scanner. We'd better not have a different // value for the "getTemplateTag" option as when the scanner // was created, because we don't do anything special to reset // the value (which is attached to the scanner). scanner = input; // ``` // { getTemplateTag: function (scanner, templateTagPosition) { // if (templateTagPosition === HTMLTools.TEMPLATE_TAG_POSITION.ELEMENT) { // ... // ``` if (options && options.getTemplateTag) scanner.getTemplateTag = options.getTemplateTag; // function (scanner) -> boolean var shouldStop = options && options.shouldStop; var result; if (options && options.textMode) { if (options.textMode === HTML.TEXTMODE.STRING) { result = getRawText(scanner, null, shouldStop); } else if (options.textMode === HTML.TEXTMODE.RCDATA) { result = getRCData(scanner, null, shouldStop); } else { throw new Error("Unsupported textMode: " + options.textMode); } } else { result = getContent(scanner, shouldStop); } if (! scanner.isEOF()) { // If we aren't at the end of the input, we either stopped at an unmatched // HTML end tag or at a template tag (like `{{else}}` or `{{/if}}`). // Detect the former case (stopped at an HTML end tag) and throw a good // error. var posBefore = scanner.pos; try { var endTag = getHTMLToken(scanner); } catch (e) { // ignore errors from getTemplateTag } // XXX we make some assumptions about shouldStop here, like that it // won't tell us to stop at an HTML end tag. Should refactor // `shouldStop` into something more suitable. if (endTag && endTag.t === 'Tag' && endTag.isEnd) { var closeTag = endTag.n; var isVoidElement = HTML.isVoidElement(closeTag); scanner.fatal("Unexpected HTML close tag" + (isVoidElement ? '. <' + endTag.n + '> should have no close tag.' : '')); } scanner.pos = posBefore; // rewind, we'll continue parsing as usual // If no "shouldStop" option was provided, we should have consumed the whole // input. if (! shouldStop) scanner.fatal("Expected EOF"); } return result; }; // Take a numeric Unicode code point, which may be larger than 16 bits, // and encode it as a JavaScript UTF-16 string. // // Adapted from // http://stackoverflow.com/questions/7126384/expressing-utf-16-unicode-characters-in-javascript/7126661. codePointToString = HTMLTools.codePointToString = function(cp) { if (cp >= 0 && cp <= 0xD7FF || cp >= 0xE000 && cp <= 0xFFFF) { return String.fromCharCode(cp); } else if (cp >= 0x10000 && cp <= 0x10FFFF) { // we substract 0x10000 from cp to get a 20-bit number // in the range 0..0xFFFF cp -= 0x10000; // we add 0xD800 to the number formed by the first 10 bits // to give the first byte var first = ((0xffc00 & cp) >> 10) + 0xD800; // we add 0xDC00 to the number formed by the low 10 bits // to give the second byte var second = (0x3ff & cp) + 0xDC00; return String.fromCharCode(first) + String.fromCharCode(second); } else { return ''; } }; getContent = HTMLTools.Parse.getContent = function (scanner, shouldStopFunc) { var items = []; while (! scanner.isEOF()) { if (shouldStopFunc && shouldStopFunc(scanner)) break; var posBefore = scanner.pos; var token = getHTMLToken(scanner); if (! token) // tokenizer reached EOF on its own, e.g. while scanning // template comments like `{{! foo}}`. continue; if (token.t === 'Doctype') { scanner.fatal("Unexpected Doctype"); } else if (token.t === 'Chars') { pushOrAppendString(items, token.v); } else if (token.t === 'CharRef') { items.push(convertCharRef(token)); } else if (token.t === 'Comment') { items.push(HTML.Comment(token.v)); } else if (token.t === 'TemplateTag') { items.push(token.v); } else if (token.t === 'Tag') { if (token.isEnd) { // Stop when we encounter an end tag at the top level. // Rewind; we'll re-parse the end tag later. scanner.pos = posBefore; break; } var tagName = token.n; // is this an element with no close tag (a BR, HR, IMG, etc.) based // on its name? var isVoid = HTML.isVoidElement(tagName); if (token.isSelfClosing) { if (! (isVoid || HTML.isKnownSVGElement(tagName) || tagName.indexOf(':') >= 0)) scanner.fatal('Only certain elements like BR, HR, IMG, etc. (and foreign elements like SVG) are allowed to self-close'); } // result of parseAttrs may be null var attrs = parseAttrs(token.attrs); // arrays need to be wrapped in HTML.Attrs(...) // when used to construct tags if (HTML.isArray(attrs)) attrs = HTML.Attrs.apply(null, attrs); var tagFunc = HTML.getTag(tagName); if (isVoid || token.isSelfClosing) { items.push(attrs ? tagFunc(attrs) : tagFunc()); } else { // parse HTML tag contents. // HTML treats a final `/` in a tag as part of an attribute, as in ``, but the template author who writes ``, say, may not be thinking about that, so generate a good error message in the "looks like self-close" case. var looksLikeSelfClose = (scanner.input.substr(scanner.pos - 2, 2) === '/>'); var content = null; if (token.n === 'textarea') { if (scanner.peek() === '\n') scanner.pos++; var textareaValue = getRCData(scanner, token.n, shouldStopFunc); if (textareaValue) { if (attrs instanceof HTML.Attrs) { attrs = HTML.Attrs.apply( null, attrs.value.concat([{value: textareaValue}])); } else { attrs = (attrs || {}); attrs.value = textareaValue; } } } else { content = getContent(scanner, shouldStopFunc); } var endTag = getHTMLToken(scanner); if (! (endTag && endTag.t === 'Tag' && endTag.isEnd && endTag.n === tagName)) scanner.fatal('Expected "' + tagName + '" end tag' + (looksLikeSelfClose ? ' -- if the "<' + token.n + ' />" tag was supposed to self-close, try adding a space before the "/"' : '')); // XXX support implied end tags in cases allowed by the spec // make `content` into an array suitable for applying tag constructor // as in `FOO.apply(null, content)`. if (content == null) content = []; else if (! (content instanceof Array)) content = [content]; items.push(HTML.getTag(tagName).apply( null, (attrs ? [attrs] : []).concat(content))); } } else { scanner.fatal("Unknown token type: " + token.t); } } if (items.length === 0) return null; else if (items.length === 1) return items[0]; else return items; }; var pushOrAppendString = function (items, string) { if (items.length && typeof items[items.length - 1] === 'string') items[items.length - 1] += string; else items.push(string); }; // get RCDATA to go in the lowercase (or camel case) tagName (e.g. "textarea") getRCData = HTMLTools.Parse.getRCData = function (scanner, tagName, shouldStopFunc) { var items = []; while (! scanner.isEOF()) { // break at appropriate end tag if (tagName && isLookingAtEndTag(scanner, tagName)) break; if (shouldStopFunc && shouldStopFunc(scanner)) break; var token = getHTMLToken(scanner, 'rcdata'); if (! token) // tokenizer reached EOF on its own, e.g. while scanning // template comments like `{{! foo}}`. continue; if (token.t === 'Chars') { pushOrAppendString(items, token.v); } else if (token.t === 'CharRef') { items.push(convertCharRef(token)); } else if (token.t === 'TemplateTag') { items.push(token.v); } else { // (can't happen) scanner.fatal("Unknown or unexpected token type: " + token.t); } } if (items.length === 0) return null; else if (items.length === 1) return items[0]; else return items; }; var getRawText = function (scanner, tagName, shouldStopFunc) { var items = []; while (! scanner.isEOF()) { // break at appropriate end tag if (tagName && isLookingAtEndTag(scanner, tagName)) break; if (shouldStopFunc && shouldStopFunc(scanner)) break; var token = getHTMLToken(scanner, 'rawtext'); if (! token) // tokenizer reached EOF on its own, e.g. while scanning // template comments like `{{! foo}}`. continue; if (token.t === 'Chars') { pushOrAppendString(items, token.v); } else if (token.t === 'TemplateTag') { items.push(token.v); } else { // (can't happen) scanner.fatal("Unknown or unexpected token type: " + token.t); } } if (items.length === 0) return null; else if (items.length === 1) return items[0]; else return items; }; // Input: A token like `{ t: 'CharRef', v: '&', cp: [38] }`. // // Output: A tag like `HTML.CharRef({ html: '&', str: '&' })`. var convertCharRef = function (token) { var codePoints = token.cp; var str = ''; for (var i = 0; i < codePoints.length; i++) str += codePointToString(codePoints[i]); return HTML.CharRef({ html: token.v, str: str }); }; // Input is always a dictionary (even if zero attributes) and each // value in the dictionary is an array of `Chars`, `CharRef`, // and maybe `TemplateTag` tokens. // // Output is null if there are zero attributes, and otherwise a // dictionary, or an array of dictionaries and template tags. // Each value in the dictionary is HTMLjs (e.g. a // string or an array of `Chars`, `CharRef`, and `TemplateTag` // nodes). // // An attribute value with no input tokens is represented as "", // not an empty array, in order to prop open empty attributes // with no template tags. var parseAttrs = function (attrs) { var result = null; if (HTML.isArray(attrs)) { // first element is nondynamic attrs, rest are template tags var nondynamicAttrs = parseAttrs(attrs[0]); if (nondynamicAttrs) { result = (result || []); result.push(nondynamicAttrs); } for (var i = 1; i < attrs.length; i++) { var token = attrs[i]; if (token.t !== 'TemplateTag') throw new Error("Expected TemplateTag token"); result = (result || []); result.push(token.v); } return result; } for (var k in attrs) { if (! result) result = {}; var inValue = attrs[k]; var outParts = []; for (var i = 0; i < inValue.length; i++) { var token = inValue[i]; if (token.t === 'CharRef') { outParts.push(convertCharRef(token)); } else if (token.t === 'TemplateTag') { outParts.push(token.v); } else if (token.t === 'Chars') { pushOrAppendString(outParts, token.v); } } var outValue = (inValue.length === 0 ? '' : (outParts.length === 1 ? outParts[0] : outParts)); var properKey = HTMLTools.properCaseAttributeName(k); result[properKey] = outValue; } return result; };