From aed3036f0d6c9c767bcbcd8d4a74d8cc1bc37708 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sun, 20 Oct 2013 16:39:52 -0700 Subject: [PATCH] start of doctype parsing --- packages/html/charref.js | 2 +- packages/html/exports.js | 3 ++- packages/html/tokenize.js | 44 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/packages/html/charref.js b/packages/html/charref.js index e5ac524844..78fefd9554 100644 --- a/packages/html/charref.js +++ b/packages/html/charref.js @@ -2367,7 +2367,7 @@ getCharacterReference = function (scanner, inAttribute, allowedChar) { if (! refNumber) scanner.fatal("Invalid numerical character reference starting with &#"); var codepoint; - if (refNumber.charAt(0).toLowerCase() === 'x') { + if (refNumber.charAt(0) === 'x' || refNumber.charAt(0) === 'X') { // hex var hex = refNumber.slice(1, -1); while (hex.charAt(0) === '0') diff --git a/packages/html/exports.js b/packages/html/exports.js index eb14b13895..5acc98a268 100644 --- a/packages/html/exports.js +++ b/packages/html/exports.js @@ -3,6 +3,7 @@ HTML = { // stuff exposed for testing Scanner: Scanner, getCharacterReference: getCharacterReference, - getComment: getComment + getComment: getComment, + getDoctype: getDoctype } }; diff --git a/packages/html/tokenize.js b/packages/html/tokenize.js index f3679ae183..5f81ea4998 100644 --- a/packages/html/tokenize.js +++ b/packages/html/tokenize.js @@ -1,5 +1,11 @@ +var HTML_SPACE = /^[\u0009\u000A\u000C\u0020]/; +var asciiLowerCase = function (str) { + return str.replace(/[A-Z]/g, function (c) { + return String.fromCharCode(c.charCodeAt(0) + 32); + }); +}; getComment = function (scanner) { if (scanner.rest().slice(0, 4) !== '