start of doctype parsing

This commit is contained in:
David Greenspan
2013-10-20 16:39:52 -07:00
parent dfb9c6d55d
commit aed3036f0d
3 changed files with 47 additions and 2 deletions

View File

@@ -2367,7 +2367,7 @@ getCharacterReference = function (scanner, inAttribute, allowedChar) {
if (! refNumber)
scanner.fatal("Invalid numerical character reference starting with &#");
var codepoint;
if (refNumber.charAt(0).toLowerCase() === 'x') {
if (refNumber.charAt(0) === 'x' || refNumber.charAt(0) === 'X') {
// hex
var hex = refNumber.slice(1, -1);
while (hex.charAt(0) === '0')

View File

@@ -3,6 +3,7 @@ HTML = {
// stuff exposed for testing
Scanner: Scanner,
getCharacterReference: getCharacterReference,
getComment: getComment
getComment: getComment,
getDoctype: getDoctype
}
};

View File

@@ -1,5 +1,11 @@
var HTML_SPACE = /^[\u0009\u000A\u000C\u0020]/;
var asciiLowerCase = function (str) {
return str.replace(/[A-Z]/g, function (c) {
return String.fromCharCode(c.charCodeAt(0) + 32);
});
};
getComment = function (scanner) {
if (scanner.rest().slice(0, 4) !== '<!--')
@@ -30,3 +36,41 @@ getComment = function (scanner) {
return { t: 'Comment',
v: commentContents };
};
var skipSpaces = function (scanner) {
while (HTML_SPACE.test(scanner.peek()))
scanner.pos++;
};
var requireSpaces = function (scanner) {
if (! HTML_SPACE.test(scanner.peek()))
scanner.fatal("Expected whitespace");
skipSpaces(scanner);
};
getDoctype = function (scanner) {
if (scanner.rest().slice(0, 9) !== '<!DOCTYPE')
return null;
scanner.pos += 9;
requireSpaces(scanner);
var ch = scanner.peek();
if ((! ch) || (ch === '>') || (ch === '\u0000'))
scanner.fatal('Malformed DOCTYPE');
var name = ch;
while ((ch = scanner.peek()), ! (HTML_SPACE.test(ch) || ch === '>')) {
if ((! ch) || (ch === '\u0000'))
scanner.fatal('Malformed DOCTYPE');
name += ch;
}
name = asciiLowerCase(name);
if (ch !== '>') {
// XXX
}
return { t: 'Doctype',
name: name };
};