From dfb9c6d55dadc64640742ffaa991874f6ec421d8 Mon Sep 17 00:00:00 2001 From: David Greenspan Date: Sun, 20 Oct 2013 16:02:03 -0700 Subject: [PATCH] parse HTML comments --- packages/html/charref.js | 2 +- packages/html/exports.js | 3 +- packages/html/package.js | 4 +-- packages/html/tokenize.js | 32 ++++++++++++++++++ packages/html/tokenize_tests.js | 60 +++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 packages/html/tokenize.js create mode 100644 packages/html/tokenize_tests.js diff --git a/packages/html/charref.js b/packages/html/charref.js index d99d79a526..e5ac524844 100644 --- a/packages/html/charref.js +++ b/packages/html/charref.js @@ -2341,7 +2341,7 @@ var isLegalCodepoint = function (cp) { // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference // // Matches a character reference if possible, including the initial `&`. -// Fails fatally in error cases, like a disallowed codepoint +// Fails fatally in error cases (assuming an initial `&` is matched), like a disallowed codepoint // number or a bad named character reference. // // `inAttribute` is truthy if we are in an attribute value. diff --git a/packages/html/exports.js b/packages/html/exports.js index 6cb5d1db0d..eb14b13895 100644 --- a/packages/html/exports.js +++ b/packages/html/exports.js @@ -2,6 +2,7 @@ HTML = { _$: { // stuff exposed for testing Scanner: Scanner, - getCharacterReference: getCharacterReference + getCharacterReference: getCharacterReference, + getComment: getComment } }; diff --git a/packages/html/package.js b/packages/html/package.js index d7d3f1ecf0..ee5c3785dd 100644 --- a/packages/html/package.js +++ b/packages/html/package.js @@ -6,12 +6,12 @@ Package.describe({ Package.on_use(function (api) { api.export('HTML'); - api.add_files(['scanner.js', 'charref.js', 'exports.js']); + api.add_files(['scanner.js', 'charref.js', 'tokenize.js', 'exports.js']); }); Package.on_test(function (api) { api.use('tinytest'); api.use('html'); api.use('underscore'); - api.add_files('charref_tests.js'); + api.add_files(['charref_tests.js', 'tokenize_tests.js']); }); diff --git a/packages/html/tokenize.js b/packages/html/tokenize.js new file mode 100644 index 0000000000..f3679ae183 --- /dev/null +++ b/packages/html/tokenize.js @@ -0,0 +1,32 @@ + + + +getComment = function (scanner) { + if (scanner.rest().slice(0, 4) !== ''); + if (closePos < 0) + scanner.fatal("Unclosed HTML comment"); + + var commentContents = rest.slice(0, closePos); + if (commentContents.slice(-1) === '-') + scanner.fatal("HTML comment must end at first `--`"); + if (commentContents.indexOf("--") >= 0) + scanner.fatal("HTML comment cannot contain `--` anywhere"); + if (commentContents.indexOf('\u0000') >= 0) + scanner.fatal("HTML comment cannot contain NULL"); + + scanner.pos += closePos + 3; + + return { t: 'Comment', + v: commentContents }; +}; diff --git a/packages/html/tokenize_tests.js b/packages/html/tokenize_tests.js new file mode 100644 index 0000000000..29553b6bc4 --- /dev/null +++ b/packages/html/tokenize_tests.js @@ -0,0 +1,60 @@ +var Scanner = HTML._$.Scanner; +var getComment = HTML._$.getComment; + +Tinytest.add("html - comments", function (test) { + var succeed = function (input, content) { + var scanner = new Scanner(input); + var result = getComment(scanner); + test.isTrue(result); + test.equal(scanner.pos, content.length + 7); + test.equal(result, { + t: 'Comment', + v: content + }); + }; + + var ignore = function (input) { + var scanner = new Scanner(input); + var result = getComment(scanner);; + test.isFalse(result); + test.equal(scanner.pos, 0); + }; + + var fatal = function (input, messageContains) { + var scanner = new Scanner(input); + var error; + try { + getComment(scanner); + } catch (e) { + error = e; + } + test.isTrue(error); + if (error) + test.isTrue(messageContains && error.message.indexOf(messageContains) >= 0, error.message); + }; + + ignore(""); + ignore("', 'Unclosed'); + fatal('', 'cannot contain'); + fatal('', 'must end at first'); + + fatal('', 'cannot contain'); + fatal('', 'cannot contain'); + + succeed('', ''); + succeed('', '-x'); + succeed('', 'x'); + succeed('', ' hello - - world '); +});