diff --git a/packages/html/charref.js b/packages/html/charref.js
index d99d79a526..e5ac524844 100644
--- a/packages/html/charref.js
+++ b/packages/html/charref.js
@@ -2341,7 +2341,7 @@ var isLegalCodepoint = function (cp) {
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
//
// Matches a character reference if possible, including the initial `&`.
-// Fails fatally in error cases, like a disallowed codepoint
+// Fails fatally in error cases (assuming an initial `&` is matched), like a disallowed codepoint
// number or a bad named character reference.
//
// `inAttribute` is truthy if we are in an attribute value.
diff --git a/packages/html/exports.js b/packages/html/exports.js
index 6cb5d1db0d..eb14b13895 100644
--- a/packages/html/exports.js
+++ b/packages/html/exports.js
@@ -2,6 +2,7 @@ HTML = {
_$: {
// stuff exposed for testing
Scanner: Scanner,
- getCharacterReference: getCharacterReference
+ getCharacterReference: getCharacterReference,
+ getComment: getComment
}
};
diff --git a/packages/html/package.js b/packages/html/package.js
index d7d3f1ecf0..ee5c3785dd 100644
--- a/packages/html/package.js
+++ b/packages/html/package.js
@@ -6,12 +6,12 @@ Package.describe({
Package.on_use(function (api) {
api.export('HTML');
- api.add_files(['scanner.js', 'charref.js', 'exports.js']);
+ api.add_files(['scanner.js', 'charref.js', 'tokenize.js', 'exports.js']);
});
Package.on_test(function (api) {
api.use('tinytest');
api.use('html');
api.use('underscore');
- api.add_files('charref_tests.js');
+ api.add_files(['charref_tests.js', 'tokenize_tests.js']);
});
diff --git a/packages/html/tokenize.js b/packages/html/tokenize.js
new file mode 100644
index 0000000000..f3679ae183
--- /dev/null
+++ b/packages/html/tokenize.js
@@ -0,0 +1,32 @@
+
+
+
+getComment = function (scanner) {
+ if (scanner.rest().slice(0, 4) !== '');
+ if (closePos < 0)
+ scanner.fatal("Unclosed HTML comment");
+
+ var commentContents = rest.slice(0, closePos);
+ if (commentContents.slice(-1) === '-')
+ scanner.fatal("HTML comment must end at first `--`");
+ if (commentContents.indexOf("--") >= 0)
+ scanner.fatal("HTML comment cannot contain `--` anywhere");
+ if (commentContents.indexOf('\u0000') >= 0)
+ scanner.fatal("HTML comment cannot contain NULL");
+
+ scanner.pos += closePos + 3;
+
+ return { t: 'Comment',
+ v: commentContents };
+};
diff --git a/packages/html/tokenize_tests.js b/packages/html/tokenize_tests.js
new file mode 100644
index 0000000000..29553b6bc4
--- /dev/null
+++ b/packages/html/tokenize_tests.js
@@ -0,0 +1,60 @@
+var Scanner = HTML._$.Scanner;
+var getComment = HTML._$.getComment;
+
+Tinytest.add("html - comments", function (test) {
+ var succeed = function (input, content) {
+ var scanner = new Scanner(input);
+ var result = getComment(scanner);
+ test.isTrue(result);
+ test.equal(scanner.pos, content.length + 7);
+ test.equal(result, {
+ t: 'Comment',
+ v: content
+ });
+ };
+
+ var ignore = function (input) {
+ var scanner = new Scanner(input);
+ var result = getComment(scanner);;
+ test.isFalse(result);
+ test.equal(scanner.pos, 0);
+ };
+
+ var fatal = function (input, messageContains) {
+ var scanner = new Scanner(input);
+ var error;
+ try {
+ getComment(scanner);
+ } catch (e) {
+ error = e;
+ }
+ test.isTrue(error);
+ if (error)
+ test.isTrue(messageContains && error.message.indexOf(messageContains) >= 0, error.message);
+ };
+
+ ignore("");
+ ignore("', 'Unclosed');
+ fatal('', 'cannot contain');
+ fatal('', 'must end at first');
+
+ fatal('', 'cannot contain');
+ fatal('', 'cannot contain');
+
+ succeed('', '');
+ succeed('', '-x');
+ succeed('', 'x');
+ succeed('', ' hello - - world ');
+});