diff --git a/lib/less/parser.js b/lib/less/parser.js index afe067f6..08acffac 100644 --- a/lib/less/parser.js +++ b/lib/less/parser.js @@ -1,5 +1,43 @@ var less = exports || {}; - +// +// less.js - parser +// +// A relatively straight-forward recursive-descent parser. +// There is no tokenization/lexing stage, the input is parsed +// in one sweep. +// +// To make the parser fast enough to run in the browser, several +// optimization had to be made: +// +// - Instead of the more commonly used technique of slicing the +// input string on every match, we use global regexps (/g), +// and move the `lastIndex` pointer on match, foregoing `slice()` +// completely. This gives us a 3x speed-up. +// +// - Matching on a huge input is often cause of slowdowns, +// especially with the /g flag. The solution to that is to +// chunkify the input: we split it by /\n\n/, just to be on +// the safe side. The chunks are stored in the `chunks` var, +// `j` holds the current chunk index, and `current` holds +// the index of the current chunk in relation to `input`. +// This gives us an almost 4x speed-up. +// +// - In many cases, we don't need to match individual tokens; +// for example, if a value doesn't hold any variables, operations +// or dynamic references, the parser can effectively 'skip' it, +// treating it as a literal. +// An example would be '1px solid #000' - which evaluates to itself, +// we don't need to know what the individual components are. +// The drawback, of course is that you don't get the benefits of +// syntax-checking on the CSS. This gives us a 50% speed-up in the parser, +// and a smaller speed-up in the code-gen. +// +// +// Token matching is done with the `$` function, which either takes +// a terminal string or regexp, or a non-terminal function to call. +// It also takes care of moving all the indices forwards. +// +// var input, // LeSS input string i = 0, // current index in `input` j = 0, // current chunk @@ -18,31 +56,52 @@ function peek(regex) { } // -// Parse from a token or regexp, and move forward if match +// Parse from a token, regexp or string, and move forward if match // function $(tok, root) { var match, args, length, c, index; - + + // // Non-terminal + // if (tok instanceof Function) { return tok.call(less.parser.parsers, root); + // // Terminal + // + // Either match a single character in the input, + // or match a regexp in the current chunk (chunk[j]). + // } else if (typeof(tok) === 'string') { match = input[i] === tok ? tok : null; length = 1; + + // 1. We move to the next chunk, if necessary. + // 2. Set the `lastIndex` to be relative + // to the current chunk, and try to match in it. + // 3. Make sure we matched at `index`. Because we use + // the /g flag, the match could be anywhere in the + // chunk. We have to make sure it's at our previous + // index, which we stored in [2]. + // } else { - if (i > current + chunks[j].length) { + if (i > current + chunks[j].length) { // 1. current += chunks[j++].length; } - tok.lastIndex = index = i - current; - match = tok.exec(chunks[j]); - + tok.lastIndex = index = i - current; // 2. + match = tok.exec(chunks[j]); + if (match) { length = match[0].length; - if (tok.lastIndex - length !== index) { return } + if (tok.lastIndex - length !== index) { return } // 3. } } + // The match is confirmed, add the match length to `i`, + // and consume any extra white-space characters (' ' || '\n') + // which come after that. The reason for this is that LeSS's + // grammar is mostly white-space insensitive. + // if (match) { i += length; @@ -56,6 +115,9 @@ function $(tok, root) { } less.parser = { + // + // Parse an input string into an abstract syntax tree + // parse: function (str) { var tree; input = str;