part 2 of the comment pass

2026-04-09 03:00:20 -04:00 · 2010-03-12 15:03:05 -05:00
parent 7e2217ba86
commit e0623094fa
1 changed files with 201 additions and 51 deletions
--- a/lib/less/parser.js
+++ b/lib/less/parser.js
@@ -5,40 +5,40 @@ if (typeof(window) === 'undefined') {
 //
 // less.js - parser
 //
-//      A relatively straight-forward recursive-descent parser.
-//      There is no tokenization/lexing stage, the input is parsed
-//      in one sweep.
+//    A relatively straight-forward recursive-descent parser.
+//    There is no tokenization/lexing stage, the input is parsed
+//    in one sweep.
 //
-//      To make the parser fast enough to run in the browser, several
-//      optimization had to be made:
+//    To make the parser fast enough to run in the browser, several
+//    optimization had to be made:
 //
-//          - Instead of the more commonly used technique of slicing the
-//          input string on every match, we use global regexps (/g),
-//          and move the `lastIndex` pointer on match, foregoing `slice()`
-//          completely. This gives us a 3x speed-up.
+//    - Instead of the more commonly used technique of slicing the
+//      input string on every match, we use global regexps (/g),
+//      and move the `lastIndex` pointer on match, foregoing `slice()`
+//      completely. This gives us a 3x speed-up.
 //
-//          - Matching on a huge input is often cause of slowdowns,
-//          especially with the /g flag. The solution to that is to
-//          chunkify the input: we split it by /\n\n/, just to be on
-//          the safe side. The chunks are stored in the `chunks` var,
-//          `j` holds the current chunk index, and `current` holds
-//          the index of the current chunk in relation to `input`.
-//          This gives us an almost 4x speed-up.
+//    - Matching on a huge input is often cause of slowdowns,
+//      especially with the /g flag. The solution to that is to
+//      chunkify the input: we split it by /\n\n/, just to be on
+//      the safe side. The chunks are stored in the `chunks` var,
+//      `j` holds the current chunk index, and `current` holds
+//      the index of the current chunk in relation to `input`.
+//      This gives us an almost 4x speed-up.
 //
-//          - In many cases, we don't need to match individual tokens;
-//          for example, if a value doesn't hold any variables, operations
-//          or dynamic references, the parser can effectively 'skip' it,
-//          treating it as a literal.
-//          An example would be '1px solid #000' - which evaluates to itself,
-//          we don't need to know what the individual components are.
-//          The drawback, of course is that you don't get the benefits of
-//          syntax-checking on the CSS. This gives us a 50% speed-up in the parser,
-//          and a smaller speed-up in the code-gen.
+//    - In many cases, we don't need to match individual tokens;
+//      for example, if a value doesn't hold any variables, operations
+//      or dynamic references, the parser can effectively 'skip' it,
+//      treating it as a literal.
+//      An example would be '1px solid #000' - which evaluates to itself,
+//      we don't need to know what the individual components are.
+//      The drawback, of course is that you don't get the benefits of
+//      syntax-checking on the CSS. This gives us a 50% speed-up in the parser,
+//      and a smaller speed-up in the code-gen.
 //
 //
-//      Token matching is done with the `$` function, which either takes
-//      a terminal string or regexp, or a non-terminal function to call.
-//      It also takes care of moving all the indices forwards.
+//    Token matching is done with the `$` function, which either takes
+//    a terminal string or regexp, or a non-terminal function to call.
+//    It also takes care of moving all the indices forwards.
 //
 //
 var input,       // LeSS input string
@@ -48,21 +48,6 @@ var input,       // LeSS input string
    current,     // index of current chunk, in `input`
    inputLength;

-function peek(tok) {
-    var match;
-
-    if (typeof(tok) === 'string') {
-        return input[i] === tok;
-    } else {
-        tok.lastIndex = i;
-
-        if ((match = tok.exec(input)) &&
-           (tok.lastIndex - match[0].length === i)) {
-            return match;
-        }
-    }
-}
-
 //
 // Parse from a token, regexp or string, and move forward if match
 //
@@ -124,8 +109,34 @@ function $(tok) {
    }
 }

+// Same as $(), but don't change the state of the parser,
+// just return the match.
+function peek(tok) {
+    var match;
+
+    if (typeof(tok) === 'string') {
+        return input[i] === tok;
+    } else {
+        tok.lastIndex = i;
+
+        if ((match = tok.exec(input)) &&
+           (tok.lastIndex - match[0].length === i)) {
+            return match;
+        }
+    }
+}
+
+//
+// The Parser
+//
 less.parser = {
+
+    // The optimization level dictates the thoroughness of the parser,
+    // the lower the number, the less nodes it will create in the tree.
+    // This could matter for debugging, or if you want to access
+    // the individual nodes in the tree.
    optimization: 2,
+
    //
    // Parse an input string into an abstract syntax tree
    //
@@ -161,13 +172,21 @@ less.parser = {
            chunks = [input];
        }

-        // Start with the primary rule
+        // Start with the primary rule.
+        // The whole syntax tree is held under a Ruleset node,
+        // with the `root` property set to true, so no `{}` are
+        // output.
        root = new(tree.Ruleset)([], $(this.parsers.primary));
        root.root = true;

-        // If `i` is smaller than the input length - 1,
+        // If `i` is smaller than the `input.length - 1`,
        // it means the parser wasn't able to parse the whole
        // string, so we've got a parsing error.
+        //
+        // We try to extract a \n delimited string,
+        // showing the line where the parse error occured.
+        // We split it up into two parts (the part which parsed,
+        // and the part which didn't), so we can color them differently.
        if (i < input.length - 1) {
            start = (function () {
                for (var n = i; n > 0; n--) {
@@ -185,7 +204,52 @@ less.parser = {
        }
        return root;
    },
+
+    //
+    // Here in, the parsing rules/functions
+    //
+    // The basic structure of the syntax tree generated is as follows:
+    //
+    //   Ruleset ->  Rule -> Value -> Expression -> Entity
+    //
+    // Here's some LESS code:
+    //
+    //    .class {
+    //      color: #fff;
+    //      border: 1px solid #000;
+    //      width: @w + 4px;
+    //      > .child {...}
+    //    }
+    //
+    // And here's what the parse tree might look like:
+    //
+    //     Ruleset (Selector '.class', [
+    //         Rule ("color",  Value ([Expression [Color #fff]]))
+    //         Rule ("border", Value ([Expression [Dimension 1px][Keyword "solid"][Color #000]]))
+    //         Rule ("width",  Value ([Expression [Operation "+" [Variable "@w"][Dimension 4px]]]))
+    //         Ruleset (Selector [Element '>', '.child'], [...])
+    //     ])
+    //
+    //  In general, most rules will try to parse a token with the `$()` function, and if the return
+    //  value is truly, will return a new node, of the relevant type. Sometimes, we need to check
+    //  first, before parsing, that's when we use `peek()`.
+    //
    parsers: {
+        //
+        // The `primary` rule is the *entry* and *exit* point of the parser.
+        // The rules here can appear at any level of the parse tree.
+        //
+        // The recursive nature of the grammar is an interplay between the `block`
+        // rule, which represents `{ ... }`, the `ruleset` rule, and this `primary` rule,
+        // as represented by this simplified grammar:
+        //
+        //     primary  →  (ruleset | rule)+
+        //     ruleset  →  selector+ block
+        //     block    →  '{' primary '}'
+        //
+        // Only at one point is the primary rule not called from the
+        // block rule: at the root level.
+        //
        primary: function () {
            var node, root = [];

@@ -196,6 +260,10 @@ less.parser = {
            }
            return root;
        },
+
+        // We create a Comment node for CSS comments `/* */`,
+        // but keep the LeSS comments `//` silent, by just skipping
+        // over them.
        comment: function () {
            var comment;

@@ -207,7 +275,16 @@ less.parser = {
                return $(/\/\/.*/g);
            }
        },
+
+        //
+        // Entities are tokens which can be found inside an Expression
+        //
        entities: {
+            //
+            // A string, which supports escaping " and '
+            //
+            //     "milky way" 'he\'s the one!'
+            //
            quoted: function () {
                var str;
                if (input[i] !== '"' && input[i] !== "'") return;
@@ -216,10 +293,27 @@ less.parser = {
                    return new(tree.Quoted)(str[0], str[1] || str[2]);
                }
            },
+
+            //
+            // A catch-all word, such as:
+            //
+            //     black border-collapse
+            //
            keyword: function () {
                var k;
                if (k = $(/[A-Za-z-]+/g)) { return new(tree.Keyword)(k) }
            },
+
+            //
+            // A function call
+            //
+            //     rgb(255, 0, 255)
+            //
+            // We also try to catch IE's `alpha()`, but let the `alpha` parser
+            // deal with the details.
+            //
+            // The arguments are parsed with the `entities.arguments` parser.
+            //
            call: function () {
                var name, args;

@@ -242,13 +336,19 @@ less.parser = {
                }
                return args;
            },
-            accessor: function () {
-            },
            literal: function () {
                return $(this.entities.dimension) ||
                       $(this.entities.color) ||
                       $(this.entities.quoted);
            },
+
+            //
+            // Parse url() tokens
+            //
+            // We use a specific rule for urls, because they don't really behave like
+            // standard function calls. The difference is that the argument doesn't have
+            // to be enclosed within a string, so it can't be parsed as an Expression.
+            //
            url: function () {
                var value;

@@ -258,6 +358,15 @@ less.parser = {

                return new(tree.URL)(value);
            },
+
+            //
+            // A Variable entity, such as `@fink`, in
+            //
+            //     width: @fink + 2px
+            //
+            // We use a different parser for variable definitions,
+            // see `parsers.variable`.
+            //
            variable: function () {
                var name;

@@ -265,6 +374,14 @@ less.parser = {
                    return new(tree.Variable)(name);
                }
            },
+
+            //
+            // A Hexadecimal color
+            //
+            //     #4F3C2F
+            //
+            // `rgb` and `hsl` colors are parsed through the `entities.call` parser.
+            //
            color: function () {
                var rgb;

@@ -272,6 +389,12 @@ less.parser = {
                    return new(tree.Color)(rgb[1]);
                }
            },
+
+            //
+            // A Dimension, that is, a number and a unit
+            //
+            //     0.5em 95%
+            //
            dimension: function () {
                var value, c = input.charCodeAt(i);
                if ((c > 57 || c < 45) || c === 47) return;
@@ -281,11 +404,25 @@ less.parser = {
                }
            }
        },
+
+        //
+        // The variable part of a variable definition. Used in the `rule` parser
+        //
+        //     @fink:
+        //
        variable: function () {
            var name;

            if (input[i] === '@' && (name = $(/(@[a-zA-Z0-9_-]+)\s*:/g))) { return name[1] }
        },
+
+        //
+        // A font size/line-height shorthand
+        //
+        //     small/12px
+        //
+        // We need to peek first, or we'll match on keywords and dimensions
+        //
        shorthand: function () {
            var a, b;

@@ -339,12 +476,21 @@ less.parser = {
                }
            }
        },
-        entity: function () {
-            var e;

-            if (e = $(this.entities.literal) || $(this.entities.variable) || $(this.entities.url) ||
-                    $(this.entities.call)    || $(this.entities.keyword)) { return e }
+        //
+        // Entities are the smallest recognized token,
+        // and can be found inside a rule's value.
+        //
+        entity: function () {
+            return $(this.entities.literal) || $(this.entities.variable) || $(this.entities.url) ||
+                   $(this.entities.call)    || $(this.entities.keyword);
        },
+
+        //
+        // A Rule terminator. Note that we use `peek()` to check for '}',
+        // because the `block` rule will be expecting it, but we still need to make sure
+        // it's there, if ';' was ommitted.
+        //
        end: function () {
            return $(';') || peek('}');
        },
@@ -406,6 +552,10 @@ less.parser = {
                return content;
            }
        },
+
+        //
+        // div, .class, body > p {...}
+        //
        ruleset: function () {
            var selectors = [], s, rules, match;