From cf78cefc8b71cc095887690e84780ee440f7d2f7 Mon Sep 17 00:00:00 2001 From: David Glasser Date: Thu, 2 Jan 2014 17:34:04 -0800 Subject: [PATCH] Refactor selector-vs-modifier/projection code Now they are methods on a compiled Matcher rather than doing their own operator parsing from scratch. This means less work is happening for each oplog entry, and it also localizes knowledge about selector parsing. --- packages/minimongo/minimongo_server_tests.js | 32 +++-- packages/minimongo/selector.js | 124 +++++++++++++----- packages/minimongo/selector_modifier.js | 54 +++----- packages/minimongo/selector_projection.js | 13 +- .../mongo-livedata/oplog_observe_driver.js | 6 +- 5 files changed, 139 insertions(+), 90 deletions(-) diff --git a/packages/minimongo/minimongo_server_tests.js b/packages/minimongo/minimongo_server_tests.js index afd0487c87..90e797c0aa 100644 --- a/packages/minimongo/minimongo_server_tests.js +++ b/packages/minimongo/minimongo_server_tests.js @@ -1,6 +1,7 @@ Tinytest.add("minimongo - modifier affects selector", function (test) { function testSelectorPaths (sel, paths, desc) { - test.isTrue(_.isEqual(MinimongoTest.getSelectorPaths(sel), paths), desc); + var matcher = new Minimongo.Matcher(sel); + test.equal(matcher._getPaths(), paths, desc); } testSelectorPaths({ @@ -49,18 +50,24 @@ Tinytest.add("minimongo - modifier affects selector", function (test) { } }, ['a', 'b.c'], "literal object"); + // Note that a and b do NOT end up in the path list, but x and y both do. + testSelectorPaths({ + $or: [ + {x: {$elemMatch: {a: 5}}}, + {y: {$elemMatch: {b: 7}}} + ] + }, ['x', 'y'], "$or and elemMatch"); + function testSelectorAffectedByModifier (sel, mod, yes, desc) { - if (yes) - test.isTrue(LocalCollection._isSelectorAffectedByModifier(sel, mod, desc)); - else - test.isFalse(LocalCollection._isSelectorAffectedByModifier(sel, mod, desc)); + var matcher = new Minimongo.Matcher(sel); + test.equal(matcher.affectedByModifier(mod), yes, desc); } function affected(sel, mod, desc) { - testSelectorAffectedByModifier(sel, mod, 1, desc); + testSelectorAffectedByModifier(sel, mod, true, desc); } function notAffected(sel, mod, desc) { - testSelectorAffectedByModifier(sel, mod, 0, desc); + testSelectorAffectedByModifier(sel, mod, false, desc); } notAffected({ foo: 0 }, { $set: { bar: 1 } }, "simplest"); @@ -89,7 +96,8 @@ Tinytest.add("minimongo - modifier affects selector", function (test) { Tinytest.add("minimongo - selector and projection combination", function (test) { function testSelProjectionComb (sel, proj, expected, desc) { - test.equal(LocalCollection._combineSelectorAndProjection(sel, proj), expected, desc); + var matcher = new Minimongo.Matcher(sel); + test.equal(matcher.combineIntoProjection(proj), expected, desc); } // Test with inclusive projection @@ -339,11 +347,15 @@ Tinytest.add("minimongo - selector and projection combination", function (test) var test = null; // set this global in the beginning of every test // T - should return true // F - should return false + var oneTest = function (sel, mod, expected, desc) { + var matcher = new Minimongo.Matcher(sel); + test.equal(matcher.canBecomeTrueByModifier(mod), expected, desc); + }; function T (sel, mod, desc) { - test.isTrue(LocalCollection._canSelectorBecomeTrueByModifier(sel, mod), desc); + oneTest(sel, mod, true, desc); } function F (sel, mod, desc) { - test.isFalse(LocalCollection._canSelectorBecomeTrueByModifier(sel, mod), desc); + oneTest(sel, mod, false, desc); } Tinytest.add("minimongo - can selector become true by modifier - literals (structured tests)", function (t) { diff --git a/packages/minimongo/selector.js b/packages/minimongo/selector.js index 9004e83c7b..f9631b7b14 100644 --- a/packages/minimongo/selector.js +++ b/packages/minimongo/selector.js @@ -20,7 +20,17 @@ // if (matcher.documentMatches({a: 7})) ... Minimongo.Matcher = function (selector) { var self = this; - self._isGeoQuery = false; // can get overwritten by compilation + // A set (object mapping string -> *) of all of the document paths looked + // at by the selector. Also includes the empty string if it may look at any + // path (eg, $where). + self._paths = {}; + // Set to true if compilation finds a $near. + self._isGeoQuery = false; + // Set to false if compilation finds anything other than a simple equality on + // some fields. + self._isEquality = true; + // A clone of the original selector. Used by canBecomeTrueByModifier. + self._selector = null; self._docMatcher = self._compileSelector(selector); }; @@ -31,19 +41,28 @@ _.extend(Minimongo.Matcher.prototype, { isGeoQuery: function () { return this._isGeoQuery; }, + isEquality: function () { + return this._isEquality; + }, // Given a selector, return a function that takes one argument, a // document. It returns a result object. _compileSelector: function (selector) { var self = this; // you can pass a literal function instead of a selector - if (selector instanceof Function) + if (selector instanceof Function) { + self._isEquality = false; + self._selector = selector; + self._recordPathUsed(''); return function (doc) { return {result: !!selector.call(doc)}; }; + } // shorthand -- scalars match _id if (LocalCollection._selectorIsId(selector)) { + self._selector = {_id: selector}; + self._recordPathUsed('_id'); return function (doc) { return {result: EJSON.equals(doc._id, selector)}; }; @@ -52,15 +71,26 @@ _.extend(Minimongo.Matcher.prototype, { // protect against dangerous selectors. falsey and {_id: falsey} are both // likely programmer error, and not what you want, particularly for // destructive operations. - if (!selector || (('_id' in selector) && !selector._id)) + if (!selector || (('_id' in selector) && !selector._id)) { + self._isEquality = null; return nothingMatcher; + } // Top level can't be an array or true or binary. if (typeof(selector) === 'boolean' || isArray(selector) || EJSON.isBinary(selector)) throw new Error("Invalid selector: " + selector); - return compileDocumentSelector(selector, self); + self._selector = EJSON.clone(selector); + return compileDocumentSelector(selector, self, {isRoot: true}); + }, + _recordPathUsed: function (path) { + this._paths[path] = true; + }, + // Returns a list of key paths the given selector is looking for. It includes + // the empty string if there is a $where. + _getPaths: function () { + return _.keys(this._paths); } }); @@ -68,9 +98,12 @@ _.extend(Minimongo.Matcher.prototype, { // Takes in a selector that could match a full document (eg, the original // selector). Returns a function mapping document->result object. // +// matcher is the Matcher object we are compiling. +// // If this is the root document selector (ie, not wrapped in $and or the like), -// then matcherIfRoot is the Matcher object. (This is used by $near.) -var compileDocumentSelector = function (docSelector, matcherIfRoot) { +// then isRoot is true. (This is used by $near.) +var compileDocumentSelector = function (docSelector, matcher, options) { + options = options || {}; var docMatchers = []; _.each(docSelector, function (subSelector, key) { if (key.substr(0, 1) === '$') { @@ -78,11 +111,18 @@ var compileDocumentSelector = function (docSelector, matcherIfRoot) { // this function), or $where. if (!_.has(LOGICAL_OPERATORS, key)) throw new Error("Unrecognized logical operator: " + key); - docMatchers.push(LOGICAL_OPERATORS[key](subSelector)); + matcher._isEquality = false; + docMatchers.push(LOGICAL_OPERATORS[key](subSelector, matcher, + options.inElemMatch)); } else { + // Record this path, but only if we aren't in an elemMatcher, since in an + // elemMatch this is a path inside an object in an array, not in the doc + // root. + if (!options.inElemMatch) + matcher._recordPathUsed(key); var lookUpByIndex = makeLookupFunction(key); var valueMatcher = - compileValueSelector(subSelector, matcherIfRoot); + compileValueSelector(subSelector, matcher, options.isRoot); docMatchers.push(function (doc) { var branchValues = lookUpByIndex(doc); return valueMatcher(branchValues); @@ -97,13 +137,14 @@ var compileDocumentSelector = function (docSelector, matcherIfRoot) { // {$gt: 5, $lt: 9}, or a regular expression, or any non-expression object (to // indicate equality). Returns a branched matcher: a function mapping // [branched value]->result object. -var compileValueSelector = function (valueSelector, matcherIfRoot) { - if (valueSelector instanceof RegExp) +var compileValueSelector = function (valueSelector, matcher, isRoot) { + if (valueSelector instanceof RegExp) { + matcher._isEquality = false; return convertElementMatcherToBranchedMatcher( regexpElementMatcher(valueSelector)); - else if (isOperatorObject(valueSelector)) - return operatorBranchedMatcher(valueSelector, matcherIfRoot); - else { + } else if (isOperatorObject(valueSelector)) { + return operatorBranchedMatcher(valueSelector, matcher, isRoot); + } else { return convertElementMatcherToBranchedMatcher( equalityElementMatcher(valueSelector)); } @@ -166,23 +207,28 @@ var equalityElementMatcher = function (elementSelector) { // Takes an operator object (an object with $ keys) and returns a branched // matcher for it. -var operatorBranchedMatcher = function (valueSelector, matcherIfRoot) { +var operatorBranchedMatcher = function (valueSelector, matcher, isRoot) { // Each valueSelector works separately on the various branches. So one // operator can match one branch and another can match another branch. This // is OK. var operatorMatchers = []; _.each(valueSelector, function (operand, operator) { + // XXX we should actually implement $eq, which is new in 2.6 + if (operator !== '$eq') + matcher._isEquality = false; + if (_.has(VALUE_OPERATORS, operator)) { operatorMatchers.push( - VALUE_OPERATORS[operator](operand, valueSelector, matcherIfRoot)); + VALUE_OPERATORS[operator](operand, valueSelector, matcher, isRoot)); } else if (_.has(ELEMENT_OPERATORS, operator)) { var options = ELEMENT_OPERATORS[operator]; if (typeof options === 'function') options = {compileElementSelector: options}; operatorMatchers.push( convertElementMatcherToBranchedMatcher( - options.compileElementSelector(operand, valueSelector), + options.compileElementSelector( + operand, valueSelector, matcher), options)); } else { throw new Error("Unrecognized operator: " + operator); @@ -192,25 +238,29 @@ var operatorBranchedMatcher = function (valueSelector, matcherIfRoot) { return andBranchedMatchers(operatorMatchers); }; -var compileArrayOfDocumentSelectors = function (selectors) { +var compileArrayOfDocumentSelectors = function ( + selectors, matcher, inElemMatch) { if (!isArray(selectors) || _.isEmpty(selectors)) throw Error("$and/$or/$nor must be nonempty array"); return _.map(selectors, function (subSelector) { if (!isPlainObject(subSelector)) throw Error("$or/$and/$nor entries need to be full objects"); - return compileDocumentSelector(subSelector); + return compileDocumentSelector( + subSelector, matcher, {inElemMatch: inElemMatch}); }); }; // Operators that appear at the top level of a document selector. var LOGICAL_OPERATORS = { - $and: function (subSelector) { - var matchers = compileArrayOfDocumentSelectors(subSelector); + $and: function (subSelector, matcher, inElemMatch) { + var matchers = compileArrayOfDocumentSelectors( + subSelector, matcher, inElemMatch); return andDocumentMatchers(matchers); }, - $or: function (subSelector) { - var matchers = compileArrayOfDocumentSelectors(subSelector); + $or: function (subSelector, matcher, inElemMatch) { + var matchers = compileArrayOfDocumentSelectors( + subSelector, matcher, inElemMatch); return function (doc) { var result = _.any(matchers, function (f) { return f(doc).result; @@ -220,8 +270,9 @@ var LOGICAL_OPERATORS = { }; }, - $nor: function (subSelector) { - var matchers = compileArrayOfDocumentSelectors(subSelector); + $nor: function (subSelector, matcher, inElemMatch) { + var matchers = compileArrayOfDocumentSelectors( + subSelector, matcher, inElemMatch); return function (doc) { var result = _.all(matchers, function (f) { return !f(doc).result; @@ -232,7 +283,9 @@ var LOGICAL_OPERATORS = { }; }, - $where: function (selectorValue) { + $where: function (selectorValue, matcher) { + // Record that *any* path may be used. + matcher._recordPathUsed(''); if (!(selectorValue instanceof Function)) { // XXX MongoDB seems to have more complex logic to decide where or or not // to add "return"; not sure exactly what it is. @@ -272,8 +325,8 @@ var invertBranchedMatcher = function (branchedMatcher) { // "match each branched value independently and combine with // convertElementMatcherToBranchedMatcher". var VALUE_OPERATORS = { - $not: function (operand) { - return invertBranchedMatcher(compileValueSelector(operand)); + $not: function (operand, valueSelector, matcher) { + return invertBranchedMatcher(compileValueSelector(operand, matcher)); }, $ne: function (operand) { return invertBranchedMatcher(convertElementMatcherToBranchedMatcher( @@ -301,7 +354,7 @@ var VALUE_OPERATORS = { throw Error("$maxDistance needs a $near"); return everythingMatcher; }, - $all: function (operand) { + $all: function (operand, valueSelector, matcher) { if (!isArray(operand)) throw Error("$all requires array"); // Not sure why, but this seems to be what MongoDB does. @@ -314,17 +367,17 @@ var VALUE_OPERATORS = { if (isOperatorObject(criterion)) throw Error("no $ expressions in $all"); // This is always a regexp or equality selector. - branchedMatchers.push(compileValueSelector(criterion)); + branchedMatchers.push(compileValueSelector(criterion, matcher)); }); // andBranchedMatchers does NOT require all selectors to return true on the // SAME branch. return andBranchedMatchers(branchedMatchers); }, - $near: function (operand, valueSelector, matcherIfRoot) { - if (!matcherIfRoot) + $near: function (operand, valueSelector, matcher, isRoot) { + if (!isRoot) throw Error("$near can't be inside another $ operator"); - matcherIfRoot._isGeoQuery = true; + matcher._isGeoQuery = true; // There are two kinds of geodata in MongoDB: coordinate pairs and // GeoJSON. They use different distance metrics, too. GeoJSON queries are @@ -555,20 +608,21 @@ var ELEMENT_OPERATORS = { }, $elemMatch: { dontExpandLeafArrays: true, - compileElementSelector: function (operand) { + compileElementSelector: function (operand, valueSelector, matcher) { if (!isPlainObject(operand)) throw Error("$elemMatch need an object"); var matcher, isDocMatcher; if (isOperatorObject(operand)) { - matcher = compileValueSelector(operand); + matcher = compileValueSelector(operand, matcher); isDocMatcher = false; } else { // This is NOT the same as compileValueSelector(operand), and not just // because of the slightly different calling convention. // {$elemMatch: {x: 3}} means "an element has a field x:3", not // "consists only of a field x:3". Also, regexps and sub-$ are allowed. - matcher = compileDocumentSelector(operand); + matcher = compileDocumentSelector(operand, matcher, + {inElemMatch: true}); isDocMatcher = true; } diff --git a/packages/minimongo/selector_modifier.js b/packages/minimongo/selector_modifier.js index 6ea0b28313..100ac007ac 100644 --- a/packages/minimongo/selector_modifier.js +++ b/packages/minimongo/selector_modifier.js @@ -6,11 +6,12 @@ // - 'foo.bar': 42 // - $unset // - 'abc.d': 1 -LocalCollection._isSelectorAffectedByModifier = function (selector, modifier) { +Minimongo.Matcher.prototype.affectedByModifier = function (modifier) { + var self = this; // safe check for $set/$unset being objects modifier = _.extend({ $set: {}, $unset: {} }, modifier); var modifiedPaths = _.keys(modifier.$set).concat(_.keys(modifier.$unset)); - var meaningfulPaths = getPaths(selector); + var meaningfulPaths = self._getPaths(); return _.any(modifiedPaths, function (path) { var mod = path.split('.'); @@ -43,44 +44,33 @@ LocalCollection._isSelectorAffectedByModifier = function (selector, modifier) { }); }; -getPathsWithoutNumericKeys = function (sel) { - return _.map(getPaths(sel), function (path) { - return _.reject(path.split('.'), isNumericKey).join('.'); - }); -}; - -// @param selector - Object: MongoDB selector. Currently doesn't support -// $-operators and arrays well. // @param modifier - Object: MongoDB-styled modifier with `$set`s and `$unsets` // only. (assumed to come from oplog) // @returns - Boolean: if after applying the modifier, selector can start // accepting the modified value. -LocalCollection._canSelectorBecomeTrueByModifier = function (selector, modifier) -{ - if (!LocalCollection._isSelectorAffectedByModifier(selector, modifier)) +// Currently doesn't support $-operators and numeric indices precisely. +Minimongo.Matcher.prototype.canBecomeTrueByModifier = function (modifier) { + var self = this; + if (!this.affectedByModifier(modifier)) return false; modifier = _.extend({$set:{}, $unset:{}}, modifier); - if (_.any(_.keys(selector), pathHasNumericKeys) || - _.any(_.keys(modifier.$unset), pathHasNumericKeys) || - _.any(_.keys(modifier.$set), pathHasNumericKeys)) + if (!self.isEquality()) return true; - if (!isLiteralSelector(selector)) + if (_.any(self._getPaths(), pathHasNumericKeys) || + _.any(_.keys(modifier.$unset), pathHasNumericKeys) || + _.any(_.keys(modifier.$set), pathHasNumericKeys)) return true; // convert a selector into an object matching the selector // { 'a.b': { ans: 42 }, 'foo.bar': null, 'foo.baz': "something" } // => { a: { b: { ans: 42 } }, foo: { bar: null, baz: "something" } } - var doc = pathsToTree(_.keys(selector), - function (path) { return selector[path]; }, + var doc = pathsToTree(self._getPaths(), + function (path) { return self._selector[path]; }, _.identity /*conflict resolution is no resolution*/); - // XXX we should move this function to being a method on Matcher so we aren't - // recompiling over and over - var matcher = new Minimongo.Matcher(selector); - try { LocalCollection._modify(doc, modifier); } catch (e) { @@ -99,11 +89,11 @@ LocalCollection._canSelectorBecomeTrueByModifier = function (selector, modifier) throw e; } - return matcher.documentMatches(doc).result; + return self.documentMatches(doc).result; }; -// Returns a list of key paths the given selector is looking for -var getPaths = MinimongoTest.getSelectorPaths = function (sel) { +var getPaths = function (sel) { + return _.keys(new Minimongo.Matcher(sel)._paths); return _.chain(sel).map(function (v, k) { // we don't know how to handle $where because it can be anything if (k === "$where") @@ -120,15 +110,3 @@ function pathHasNumericKeys (path) { return _.any(path.split('.'), isNumericKey); } -function isLiteralSelector (selector) { - return _.all(selector, function (subSelector, keyPath) { - if (keyPath.substr(0, 1) === "$" || _.isRegExp(subSelector)) - return false; - if (!_.isObject(subSelector) || _.isArray(subSelector)) - return true; - return _.all(subSelector, function (value, key) { - return key.substr(0, 1) !== "$"; - }); - }); -} - diff --git a/packages/minimongo/selector_projection.js b/packages/minimongo/selector_projection.js index ece29b8470..73a2727a8e 100644 --- a/packages/minimongo/selector_projection.js +++ b/packages/minimongo/selector_projection.js @@ -1,9 +1,9 @@ // Knows how to combine a mongo selector and a fields projection to a new fields // projection taking into account active fields from the passed selector. // @returns Object - projection object (same as fields option of mongo cursor) -LocalCollection._combineSelectorAndProjection = function (selector, projection) -{ - var selectorPaths = getPathsWithoutNumericKeys(selector); +Minimongo.Matcher.prototype.combineIntoProjection = function (projection) { + var self = this; + var selectorPaths = self._getPathsElidingNumericKeys(); // Special case for $where operator in the selector - projection should depend // on all fields of the document. getSelectorPaths returns a list of paths @@ -40,6 +40,13 @@ LocalCollection._combineSelectorAndProjection = function (selector, projection) } }; +Minimongo.Matcher.prototype._getPathsElidingNumericKeys = function () { + var self = this; + return _.map(self._getPaths(), function (path) { + return _.reject(path.split('.'), isNumericKey).join('.'); + }); +}; + // Returns a set of key paths similar to // { 'foo.bar': 1, 'a.b.c': 1 } var treeToPaths = function (tree, prefix) { diff --git a/packages/mongo-livedata/oplog_observe_driver.js b/packages/mongo-livedata/oplog_observe_driver.js index 60be922a16..109a5dca37 100644 --- a/packages/mongo-livedata/oplog_observe_driver.js +++ b/packages/mongo-livedata/oplog_observe_driver.js @@ -38,8 +38,7 @@ OplogObserveDriver = function (options) { self._projectionFn = LocalCollection._compileProjection(projection); // Projection function, result of combining important fields for selector and // existing fields projection - self._sharedProjection = LocalCollection._combineSelectorAndProjection( - selector, projection); + self._sharedProjection = self._matcher.combineIntoProjection(projection); self._sharedProjectionFn = LocalCollection._compileProjection( self._sharedProjection); @@ -263,8 +262,7 @@ _.extend(OplogObserveDriver.prototype, { LocalCollection._modify(newDoc, op.o); self._handleDoc(id, self._sharedProjectionFn(newDoc)); } else if (!canDirectlyModifyDoc || - LocalCollection._canSelectorBecomeTrueByModifier( - self._cursorDescription.selector, op.o)) { + self._matcher.canBecomeTrueByModifier(op.o)) { self._needToFetch.set(id, op.ts.toString()); if (self._phase === PHASE.STEADY) self._fetchModifiedDocuments();