meteor/packages/minimongo/diff.js

// old_results: array of documents.
// new_results: array of documents.
// observer: object with 'added', 'changed', 'moved',
//           'removed' functions (each optional)
// deepcopy: if true, elements of new_results that are passed to callbacks are
//          deepcopied first
LocalCollection._diffQuery = function (old_results, new_results, observer, deepcopy) {

  var new_presence_of_id = {};
  _.each(new_results, function (doc) {
    if (new_presence_of_id[doc._id])
      Meteor._debug("Duplicate _id in new_results");
    new_presence_of_id[doc._id] = true;
  });

  var old_index_of_id = {};
  _.each(old_results, function (doc, i) {
    if (doc._id in old_index_of_id)
      Meteor._debug("Duplicate _id in old_results");
    old_index_of_id[doc._id] = i;
  });

  // "maybe deepcopy"
  var mdc = (deepcopy ? LocalCollection._deepcopy : _.identity);

  // ALGORITHM:
  //
  // We walk old_idx through the old_results array and
  // new_idx through the new_results array at the same time.
  // These pointers establish a sort of correspondence between
  // old docs and new docs (identified by their _ids).
  // If they point to the same doc (i.e. old and new docs
  // with the same _id), we can increment both pointers
  // and fire no 'moved' callbacks.  Otherwise, we must
  // increment one or the other and fire approprate 'added',
  // 'removed', and 'moved' callbacks.
  //
  // The process is driven by new_results, in that we try
  // make the observer's array look like new_results by
  // establishing each new doc in order.  The doc pointed
  // to by new_idx is the one we are trying to establish
  // at any given time.  If it doesn't exist in old_results,
  // we fire an 'added' callback.  If it does, we have a
  // choice of two ways to handle the situation.  We can
  // advance old_idx forward to the corresponding old doc,
  // treating all intervening old docs as moved or removed,
  // and the current doc as unmoved.  Or, we can simply
  // establish the new doc as next by moving it into place,
  // i.e. firing a single 'moved' callback to move the
  // doc from wherever it was before.  Generating a sequence
  // of 'moved' callbacks that is not just correct but small
  // (or minimal) is a matter of choosing which elements
  // to consider moved and which ones merely change position
  // by virtue of the movement of other docs.
  //
  // Calling callbacks with correct indices requires understanding
  // what the observer's array looks like at each iteration.
  // The observer's array is a concatenation of:
  // - new_results up to (but not including) new_idx, with the
  //   addition of some "bumped" docs that we are later going
  //   to move into place
  // - old_results starting at old_idx, minus any docs that we
  //   have already moved ("taken" docs)
  //
  // To keep track of "bumped" items -- docs in the observer's
  // array that we have skipped over, but will be moved forward
  // later when we get to their new position -- we keep a
  // "bump list" of indices into new_results where bumped items
  // occur.  [The idea is that by adding an item to the list (bumping
  // it), we can consider it dealt with, even though it is still there.]
  // The corresponding position of new_idx in the observer's array,
  // then, is new_idx + bump_list.length, and the position of
  // the nth bumped item in the observer's array is
  // bump_list[n] + n (to account for the previous bumped items
  // that are still there).
  //
  // A "taken" list is used in a sort of analogous way to track
  // the indices of the documents after old_idx in old_results
  // that we have moved, so that, conversely, even though we will
  // come across them in old_results, they are actually no longer
  // in the observer's array.
  //
  // To determine which docs should be considered "moved" (and which
  // merely change position because of other docs moving) we run
  // a "longest common subsequence" (LCS) algorithm.  The LCS of the
  // old doc IDs and the new doc IDs gives the docs that should NOT be
  // considered moved.
  //
  // Overall, this diff implementation is asymptotically good, but could
  // be optimized to streamline execution and use less memory (e.g. not
  // have to build data structures with an entry for every doc).

  // Asymptotically: O(N k) where k is number of ops, or potentially
  // O(N log N) if inner loop of LCS were made to be binary search.


  //////// LCS (longest common sequence, with respect to _id)
  // (see Wikipedia article on Longest Increasing Subsequence,
  // where the LIS is taken of the sequence of old indices of the
  // docs in new_results)
  //
  // unmoved_set: the output of the algorithm; members of the LCS,
  // in the form of indices into new_results
  var unmoved_set = {};
  // max_seq_len: length of LCS found so far
  var max_seq_len = 0;
  // seq_ends[i]: the index into new_results of the last doc in a
  // common subsequence of length of i+1 <= max_seq_len
  var N = new_results.length;
  var seq_ends = new Array(N);
  // ptrs:  the common subsequence ending with new_results[n] extends
  // a common subsequence ending with new_results[ptr[n]], unless
  // ptr[n] is -1.
  var ptrs = new Array(N);
  // virtual sequence of old indices of new results
  var old_idx_seq = function(i_new) {
    return old_index_of_id[new_results[i_new]._id];
  };
  // for each item in new_results, use it to extend a common subsequence
  // of length j <= max_seq_len
  for(var i=0; i<N; i++) {
    if (old_index_of_id[new_results[i]._id] !== undefined) {
      var j = max_seq_len;
      // this inner loop would traditionally be a binary search,
      // but scanning backwards we will likely find a subseq to extend
      // pretty soon, bounded for example by the total number of ops.
      // If this were to be changed to a binary search, we'd still want
      // to scan backwards a bit as an optimization.
      while (j > 0) {
        if (old_idx_seq(seq_ends[j-1]) < old_idx_seq(i))
          break;
        j--;
      }

      ptrs[i] = (j === 0 ? -1 : seq_ends[j-1]);
      seq_ends[j] = i;
      if (j+1 > max_seq_len)
        max_seq_len = j+1;
    }
  }

  // pull out the LCS/LIS into unmoved_set
  var idx = (max_seq_len === 0 ? -1 : seq_ends[max_seq_len-1]);
  while (idx >= 0) {
    unmoved_set[idx] = true;
    idx = ptrs[idx];
  }

  //////// Main Diff Algorithm

  var old_idx = 0;
  var new_idx = 0;
  var bump_list = [];
  var bump_list_old_idx = [];
  var taken_list = [];

  var scan_to = function(old_j) {
    // old_j <= old_results.length (may scan to end)
    while (old_idx < old_j) {
      var old_doc = old_results[old_idx];
      var is_in_new = new_presence_of_id[old_doc._id];
      if (! is_in_new) {
        observer.removed && observer.removed(old_doc, new_idx + bump_list.length);
      } else {
        if (taken_list.length >= 1 && taken_list[0] === old_idx) {
          // already moved
          taken_list.shift();
        } else {
          // bump!
          bump_list.push(new_idx);
          bump_list_old_idx.push(old_idx);
        }
      }
      old_idx++;
    }
  };


  while (new_idx <= new_results.length) {
    if (new_idx < new_results.length) {
      var new_doc = new_results[new_idx];
      var old_doc_idx = old_index_of_id[new_doc._id];
      if (old_doc_idx === undefined) {
        // insert
        observer.added && observer.added(mdc(new_doc), new_idx + bump_list.length);
      } else {
        var old_doc = old_results[old_doc_idx];
        //var is_unmoved = (old_doc_idx > old_idx); // greedy; not minimal
        var is_unmoved = unmoved_set[new_idx];
        if (is_unmoved) {
          if (old_doc_idx < old_idx)
            Meteor._debug("Assertion failed while diffing: nonmonotonic lcs data");
          // no move
          scan_to(old_doc_idx);
          if (! _.isEqual(old_doc, new_doc)) {
            observer.changed && observer.changed(
              mdc(new_doc), new_idx + bump_list.length, old_doc);
          }
          old_idx++;
        } else {
          // move into place
          var to_idx = new_idx + bump_list.length;
          var from_idx;
          if (old_doc_idx >= old_idx) {
            // move backwards
            from_idx = to_idx + old_doc_idx - old_idx;
            // must take number of "taken" items into account; also use
            // results of this binary search to insert new taken_list entry
            var num_taken_before = _.sortedIndex(taken_list, old_doc_idx);
            from_idx -= num_taken_before;
            taken_list.splice(num_taken_before, 0, old_doc_idx);
          } else {
            // move forwards, from bump list
            // (binary search applies)
            var b = _.indexOf(bump_list_old_idx, old_doc_idx, true);
            if (b < 0)
              Meteor._debug("Assertion failed while diffing: no bumped item");
            from_idx = bump_list[b] + b;
            to_idx--;
            bump_list.splice(b, 1);
            bump_list_old_idx.splice(b, 1);
          }
          if (from_idx != to_idx)
            observer.moved && observer.moved(mdc(old_doc), from_idx, to_idx);
          if (! _.isEqual(old_doc, new_doc)) {
            observer.changed && observer.changed(mdc(new_doc), to_idx, old_doc);
          }
        }
      }
    } else {
      scan_to(old_results.length);
    }
    new_idx++;
  }
  if (bump_list.length > 0) {
    Meteor._debug(old_results);
    Meteor._debug(new_results);
    Meteor._debug("Assertion failed while diffing: leftover bump_list "+
                  bump_list);
  }

};