CommentTree: Calculate num_children

num_children is derived directly from the tree, so it makes sense
to calculate it on CommentTree retrieval rather than waiting and making
the CommentBuilder handle it. Calculating child counts in advance for the
full tree appears to be faster than the previous method of using
`get_num_children()` to calculate the child count only for visible comments.
This commit is contained in:
Brian Simpson
2016-01-31 09:28:06 -08:00
parent ff01943ca7
commit c74c75800d
5 changed files with 50 additions and 70 deletions

View File

@@ -215,6 +215,7 @@ def link_comments_and_sort(link, sort):
* depth -- a dictionary from cid to the depth that comment resides in the
tree. A top-level comment has depth 0.
* parents -- a dictionary from child cid to parent cid.
* num_children -- a dictionary from cid to total descendant count
* sorter -- a dictionary from cid to a numeric value to be used for
sorting.
"""
@@ -234,15 +235,13 @@ def link_comments_and_sort(link, sort):
cache = get_comment_tree(link, timer=timer)
cids = cache.cids
tree = cache.tree
depth = cache.depth
parents = cache.parents
scores_by_id = get_comment_scores(link, sort, cids, timer)
timer.intermediate('get_scores')
timer.stop()
return (cache.cids, cache.tree, cache.depth, cache.parents, scores_by_id)
return (cache.cids, cache.tree, cache.depth, cache.parents,
cache.num_children, scores_by_id)
def get_comment_tree(link, timer=None):

View File

@@ -20,46 +20,6 @@
# Inc. All Rights Reserved.
###############################################################################
def get_num_children(list comments, dict tree):
"""Count the number of children for each comment."""
cdef:
dict num_children = {}
list stack = []
list children = []
list missing = []
long comment
long current
long child
for comment in sorted(comments):
stack.append(comment)
while stack:
current = stack[-1]
if current in num_children:
stack.pop()
continue
children = tree.get(current, [])
for child in children:
if child not in num_children and not tree.get(child, None):
num_children[child] = 0
missing = [child for child in children if not child in num_children]
if not missing:
num_children[current] = 0
stack.pop()
for child in children:
num_children[current] += 1 + num_children[child]
else:
stack.extend(missing)
return num_children
def get_tree_details(dict tree):
cdef:
@@ -79,3 +39,18 @@ def get_tree_details(dict tree):
depth.update({child_id: child_depth for child_id in child_ids})
return cids, depth, parents
def calc_num_children(dict tree):
cdef:
dict num_children = {}
list child_ids
for parent_id in sorted(tree, reverse=True):
if parent_id is None:
continue
child_ids = tree[parent_id]
num_children[parent_id] = sum(
1 + num_children.get(child_id, 0) for child_id in tree[parent_id])
return num_children

View File

@@ -49,7 +49,6 @@ from r2.lib.wrapped import Wrapped
from r2.lib.db import operators, tdb_cassandra
from r2.lib.filters import _force_unicode
from r2.lib.utils import Storage, shuffle_slice, timesince, tup, to36
from r2.lib.utils.comment_tree_utils import get_num_children
from r2.models import (
Account,
@@ -841,7 +840,7 @@ class CommentBuilder(Builder):
timer = g.stats.get_timer("CommentBuilder.get_items")
timer.start()
r = link_comments_and_sort(self.link, self.sort.col)
cids, cid_tree, depth, parents, sorter = r
cids, cid_tree, depth, parents, num_children, sorter = r
timer.intermediate("load_storage")
if self.comment and not self.comment._id in depth:
@@ -874,10 +873,17 @@ class CommentBuilder(Builder):
dont_collapse.extend(path)
# rewrite cid_tree so the parents lead only to the requested comment
for comment in path:
parent = parents[comment]
cid_tree[parent] = [comment]
# work through the path starting with the requested comment
# (path is requested comment, its parent, its grandparent, etc.)
for comment_id in path:
# rewrite parent's tree so it leads only to the requested comment
parent_id = parents[comment_id]
cid_tree[parent_id] = [comment_id]
# rewrite parent's num_children to count only this branch
if parent_id is not None:
branch_num_children = num_children[comment_id]
num_children[parent_id] = branch_num_children + 1
# start building comment tree from earliest comment
self.update_candidates(candidates, sorter, path[-1])
@@ -961,6 +967,7 @@ class CommentBuilder(Builder):
self.timer = timer
self.cid_tree = cid_tree
self.depth = depth
self.num_children = num_children
self.more_recursions = more_recursions
self.offset_depth = offset_depth
self.dont_collapse = dont_collapse
@@ -971,6 +978,7 @@ class CommentBuilder(Builder):
cid_tree = self.cid_tree
top_level_candidates = self.top_level_candidates
depth = self.depth
num_children = self.num_children
more_recursions = self.more_recursions
offset_depth = self.offset_depth
dont_collapse = self.dont_collapse
@@ -980,11 +988,6 @@ class CommentBuilder(Builder):
timer.stop()
return []
# retrieve num_children for the visible comments
needs_num_children = [c._id for c in comments] + top_level_candidates
num_children = get_num_children(needs_num_children, cid_tree)
timer.intermediate("calc_num_children")
wrapped = self.wrap_items(comments)
timer.intermediate("wrap_comments")
wrapped_by_id = {comment._id: comment for comment in wrapped}

View File

@@ -20,6 +20,8 @@
# Inc. All Rights Reserved.
###############################################################################
from collections import defaultdict
from pycassa import batch, types
from pycassa.cassandra import ttypes
from pycassa.system_manager import ASCII_TYPE, COUNTER_COLUMN_TYPE
@@ -28,7 +30,7 @@ from pylons import app_globals as g
from r2.lib import utils
from r2.lib.db import tdb_cassandra
from r2.lib.utils import SimpleSillyStub
from r2.lib.utils.comment_tree_utils import get_tree_details
from r2.lib.utils.comment_tree_utils import get_tree_details, calc_num_children
from r2.models.link import Comment
@@ -81,7 +83,7 @@ class CommentTreeStorageBase(object):
@classmethod
def get_tree_pieces(cls, link, timer):
"""Return cids, tree, depth, and parents for link."""
"""Return cids, tree, depth, parents, and num_children for link."""
raise NotImplementedError
@classmethod
@@ -166,9 +168,11 @@ class CommentTreeStorageV1(CommentTreeStorageBase):
tree = tree or {} # assume empty tree on miss
cids, depth, parents = get_tree_details(tree)
num_children = calc_num_children(tree)
num_children = defaultdict(int, num_children)
timer.intermediate('calculate')
return cids, tree, depth, parents
return cids, tree, depth, parents, num_children
@classmethod
def write_from_comment_tree(cls, link, comment_tree):
@@ -209,12 +213,13 @@ class CommentTree:
3: None, # placeholder for abandoned CommentTreeStorageV3
}
def __init__(self, link, cids, tree, depth, parents):
def __init__(self, link, cids, tree, depth, parents, num_children):
self.link = link
self.cids = cids
self.tree = tree
self.depth = depth
self.parents = parents
self.num_children = num_children
@classmethod
def mutation_context(cls, link, timeout=None):
@@ -227,8 +232,8 @@ class CommentTree:
timer = SimpleSillyStub()
impl = cls.IMPLEMENTATIONS[link.comment_tree_version]
cids, tree, depth, parents = impl.get_tree_pieces(link, timer)
comment_tree = cls(link, cids, tree, depth, parents)
cids, tree, depth, parents, num_children = impl.get_tree_pieces(link, timer)
comment_tree = cls(link, cids, tree, depth, parents, num_children)
return comment_tree
@classmethod

View File

@@ -21,10 +21,11 @@
# Inc. All Rights Reserved.
###############################################################################
from collections import namedtuple
from collections import namedtuple, defaultdict
from mock import MagicMock
from r2.lib import comment_tree
from r2.lib.utils.comment_tree_utils import get_tree_details, calc_num_children
from r2.lib.db import operators
from r2.models import Comment
from r2.models.builder import CommentBuilder
@@ -55,17 +56,10 @@ TREE = [
def make_comment_tree(link):
cids = []
depth = {}
tree = {}
parents = {}
def _add_comment(comment, parent):
cids.append(comment.id)
depth[comment.id] = 0 if parent is None else depth[parent.id] + 1
tree[comment.id] = [child.id for child in comment.children]
parents[comment.id] = None if parent is None else parent.id
for child in comment.children:
_add_comment(child, parent=comment)
@@ -74,7 +68,11 @@ def make_comment_tree(link):
for comment in TREE:
_add_comment(comment, parent=None)
return CommentTree(link, cids, tree, depth, parents)
cids, depth, parents = get_tree_details(tree)
num_children = calc_num_children(tree)
num_children = defaultdict(int, num_children)
return CommentTree(link, cids, tree, depth, parents, num_children)
def make_comment_scores():