mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-27 03:00:12 -04:00
CommentTree: Calculate num_children
num_children is derived directly from the tree, so it makes sense to calculate it on CommentTree retrieval rather than waiting and making the CommentBuilder handle it. Calculating child counts in advance for the full tree appears to be faster than the previous method of using `get_num_children()` to calculate the child count only for visible comments.
This commit is contained in:
@@ -215,6 +215,7 @@ def link_comments_and_sort(link, sort):
|
||||
* depth -- a dictionary from cid to the depth that comment resides in the
|
||||
tree. A top-level comment has depth 0.
|
||||
* parents -- a dictionary from child cid to parent cid.
|
||||
* num_children -- a dictionary from cid to total descendant count
|
||||
* sorter -- a dictionary from cid to a numeric value to be used for
|
||||
sorting.
|
||||
"""
|
||||
@@ -234,15 +235,13 @@ def link_comments_and_sort(link, sort):
|
||||
|
||||
cache = get_comment_tree(link, timer=timer)
|
||||
cids = cache.cids
|
||||
tree = cache.tree
|
||||
depth = cache.depth
|
||||
parents = cache.parents
|
||||
|
||||
scores_by_id = get_comment_scores(link, sort, cids, timer)
|
||||
timer.intermediate('get_scores')
|
||||
timer.stop()
|
||||
|
||||
return (cache.cids, cache.tree, cache.depth, cache.parents, scores_by_id)
|
||||
return (cache.cids, cache.tree, cache.depth, cache.parents,
|
||||
cache.num_children, scores_by_id)
|
||||
|
||||
|
||||
def get_comment_tree(link, timer=None):
|
||||
|
||||
@@ -20,46 +20,6 @@
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
def get_num_children(list comments, dict tree):
|
||||
"""Count the number of children for each comment."""
|
||||
|
||||
cdef:
|
||||
dict num_children = {}
|
||||
list stack = []
|
||||
list children = []
|
||||
list missing = []
|
||||
long comment
|
||||
long current
|
||||
long child
|
||||
|
||||
for comment in sorted(comments):
|
||||
stack.append(comment)
|
||||
|
||||
while stack:
|
||||
current = stack[-1]
|
||||
|
||||
if current in num_children:
|
||||
stack.pop()
|
||||
continue
|
||||
|
||||
children = tree.get(current, [])
|
||||
|
||||
for child in children:
|
||||
if child not in num_children and not tree.get(child, None):
|
||||
num_children[child] = 0
|
||||
|
||||
missing = [child for child in children if not child in num_children]
|
||||
|
||||
if not missing:
|
||||
num_children[current] = 0
|
||||
stack.pop()
|
||||
for child in children:
|
||||
num_children[current] += 1 + num_children[child]
|
||||
else:
|
||||
stack.extend(missing)
|
||||
|
||||
return num_children
|
||||
|
||||
|
||||
def get_tree_details(dict tree):
|
||||
cdef:
|
||||
@@ -79,3 +39,18 @@ def get_tree_details(dict tree):
|
||||
depth.update({child_id: child_depth for child_id in child_ids})
|
||||
|
||||
return cids, depth, parents
|
||||
|
||||
|
||||
def calc_num_children(dict tree):
|
||||
cdef:
|
||||
dict num_children = {}
|
||||
list child_ids
|
||||
|
||||
for parent_id in sorted(tree, reverse=True):
|
||||
if parent_id is None:
|
||||
continue
|
||||
|
||||
child_ids = tree[parent_id]
|
||||
num_children[parent_id] = sum(
|
||||
1 + num_children.get(child_id, 0) for child_id in tree[parent_id])
|
||||
return num_children
|
||||
|
||||
@@ -49,7 +49,6 @@ from r2.lib.wrapped import Wrapped
|
||||
from r2.lib.db import operators, tdb_cassandra
|
||||
from r2.lib.filters import _force_unicode
|
||||
from r2.lib.utils import Storage, shuffle_slice, timesince, tup, to36
|
||||
from r2.lib.utils.comment_tree_utils import get_num_children
|
||||
|
||||
from r2.models import (
|
||||
Account,
|
||||
@@ -841,7 +840,7 @@ class CommentBuilder(Builder):
|
||||
timer = g.stats.get_timer("CommentBuilder.get_items")
|
||||
timer.start()
|
||||
r = link_comments_and_sort(self.link, self.sort.col)
|
||||
cids, cid_tree, depth, parents, sorter = r
|
||||
cids, cid_tree, depth, parents, num_children, sorter = r
|
||||
timer.intermediate("load_storage")
|
||||
|
||||
if self.comment and not self.comment._id in depth:
|
||||
@@ -874,10 +873,17 @@ class CommentBuilder(Builder):
|
||||
|
||||
dont_collapse.extend(path)
|
||||
|
||||
# rewrite cid_tree so the parents lead only to the requested comment
|
||||
for comment in path:
|
||||
parent = parents[comment]
|
||||
cid_tree[parent] = [comment]
|
||||
# work through the path starting with the requested comment
|
||||
# (path is requested comment, its parent, its grandparent, etc.)
|
||||
for comment_id in path:
|
||||
# rewrite parent's tree so it leads only to the requested comment
|
||||
parent_id = parents[comment_id]
|
||||
cid_tree[parent_id] = [comment_id]
|
||||
|
||||
# rewrite parent's num_children to count only this branch
|
||||
if parent_id is not None:
|
||||
branch_num_children = num_children[comment_id]
|
||||
num_children[parent_id] = branch_num_children + 1
|
||||
|
||||
# start building comment tree from earliest comment
|
||||
self.update_candidates(candidates, sorter, path[-1])
|
||||
@@ -961,6 +967,7 @@ class CommentBuilder(Builder):
|
||||
self.timer = timer
|
||||
self.cid_tree = cid_tree
|
||||
self.depth = depth
|
||||
self.num_children = num_children
|
||||
self.more_recursions = more_recursions
|
||||
self.offset_depth = offset_depth
|
||||
self.dont_collapse = dont_collapse
|
||||
@@ -971,6 +978,7 @@ class CommentBuilder(Builder):
|
||||
cid_tree = self.cid_tree
|
||||
top_level_candidates = self.top_level_candidates
|
||||
depth = self.depth
|
||||
num_children = self.num_children
|
||||
more_recursions = self.more_recursions
|
||||
offset_depth = self.offset_depth
|
||||
dont_collapse = self.dont_collapse
|
||||
@@ -980,11 +988,6 @@ class CommentBuilder(Builder):
|
||||
timer.stop()
|
||||
return []
|
||||
|
||||
# retrieve num_children for the visible comments
|
||||
needs_num_children = [c._id for c in comments] + top_level_candidates
|
||||
num_children = get_num_children(needs_num_children, cid_tree)
|
||||
timer.intermediate("calc_num_children")
|
||||
|
||||
wrapped = self.wrap_items(comments)
|
||||
timer.intermediate("wrap_comments")
|
||||
wrapped_by_id = {comment._id: comment for comment in wrapped}
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from pycassa import batch, types
|
||||
from pycassa.cassandra import ttypes
|
||||
from pycassa.system_manager import ASCII_TYPE, COUNTER_COLUMN_TYPE
|
||||
@@ -28,7 +30,7 @@ from pylons import app_globals as g
|
||||
from r2.lib import utils
|
||||
from r2.lib.db import tdb_cassandra
|
||||
from r2.lib.utils import SimpleSillyStub
|
||||
from r2.lib.utils.comment_tree_utils import get_tree_details
|
||||
from r2.lib.utils.comment_tree_utils import get_tree_details, calc_num_children
|
||||
from r2.models.link import Comment
|
||||
|
||||
|
||||
@@ -81,7 +83,7 @@ class CommentTreeStorageBase(object):
|
||||
|
||||
@classmethod
|
||||
def get_tree_pieces(cls, link, timer):
|
||||
"""Return cids, tree, depth, and parents for link."""
|
||||
"""Return cids, tree, depth, parents, and num_children for link."""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@@ -166,9 +168,11 @@ class CommentTreeStorageV1(CommentTreeStorageBase):
|
||||
|
||||
tree = tree or {} # assume empty tree on miss
|
||||
cids, depth, parents = get_tree_details(tree)
|
||||
num_children = calc_num_children(tree)
|
||||
num_children = defaultdict(int, num_children)
|
||||
timer.intermediate('calculate')
|
||||
|
||||
return cids, tree, depth, parents
|
||||
return cids, tree, depth, parents, num_children
|
||||
|
||||
@classmethod
|
||||
def write_from_comment_tree(cls, link, comment_tree):
|
||||
@@ -209,12 +213,13 @@ class CommentTree:
|
||||
3: None, # placeholder for abandoned CommentTreeStorageV3
|
||||
}
|
||||
|
||||
def __init__(self, link, cids, tree, depth, parents):
|
||||
def __init__(self, link, cids, tree, depth, parents, num_children):
|
||||
self.link = link
|
||||
self.cids = cids
|
||||
self.tree = tree
|
||||
self.depth = depth
|
||||
self.parents = parents
|
||||
self.num_children = num_children
|
||||
|
||||
@classmethod
|
||||
def mutation_context(cls, link, timeout=None):
|
||||
@@ -227,8 +232,8 @@ class CommentTree:
|
||||
timer = SimpleSillyStub()
|
||||
|
||||
impl = cls.IMPLEMENTATIONS[link.comment_tree_version]
|
||||
cids, tree, depth, parents = impl.get_tree_pieces(link, timer)
|
||||
comment_tree = cls(link, cids, tree, depth, parents)
|
||||
cids, tree, depth, parents, num_children = impl.get_tree_pieces(link, timer)
|
||||
comment_tree = cls(link, cids, tree, depth, parents, num_children)
|
||||
return comment_tree
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -21,10 +21,11 @@
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
from collections import namedtuple
|
||||
from collections import namedtuple, defaultdict
|
||||
from mock import MagicMock
|
||||
|
||||
from r2.lib import comment_tree
|
||||
from r2.lib.utils.comment_tree_utils import get_tree_details, calc_num_children
|
||||
from r2.lib.db import operators
|
||||
from r2.models import Comment
|
||||
from r2.models.builder import CommentBuilder
|
||||
@@ -55,17 +56,10 @@ TREE = [
|
||||
|
||||
|
||||
def make_comment_tree(link):
|
||||
cids = []
|
||||
depth = {}
|
||||
tree = {}
|
||||
parents = {}
|
||||
|
||||
def _add_comment(comment, parent):
|
||||
cids.append(comment.id)
|
||||
depth[comment.id] = 0 if parent is None else depth[parent.id] + 1
|
||||
tree[comment.id] = [child.id for child in comment.children]
|
||||
parents[comment.id] = None if parent is None else parent.id
|
||||
|
||||
for child in comment.children:
|
||||
_add_comment(child, parent=comment)
|
||||
|
||||
@@ -74,7 +68,11 @@ def make_comment_tree(link):
|
||||
for comment in TREE:
|
||||
_add_comment(comment, parent=None)
|
||||
|
||||
return CommentTree(link, cids, tree, depth, parents)
|
||||
cids, depth, parents = get_tree_details(tree)
|
||||
num_children = calc_num_children(tree)
|
||||
num_children = defaultdict(int, num_children)
|
||||
|
||||
return CommentTree(link, cids, tree, depth, parents, num_children)
|
||||
|
||||
|
||||
def make_comment_scores():
|
||||
|
||||
Reference in New Issue
Block a user