mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-15 01:48:18 -05:00
Add new Q&A-oriented sort type
None of our current sort types work terribly well for Q&A threads. So, we're adding a new one that takes into account not only the votes on top-level comments, but the votes on any OP responses, too, as well as the length of both comments.
This commit is contained in:
@@ -43,9 +43,14 @@ def sort_comments_key(link_id, sort):
|
||||
assert sort.startswith('_')
|
||||
return '%s%s' % (to36(link_id), sort)
|
||||
|
||||
def _get_sort_value(comment, sort):
|
||||
def _get_sort_value(comment, sort, link=None, children=None):
|
||||
if sort == "_date":
|
||||
return epoch_seconds(comment._date)
|
||||
if sort == '_qa':
|
||||
# Responder is usually the OP, but there could be support for adding
|
||||
# other answerers in the future.
|
||||
responder_ids = link.responder_ids
|
||||
return comment._qa(children, responder_ids)
|
||||
return getattr(comment, sort)
|
||||
|
||||
def add_comments(comments):
|
||||
@@ -91,19 +96,28 @@ def update_comment_votes(comments, write_consistency_level = None):
|
||||
link_map = {}
|
||||
for com in comments:
|
||||
link_map.setdefault(com.link_id, []).append(com)
|
||||
all_links = Link._byID(link_map.keys(), data=True)
|
||||
|
||||
comment_trees = {}
|
||||
for link in all_links.values():
|
||||
comment_trees[link._id] = get_comment_tree(link)
|
||||
|
||||
for link_id, coms in link_map.iteritems():
|
||||
for sort in ("_controversy", "_hot", "_confidence", "_score", "_date"):
|
||||
link = all_links[link_id]
|
||||
for sort in ("_controversy", "_hot", "_confidence", "_score", "_date",
|
||||
"_qa"):
|
||||
cid_tree = comment_trees[link_id].tree
|
||||
sorter = _comment_sorter_from_cids(coms, sort, link, cid_tree,
|
||||
by_36=True)
|
||||
|
||||
# Cassandra always uses the id36 instead of the integer
|
||||
# ID, so we'll map that first before sending it
|
||||
c_key = sort_comments_key(link_id, sort)
|
||||
c_r = dict((cm._id36, _get_sort_value(cm, sort))
|
||||
for cm in coms)
|
||||
CommentSortsCache._set_values(c_key, c_r,
|
||||
CommentSortsCache._set_values(c_key, sorter,
|
||||
write_consistency_level = write_consistency_level)
|
||||
|
||||
|
||||
def _comment_sorter_from_cids(cids, sort):
|
||||
def _comment_sorter_from_cids(comments, sort, link, cid_tree, by_36=False):
|
||||
"""Retrieve sort values for comments.
|
||||
|
||||
Useful to fill in any gaps in CommentSortsCache.
|
||||
@@ -119,8 +133,45 @@ def _comment_sorter_from_cids(cids, sort):
|
||||
|
||||
Returns a dictionary from cid to a numeric sort value.
|
||||
"""
|
||||
comments = Comment._byID(cids, data = False, return_dict = False)
|
||||
return dict((x._id, _get_sort_value(x, sort)) for x in comments)
|
||||
# The Q&A sort requires extra information about surrounding comments. It's
|
||||
# more efficient to gather it up here instead of in the guts of the comment
|
||||
# sort, but we don't want to do that for sort types that don't need it.
|
||||
if sort == '_qa':
|
||||
# An OP response will change the sort value for its parent, so we need
|
||||
# to process the parent, too.
|
||||
parent_cids = []
|
||||
responder_ids = link.responder_ids
|
||||
for c in comments:
|
||||
if c.author_id in responder_ids and c.parent_id:
|
||||
parent_cids.append(c.parent_id)
|
||||
parent_comments = Comment._byID(parent_cids, data=True,
|
||||
return_dict=False)
|
||||
comments.extend(parent_comments)
|
||||
|
||||
# Fetch the comments in batch to avoid a bunch of separate calls down
|
||||
# the line.
|
||||
all_child_cids = []
|
||||
for c in comments:
|
||||
child_cids = cid_tree.get(c._id, None)
|
||||
if child_cids:
|
||||
all_child_cids.extend(child_cids)
|
||||
all_child_comments = Comment._byID(all_child_cids, data=True)
|
||||
|
||||
comment_sorter = {}
|
||||
for comment in comments:
|
||||
if sort == '_qa':
|
||||
child_cids = cid_tree.get(comment._id, ())
|
||||
child_comments = (all_child_comments[cid] for cid in child_cids)
|
||||
sort_value = _get_sort_value(comment, sort, link, child_comments)
|
||||
else:
|
||||
sort_value = _get_sort_value(comment, sort)
|
||||
if by_36:
|
||||
id = comment._id36
|
||||
else:
|
||||
id = comment._id
|
||||
comment_sorter[id] = sort_value
|
||||
|
||||
return comment_sorter
|
||||
|
||||
def _get_comment_sorter(link_id, sort):
|
||||
"""Retrieve cached sort values for all comments on a post.
|
||||
@@ -213,7 +264,8 @@ def link_comments_and_sort(link, sort):
|
||||
if not g.disallow_db_writes:
|
||||
update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False))
|
||||
|
||||
sorter.update(_comment_sorter_from_cids(sorter_needed, sort))
|
||||
comments = Comment._byID(sorter_needed, data = False, return_dict = False)
|
||||
sorter.update(_comment_sorter_from_cids(comments, sort, link, tree))
|
||||
timer.intermediate('sort')
|
||||
|
||||
if parents is None:
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
import math
|
||||
from datetime import datetime, timedelta
|
||||
from pylons import g
|
||||
|
||||
@@ -95,3 +96,36 @@ def confidence(int ups, int downs):
|
||||
return _confidences[downs + ups * down_range]
|
||||
else:
|
||||
return _confidence(ups, downs)
|
||||
|
||||
cpdef double qa(int question_ups, int question_downs, int question_length,
|
||||
op_children):
|
||||
"""The Q&A-type sort.
|
||||
|
||||
Similar to the "best" (confidence) sort, but specially designed for
|
||||
Q&A-type threads to highlight good question/answer pairs.
|
||||
"""
|
||||
question_score = confidence(question_ups, question_downs)
|
||||
|
||||
if not op_children:
|
||||
return _qa(question_score, question_length)
|
||||
|
||||
# Only take into account the "best" answer from OP.
|
||||
best_score = None
|
||||
for answer in op_children:
|
||||
score = confidence(answer._ups, answer._downs)
|
||||
if best_score is None or score > best_score:
|
||||
best_score = score
|
||||
answer_length = len(answer.body)
|
||||
return _qa(question_score, question_length, best_score, answer_length)
|
||||
|
||||
cpdef double _qa(double question_score, int question_length,
|
||||
double answer_score=0, int answer_length=1):
|
||||
score_modifier = question_score + answer_score
|
||||
|
||||
# Give more weight to longer posts, but count longer text less and less to
|
||||
# avoid artificially high rankings for long-spam posts.
|
||||
length_modifier = log10(question_length + answer_length)
|
||||
|
||||
# Add together the weighting from the scores and lengths, but emphasize
|
||||
# score more.
|
||||
return score_modifier + (length_modifier / 5)
|
||||
|
||||
@@ -21,4 +21,4 @@
|
||||
###############################################################################
|
||||
|
||||
from r2.lib.db._sorts import epoch_seconds, score, hot, _hot
|
||||
from r2.lib.db._sorts import controversy, confidence
|
||||
from r2.lib.db._sorts import controversy, confidence, qa
|
||||
|
||||
@@ -23,11 +23,12 @@
|
||||
from pylons import c, request
|
||||
from pylons.i18n import _, N_
|
||||
|
||||
from r2.config import feature
|
||||
from r2.lib.db import operators
|
||||
from r2.lib.filters import _force_unicode
|
||||
from r2.lib.search import sorts as search_sorts
|
||||
from r2.lib.strings import StringHandler, plurals
|
||||
from r2.lib.utils import query_string, timeago
|
||||
from r2.lib.utils import class_property, query_string, timeago
|
||||
from r2.lib.wrapped import Styled
|
||||
|
||||
|
||||
@@ -56,6 +57,7 @@ menu = MenuHandler(hot = _('hot'),
|
||||
gilded = _('gilded'),
|
||||
confidence = _('best'),
|
||||
random = _('random'),
|
||||
qa = _('q & a'),
|
||||
saved = _('saved {toolbar}'),
|
||||
recommended = _('recommended'),
|
||||
rising = _('rising'),
|
||||
@@ -549,6 +551,7 @@ class SortMenu(NavMenu):
|
||||
"controversial": operators.desc('_controversy'),
|
||||
"confidence": operators.desc('_confidence'),
|
||||
"random": operators.shuffled('_confidence'),
|
||||
"qa": operators.desc('_qa'),
|
||||
}
|
||||
_reverse_mapping = {v: k for k, v in _mapping.iteritems()}
|
||||
|
||||
@@ -570,10 +573,16 @@ class CommentSortMenu(SortMenu):
|
||||
"""Sort menu for comments pages"""
|
||||
_default = 'confidence'
|
||||
_options = ('confidence', 'top', 'new', 'hot', 'controversial', 'old',
|
||||
'random')
|
||||
hidden_options = ('random',)
|
||||
'random', 'qa',)
|
||||
button_cls = PostButton
|
||||
|
||||
@class_property
|
||||
def hidden_options(cls):
|
||||
if feature.is_enabled('qa_sort'):
|
||||
return ('random',)
|
||||
else:
|
||||
return ('random', 'qa',)
|
||||
|
||||
|
||||
class SearchSortMenu(SortMenu):
|
||||
"""Sort menu for search pages."""
|
||||
|
||||
@@ -112,6 +112,17 @@ class Enum(Storage):
|
||||
return Storage.__contains__(self, item)
|
||||
|
||||
|
||||
class class_property(object):
|
||||
"""A decorator that combines @classmethod and @property.
|
||||
|
||||
http://stackoverflow.com/a/8198300/120999
|
||||
"""
|
||||
def __init__(self, function):
|
||||
self.function = function
|
||||
def __get__(self, instance, cls):
|
||||
return self.function(cls)
|
||||
|
||||
|
||||
class Results():
|
||||
def __init__(self, sa_ResultProxy, build_fn, do_batch=False):
|
||||
self.rp = sa_ResultProxy
|
||||
|
||||
@@ -42,7 +42,7 @@ from r2.lib import hooks, utils
|
||||
from r2.lib.log import log_text
|
||||
from mako.filters import url_escape
|
||||
from r2.lib.strings import strings, Score
|
||||
from r2.lib.db import tdb_cassandra
|
||||
from r2.lib.db import tdb_cassandra, sorts
|
||||
from r2.lib.db.tdb_cassandra import view_of
|
||||
from r2.lib.utils import sanitize_url
|
||||
from r2.models.gold import (
|
||||
@@ -732,6 +732,15 @@ class Link(Thing, Printable):
|
||||
# If available, that should be used instead of calling this
|
||||
return Account._byID(self.author_id, data=True, return_dict=False)
|
||||
|
||||
@property
|
||||
def responder_ids(self):
|
||||
"""Returns an iterable of the OP and other official responders in a
|
||||
thread.
|
||||
|
||||
Designed for Q&A-type threads (eg /r/iama).
|
||||
"""
|
||||
return (self.author_id,)
|
||||
|
||||
def can_flair_slow(self, user):
|
||||
"""Returns whether the specified user can flair this link"""
|
||||
site = self.subreddit_slow
|
||||
@@ -1040,6 +1049,31 @@ class Comment(Thing, Printable):
|
||||
|
||||
return pids
|
||||
|
||||
def _qa(self, children, responder_ids):
|
||||
"""Sort a comment according to the Q&A-type sort.
|
||||
|
||||
Arguments:
|
||||
|
||||
* children -- a list of the children of this comment.
|
||||
* responder_ids -- a set of ids of users categorized as "answerers" for
|
||||
this thread.
|
||||
"""
|
||||
# This sort type only makes sense for comments, unlike the other sorts
|
||||
# that can be applied to any Things, which is why it's defined here
|
||||
# instead of in Thing.
|
||||
|
||||
op_children = [c for c in children if c.author_id in responder_ids]
|
||||
score = sorts.qa(self._ups, self._downs, len(self.body), op_children)
|
||||
|
||||
# When replies to a question, we want to rank OP replies higher than
|
||||
# non-OP replies (generally). This is a rough way to do so.
|
||||
# Don't add extra scoring when we've already added it due to replies,
|
||||
# though (because an OP responds to themselves).
|
||||
if self.author_id in responder_ids and not op_children:
|
||||
score *= 2
|
||||
|
||||
return score
|
||||
|
||||
@classmethod
|
||||
def add_props(cls, user, wrapped):
|
||||
from r2.lib.template_helpers import add_attr, get_domain
|
||||
|
||||
146
r2/r2/tests/unit/models/link_test.py
Normal file
146
r2/r2/tests/unit/models/link_test.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python
|
||||
# The contents of this file are subject to the Common Public Attribution
|
||||
# License Version 1.0. (the "License"); you may not use this file except in
|
||||
# compliance with the License. You may obtain a copy of the License at
|
||||
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
|
||||
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
|
||||
# software over a computer network and provide for limited attribution for the
|
||||
# Original Developer. In addition, Exhibit A has been modified to be consistent
|
||||
# with Exhibit B.
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
|
||||
# the specific language governing rights and limitations under the License.
|
||||
#
|
||||
# The Original Code is reddit.
|
||||
#
|
||||
# The Original Developer is the Initial Developer. The Initial Developer of
|
||||
# the Original Code is reddit Inc.
|
||||
#
|
||||
# All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
import unittest
|
||||
from collections import namedtuple
|
||||
|
||||
import mock
|
||||
|
||||
from r2.lib.comment_tree import _comment_sorter_from_cids
|
||||
from r2.models.link import Comment
|
||||
|
||||
TINY_COMMENT = 'rekt'
|
||||
SHORT_COMMENT = 'What is your favorite car from a rival brand?'
|
||||
MEDIUM_COMMENT = '''I'm humbled by how many of you were interested in talking
|
||||
to me and hearing about what we're doing with autonomous driving. When I signed
|
||||
off last night, I never thought there would be so many more questions than what
|
||||
I was able to answer after I left. I wish I had had more time last night, but
|
||||
I'm answering more questions today.'''
|
||||
LONG_COMMENT = '''That was a really good question then and still is and gets
|
||||
back to something I said earlier about the difference between autonomous drive
|
||||
vehicles, in which the driver remains in control, and self-driving cars, in
|
||||
which a driver is not even required.
|
||||
The focus of our R&D right now is on autonomous drive vehicles that enhance the
|
||||
driving experience, by giving the driver the option of letting the car handle
|
||||
some functions and by improving the car's ability to avoid accidents. That
|
||||
technology exists already and will be rolled out in phases over the next
|
||||
several years.
|
||||
We are not dismissive of self-driving vehicles, but the reality is the timeline
|
||||
for them is much further out into the future.'''
|
||||
|
||||
class CommentMock(Comment):
|
||||
"""This class exists to allow us to call the _qa() method on Comments
|
||||
without having to dick around with everything else they support."""
|
||||
_nodb = True
|
||||
|
||||
def __init__(self, ups=1, downs=0, body=TINY_COMMENT, author_id=None):
|
||||
self._ups = ups
|
||||
self._downs = downs
|
||||
self.body = body
|
||||
self.author_id = author_id
|
||||
|
||||
def __setattr__(self, attr, val):
|
||||
self.__dict__[attr] = val
|
||||
|
||||
def __getattr__(self, attr):
|
||||
return self.attr
|
||||
|
||||
|
||||
class TestCommentQaSort(unittest.TestCase):
|
||||
def test_simple_upvotes(self):
|
||||
"""All else equal, do upvoted comments score better?"""
|
||||
no_upvotes = CommentMock(ups=1)
|
||||
one_upvote = CommentMock(ups=2)
|
||||
many_upvotes = CommentMock(ups=50)
|
||||
no_upvotes_score = no_upvotes._qa((), ())
|
||||
one_upvote_score = one_upvote._qa((), ())
|
||||
many_upvotes_score = many_upvotes._qa((), ())
|
||||
|
||||
self.assertLess(no_upvotes_score, one_upvote_score)
|
||||
self.assertLess(one_upvote_score, many_upvotes_score)
|
||||
|
||||
def test_simple_downvotes(self):
|
||||
"""All else equal, do downvoted comments score worse?"""
|
||||
no_downvotes = CommentMock(downs=0)
|
||||
one_downvote = CommentMock(downs=1)
|
||||
many_downvotes = CommentMock(downs=50)
|
||||
no_downvotes_score = no_downvotes._qa((), ())
|
||||
one_downvote_score = one_downvote._qa((), ())
|
||||
many_downvotes_score = many_downvotes._qa((), ())
|
||||
|
||||
self.assertGreater(no_downvotes_score, one_downvote_score)
|
||||
self.assertGreater(one_downvote_score, many_downvotes_score)
|
||||
|
||||
def test_simple_length(self):
|
||||
"""All else equal, do longer comments score better?"""
|
||||
tiny = CommentMock(body=TINY_COMMENT)
|
||||
short = CommentMock(body=SHORT_COMMENT)
|
||||
medium = CommentMock(body=MEDIUM_COMMENT)
|
||||
long = CommentMock(body=LONG_COMMENT)
|
||||
tiny_score = tiny._qa((), ())
|
||||
short_score = short._qa((), ())
|
||||
medium_score = medium._qa((), ())
|
||||
long_score = long._qa((), ())
|
||||
|
||||
self.assertLess(tiny_score, short_score)
|
||||
self.assertLess(short_score, medium_score)
|
||||
self.assertLess(medium_score, long_score)
|
||||
|
||||
def test_simple_op_responses(self):
|
||||
"""All else equal, do OP answers bump up the score of comments?"""
|
||||
question = CommentMock()
|
||||
answer = CommentMock(author_id=1)
|
||||
no_answer_score = question._qa((), ())
|
||||
no_op_answer_score = question._qa((answer,), (2,))
|
||||
with_op_answer_score = question._qa((answer,), (1,))
|
||||
|
||||
self.assertEqual(no_answer_score, no_op_answer_score)
|
||||
self.assertLess(no_op_answer_score, with_op_answer_score)
|
||||
|
||||
def test_multiple_op_responses(self):
|
||||
"""What effect do multiple OP responses have on a comment's score?"""
|
||||
question = CommentMock()
|
||||
op_answer = CommentMock(author_id=1)
|
||||
another_op_answer = CommentMock(author_id=1)
|
||||
one_answer_score = question._qa((op_answer,), (1,))
|
||||
two_answers_score = question._qa((op_answer, another_op_answer), (1,))
|
||||
|
||||
self.assertEqual(one_answer_score, two_answers_score)
|
||||
|
||||
bad_op_answer = CommentMock(ups=0, author_id=1)
|
||||
good_op_answer = CommentMock(ups=30, author_id=1)
|
||||
good_op_answer_score = question._qa((good_op_answer,), (1,))
|
||||
op_answers = (bad_op_answer, good_op_answer, op_answer)
|
||||
three_op_answers_score = question._qa(op_answers, (1,))
|
||||
|
||||
# Are we basing the score on the highest-scoring OP answer?
|
||||
self.assertEqual(good_op_answer_score, three_op_answers_score)
|
||||
|
||||
def test_simple_op_comments(self):
|
||||
"""All else equal, do comments from OP score better?"""
|
||||
comment = CommentMock(author_id=1)
|
||||
op_score = comment._qa((), (1,))
|
||||
non_op_score = comment._qa((), (2,))
|
||||
|
||||
self.assertLess(non_op_score, op_score)
|
||||
|
||||
Reference in New Issue
Block a user