diff --git a/config/solr/schema.xml b/config/solr/schema.xml
index ecd6c0a38..7568c088a 100644
--- a/config/solr/schema.xml
+++ b/config/solr/schema.xml
@@ -385,6 +385,8 @@ CondeNet, Inc. All Rights Reserved.
+
+
diff --git a/r2/r2/config/middleware.py b/r2/r2/config/middleware.py
index a7b88647d..ef9953648 100644
--- a/r2/r2/config/middleware.py
+++ b/r2/r2/config/middleware.py
@@ -34,6 +34,7 @@ from pylons.wsgiapp import PylonsApp, PylonsBaseWSGIApp
from r2.config.environment import load_environment
from r2.config.rewrites import rewrites
from r2.lib.utils import rstrips
+from r2.lib.jsontemplates import api_type
#middleware stuff
from r2.lib.html_source import HTMLValidationParser
@@ -240,7 +241,7 @@ class DomainMiddleware(object):
class SubredditMiddleware(object):
- sr_pattern = re.compile(r'^/r/([^/]+)')
+ sr_pattern = re.compile(r'^/r/([^/]{3,20})')
def __init__(self, app):
self.app = app
@@ -255,18 +256,50 @@ class SubredditMiddleware(object):
environ['subreddit'] = 'r'
return self.app(environ, start_response)
+class DomainListingMiddleware(object):
+ domain_pattern = re.compile(r'^/domain/(([\w]+\.)+[\w]+)')
+
+ def __init__(self, app):
+ self.app = app
+
+ def __call__(self, environ, start_response):
+ if not environ.has_key('subreddit'):
+ path = environ['PATH_INFO']
+ domain = self.domain_pattern.match(path)
+ if domain:
+ environ['domain'] = domain.groups()[0]
+ environ['PATH_INFO'] = self.domain_pattern.sub('', path) or '/'
+ return self.app(environ, start_response)
+
class ExtensionMiddleware(object):
ext_pattern = re.compile(r'\.([^/]+)$')
+ extensions = {'rss' : ('xml', 'text/xml; charset=UTF-8'),
+ 'xml' : ('xml', 'text/xml; charset=UTF-8'),
+ 'js' : ('js', 'text/javascript; charset=UTF-8'),
+ 'wired' : ('wired', 'text/javascript; charset=UTF-8'),
+ 'embed' : ('htmllite', 'text/javascript; charset=UTF-8'),
+ 'mobile' : ('mobile', 'text/html'),
+ 'png' : ('png', 'image/png'),
+ 'css' : ('css', 'text/css'),
+ 'api' : (api_type(), 'application/json; charset=UTF-8'),
+ 'json' : (api_type(), 'application/json; charset=UTF-8'),
+ 'json-html' : (api_type('html'), 'application/json; charset=UTF-8')}
+
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
path = environ['PATH_INFO']
- ext = self.ext_pattern.findall(path)
- if ext:
- environ['extension'] = ext[0]
- environ['PATH_INFO'] = self.ext_pattern.sub('', path) or '/'
+ domain_ext = environ.get('reddit-domain-extension')
+ for ext, val in self.extensions.iteritems():
+ if ext == domain_ext or path.endswith(ext):
+ environ['extension'] = ext
+ environ['render_style'] = val[0]
+ environ['content_type'] = val[1]
+ #strip off the extension
+ environ['PATH_INFO'] = path[:-(len(ext) + 1)]
+ break
return self.app(environ, start_response)
class RewriteMiddleware(object):
@@ -382,11 +415,11 @@ def make_app(global_conf, full_stack=True, **app_conf):
app = ProfilingMiddleware(app)
app = SourceViewMiddleware(app)
- app = SubredditMiddleware(app)
app = DomainMiddleware(app)
+ app = DomainListingMiddleware(app)
+ app = SubredditMiddleware(app)
app = ExtensionMiddleware(app)
-
log_path = global_conf.get('log_path')
if log_path:
process_iden = global_conf.get('scgi_port', 'default')
diff --git a/r2/r2/controllers/front.py b/r2/r2/controllers/front.py
index ac8182f77..a2c680c7e 100644
--- a/r2/r2/controllers/front.py
+++ b/r2/r2/controllers/front.py
@@ -32,12 +32,14 @@ from r2.lib.template_helpers import get_domain
from r2.lib.emailer import has_opted_out, Email
from r2.lib.db.operators import desc
from r2.lib.strings import strings
+from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery, LinkSearchQuery
import r2.lib.db.thing as thing
from listingcontroller import ListingController
from pylons import c, request
import random as rand
import re
+import time as time_module
from urllib import quote_plus
from admin import admin_profile_query
@@ -292,6 +294,7 @@ class FrontController(RedditController):
def GET_related(self, num, article, after, reverse, count):
"""Related page: performs a search using title of article as
the search query."""
+
title = c.site.name + ((': ' + article.title) if hasattr(article, 'title') else '')
query = self.related_replace_regex.sub(self.related_replace_with,
@@ -301,24 +304,25 @@ class FrontController(RedditController):
# longer than this are typically ascii art anyway
query = query[0:1023]
- num, t, pane = self._search(query, time = 'all',
- count = count,
- after = after, reverse = reverse, num = num,
- ignore = [article._fullname],
- types = [Link])
- res = LinkInfoPage(link = article, content = pane).render()
- return res
+ q = RelatedSearchQuery(query, ignore = [article._fullname])
+ num, t, pane = self._search(q,
+ num = num, after = after, reverse = reverse,
+ count = count)
+
+ return LinkInfoPage(link = article, content = pane).render()
@base_listing
@validate(query = nop('q'))
def GET_search_reddits(self, query, reverse, after, count, num):
"""Search reddits by title and description."""
- num, t, spane = self._search(query, num = num, types = [Subreddit],
- sort='points desc', time='all',
- after = after, reverse = reverse,
+ # note that 'downs' is a measure of activity on subreddits
+ q = SubredditSearchQuery(query, sort = 'downs desc',
+ timerange = 'all')
+
+ num, t, spane = self._search(q, num = num, reverse = reverse, after = after,
count = count)
- res = SubredditsPage(content=spane,
+ res = SubredditsPage(content=spane,
prev_search = query,
elapsed_time = t,
num_results = num,
@@ -327,7 +331,7 @@ class FrontController(RedditController):
verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$")
@base_listing
- @validate(query=nop('q'),
+ @validate(query = nop('q'),
time = VMenu('action', TimeMenu, remember = False),
langs = nop('langs'))
def GET_search(self, query, num, time, reverse, after, count, langs):
@@ -340,12 +344,12 @@ class FrontController(RedditController):
if langs and self.verify_langs_regex.match(langs):
langs = langs.split(',')
else:
- langs = None
+ langs = c.content_langs
- num, t, spane = self._search(query, time=time,
- num = num, after = after,
- reverse = reverse,
- count = count, types = [Link])
+ q = LinkSearchQuery(q = query, timerange = time, langs = langs)
+
+ num, t, spane = self._search(q, num = num, after = after, reverse = reverse,
+ count = count)
if not isinstance(c.site,FakeSubreddit):
my_reddits_link = "/search%s" % query_string({'q': query})
@@ -365,26 +369,22 @@ class FrontController(RedditController):
return res
- def _search(self, query = '', time=None,
- sort = 'hot desc',
- after = None, reverse = False, num = 25,
- ignore = None, count=0, types = None,
- langs = None):
+ def _search(self, query_obj, num, after, reverse, count=0):
"""Helper function for interfacing with search. Basically a
thin wrapper for SearchBuilder."""
- builder = SearchBuilder(query, num = num,
- sort = sort,
- after = after, reverse = reverse,
- count = count, types = types,
- time = time, ignore = ignore,
- langs = langs,
+ builder = SearchBuilder(query_obj,
+ after = after, num = num, reverse = reverse,
+ count = count,
wrap = ListingController.builder_wrapper)
+
listing = LinkListing(builder, show_nums=True)
# have to do it in two steps since total_num and timing are only
# computed after fetch_more
res = listing.listing()
- return builder.total_num, builder.timing, res
+ timing = time_module.time() - builder.start_time
+
+ return builder.total_num, timing, res
diff --git a/r2/r2/controllers/listingcontroller.py b/r2/r2/controllers/listingcontroller.py
index 6d3299ebc..ecb996624 100644
--- a/r2/r2/controllers/listingcontroller.py
+++ b/r2/r2/controllers/listingcontroller.py
@@ -33,6 +33,7 @@ from r2.lib.db.thing import Query, Merge, Relations
from r2.lib.db import queries
from r2.lib.strings import Score
from r2.lib import organic
+from r2.lib.solrsearch import SearchQuery
from r2.lib.utils import iters, check_cheating
from admin import admin_profile_query
@@ -112,6 +113,8 @@ class ListingController(RedditController):
builder_cls = self.builder_cls
elif isinstance(self.query_obj, Query):
builder_cls = QueryBuilder
+ elif isinstance(self.query_obj, SearchQuery):
+ builder_cls = SearchBuilder
elif isinstance(self.query_obj, iters):
builder_cls = IDBuilder
elif isinstance(self.query_obj, queries.CachedResults):
diff --git a/r2/r2/controllers/reddit_base.py b/r2/r2/controllers/reddit_base.py
index 126ff4f49..8415ccc5d 100644
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -212,13 +212,18 @@ def over18():
return True
def set_subreddit():
- sr_name=request.environ.get("subreddit", request.params.get('r'))
+ #the r parameter gets added by javascript for POST requests so we
+ #can reference c.site in api.py
+ sr_name = request.environ.get("subreddit", request.POST.get('r'))
+ domain = request.environ.get("domain")
- if not sr_name or sr_name == Default.name:
+ if not sr_name:
+ #check for cnames
sub_domain = request.environ.get('sub_domain')
sr = Subreddit._by_domain(sub_domain) if sub_domain else None
c.site = sr or Default
elif sr_name == 'r':
+ #reddits
c.site = Sub
else:
try:
@@ -227,6 +232,10 @@ def set_subreddit():
c.site = Default
redirect_to("/reddits/create?name=%s" % sr_name)
+ #if we didn't find a subreddit, check for a domain listing
+ if not sr_name and c.site == Default and domain:
+ c.site = DomainSR(domain)
+
if isinstance(c.site, FakeSubreddit):
c.default_sr = True
@@ -235,42 +244,16 @@ def set_subreddit():
abort(404, "not found")
def set_content_type():
- c.extension = request.environ.get('extension') or \
- request.environ.get('reddit-domain-extension') or ''
- c.render_style = 'html'
- if c.extension in ('rss', 'xml'):
- c.render_style = 'xml'
- c.response_content_type = 'text/xml; charset=UTF-8'
- elif c.extension == 'js':
- c.render_style = 'js'
- c.response_content_type = 'text/javascript; charset=UTF-8'
- elif c.extension.startswith('json') or c.extension == "api":
- c.response_content_type = 'application/json; charset=UTF-8'
- c.response_access_control = 'allow <*>'
- if c.extension == 'json-html':
- c.render_style = api_type('html')
- else:
- c.render_style = api_type()
- elif c.extension == 'wired':
- c.render_style = 'wired'
- c.response_content_type = 'text/javascript; charset=UTF-8'
- c.response_wrappers.append(utils.to_js)
- elif c.extension == 'embed':
- c.render_style = 'htmllite'
- c.response_content_type = 'text/javascript; charset=UTF-8'
- c.response_wrappers.append(utils.to_js)
- elif c.extension == 'mobile':
- c.render_style = 'mobile'
- elif c.extension == 'png':
- c.response_content_type = 'image/png'
- c.render_style = 'png'
- elif c.extension == 'css':
- c.response_content_type = 'text/css'
- c.render_style = 'css'
- #Insert new extentions above this line
- elif c.extension not in ('', 'html'):
- # request.path already has the extension stripped off of it
- redirect_to(request.path + utils.query_string(request.get))
+ e = request.environ
+ if e.has_key('extension'):
+ c.render_style = e['render_style']
+ c.response_content_type = e['content_type']
+
+ ext = e['extension']
+ if ext == 'api' or ext.startswith('json'):
+ c.response_access_control = 'allow <*>'
+ if ext in ('embed', 'wired'):
+ c.response_wrappers.append(utils.to_js)
def get_browser_langs():
browser_langs = []
diff --git a/r2/r2/lib/base.py b/r2/r2/lib/base.py
index 48a1eba84..10bf73bc0 100644
--- a/r2/r2/lib/base.py
+++ b/r2/r2/lib/base.py
@@ -118,7 +118,8 @@ class BaseController(WSGIController):
u.mk_cname(**kw)
# make sure the extensions agree with the current page
- u.set_extension(c.extension)
+ if c.extension:
+ u.set_extension(c.extension)
# unparse and encode it un utf8
return _force_unicode(u.unparse()).encode('utf8')
diff --git a/r2/r2/lib/cache.py b/r2/r2/lib/cache.py
index ba9324eaa..75b10a0cb 100644
--- a/r2/r2/lib/cache.py
+++ b/r2/r2/lib/cache.py
@@ -225,7 +225,7 @@ def test_cache(cache):
# a cache that occasionally dumps itself to be used for long-running
# processes
class SelfEmptyingCache(LocalCache):
- def __init__(self,max_size=50*1000):
+ def __init__(self,max_size=100*1000):
self.max_size = max_size
def maybe_reset(self):
diff --git a/r2/r2/lib/db/queries.py b/r2/r2/lib/db/queries.py
index 824f60f92..95c1c34ed 100644
--- a/r2/r2/lib/db/queries.py
+++ b/r2/r2/lib/db/queries.py
@@ -5,6 +5,7 @@ from r2.lib.db.operators import asc, desc, timeago
from r2.lib.db import query_queue
from r2.lib.db.sorts import epoch_seconds
from r2.lib.utils import fetch_things2, worker
+from r2.lib.solrsearch import DomainSearchQuery
from datetime import datetime
@@ -23,6 +24,12 @@ def db_sort(sort):
cls, col = db_sorts[sort]
return cls(col)
+search_sort = dict(hot = 'hot desc',
+ new = 'date desc',
+ top = 'points desc',
+ controversial = 'controversy desc',
+ old = 'date asc')
+
db_times = dict(all = None,
hour = Thing.c._date >= timeago('1 hour'),
day = Thing.c._date >= timeago('1 day'),
@@ -176,6 +183,9 @@ def get_links(sr, sort, time):
q._filter(db_times[time])
return make_results(q)
+def get_domain_links(domain, sort, time):
+ return DomainSearchQuery(domain, sort=search_sort[sort], timerange=time)
+
def user_query(kind, user, sort, time):
"""General profile-page query."""
q = kind._query(kind.c.author_id == user._id,
diff --git a/r2/r2/lib/solrsearch.py b/r2/r2/lib/solrsearch.py
index 2409c50b1..d7090fd97 100644
--- a/r2/r2/lib/solrsearch.py
+++ b/r2/r2/lib/solrsearch.py
@@ -32,7 +32,7 @@ from r2.models import *
from r2.lib.contrib import pysolr
from r2.lib.contrib.pysolr import SolrError
from r2.lib.utils import timeago, set_emptying_cache, IteratorChunker
-from r2.lib.utils import psave, pload, unicode_safe
+from r2.lib.utils import psave, pload, unicode_safe, tup
from r2.lib.cache import SelfEmptyingCache
from Queue import Queue
from threading import Thread
@@ -125,6 +125,8 @@ search_fields={Thing: (Field('fullname', '_fullname'),
Field('lang'),
Field('ups', '_ups', is_number=True, reverse=True),
Field('downs', '_downs', is_number=True, reverse=True),
+ Field('spam','_spam'),
+ Field('deleted','_deleted'),
Field('hot', lambda t: t._hot*1000, is_number=True, reverse=True),
Field('controversy', '_controversy', is_number=True, reverse=True),
Field('points', lambda t: (t._ups - t._downs), is_number=True, reverse=True)),
@@ -162,8 +164,8 @@ search_fields={Thing: (Field('fullname', '_fullname'),
# yes, it's a copy of 'hot'
is_number=True, reverse=True),
ThingField('author',Account,'author_id','name'),
- #ThingField('subreddit',Subreddit,'sr_id','name'),
- ThingField('reddit',Subreddit,'sr_id','name'))}
+ ThingField('subreddit',Subreddit,'sr_id','name'))}
+ #ThingField('reddit',Subreddit,'sr_id','name'))}
def tokenize_things(things,return_dict=False):
"""
@@ -276,6 +278,8 @@ def fetch_batches(t_class,size,since,until):
of `fetch_things`
"""
q=t_class._query(t_class.c._date >= since,
+ t_class.c._spam == (True,False),
+ t_class.c._deleted == (True,False),
t_class.c._date < until,
sort = desc('_date'),
limit = size,
@@ -375,8 +379,8 @@ def reindex_all(types = None, delete_all_first=False):
for batch in fetch_batches(cls,1000,
timeago("50 years"),
start_t):
- r = tokenize_things([x for x in batch
- if not x._spam and not x._deleted ])
+ r = tokenize_things([ x for x in batch
+ if not x._spam and not x._deleted ])
count += len(r)
print ("Processing %s #%d(%s): %s"
@@ -465,173 +469,241 @@ def combine_searchterms(terms):
def swap_strings(s,this,that):
"""
Just swaps substrings, like:
- s = "sort(asc)"
- swap_strings(s,'asc','desc')
- s -> "sort desc"
+ s = "hot asc"
+ s = swap_strings(s,'asc','desc')
+ s == "hot desc"
uses 'tmp' as a replacment string, so don't use for anything
very complicated
"""
return s.replace(this,'tmp').replace(that,this).replace('tmp',that)
-def search_things(q, sort = 'hot desc',
- after = None,
- subreddits = None,
- authors = None,
- num = 100, reverse = False,
- timerange = None, langs = None,
- types = None,
- boost = []):
- """
- Takes a given query and returns a list of Things that match
- that query. See Builder for the use of `after`, `reverse`, and
- `num`. Queries on params are OR queries, except `timerange`
- and `types`
- """
- if not q or not g.solr_url:
- return pysolr.Results([],0)
+class SearchQuery(object):
+ def __init__(self, q, sort, fields = [], subreddits = [], authors = [],
+ types = [], timerange = None, spam = False, deleted = False):
- # there are two parts to our query: what the user typed (parsed
- # with Solr's DisMax parser), and what we are adding to it. The
- # latter is called the "boost" (and is parsed using full Lucene
- # syntax), and it can be added to via the `boost` parameter (which
- # we have to copy since we append to it)
- boost = list(boost)
+ self.q = q
+ self.fields = fields
+ self.sort = sort
+ self.subreddits = subreddits
+ self.authors = authors
+ self.types = types
+ self.spam = spam
+ self.deleted = deleted
- # `score` refers to Solr's score (relevency to the search given),
- # not our score (sums of ups and downs).
- sort = "score desc, %s, date desc, fullname asc" % (sort,)
- if reverse:
- sort = swap_strings(sort,'asc','desc')
-
- if timerange:
- def time_to_searchstr(t):
- if isinstance(t, datetime):
- t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z')
- elif isinstance(t, date):
- t = t.strftime('%Y-%m-%dT00:00:00.000Z')
- elif isinstance(t,str):
- t = t
- return t
-
- (fromtime, totime) = timerange
- fromtime = time_to_searchstr(fromtime)
- totime = time_to_searchstr(totime)
- boost.append("+date:[%s TO %s]"
- % (fromtime,totime))
-
- if subreddits:
- def subreddit_to_searchstr(sr):
- if isinstance(sr,Subreddit):
- return ('sr_id','%d' % sr.id)
- elif isinstance(sr,str) or isinstance(sr,unicode):
- return ('reddit',sr)
- else:
- return ('sr_id','%d' % sr)
-
- if isinstance(subreddits,list) or isinstance(subreddits,tuple):
- s_subreddits = map(subreddit_to_searchstr, subreddits)
+ if timerange in ['hour','week','day','month','year']:
+ self.timerange = (timeago("1 %s" % timerange),"NOW")
+ elif timerange == 'all' or timerange is None:
+ self.timerange = None
else:
- s_subreddits = (subreddit_to_searchstr(subreddits),)
+ self.timerange = timerange
- boost.append("+(%s)^2" % combine_searchterms(s_subreddits))
+ def run(self, after = None, num = 100, reverse = False):
+ if not self.q or not g.solr_url:
+ return pysolr.Results([],0)
- if authors:
- def author_to_searchstr(a):
- if isinstance(a,Account):
- return ('author_id','%d' % a.id)
- elif isinstance(a,str) or isinstance(a,unicode):
- return ('author',a)
+ # there are two parts to our query: what the user typed
+ # (parsed with Solr's DisMax parser), and what we are adding
+ # to it. The latter is called the "boost" (and is parsed using
+ # full Lucene syntax), and it can be added to via the `boost`
+ # parameter
+ boost = []
+
+ if not self.spam:
+ boost.append("-spam:true")
+ if not self.deleted:
+ boost.append("-deleted:true")
+
+ if self.timerange:
+ def time_to_searchstr(t):
+ if isinstance(t, datetime):
+ t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z')
+ elif isinstance(t, date):
+ t = t.strftime('%Y-%m-%dT00:00:00.000Z')
+ elif isinstance(t,str):
+ t = t
+ return t
+
+ (fromtime, totime) = self.timerange
+ fromtime = time_to_searchstr(fromtime)
+ totime = time_to_searchstr(totime)
+ boost.append("+date:[%s TO %s]"
+ % (fromtime,totime))
+
+ if self.subreddits:
+ def subreddit_to_searchstr(sr):
+ if isinstance(sr,Subreddit):
+ return ('sr_id','%d' % sr.id)
+ elif isinstance(sr,str) or isinstance(sr,unicode):
+ return ('subreddit',sr)
+ else:
+ return ('sr_id','%d' % sr)
+
+ s_subreddits = map(subreddit_to_searchstr, tup(self.subreddits))
+
+ boost.append("+(%s)" % combine_searchterms(s_subreddits))
+
+ if self.authors:
+ def author_to_searchstr(a):
+ if isinstance(a,Account):
+ return ('author_id','%d' % a.id)
+ elif isinstance(a,str) or isinstance(a,unicode):
+ return ('author',a)
+ else:
+ return ('author_id','%d' % a)
+
+ s_authors = map(author_to_searchstr,tup(self.authors))
+
+ boost.append('+(%s)^2' % combine_searchterms(s_authors))
+
+
+ def type_to_searchstr(t):
+ if isinstance(t,str):
+ return ('type',t)
else:
- return ('author_id','%d' % a)
-
- if isinstance(authors,list) or isinstance(authors,tuple):
- s_authors = map(author_to_searchstr,authors)
- else:
- s_authors = map(author_to_searchstr,(authors,))
-
- boost.append('+(%s)^2' % combine_searchterms(s_authors))
-
- # the set of languages is used to determine the fields to search,
- # named ('contents_%s' % lang), but 'contents' (which is split
- # only on whitespace) is always also searched. This means that
- # all_langs and schema.xml must be kept in synch
- default_fields = ['contents^1.5','contents_ws^3',
- 'site^1','author^1', 'reddit^1', 'url^1']
- if langs == None:
- # only search 'contents'
- fields = default_fields
- else:
- if langs == 'all':
- langs = searchable_langs
- fields = set([("%s^2" % lang_to_fieldname(lang)) for lang in langs]
- + default_fields)
-
- if not types:
- types = indexed_types
-
- def type_to_searchstr(t):
- if isinstance(t,str):
- return ('type',t)
- else:
- return ('type',t.__name__.lower())
+ return ('type',t.__name__.lower())
- s_types = map(type_to_searchstr,types)
- boost.append("+%s" % combine_searchterms(s_types))
+ s_types = map(type_to_searchstr,self.types)
+ boost.append("+%s" % combine_searchterms(s_types))
- # everything else that solr needs to know
- solr_params = dict(fl = 'fullname', # the field(s) to return
- qt = 'dismax', # the query-handler (dismax supports 'bq' and 'qf')
- # qb = '3',
- bq = ' '.join(boost),
- qf = ' '.join(fields),
- mm = '75%') # minimum number of clauses that should match
+ q,solr_params = self.solr_params(self.q,boost)
- with SolrConnection() as s:
- if after:
- # size of the pre-search to run in the case that we need
- # to search more than once. A bigger one can reduce the
- # number of searches that need to be run twice, but if
- # it's bigger than the default display size, it could
- # waste some
- PRESEARCH_SIZE = num
+ try:
+ search = self.run_search(q, self.sort, solr_params,
+ reverse, after, num)
+ return search
- # run a search and get back the number of hits, so that we
- # can re-run the search with that max_count.
- pre_search = s.search(q,sort,rows=PRESEARCH_SIZE,
+ except SolrError,e:
+ g.log.error(str(e))
+ return pysolr.Results([],0)
+
+ @classmethod
+ def run_search(cls, q, sort, solr_params, reverse, after, num):
+ "returns pysolr.Results(docs=[fullname()],hits=int())"
+
+ if reverse:
+ sort = swap_strings(sort,'asc','desc')
+
+ g.log.debug("Searching q=%s" % q)
+
+ with SolrConnection() as s:
+ if after:
+ # size of the pre-search to run in the case that we
+ # need to search more than once. A bigger one can
+ # reduce the number of searches that need to be run
+ # twice, but if it's bigger than the default display
+ # size, it could waste some
+ PRESEARCH_SIZE = num
+
+ # run a search and get back the number of hits, so
+ # that we can re-run the search with that max_count.
+ pre_search = s.search(q,sort,rows=PRESEARCH_SIZE,
+ other_params = solr_params)
+
+ if (PRESEARCH_SIZE >= pre_search.hits
+ or pre_search.hits == len(pre_search.docs)):
+ # don't run a second search if our pre-search
+ # found all of the elements anyway
+ search = pre_search
+ else:
+ # we have to run a second search, but we can limit
+ # the duplicated transfer of the first few records
+ # since we already have those from the pre_search
+ second_search = s.search(q,sort,
+ start=len(pre_search.docs),
+ rows=pre_search.hits - len(pre_search.docs),
+ other_params = solr_params)
+ search = pysolr.Results(pre_search.docs + second_search.docs,
+ pre_search.hits)
+
+ search.docs = [ i['fullname'] for i in search.docs ]
+ search.docs = get_after(search.docs, after._fullname, num)
+ else:
+ search = s.search(q,sort,rows=num,
other_params = solr_params)
+ search.docs = [ i['fullname'] for i in search.docs ]
- if (PRESEARCH_SIZE >= pre_search.hits
- or pre_search.hits == len(pre_search.docs)):
- # don't run a second search if our pre-search found
- # all of the elements anyway
- search = pre_search
- else:
- # we have to run a second search, but we can limit the
- # duplicated transfer of the first few records since
- # we already have those from the pre_search
- second_search = s.search(q,sort,
- start=len(pre_search.docs),
- rows=pre_search.hits - len(pre_search.docs),
- other_params = solr_params)
- search = pysolr.Results(pre_search.docs + second_search.docs,
- pre_search.hits)
+ return search
- fullname = after._fullname
- for i, item in enumerate(search.docs):
- if item['fullname'] == fullname:
- search.docs = search.docs[i+1:i+1+num]
- break
- else:
- g.log.debug("I got an after query, but the fullname was not present in the results")
- search.docs = search.docs[0:num]
+ def solr_params(self,*k,**kw):
+ raise NotImplementedError
+
+class UserSearchQuery(SearchQuery):
+ "Base class for queries that use the dismax parser; requires self.mm"
+ def __init__(self, q, sort=None, fields=[], langs=None, **kw):
+ default_fields = ['contents^1.5','contents_ws^3'] + fields
+
+ if sort is None:
+ sort = 'score desc, hot desc, date desc'
+
+ if langs is None:
+ fields = default_fields
else:
- search = s.search(q,sort,rows=num,
- other_params = solr_params)
+ if langs == 'all':
+ langs = searchable_langs
+ fields = set([("%s^2" % lang_to_fieldname(lang)) for lang in langs]
+ + default_fields)
- hits = search.hits
- things = Thing._by_fullname([i['fullname'] for i in search.docs],
- data = True, return_dict = False)
+ # default minimum match
+ self.mm = '75%'
- return pysolr.Results(things,hits)
+ SearchQuery.__init__(self, q, sort, fields = fields, **kw)
+ def solr_params(self, q, boost):
+ return q, dict(fl = 'fullname',
+ qt = 'dismax',
+ bq = ' '.join(boost),
+ qf = ' '.join(self.fields),
+ mm = self.mm)
+
+class LinkSearchQuery(UserSearchQuery):
+ def __init__(self, q, **kw):
+ additional_fields = ['site^1','author^1', 'subreddit^1', 'url^1']
+
+ subreddits = None
+ authors = None
+ if c.site == subreddit.Default:
+ subreddits = Subreddit.user_subreddits(c.user)
+ elif c.site == subreddit.Friends and c.user.friends:
+ authors = c.user.friends
+ elif not isinstance(c.site,subreddit.FakeSubreddit):
+ subreddits = [c.site._id]
+
+ UserSearchQuery.__init__(self, q, fields = additional_fields,
+ subreddits = subreddits, authors = authors,
+ types=[Link], **kw)
+
+class RelatedSearchQuery(LinkSearchQuery):
+ def __init__(self, q, ignore = [], **kw):
+ self.ignore = set(ignore) if ignore else set()
+
+ LinkSearchQuery.__init__(self, q, sort = 'score desc', **kw)
+
+ self.mm = '25%'
+
+ def run(self, *k, **kw):
+ search = LinkSearchQuery.run(self, *k, **kw)
+ search.docs = [ x for x in search.docs if x not in self.ignore ]
+ return search
+
+class SubredditSearchQuery(UserSearchQuery):
+ def __init__(self, q, **kw):
+ UserSearchQuery.__init__(self, q, types=[Subreddit], **kw)
+
+class DomainSearchQuery(SearchQuery):
+ def __init__(self, domain, **kw):
+ q = '+site:%s' % domain
+
+ SearchQuery.__init__(self, q=q, fields=['site'],types=[Link], **kw)
+
+ def solr_params(self, q, boost):
+ q = q + ' ' + ' '.join(boost)
+ return q, dict(fl='fullname',
+ qt='standard')
+
+def get_after(fullnames, fullname, num):
+ for i, item in enumerate(fullnames):
+ if item == fullname:
+ return fullnames[i+1:i+num+1]
+ else:
+ return fullnames[:num]
diff --git a/r2/r2/lib/utils/utils.py b/r2/r2/lib/utils/utils.py
index 7920ee06c..25e294b99 100644
--- a/r2/r2/lib/utils/utils.py
+++ b/r2/r2/lib/utils/utils.py
@@ -999,6 +999,7 @@ def title_to_url(title, max_length = 50):
return title
def debug_print(fn):
+ from pylons import g
def new_fn(*k,**kw):
ret = fn(*k,**kw)
g.log.debug("Fn: %s; k=%s; kw=%s\nRet: %s"
diff --git a/r2/r2/models/builder.py b/r2/r2/models/builder.py
index 2368331c8..5bbed1038 100644
--- a/r2/r2/models/builder.py
+++ b/r2/r2/models/builder.py
@@ -354,42 +354,26 @@ class IDBuilder(QueryBuilder):
return done, new_items
class SearchBuilder(QueryBuilder):
- def __init__(self, query, wrap = Wrapped, sort = None, ignore = [],
- time = time, types = None, langs = None, **kw):
- QueryBuilder.__init__(self, query, wrap=wrap, **kw)
- self.sort = sort
- self.time = time
- self.types = types
- self.timing = 0
- self.total_num = 0
- self.langs = langs
-
- self.ignore = set(x for x in (ignore if ignore else []))
-
def init_query(self):
- subreddits = None
- authors = None
- if c.site == subreddit.Default:
- subreddits = Subreddit.user_subreddits(c.user)
- elif c.site == subreddit.Friends and c.user.friends:
- authors = c.user.friends
- elif not isinstance(c.site,subreddit.FakeSubreddit):
- subreddits = c.site._id
-
- self.subreddits = subreddits
- self.authors = authors
-
self.skip = True
+ self.total_num = 0
+ self.start_time = time.time()
+
+ self.start_time = time.time()
def keep_item(self,item):
- skip_if = item._spam or item._deleted or item._fullname in self.ignore
- return not skip_if
+ # doesn't use the default keep_item because we want to keep
+ # things that were voted on, even if they've chosen to hide
+ # them in normal listings
+ if item._spam or item._deleted:
+ return False
+ else:
+ return True
+
def fetch_more(self, last_item, num_have):
from r2.lib import solrsearch
- start_t = time.time()
-
done = False
limit = None
if self.num:
@@ -401,25 +385,13 @@ class SearchBuilder(QueryBuilder):
else:
done = True
- langs = c.content_langs
- if self.langs:
- langs += self.langs
+ search = self.query.run(after = last_item or self.after,
+ reverse = self.reverse,
+ num = limit)
- if self.time in ['hour','week','day','month']:
- timerange = (timeago("1 %s" % self.time),"NOW")
- else:
- timerange = None
+ new_items = Thing._by_fullname(search.docs, data = True, return_dict=False)
- new_items = solrsearch.search_things(q = self.query or '', sort = self.sort,
- after = last_item,
- subreddits = self.subreddits,
- authors = self.authors,
- num = limit, reverse = self.reverse,
- timerange = timerange, langs = langs,
- types = self.types)
-
- self.total_num = new_items.hits
- self.timing = time.time() - start_t
+ self.total_num = search.hits
return done, new_items
diff --git a/r2/r2/models/subreddit.py b/r2/r2/models/subreddit.py
index 4c9e5cf52..6694f65b4 100644
--- a/r2/r2/models/subreddit.py
+++ b/r2/r2/models/subreddit.py
@@ -414,9 +414,12 @@ class Subreddit(Thing, Printable):
class FakeSubreddit(Subreddit):
over_18 = False
- title = ''
_nodb = True
+ def __init__(self):
+ Subreddit.__init__(self)
+ self.title = ''
+
def is_moderator(self, user):
return c.user_is_loggedin and c.user_is_admin
@@ -568,6 +571,21 @@ class SubSR(FakeSubreddit):
@property
def path(self):
return "/reddits/"
+
+class DomainSR(FakeSubreddit):
+ @property
+ def path(self):
+ return '/domain/' + self.domain
+
+ def __init__(self, domain):
+ FakeSubreddit.__init__(self)
+ self.domain = domain
+ self.name = domain
+ self.title = domain + ' ' + _('on reddit.com')
+
+ def get_links(self, sort, time):
+ from r2.lib.db import queries
+ return queries.get_domain_links(self.domain, sort, time)
Sub = SubSR()
Friends = FriendsSR()
diff --git a/r2/r2/public/static/reddit.css b/r2/r2/public/static/reddit.css
index cc4b033f5..b04442c58 100644
--- a/r2/r2/public/static/reddit.css
+++ b/r2/r2/public/static/reddit.css
@@ -222,7 +222,7 @@ input[type=checkbox], input[type=radio] { margin-top: .4em; }
padding: 2px 6px 1px 6px;
background-color: white;
border: 1px solid #5f99cf;
- border-bottom: none;
+ border-bottom: 1px solid white;
}
#search {
@@ -588,6 +588,7 @@ before enabling */
padding: 5px 10px;
margin: 5px 310px 5px 0px;
border: 1px solid orange;
+ font-size: small;
}
.menuarea {
@@ -958,8 +959,7 @@ a.star { text-decoration: none; color: #ff8b60 }
.searchpane a { color: #369 }*/
.searchpane {
- margin: 5px;
- margin-right: 310px;
+ margin: 5px 310px 5px 0px;
}
.searchpane #search input[type=text] { }
diff --git a/r2/r2/templates/base.html b/r2/r2/templates/base.html
index e9ecba136..e979dfc05 100644
--- a/r2/r2/templates/base.html
+++ b/r2/r2/templates/base.html
@@ -45,7 +45,7 @@
var sr = {};
var logged = ${c.user_is_loggedin and ("'%s'" % c.user.name) or "false"};
- var post_site = "${c.site.name}";
+ var post_site = "${c.site.name if not c.default_sr else ''}";
var cnameframe = ${'true' if c.cname else 'false'};
var modhash = ${"'%s'" % c.modhash or "false"};
var cur_domain = "${get_domain(cname = True, subreddit = False) if c.frameless_cname else g.domain}";