Tools for webmasters to monitor their content on reddit (uses Solr

for pulling domain information). Also includes a re-factor of solrsearch.py and its usage, which should fix bug #179 as a side-effect
2026-01-24 06:18:08 -05:00 · 2008-09-30 10:10:05 -07:00
parent 870364b152
commit 90278abea3
14 changed files with 373 additions and 278 deletions
--- a/config/solr/schema.xml
+++ b/config/solr/schema.xml
@@ -385,6 +385,8 @@ CondeNet, Inc. All Rights Reserved.
   <field name="hot"         type="hotness" indexed="true" stored="true"  required="true" reversed="true" />
   <field name="controversy" type="sfloat"  indexed="true" stored="true"  required="true" reversed="true" />
   <field name="points"      type="sint"    indexed="true" stored="true"  required="true" reversed="true" />
+   <field name="spam"        type="boolean" indexed="true" stored="true"  required="false" />
+   <field name="deleted"     type="boolean" indexed="true" stored="true"  required="false" />
   <!-- subreddit,link,comment -->
   <field name="author_id"   type="integer" indexed="true" stored="false" required="false" />
   <field name="author"      type="string"  indexed="true" stored="false" required="false" />
--- a/r2/r2/config/middleware.py
+++ b/r2/r2/config/middleware.py
@@ -34,6 +34,7 @@ from pylons.wsgiapp import PylonsApp, PylonsBaseWSGIApp
 from r2.config.environment import load_environment
 from r2.config.rewrites import rewrites
 from r2.lib.utils import rstrips
+from r2.lib.jsontemplates import api_type

 #middleware stuff
 from r2.lib.html_source import HTMLValidationParser
@@ -240,7 +241,7 @@ class DomainMiddleware(object):


 class SubredditMiddleware(object):
-    sr_pattern = re.compile(r'^/r/([^/]+)')
+    sr_pattern = re.compile(r'^/r/([^/]{3,20})')

    def __init__(self, app):
        self.app = app
@@ -255,18 +256,50 @@ class SubredditMiddleware(object):
            environ['subreddit'] = 'r'
        return self.app(environ, start_response)

+class DomainListingMiddleware(object):
+    domain_pattern = re.compile(r'^/domain/(([\w]+\.)+[\w]+)')
+
+    def __init__(self, app):
+        self.app = app
+
+    def __call__(self, environ, start_response):
+        if not environ.has_key('subreddit'):
+            path = environ['PATH_INFO']
+            domain = self.domain_pattern.match(path)
+            if domain:
+                environ['domain'] = domain.groups()[0]
+                environ['PATH_INFO'] = self.domain_pattern.sub('', path) or '/'
+        return self.app(environ, start_response)
+
 class ExtensionMiddleware(object):
    ext_pattern = re.compile(r'\.([^/]+)$')

+    extensions = {'rss' : ('xml', 'text/xml; charset=UTF-8'),
+                  'xml' : ('xml', 'text/xml; charset=UTF-8'),
+                  'js' : ('js', 'text/javascript; charset=UTF-8'),
+                  'wired' : ('wired', 'text/javascript; charset=UTF-8'),
+                  'embed' : ('htmllite', 'text/javascript; charset=UTF-8'),
+                  'mobile' : ('mobile', 'text/html'),
+                  'png' : ('png', 'image/png'),
+                  'css' : ('css', 'text/css'),
+                  'api' : (api_type(), 'application/json; charset=UTF-8'),
+                  'json' : (api_type(), 'application/json; charset=UTF-8'),
+                  'json-html' : (api_type('html'), 'application/json; charset=UTF-8')}
+
    def __init__(self, app):
        self.app = app

    def __call__(self, environ, start_response):
        path = environ['PATH_INFO']
-        ext = self.ext_pattern.findall(path)
-        if ext:
-            environ['extension'] = ext[0]
-            environ['PATH_INFO'] = self.ext_pattern.sub('', path) or '/'
+        domain_ext = environ.get('reddit-domain-extension')
+        for ext, val in self.extensions.iteritems():
+            if ext == domain_ext or path.endswith(ext):
+                environ['extension'] = ext
+                environ['render_style'] = val[0]
+                environ['content_type'] = val[1]
+                #strip off the extension
+                environ['PATH_INFO'] = path[:-(len(ext) + 1)]
+                break
        return self.app(environ, start_response)

 class RewriteMiddleware(object):
@@ -382,11 +415,11 @@ def make_app(global_conf, full_stack=True, **app_conf):
    app = ProfilingMiddleware(app)
    app = SourceViewMiddleware(app)

-    app = SubredditMiddleware(app)
    app = DomainMiddleware(app)
+    app = DomainListingMiddleware(app)
+    app = SubredditMiddleware(app)
    app = ExtensionMiddleware(app)

-
    log_path = global_conf.get('log_path')
    if log_path:
        process_iden = global_conf.get('scgi_port', 'default')
--- a/r2/r2/controllers/front.py
+++ b/r2/r2/controllers/front.py
@@ -32,12 +32,14 @@ from r2.lib.template_helpers import get_domain
 from r2.lib.emailer import has_opted_out, Email
 from r2.lib.db.operators import desc
 from r2.lib.strings import strings
+from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery, LinkSearchQuery
 import r2.lib.db.thing as thing
 from listingcontroller import ListingController
 from pylons import c, request

 import random as rand
 import re
+import time as time_module
 from urllib import quote_plus

 from admin import admin_profile_query
@@ -292,6 +294,7 @@ class FrontController(RedditController):
    def GET_related(self, num, article, after, reverse, count):
        """Related page: performs a search using title of article as
        the search query."""
+
        title = c.site.name + ((': ' + article.title) if hasattr(article, 'title') else '')

        query = self.related_replace_regex.sub(self.related_replace_with,
@@ -301,24 +304,25 @@ class FrontController(RedditController):
            # longer than this are typically ascii art anyway
            query = query[0:1023]

-        num, t, pane = self._search(query, time = 'all',
-                                    count = count,
-                                    after = after, reverse = reverse, num = num,
-                                    ignore = [article._fullname],
-                                    types = [Link])
-        res = LinkInfoPage(link = article, content = pane).render()
-        return res
+        q = RelatedSearchQuery(query, ignore = [article._fullname])
+        num, t, pane = self._search(q,
+                                    num = num, after = after, reverse = reverse,
+                                    count = count)
+
+        return LinkInfoPage(link = article, content = pane).render()

    @base_listing
    @validate(query = nop('q'))
    def GET_search_reddits(self, query, reverse, after,  count, num):
        """Search reddits by title and description."""
-        num, t, spane = self._search(query, num = num, types = [Subreddit],
-                                     sort='points desc', time='all',
-                                     after = after, reverse = reverse, 
+        # note that 'downs' is a measure of activity on subreddits
+        q = SubredditSearchQuery(query, sort = 'downs desc',
+                                 timerange = 'all')
+
+        num, t, spane = self._search(q, num = num, reverse = reverse, after = after,
                                     count = count)
        
-        res = SubredditsPage(content=spane, 
+        res = SubredditsPage(content=spane,
                             prev_search = query,
                             elapsed_time = t,
                             num_results = num,
@@ -327,7 +331,7 @@ class FrontController(RedditController):

    verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$")
    @base_listing
-    @validate(query=nop('q'),
+    @validate(query = nop('q'),
              time = VMenu('action', TimeMenu, remember = False),
              langs = nop('langs'))
    def GET_search(self, query, num, time, reverse, after, count, langs):
@@ -340,12 +344,12 @@ class FrontController(RedditController):
        if langs and self.verify_langs_regex.match(langs):
            langs = langs.split(',')
        else:
-            langs = None
+            langs = c.content_langs

-        num, t, spane = self._search(query, time=time,
-                                     num = num, after = after, 
-                                     reverse = reverse,
-                                     count = count, types = [Link])
+        q = LinkSearchQuery(q = query, timerange = time, langs = langs)
+
+        num, t, spane = self._search(q, num = num, after = after, reverse = reverse,
+                                     count = count)

        if not isinstance(c.site,FakeSubreddit):
            my_reddits_link = "/search%s" % query_string({'q': query})
@@ -365,26 +369,22 @@ class FrontController(RedditController):
        
        return res
        
-    def _search(self, query = '', time=None,
-                sort = 'hot desc',
-                after = None, reverse = False, num = 25, 
-                ignore = None, count=0, types = None,
-                langs = None):
+    def _search(self, query_obj, num, after, reverse, count=0):
        """Helper function for interfacing with search.  Basically a
        thin wrapper for SearchBuilder."""
-        builder = SearchBuilder(query, num = num,
-                                sort = sort,
-                                after = after, reverse = reverse,
-                                count = count, types = types, 
-                                time = time, ignore = ignore,
-                                langs = langs,
+        builder = SearchBuilder(query_obj,
+                                after = after, num = num, reverse = reverse,
+                                count = count,
                                wrap = ListingController.builder_wrapper)
+
        listing = LinkListing(builder, show_nums=True)

        # have to do it in two steps since total_num and timing are only
        # computed after fetch_more
        res = listing.listing()
-        return builder.total_num, builder.timing, res
+        timing = time_module.time() - builder.start_time
+
+        return builder.total_num, timing, res



--- a/r2/r2/controllers/listingcontroller.py
+++ b/r2/r2/controllers/listingcontroller.py
@@ -33,6 +33,7 @@ from r2.lib.db.thing import Query, Merge, Relations
 from r2.lib.db import queries
 from r2.lib.strings import Score
 from r2.lib import organic
+from r2.lib.solrsearch import SearchQuery
 from r2.lib.utils import iters, check_cheating

 from admin import admin_profile_query
@@ -112,6 +113,8 @@ class ListingController(RedditController):
            builder_cls = self.builder_cls
        elif isinstance(self.query_obj, Query):
            builder_cls = QueryBuilder
+        elif isinstance(self.query_obj, SearchQuery):
+            builder_cls = SearchBuilder
        elif isinstance(self.query_obj, iters):
            builder_cls = IDBuilder
        elif isinstance(self.query_obj, queries.CachedResults):
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -212,13 +212,18 @@ def over18():
                return True

 def set_subreddit():
-    sr_name=request.environ.get("subreddit", request.params.get('r'))
+    #the r parameter gets added by javascript for POST requests so we
+    #can reference c.site in api.py
+    sr_name = request.environ.get("subreddit", request.POST.get('r'))
+    domain = request.environ.get("domain")

-    if not sr_name or sr_name == Default.name:
+    if not sr_name:
+        #check for cnames
        sub_domain = request.environ.get('sub_domain')
        sr = Subreddit._by_domain(sub_domain) if sub_domain else None
        c.site = sr or Default
    elif sr_name == 'r':
+        #reddits
        c.site = Sub
    else:
        try:
@@ -227,6 +232,10 @@ def set_subreddit():
            c.site = Default
            redirect_to("/reddits/create?name=%s" % sr_name)

+    #if we didn't find a subreddit, check for a domain listing
+    if not sr_name and c.site == Default and domain:
+        c.site = DomainSR(domain)
+
    if isinstance(c.site, FakeSubreddit):
        c.default_sr = True

@@ -235,42 +244,16 @@ def set_subreddit():
        abort(404, "not found")

 def set_content_type():
-    c.extension = request.environ.get('extension') or \
-                  request.environ.get('reddit-domain-extension') or ''
-    c.render_style = 'html'
-    if c.extension in ('rss', 'xml'):
-        c.render_style = 'xml'
-        c.response_content_type = 'text/xml; charset=UTF-8'
-    elif c.extension == 'js':
-        c.render_style = 'js'
-        c.response_content_type = 'text/javascript; charset=UTF-8'
-    elif c.extension.startswith('json') or c.extension == "api":
-        c.response_content_type = 'application/json; charset=UTF-8'
-        c.response_access_control = 'allow <*>'
-        if c.extension == 'json-html':
-            c.render_style = api_type('html')
-        else:
-            c.render_style = api_type()
-    elif c.extension == 'wired':
-        c.render_style = 'wired'
-        c.response_content_type = 'text/javascript; charset=UTF-8'
-        c.response_wrappers.append(utils.to_js)
-    elif c.extension  == 'embed':
-        c.render_style = 'htmllite'
-        c.response_content_type = 'text/javascript; charset=UTF-8'
-        c.response_wrappers.append(utils.to_js)
-    elif c.extension == 'mobile':
-        c.render_style = 'mobile' 
-    elif c.extension == 'png':
-        c.response_content_type = 'image/png'
-        c.render_style = 'png'
-    elif c.extension == 'css':
-        c.response_content_type = 'text/css'
-        c.render_style = 'css'
-   #Insert new extentions above this line
-    elif c.extension not in ('', 'html'):
-        # request.path already has the extension stripped off of it
-        redirect_to(request.path + utils.query_string(request.get))
+    e = request.environ
+    if e.has_key('extension'):
+        c.render_style = e['render_style']
+        c.response_content_type = e['content_type']
+
+        ext = e['extension']
+        if ext == 'api' or ext.startswith('json'):
+            c.response_access_control = 'allow <*>'
+        if ext in ('embed', 'wired'):
+            c.response_wrappers.append(utils.to_js)

 def get_browser_langs():
    browser_langs = []
--- a/r2/r2/lib/base.py
+++ b/r2/r2/lib/base.py
@@ -118,7 +118,8 @@ class BaseController(WSGIController):
            u.mk_cname(**kw)
    
            # make sure the extensions agree with the current page
-            u.set_extension(c.extension)
+            if c.extension:
+                u.set_extension(c.extension)

        # unparse and encode it un utf8
        return _force_unicode(u.unparse()).encode('utf8')
--- a/r2/r2/lib/cache.py
+++ b/r2/r2/lib/cache.py
@@ -225,7 +225,7 @@ def test_cache(cache):
 # a cache that occasionally dumps itself to be used for long-running
 # processes
 class SelfEmptyingCache(LocalCache):
-    def __init__(self,max_size=50*1000):
+    def __init__(self,max_size=100*1000):
        self.max_size = max_size

    def maybe_reset(self):
--- a/r2/r2/lib/db/queries.py
+++ b/r2/r2/lib/db/queries.py
@@ -5,6 +5,7 @@ from r2.lib.db.operators import asc, desc, timeago
 from r2.lib.db import query_queue
 from r2.lib.db.sorts import epoch_seconds
 from r2.lib.utils import fetch_things2, worker
+from r2.lib.solrsearch import DomainSearchQuery

 from datetime import datetime

@@ -23,6 +24,12 @@ def db_sort(sort):
    cls, col = db_sorts[sort]
    return cls(col)

+search_sort = dict(hot = 'hot desc',
+                   new = 'date desc',
+                   top = 'points desc',
+                   controversial = 'controversy desc',
+                   old = 'date asc')
+
 db_times = dict(all = None,
                hour = Thing.c._date >= timeago('1 hour'),
                day = Thing.c._date >= timeago('1 day'),
@@ -176,6 +183,9 @@ def get_links(sr, sort, time):
        q._filter(db_times[time])
    return make_results(q)

+def get_domain_links(domain, sort, time):
+    return DomainSearchQuery(domain, sort=search_sort[sort], timerange=time)
+
 def user_query(kind, user, sort, time):
    """General profile-page query."""
    q = kind._query(kind.c.author_id == user._id,
--- a/r2/r2/lib/solrsearch.py
+++ b/r2/r2/lib/solrsearch.py
@@ -32,7 +32,7 @@ from r2.models import *
 from r2.lib.contrib import pysolr
 from r2.lib.contrib.pysolr import SolrError
 from r2.lib.utils import timeago, set_emptying_cache, IteratorChunker
-from r2.lib.utils import psave, pload, unicode_safe
+from r2.lib.utils import psave, pload, unicode_safe, tup
 from r2.lib.cache import SelfEmptyingCache
 from Queue import Queue
 from threading import Thread
@@ -125,6 +125,8 @@ search_fields={Thing:     (Field('fullname', '_fullname'),
                           Field('lang'),
                           Field('ups',   '_ups',   is_number=True, reverse=True),
                           Field('downs', '_downs', is_number=True, reverse=True),
+                           Field('spam','_spam'),
+                           Field('deleted','_deleted'),
                           Field('hot', lambda t: t._hot*1000, is_number=True, reverse=True),
                           Field('controversy', '_controversy', is_number=True, reverse=True),
                           Field('points', lambda t: (t._ups - t._downs), is_number=True, reverse=True)),
@@ -162,8 +164,8 @@ search_fields={Thing:     (Field('fullname', '_fullname'),
                                 # yes, it's a copy of 'hot'
                                 is_number=True, reverse=True),
                           ThingField('author',Account,'author_id','name'),
-                           #ThingField('subreddit',Subreddit,'sr_id','name'),
-                           ThingField('reddit',Subreddit,'sr_id','name'))}
+                           ThingField('subreddit',Subreddit,'sr_id','name'))}
+                           #ThingField('reddit',Subreddit,'sr_id','name'))}

 def tokenize_things(things,return_dict=False):
    """
@@ -276,6 +278,8 @@ def fetch_batches(t_class,size,since,until):
        of `fetch_things`
    """
    q=t_class._query(t_class.c._date >= since,
+                     t_class.c._spam == (True,False),
+                     t_class.c._deleted == (True,False),
                     t_class.c._date <  until,
                     sort  = desc('_date'),
                     limit = size,
@@ -375,8 +379,8 @@ def reindex_all(types = None, delete_all_first=False):
            for batch in fetch_batches(cls,1000,
                                       timeago("50 years"),
                                       start_t):
-                r = tokenize_things([x for x in batch
-                                     if not x._spam and not x._deleted ])
+                r = tokenize_things([ x for x in batch
+                                      if not x._spam and not x._deleted ])

                count += len(r)
                print ("Processing %s #%d(%s): %s"
@@ -465,173 +469,241 @@ def combine_searchterms(terms):
 def swap_strings(s,this,that):
    """
        Just swaps substrings, like:
-            s = "sort(asc)"
-            swap_strings(s,'asc','desc')
-            s -> "sort desc"
+            s = "hot asc"
+            s = swap_strings(s,'asc','desc')
+            s == "hot desc"

         uses 'tmp' as a replacment string, so don't use for anything
         very complicated
    """
    return s.replace(this,'tmp').replace(that,this).replace('tmp',that)

-def search_things(q, sort = 'hot desc',
-                  after = None,
-                  subreddits = None,
-                  authors = None,
-                  num = 100, reverse = False,
-                  timerange = None, langs = None,
-                  types = None,
-                  boost = []):
-    """
-        Takes a given query and returns a list of Things that match
-        that query. See Builder for the use of `after`, `reverse`, and
-        `num`. Queries on params are OR queries, except `timerange`
-        and `types`
-    """
-    if not q or not g.solr_url:
-        return pysolr.Results([],0)
+class SearchQuery(object):
+    def __init__(self, q, sort, fields = [], subreddits = [], authors = [], 
+                 types = [], timerange = None, spam = False, deleted = False):

-    # there are two parts to our query: what the user typed (parsed
-    # with Solr's DisMax parser), and what we are adding to it. The
-    # latter is called the "boost" (and is parsed using full Lucene
-    # syntax), and it can be added to via the `boost` parameter (which
-    # we have to copy since we append to it)
-    boost = list(boost)
+        self.q = q
+        self.fields = fields
+        self.sort = sort
+        self.subreddits = subreddits
+        self.authors = authors
+        self.types = types
+        self.spam = spam
+        self.deleted = deleted

-    # `score` refers to Solr's score (relevency to the search given),
-    # not our score (sums of ups and downs).
-    sort = "score desc, %s, date desc, fullname asc" % (sort,)
-    if reverse:
-        sort = swap_strings(sort,'asc','desc')
-
-    if timerange:
-        def time_to_searchstr(t):
-            if isinstance(t, datetime):
-                t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-            elif isinstance(t, date):
-                t = t.strftime('%Y-%m-%dT00:00:00.000Z')
-            elif isinstance(t,str):
-                t = t
-            return t
-
-        (fromtime, totime) = timerange
-        fromtime = time_to_searchstr(fromtime)
-        totime   = time_to_searchstr(totime)
-        boost.append("+date:[%s TO %s]"
-                     % (fromtime,totime))
-
-    if subreddits:
-        def subreddit_to_searchstr(sr):
-            if isinstance(sr,Subreddit):
-                return ('sr_id','%d' % sr.id)
-            elif isinstance(sr,str) or isinstance(sr,unicode):
-                return ('reddit',sr)
-            else:
-                return ('sr_id','%d' % sr)
-
-        if isinstance(subreddits,list) or isinstance(subreddits,tuple):
-            s_subreddits = map(subreddit_to_searchstr, subreddits)
+        if timerange in ['hour','week','day','month','year']:
+            self.timerange = (timeago("1 %s" % timerange),"NOW")
+        elif timerange == 'all' or timerange is None:
+            self.timerange = None
        else:
-            s_subreddits = (subreddit_to_searchstr(subreddits),)
+            self.timerange = timerange

-        boost.append("+(%s)^2" % combine_searchterms(s_subreddits))
+    def run(self, after = None, num = 100, reverse = False):
+        if not self.q or not g.solr_url:
+            return pysolr.Results([],0)

-    if authors:
-        def author_to_searchstr(a):
-            if isinstance(a,Account):
-                return ('author_id','%d' % a.id)
-            elif isinstance(a,str) or isinstance(a,unicode):
-                return ('author',a)
+        # there are two parts to our query: what the user typed
+        # (parsed with Solr's DisMax parser), and what we are adding
+        # to it. The latter is called the "boost" (and is parsed using
+        # full Lucene syntax), and it can be added to via the `boost`
+        # parameter
+        boost = []
+
+        if not self.spam:
+            boost.append("-spam:true")
+        if not self.deleted:
+            boost.append("-deleted:true")
+
+        if self.timerange:
+            def time_to_searchstr(t):
+                if isinstance(t, datetime):
+                    t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z')
+                elif isinstance(t, date):
+                    t = t.strftime('%Y-%m-%dT00:00:00.000Z')
+                elif isinstance(t,str):
+                    t = t
+                return t
+
+            (fromtime, totime) = self.timerange
+            fromtime = time_to_searchstr(fromtime)
+            totime   = time_to_searchstr(totime)
+            boost.append("+date:[%s TO %s]"
+                         % (fromtime,totime))
+
+        if self.subreddits:
+            def subreddit_to_searchstr(sr):
+                if isinstance(sr,Subreddit):
+                    return ('sr_id','%d' % sr.id)
+                elif isinstance(sr,str) or isinstance(sr,unicode):
+                    return ('subreddit',sr)
+                else:
+                    return ('sr_id','%d' % sr)
+
+            s_subreddits = map(subreddit_to_searchstr, tup(self.subreddits))
+
+            boost.append("+(%s)" % combine_searchterms(s_subreddits))
+
+        if self.authors:
+            def author_to_searchstr(a):
+                if isinstance(a,Account):
+                    return ('author_id','%d' % a.id)
+                elif isinstance(a,str) or isinstance(a,unicode):
+                    return ('author',a)
+                else:
+                    return ('author_id','%d' % a)
+
+            s_authors = map(author_to_searchstr,tup(self.authors))
+
+            boost.append('+(%s)^2' % combine_searchterms(s_authors))
+
+
+        def type_to_searchstr(t):
+            if isinstance(t,str):
+                return ('type',t)
            else:
-                return ('author_id','%d' % a)
-
-        if isinstance(authors,list) or isinstance(authors,tuple):
-            s_authors = map(author_to_searchstr,authors)
-        else:
-            s_authors = map(author_to_searchstr,(authors,))
-
-        boost.append('+(%s)^2' % combine_searchterms(s_authors))
-
-    # the set of languages is used to determine the fields to search,
-    # named ('contents_%s' % lang), but 'contents' (which is split
-    # only on whitespace) is always also searched. This means that
-    # all_langs and schema.xml must be kept in synch
-    default_fields = ['contents^1.5','contents_ws^3',
-                      'site^1','author^1', 'reddit^1', 'url^1']
-    if langs == None:
-        # only search 'contents'
-        fields = default_fields
-    else:
-        if langs == 'all':
-            langs = searchable_langs
-        fields = set([("%s^2" % lang_to_fieldname(lang)) for lang in langs]
-                     + default_fields)
-
-    if not types:
-        types = indexed_types
-        
-    def type_to_searchstr(t):
-         if isinstance(t,str):
-            return ('type',t)
-         else:
-             return ('type',t.__name__.lower())
+                return ('type',t.__name__.lower())
         
-    s_types = map(type_to_searchstr,types)
-    boost.append("+%s" % combine_searchterms(s_types))
+        s_types = map(type_to_searchstr,self.types)
+        boost.append("+%s" % combine_searchterms(s_types))

-    # everything else that solr needs to know
-    solr_params = dict(fl = 'fullname', # the field(s) to return
-                       qt = 'dismax',   # the query-handler (dismax supports 'bq' and 'qf')
-                       # qb = '3',
-                       bq = ' '.join(boost),
-                       qf = ' '.join(fields),
-                       mm = '75%')      # minimum number of clauses that should match
+        q,solr_params = self.solr_params(self.q,boost)

-    with SolrConnection() as s:
-        if after:
-            # size of the pre-search to run in the case that we need
-            # to search more than once. A bigger one can reduce the
-            # number of searches that need to be run twice, but if
-            # it's bigger than the default display size, it could
-            # waste some
-            PRESEARCH_SIZE = num
+        try:
+            search = self.run_search(q, self.sort, solr_params,
+                                     reverse, after, num)
+            return search

-            # run a search and get back the number of hits, so that we
-            # can re-run the search with that max_count.
-            pre_search = s.search(q,sort,rows=PRESEARCH_SIZE,
+        except SolrError,e:
+            g.log.error(str(e))
+            return pysolr.Results([],0)
+
+    @classmethod
+    def run_search(cls, q, sort, solr_params, reverse, after, num):
+        "returns pysolr.Results(docs=[fullname()],hits=int())"
+
+        if reverse:
+            sort = swap_strings(sort,'asc','desc')
+
+        g.log.debug("Searching q=%s" % q)
+
+        with SolrConnection() as s:
+            if after:
+                # size of the pre-search to run in the case that we
+                # need to search more than once. A bigger one can
+                # reduce the number of searches that need to be run
+                # twice, but if it's bigger than the default display
+                # size, it could waste some
+                PRESEARCH_SIZE = num
+
+                # run a search and get back the number of hits, so
+                # that we can re-run the search with that max_count.
+                pre_search = s.search(q,sort,rows=PRESEARCH_SIZE,
+                                      other_params = solr_params)
+
+                if (PRESEARCH_SIZE >= pre_search.hits
+                    or pre_search.hits == len(pre_search.docs)):
+                    # don't run a second search if our pre-search
+                    # found all of the elements anyway
+                    search = pre_search
+                else:
+                    # we have to run a second search, but we can limit
+                    # the duplicated transfer of the first few records
+                    # since we already have those from the pre_search
+                    second_search = s.search(q,sort,
+                                             start=len(pre_search.docs),
+                                             rows=pre_search.hits - len(pre_search.docs),
+                                             other_params = solr_params)
+                    search = pysolr.Results(pre_search.docs + second_search.docs,
+                                            pre_search.hits)
+
+                search.docs = [ i['fullname'] for i in search.docs ]
+                search.docs = get_after(search.docs, after._fullname, num)
+            else:
+                search = s.search(q,sort,rows=num,
                                  other_params = solr_params)
+                search.docs = [ i['fullname'] for i in search.docs ]

-            if (PRESEARCH_SIZE >= pre_search.hits
-                or pre_search.hits == len(pre_search.docs)):
-                # don't run a second search if our pre-search found
-                # all of the elements anyway
-                search = pre_search
-            else:
-                # we have to run a second search, but we can limit the
-                # duplicated transfer of the first few records since
-                # we already have those from the pre_search
-                second_search = s.search(q,sort,
-                                         start=len(pre_search.docs),
-                                         rows=pre_search.hits - len(pre_search.docs),
-                                         other_params = solr_params)
-                search = pysolr.Results(pre_search.docs + second_search.docs,
-                                        pre_search.hits)
+            return search

-            fullname = after._fullname
-            for i, item in enumerate(search.docs):
-                if item['fullname'] == fullname:
-                    search.docs = search.docs[i+1:i+1+num]
-                    break
-            else:
-                g.log.debug("I got an after query, but the fullname was not present in the results")
-                search.docs = search.docs[0:num]
+    def solr_params(self,*k,**kw):
+        raise NotImplementedError
+
+class UserSearchQuery(SearchQuery):
+    "Base class for queries that use the dismax parser; requires self.mm"
+    def __init__(self, q, sort=None, fields=[], langs=None, **kw):
+        default_fields = ['contents^1.5','contents_ws^3'] + fields
+
+        if sort is None:
+            sort = 'score desc, hot desc, date desc'
+
+        if langs is None:
+            fields = default_fields
        else:
-            search = s.search(q,sort,rows=num,
-                              other_params = solr_params)
+            if langs == 'all':
+                langs = searchable_langs
+            fields = set([("%s^2" % lang_to_fieldname(lang)) for lang in langs]
+                         + default_fields)

-    hits = search.hits
-    things = Thing._by_fullname([i['fullname'] for i in search.docs],
-                                data = True, return_dict = False)
+        # default minimum match
+        self.mm = '75%'

-    return pysolr.Results(things,hits)
+        SearchQuery.__init__(self, q, sort, fields = fields, **kw)

+    def solr_params(self, q, boost):
+        return q, dict(fl = 'fullname',
+                       qt = 'dismax',
+                       bq = ' '.join(boost),
+                       qf = ' '.join(self.fields),
+                       mm = self.mm)
+
+class LinkSearchQuery(UserSearchQuery):
+    def __init__(self, q, **kw):
+        additional_fields = ['site^1','author^1', 'subreddit^1', 'url^1']
+
+        subreddits = None
+        authors = None
+        if c.site == subreddit.Default:
+            subreddits = Subreddit.user_subreddits(c.user)
+        elif c.site == subreddit.Friends and c.user.friends:
+            authors = c.user.friends
+        elif not isinstance(c.site,subreddit.FakeSubreddit):
+            subreddits = [c.site._id]
+
+        UserSearchQuery.__init__(self, q, fields = additional_fields,
+                                 subreddits = subreddits, authors = authors,
+                                 types=[Link], **kw)
+
+class RelatedSearchQuery(LinkSearchQuery):
+    def __init__(self, q, ignore = [], **kw):
+        self.ignore = set(ignore) if ignore else set()
+
+        LinkSearchQuery.__init__(self, q, sort = 'score desc', **kw)
+
+        self.mm = '25%'
+
+    def run(self, *k, **kw):
+        search = LinkSearchQuery.run(self, *k, **kw)
+        search.docs = [ x for x in search.docs if x not in self.ignore ]
+        return search
+
+class SubredditSearchQuery(UserSearchQuery):
+    def __init__(self, q, **kw):
+        UserSearchQuery.__init__(self, q, types=[Subreddit], **kw)
+
+class DomainSearchQuery(SearchQuery):
+    def __init__(self, domain, **kw):
+        q = '+site:%s' % domain
+
+        SearchQuery.__init__(self, q=q, fields=['site'],types=[Link], **kw)
+
+    def solr_params(self, q, boost):
+        q = q + ' ' + ' '.join(boost)
+        return q, dict(fl='fullname',
+                       qt='standard')
+
+def get_after(fullnames, fullname, num):
+    for i, item in enumerate(fullnames):
+        if item == fullname:
+            return fullnames[i+1:i+num+1]
+    else:
+        return fullnames[:num]
--- a/r2/r2/lib/utils/utils.py
+++ b/r2/r2/lib/utils/utils.py
@@ -999,6 +999,7 @@ def title_to_url(title, max_length = 50):
    return title

 def debug_print(fn):
+    from pylons import g
    def new_fn(*k,**kw):
        ret = fn(*k,**kw)
        g.log.debug("Fn: %s; k=%s; kw=%s\nRet: %s"
--- a/r2/r2/models/builder.py
+++ b/r2/r2/models/builder.py
@@ -354,42 +354,26 @@ class IDBuilder(QueryBuilder):
        return done, new_items

 class SearchBuilder(QueryBuilder):
-    def __init__(self, query, wrap = Wrapped, sort = None, ignore = [],
-                 time = time, types = None, langs = None, **kw):
-        QueryBuilder.__init__(self, query, wrap=wrap, **kw)
-        self.sort = sort
-        self.time = time
-        self.types = types
-        self.timing = 0
-        self.total_num = 0
-        self.langs = langs
-
-        self.ignore = set(x for x in (ignore if ignore else []))
-
    def init_query(self):
-        subreddits = None
-        authors = None
-        if c.site == subreddit.Default:
-            subreddits = Subreddit.user_subreddits(c.user)
-        elif c.site == subreddit.Friends and c.user.friends:
-            authors = c.user.friends
-        elif not isinstance(c.site,subreddit.FakeSubreddit):
-            subreddits = c.site._id
-
-        self.subreddits = subreddits
-        self.authors = authors
-
        self.skip = True
+        self.total_num = 0
+        self.start_time = time.time()
+
+        self.start_time = time.time()

    def keep_item(self,item):
-        skip_if = item._spam or item._deleted or item._fullname in self.ignore
-        return not skip_if
+        # doesn't use the default keep_item because we want to keep
+        # things that were voted on, even if they've chosen to hide
+        # them in normal listings
+        if item._spam or item._deleted:
+            return False
+        else:
+            return True
+

    def fetch_more(self, last_item, num_have):
        from r2.lib import solrsearch

-        start_t = time.time()
-
        done = False
        limit = None
        if self.num:
@@ -401,25 +385,13 @@ class SearchBuilder(QueryBuilder):
        else:
            done = True

-        langs = c.content_langs
-        if self.langs:
-            langs += self.langs
+        search = self.query.run(after = last_item or self.after,
+                                reverse = self.reverse,
+                                num = limit)

-        if self.time in ['hour','week','day','month']:
-            timerange = (timeago("1 %s" % self.time),"NOW")
-        else:
-            timerange = None
+        new_items = Thing._by_fullname(search.docs, data = True, return_dict=False)

-        new_items = solrsearch.search_things(q = self.query or '', sort = self.sort,
-                                             after = last_item,
-                                             subreddits = self.subreddits,
-                                             authors = self.authors,
-                                             num = limit, reverse = self.reverse,
-                                             timerange = timerange, langs = langs,
-                                             types = self.types)
-
-        self.total_num = new_items.hits
-        self.timing = time.time() - start_t
+        self.total_num = search.hits

        return done, new_items

--- a/r2/r2/models/subreddit.py
+++ b/r2/r2/models/subreddit.py
@@ -414,9 +414,12 @@ class Subreddit(Thing, Printable):

 class FakeSubreddit(Subreddit):
    over_18 = False
-    title = ''
    _nodb = True

+    def __init__(self):
+        Subreddit.__init__(self)
+        self.title = ''
+
    def is_moderator(self, user):
        return c.user_is_loggedin and c.user_is_admin

@@ -568,6 +571,21 @@ class SubSR(FakeSubreddit):
    @property
    def path(self):
        return "/reddits/"
+
+class DomainSR(FakeSubreddit):
+    @property
+    def path(self):
+        return '/domain/' + self.domain
+
+    def __init__(self, domain):
+        FakeSubreddit.__init__(self)
+        self.domain = domain
+        self.name = domain 
+        self.title = domain + ' ' + _('on reddit.com')
+
+    def get_links(self, sort, time):
+        from r2.lib.db import queries
+        return queries.get_domain_links(self.domain, sort, time)
        
 Sub = SubSR()
 Friends = FriendsSR()
--- a/r2/r2/public/static/reddit.css
+++ b/r2/r2/public/static/reddit.css
@@ -222,7 +222,7 @@ input[type=checkbox], input[type=radio] { margin-top: .4em; }
    padding: 2px 6px 1px 6px;
    background-color: white;
    border: 1px solid #5f99cf;
-    border-bottom: none;
+    border-bottom: 1px solid white;
 }

 #search {
@@ -588,6 +588,7 @@ before enabling */
    padding: 5px 10px;
    margin: 5px 310px 5px 0px;
    border: 1px solid orange;
+    font-size: small;
 }

 .menuarea {
@@ -958,8 +959,7 @@ a.star { text-decoration: none; color: #ff8b60 }
 .searchpane a { color: #369 }*/

 .searchpane { 
-    margin: 5px;
-    margin-right: 310px;
+    margin: 5px 310px 5px 0px;
 } 

 .searchpane #search input[type=text] { }
--- a/r2/r2/templates/base.html
+++ b/r2/r2/templates/base.html
@@ -45,7 +45,7 @@
      var sr = {};
      
      var logged = ${c.user_is_loggedin and ("'%s'" % c.user.name) or "false"};
-      var post_site = "${c.site.name}";
+      var post_site = "${c.site.name if not c.default_sr else ''}";
      var cnameframe  = ${'true' if c.cname else 'false'}; 
      var modhash = ${"'%s'" % c.modhash or "false"};
      var cur_domain = "${get_domain(cname = True, subreddit = False) if c.frameless_cname else g.domain}";