mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-05 03:00:15 -04:00
Don't show spam in search listings (also avoids adding spam to the search index at all, would could speed up solr)
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -19,6 +19,8 @@ r2/r2/public/static/frame.js
|
||||
r2/r2/public/static/reddit.js
|
||||
r2/r2/public/static/vote.js
|
||||
r2/r2/public/static/reddit_rtl.css
|
||||
r2/r2admin
|
||||
r2/reddit_i18n
|
||||
r2/data/*
|
||||
r2/count.pickle
|
||||
r2/srcount.pickle
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
from __future__ import with_statement
|
||||
|
||||
from r2.models import *
|
||||
from r2.models import thing_changes
|
||||
from r2.lib.contrib import pysolr
|
||||
from r2.lib.contrib.pysolr import SolrError
|
||||
from r2.lib.utils import timeago, set_emptying_cache, IteratorChunker
|
||||
@@ -382,7 +381,8 @@ def reindex_all(types = None, delete_all_first=False):
|
||||
for batch in fetch_batches(cls,1000,
|
||||
timeago("50 years"),
|
||||
start_t):
|
||||
r = tokenize_things(batch)
|
||||
r = tokenize_things([x for x in batch
|
||||
if not x._spam and not x._deleted ])
|
||||
|
||||
count += len(r)
|
||||
print ("Processing %s #%d(%s): %s"
|
||||
@@ -446,6 +446,7 @@ def changed(types=None,since=None,commit=True,optimize=False):
|
||||
chunk = cls._by_fullname(chunk,
|
||||
data=True, return_dict=False)
|
||||
chunk = [x for x in chunk if not x._spam and not x._deleted]
|
||||
to_delete = [x for x in chunk if x._spam or x._deleted]
|
||||
|
||||
# note: anything marked as spam or deleted is not
|
||||
# updated in the search database. Since these are
|
||||
@@ -454,6 +455,9 @@ def changed(types=None,since=None,commit=True,optimize=False):
|
||||
chunk = tokenize_things(chunk)
|
||||
s.add(chunk)
|
||||
|
||||
for i in to_delete:
|
||||
s.delete(id=i._fullname)
|
||||
|
||||
save_last_run(start_t)
|
||||
|
||||
def combine_searchterms(terms):
|
||||
|
||||
@@ -33,7 +33,6 @@ from r2.lib.wrapped import Wrapped
|
||||
from r2.lib import utils
|
||||
from r2.lib.db import operators
|
||||
from r2.lib.cache import sgm
|
||||
from r2.lib import solrsearch
|
||||
|
||||
from copy import deepcopy, copy
|
||||
|
||||
@@ -361,7 +360,7 @@ class SearchBuilder(QueryBuilder):
|
||||
self.total_num = 0
|
||||
self.langs = langs
|
||||
|
||||
self.ignore = ignore
|
||||
self.ignore = set(x for x in (ignore if ignore else []))
|
||||
|
||||
def init_query(self):
|
||||
subreddits = None
|
||||
@@ -381,7 +380,15 @@ class SearchBuilder(QueryBuilder):
|
||||
self.subreddits = subreddits
|
||||
self.authors = authors
|
||||
|
||||
self.skip = True
|
||||
|
||||
def keep_item(self,item):
|
||||
skip_if = item._spam or item._deleted or item._fullname in self.ignore
|
||||
return not skip_if
|
||||
|
||||
def fetch_more(self, last_item, num_have):
|
||||
from r2.lib import solrsearch
|
||||
|
||||
start_t = time.time()
|
||||
|
||||
done = False
|
||||
@@ -412,15 +419,10 @@ class SearchBuilder(QueryBuilder):
|
||||
timerange = timerange, langs = langs,
|
||||
types = self.types)
|
||||
|
||||
things = [x
|
||||
for x in new_items
|
||||
if not x._fullname in
|
||||
(self.ignore if self.ignore else [])]
|
||||
|
||||
self.total_num = new_items.hits
|
||||
self.timing = time.time() - start_t
|
||||
|
||||
return done, things
|
||||
return done, new_items
|
||||
|
||||
class CommentBuilder(Builder):
|
||||
def __init__(self, link, sort, comment = None, context = None):
|
||||
|
||||
Reference in New Issue
Block a user