diff --git a/r2/example.ini b/r2/example.ini index de09834ce..753fd3939 100644 --- a/r2/example.ini +++ b/r2/example.ini @@ -91,6 +91,7 @@ max_sr_images = 20 login_cookie = reddit_session domain = localhost domain_prefix = +media_domain = localhost default_sr = localhost admins = sponsors = diff --git a/r2/r2/config/routing.py b/r2/r2/config/routing.py index 2fe26f742..eef0fcce7 100644 --- a/r2/r2/config/routing.py +++ b/r2/r2/config/routing.py @@ -168,6 +168,8 @@ def make_map(global_conf={}, app_conf={}): mc('/captcha/:iden', controller='captcha', action='captchaimg') + mc('/mediaembed/:link', controller="mediaembed", action="mediaembed") + mc('/doquery', controller='query', action='doquery') mc('/store', controller='redirect', action='redirect', diff --git a/r2/r2/controllers/__init__.py b/r2/r2/controllers/__init__.py index 0ce49dc57..fd024f035 100644 --- a/r2/r2/controllers/__init__.py +++ b/r2/r2/controllers/__init__.py @@ -45,6 +45,7 @@ from post import PostController from toolbar import ToolbarController from i18n import I18nController from promotecontroller import PromoteController +from mediaembed import MediaembedController from querycontroller import QueryController diff --git a/r2/r2/controllers/mediaembed.py b/r2/r2/controllers/mediaembed.py new file mode 100644 index 000000000..83e80bf46 --- /dev/null +++ b/r2/r2/controllers/mediaembed.py @@ -0,0 +1,52 @@ +# The contents of this file are subject to the Common Public Attribution +# License Version 1.0. (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public +# License Version 1.1, but Sections 14 and 15 have been added to cover use of +# software over a computer network and provide for limited attribution for the +# Original Developer. In addition, Exhibit A has been modified to be consistent +# with Exhibit B. +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for +# the specific language governing rights and limitations under the License. +# +# The Original Code is Reddit. +# +# The Original Developer is the Initial Developer. The Initial Developer of the +# Original Code is CondeNet, Inc. +# +# All portions of the code written by CondeNet are Copyright (c) 2006-2009 +# CondeNet, Inc. All Rights Reserved. +################################################################################ +from validator import * +from reddit_base import RedditController + +from r2.lib.scraper import scrapers +from r2.lib.pages import MediaEmbedBody + +from pylons import request + +class MediaembedController(RedditController): + @validate(link = VLink('link')) + def GET_mediaembed(self, link): + if request.host != g.media_domain: + # don't serve up untrusted content except on our + # specifically untrusted domain + return self.abort404() + + if not link or not link.media_object: + return self.abort404() + + if isinstance(link.media_object, basestring): + # it's an old-style string + content = link.media_object + + elif isinstance(link.media_object, dict): + # otherwise it's the new style, which is a dict(type=type, **args) + media_object_type = link.media_object['type'] + scraper = scrapers[media_object_type] + media_embed = scraper.media_embed(**link.media_object) + content = media_embed.content + + return MediaEmbedBody(body = content).render() diff --git a/r2/r2/controllers/validator/validator.py b/r2/r2/controllers/validator/validator.py index 58cea9fd7..93c9ee9ba 100644 --- a/r2/r2/controllers/validator/validator.py +++ b/r2/r2/controllers/validator/validator.py @@ -865,7 +865,8 @@ class VCnameDomain(Validator): def run(self, domain): if (domain and (not self.domain_re.match(domain) - or domain.endswith('.reddit.com') + or domain.endswith('.' + g.domain) + or domain.endswith('.' + g.media_domain) or len(domain) > 300)): self.set_error(errors.BAD_CNAME) elif domain: diff --git a/r2/r2/lib/app_globals.py b/r2/r2/lib/app_globals.py index 2c01d290a..4583bee85 100644 --- a/r2/r2/lib/app_globals.py +++ b/r2/r2/lib/app_globals.py @@ -174,6 +174,11 @@ class Globals(object): if self.debug: self.log.setLevel(logging.DEBUG) + if not self.media_domain: + self.media_domain = self.domain + if self.media_domain == self.domain: + print "Warning: g.media_domain == g.domain. This may give untrusted content access to user cookies" + #read in our CSS so that it can become a default for subreddit #stylesheets stylesheet_path = os.path.join(paths.get('static_files'), diff --git a/r2/r2/lib/pages/pages.py b/r2/r2/lib/pages/pages.py index 5420ed4b3..8b108c890 100644 --- a/r2/r2/lib/pages/pages.py +++ b/r2/r2/lib/pages/pages.py @@ -42,6 +42,7 @@ from r2.lib.utils import title_to_url, query_string, UrlParser, to_js, vote_hash from r2.lib.utils import link_duplicates from r2.lib.template_helpers import add_sr, get_domain from r2.lib.subreddit_search import popular_searches +from r2.lib.scraper import scrapers import sys, random, datetime, locale, calendar, simplejson, re import graph @@ -1482,9 +1483,25 @@ class LinkChild(object): return '' class MediaChild(LinkChild): + """renders when the user hits the expando button to expand media + objects, like embedded videos""" css_style = "video" def content(self): - return self.link.media_object + if isinstance(self.link.media_object, basestring): + return self.link.media_object + + media_object_type = self.link.media_object['type'] + if media_object_type in scrapers: + scraper = scrapers[media_object_type] + media_embed = scraper.media_embed(**self.link.media_object) + return MediaEmbed(media_domain = g.media_domain, + height = media_embed.height+10, + width = media_embed.width+10, + id36 = self.link._id36).render() + +class MediaEmbed(Templated): + """The actual rendered iframe for a media child""" + pass class SelfTextChild(LinkChild): css_style = "selftext" @@ -1494,10 +1511,6 @@ class SelfTextChild(LinkChild): nofollow = self.nofollow) return u.render() -class SelfText(Templated): - def __init__(self, link): - Templated.__init__(self, link = link) - class UserText(CachedTemplate): def __init__(self, item, @@ -1531,6 +1544,10 @@ class UserText(CachedTemplate): cloneable = cloneable, css_class = css_class) +class MediaEmbedBody(CachedTemplate): + """What's rendered inside the iframe that contains media objects""" + pass + class Traffic(Templated): @staticmethod def slice_traffic(traffic, *indices): diff --git a/r2/r2/lib/scraper.py b/r2/r2/lib/scraper.py index a2ddd1a23..a89b392fa 100644 --- a/r2/r2/lib/scraper.py +++ b/r2/r2/lib/scraper.py @@ -151,6 +151,16 @@ def fetch_url(url, referer = None, retries = 1, dimension = False): def fetch_size(url, referer = None, retries = 1): return fetch_url(url, referer, retries, dimension = True) +class MediaEmbed(object): + width = None + height = None + content = None + + def __init__(self, height, width, content): + self.height = height + self.width = width + self.content = content + class Scraper: def __init__(self, url): self.url = url @@ -158,6 +168,9 @@ class Scraper: self.content_type = None self.soup = None + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.url) + def download(self): self.content_type, self.content = fetch_url(self.url) if self.content_type and 'html' in self.content_type and self.content: @@ -184,6 +197,12 @@ class Scraper: max_area = 0 max_url = None + if self.soup: + thumbnail_spec = self.soup.find('link', rel = 'image_src') + if thumbnail_spec and thumbnail_spec['href']: + log.debug("Using image_src") + return thumbnail_spec['href'] + for image_url in self.image_urls(): size = fetch_size(image_url, referer = self.url) if not size: @@ -228,28 +247,60 @@ class Scraper: return image def media_object(self): - return None + for deepscraper in deepscrapers: + ds = deepscraper() + found = ds.find_media_object(self) + if found: + return found + + @classmethod + def media_embed(cls): + raise NotImplementedError class MediaScraper(Scraper): media_template = "" thumbnail_template = "" + video_id = None video_id_rx = None - + def __init__(self, url): - m = self.video_id_rx.match(url) - if m: - self.video_id = m.groups()[0] - else: - #if we can't find the id just treat it like a normal page - log.debug('reverting to regular scraper: %s' % url) - self.__class__ = Scraper Scraper.__init__(self, url) + # first try the simple regex against the URL. If that fails, + # see if the MediaScraper subclass has its own extraction + # function + if self.video_id_rx: + m = self.video_id_rx.match(url) + if m: + self.video_id = m.groups()[0] + if not self.video_id: + video_id = self.video_id_extract() + if video_id: + self.video_id = video_id + if not self.video_id: + #if we still can't find the id just treat it like a normal page + log.debug('reverting to regular scraper: %s' % url) + self.__class__ = Scraper + + def video_id_extract(self): + return None + def largest_image_url(self): - return self.thumbnail_template.replace('$video_id', self.video_id) + if self.thumbnail_template: + return self.thumbnail_template.replace('$video_id', self.video_id) + else: + return Scraper.largest_image_url(self) def media_object(self): - return self.media_template.replace('$video_id', self.video_id) + return dict(video_id = self.video_id, + type = self.domains[0]) + + @classmethod + def media_embed(cls, video_id = None, height = None, width = None, **kw): + content = cls.media_template.replace('$video_id', video_id) + return MediaEmbed(height = height or cls.height, + width = width or cls.width, + content = content) def youtube_in_google(google_url): h = Scraper(google_url) @@ -276,17 +327,20 @@ def make_scraper(url): return make_scraper(youtube_url) return scraper(url) - ########## site-specific video scrapers ########## -#Youtube class YoutubeScraper(MediaScraper): - media_template = '' + domains = ['youtube.com'] + height = 295 + width = 480 + media_template = '' thumbnail_template = 'http://img.youtube.com/vi/$video_id/default.jpg' video_id_rx = re.compile('.*v=([A-Za-z0-9-_]+).*') -#Metacage class MetacafeScraper(MediaScraper): + domains = ['metacafe.com'] + height = 345 + width = 400 media_template = ' ' video_id_rx = re.compile('.*/watch/([^/]+)/.*') @@ -296,20 +350,16 @@ class MetacafeScraper(MediaScraper): if self.soup: video_url = self.soup.find('link', rel = 'video_src')['href'] - return self.media_template.replace('$video_id', video_url) + return dict(video_id = video_url, + type = self.domains[0]) - def largest_image_url(self): - if not self.soup: - self.download() - - if self.soup: - return self.soup.find('link', rel = 'image_src')['href'] - -#Google Video -gootube_thumb_rx = re.compile(".*thumbnail:\s*\'(http://[^/]+/ThumbnailServer2[^\']+)\'.*", re.IGNORECASE | re.S) class GootubeScraper(MediaScraper): + domains = ['video.google.com'] + height = 326 + width = 400 media_template = ' ' - video_id_rx = re.compile('.*videoplay\?docid=([A-Za-z0-9-_]+).*') + video_id_rx = re.compile('.*videoplay\?docid=([A-Za-z0-9-_]+).*') + gootube_thumb_rx = re.compile(".*thumbnail:\s*\'(http://[^/]+/ThumbnailServer2[^\']+)\'.*", re.IGNORECASE | re.S) def largest_image_url(self): if not self.content: @@ -318,40 +368,353 @@ class GootubeScraper(MediaScraper): if not self.content: return None - m = gootube_thumb_rx.match(self.content) + m = self.gootube_thumb_rx.match(self.content) if m: image_url = m.groups()[0] image_url = utils.safe_eval_str(image_url) return image_url -scrapers = {'youtube.com': YoutubeScraper, - 'video.google.com': GootubeScraper, - 'metacafe.com': MetacafeScraper} +class VimeoScraper(MediaScraper): + domains = ['vimeo.com'] + height = 448 + width = 520 + media_template = ' ' + video_id_rx = re.compile('.*/(.*)') + + def media_object(self): + if not self.soup: + self.download() + + if self.soup: + video_url = self.soup.find('link', rel = 'video_src')['href'] + return dict(video_id = video_url, + type = self.domains[0]) + +class BreakScraper(MediaScraper): + domains = ['break.com'] + height = 421 + width = 520 + media_template = '' + video_id_rx = re.compile('.*/index/([^/]+).*'); + + def video_id_extract(self): + if not self.soup: + self.download() + + if self.soup: + video_src = self.soup.find('link', rel = 'video_src') + if video_src and video_src['href']: + return video_src['href'] + +class TheOnionScraper(MediaScraper): + domains = ['theonion.com'] + height = 430 + width = 480 + media_template = """ + + + + + + + + """ + video_id_rx = re.compile('.*/video/([^/?#]+).*') + + def media_object(self): + if not self.soup: + self.download() + + if self.soup: + video_url = self.soup.find('meta', attrs={'name': 'nid'})['content'] + return dict(video_id = video_url, + type = self.domains[0]) + +class CollegeHumorScraper(MediaScraper): + domains = ['collegehumor.com'] + height = 390 + width = 520 + media_template = '' + video_id_rx = re.compile('.*video:(\d+).*'); + +class FunnyOrDieScraper(MediaScraper): + domains = ['funnyordie.com'] + height = 438 + width = 464 + media_template = '' + thumbnail_template = 'http://assets1.ordienetworks.com/tmbs/$video_id/medium_2.jpg?c79e63ac' + video_id_rx = re.compile('.*/videos/([^/]+)/.*') + +class ComedyCentralScraper(MediaScraper): + domains = ['comedycentral.com', 'thedailyshow.com'] + height = 316 + width = 332 + media_template = '' + video_id_rx = re.compile('.*videoId=(\d+).*') + +class ColbertNationScraper(ComedyCentralScraper): + domains = ['colbertnation.com'] + video_id_rx = re.compile('.*videos/(\d+)/.*') + +class LiveLeakScraper(MediaScraper): + domains = ['liveleak.com'] + height = 370 + width = 450 + media_template = '' + video_id_rx = re.compile('.*i=([a-zA-Z0-9_]+).*') + + def largest_image_url(self): + if not self.soup: + self.download() + + if self.soup: + return self.soup.find('link', rel = 'videothumbnail')['href'] + +class DailyMotionScraper(MediaScraper): + domains = ['dailymotion.com'] + height = 381 + width = 480 + media_template = '' + video_id_rx = re.compile('.*/video/([a-zA-Z0-9]+)_.*') + + def media_object(self): + if not self.soup: + self.download() + + if self.soup: + video_url = self.soup.find('link', rel = 'video_src')['href'] + return dict(video_id = video_url, + type = self.domains[0]) + +class RevverScraper(MediaScraper): + domains = ['revver.com'] + height = 392 + width = 480 + media_template = '' + video_id_rx = re.compile('.*/video/([a-zA-Z0-9]+)/.*') + +class EscapistScraper(MediaScraper): + domains = ['escapistmagazine.com'] + height = 294 + width = 480 + media_template = """""" + video_id_rx = re.compile('.*/videos/view/[A-Za-z-9-]+/([0-9]+).*') + +class JustintvScraper(MediaScraper): + """Can grab streams from justin.tv, but not clips""" + domains = ['justin.tv'] + height = 295 + width = 353 + stream_media_template = """""" + video_id_rx = re.compile('^http://www.justin.tv/([a-zA-Z0-9_]+)[^/]*$') + + @classmethod + def media_embed(cls, video_id, **kw): + content = cls.stream_media_template.replace('$video_id', video_id) + return MediaEmbed(height = cls.height, + width = cls.width, + content = content) + +class SoundcloudScraper(MediaScraper): + """soundcloud.com""" + domains = ['soundcloud.com'] + height = 81 + width = 400 + media_template = """
+ + + + + + + """ + video_id_rx = re.compile('^http://soundcloud.com/[a-zA-Z0-9_-]+/([a-zA-Z0-9_-]+)') + + +class DeepScraper(object): + """Subclasses of DeepScraper attempt to dive into generic pages + for embeds of other types (like YouTube videos on blog + sites).""" + + def find_media_object(self, scraper): + return None + +class YoutubeEmbedDeepScraper(DeepScraper): + youtube_url_re = re.compile('^(http://www.youtube.com/v/([_a-zA-Z0-9-]+)).*') + + def find_media_object(self, scraper): + # try to find very simple youtube embeds + if not scraper.soup: + scraper.download() + + if scraper.soup: + movie_embed = scraper.soup.find('embed', + attrs={'src': lambda x: self.youtube_url_re.match(x)}) + if movie_embed: + youtube_id = self.youtube_url_re.match(movie_embed['src']).group(2) + youtube_url = 'http://www.youtube.com/watch?v=%s"' % youtube_id + log.debug('found youtube embed %s' % youtube_url) + mo = YoutubeScraper(youtube_url).media_object() + mo['deep'] = scraper.url + return mo + +#scrapers =:= dict(domain -> ScraperClass) +scrapers = {} +for scraper in [ YoutubeScraper, + MetacafeScraper, + GootubeScraper, + VimeoScraper, + BreakScraper, + TheOnionScraper, + CollegeHumorScraper, + FunnyOrDieScraper, + ComedyCentralScraper, + ColbertNationScraper, + LiveLeakScraper, + DailyMotionScraper, + RevverScraper, + EscapistScraper, + JustintvScraper, + SoundcloudScraper, + ]: + for domain in scraper.domains: + scrapers[domain] = scraper + +deepscrapers = [YoutubeEmbedDeepScraper] + +def convert_old_media_objects(): + q = Link._query(Link.c.media_object is not None, + Link.c._date > whenever, + data = True) + for link in utils.fetch_things2(q): + if not getattr(link, 'media_object', None): + continue + + if 'youtube' in link.media_object: + # we can rewrite this one without scraping + video_id = YoutubeScraper.video_id_rx.match(link.url) + link.media_object = dict(type='youtube.com', + video_id = video_id.group(1)) + elif ('video.google.com' in link.media_object + or 'metacafe' in link.media_object): + scraper = make_scraper(link.url) + if not scraper: + continue + mo = scraper.media_object() + if not mo: + continue + + link.media_object = mo + + else: + print "skipping %s because it confuses me" % link._fullname + continue + + link._commit() + +test_urls = [ + 'http://www.facebook.com/pages/Rick-Astley/5807213510?sid=c99aaf3888171e73668a38e0749ae12d', # regular thumbnail finder + 'http://www.flickr.com/photos/septuagesima/317819584/', # thumbnail with image_src + + 'http://www.youtube.com/watch?v=Yu_moia-oVI', + 'http://www.metacafe.com/watch/sy-1473689248/rick_astley_never_gonna_give_you_up_official_music_video/', + 'http://video.google.com/videoplay?docid=5908758151704698048', + 'http://vimeo.com/4495451', + 'http://www.break.com/usercontent/2008/11/Macy-s-Thankgiving-Day-Parade-Rick-Roll-611965.html', + 'http://www.theonion.com/content/video/sony_releases_new_stupid_piece_of', + 'http://www.collegehumor.com/video:1823712', + 'http://www.funnyordie.com/videos/7f2a184755/macys-thanksgiving-day-parade-gets-rick-rolled-from-that-happened', + 'http://www.comedycentral.com/videos/index.jhtml?videoId=178342&title=ultimate-fighting-vs.-bloggers', + 'http://www.thedailyshow.com/video/index.jhtml?videoId=175244&title=Photoshop-of-Horrors', + 'http://www.colbertnation.com/the-colbert-report-videos/63549/may-01-2006/sign-off---spam', + 'http://www.liveleak.com/view?i=e09_1207983531', + 'http://www.dailymotion.com/relevance/search/rick+roll/video/x5l8e6_rickroll_fun', + 'http://revver.com/video/1199591/rick-rolld-at-work/', + 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10-The-Orange-Box', + 'http://www.escapistmagazine.com/videos/view/unskippable/736-Lost-Odyssey', + + # justin.tv has two media types that we care about, streams, which + # we can scrape, and clips, which we can't + 'http://www.justin.tv/help', # stream + 'http://www.justin.tv/clip/c07a333f94e5716b', # clip, which we can't currently scrape, and shouldn't try + + 'http://soundcloud.com/kalhonaaho01/never-gonna-stand-you-up-rick-astley-vs-ludacris-album-version', + 'http://listen.grooveshark.com/#/song/Never_Gonna_Give_You_Up/12616328', + 'http://tinysong.com/2WOJ', # also Grooveshark + + 'http://www.rickrolled.com/videos/video/rickrolld' # test the DeepScraper + ] + +def submit_all(): + from r2.models import Subreddit, Account, Link, NotFound + from r2.lib.media import set_media + from r2.lib.db import queries + sr = Subreddit._by_name('testmedia') + author = Account._by_name('testmedia') + links = [] + for url in test_urls: + try: + # delete any existing version of the link + l = Link._by_url(url, sr) + print "Deleting %s" % l + l._deleted = True + l._commit() + except NotFound: + pass + + l = Link._submit(url, url, author, sr, '0.0.0.0') + + try: + set_media(l) + except Exception, e: + print e + + if g.write_query_queue: + queries.new_link(l) + + links.append(l) + + return links def test(): - #from r2.lib.pool2 import WorkQueue - jobs = [] - f = open('/tmp/testurls.txt') - for url in f: - if url.startswith('#'): - continue - if url.startswith('/info'): - continue - - def make_job(url): - def fetch(url): - print 'START', url - url = url.strip() - h = make_scraper(url) - image_url = h.largest_image_url() - print 'DONE', image_url - return lambda: fetch(url) + """Take some example URLs and print out a nice pretty HTML table + of their extracted thubmnails and media objects""" + import sys + from r2.lib.filters import websafe - jobs.append(make_job(url)) + print "" + for url in test_urls: + sys.stderr.write("%s\n" % url) + print "" + h = make_scraper(url) + print "" % img + else: + print "" + mo = h.media_object() + print "" + print "" + print "
" + print "", websafe(url), "" + print "
" + print websafe(repr(h)) + img = h.largest_image_url() + if img: + print "
(no image)" + if mo: + s = scrapers[mo['type']] + print websafe(repr(mo)) + print "
" + print s.media_embed(**mo).content + else: + print "None" + print "
" - print jobs[0]() - #wq = WorkQueue(jobs) - #wq.start() - -if __name__ == '__main__': - test() diff --git a/r2/r2/models/link.py b/r2/r2/models/link.py index 2e21fe5b0..12c290134 100644 --- a/r2/r2/models/link.py +++ b/r2/r2/models/link.py @@ -211,6 +211,7 @@ class Link(Thing, Printable): s.append(request.get.has_key('twocolumn')) elif style == "xml": s.append(request.GET.has_key("nothumbs")) + s.append(getattr(wrapped, 'media_object', {})) return s def make_permalink(self, sr, force_domain = False): diff --git a/r2/r2/templates/mediaembed.html b/r2/r2/templates/mediaembed.html new file mode 100644 index 000000000..5c128e91e --- /dev/null +++ b/r2/r2/templates/mediaembed.html @@ -0,0 +1,24 @@ +## The contents of this file are subject to the Common Public Attribution +## License Version 1.0. (the "License"); you may not use this file except in +## compliance with the License. You may obtain a copy of the License at +## http://code.reddit.com/LICENSE. The License is based on the Mozilla Public +## License Version 1.1, but Sections 14 and 15 have been added to cover use of +## software over a computer network and provide for limited attribution for the +## Original Developer. In addition, Exhibit A has been modified to be consistent +## with Exhibit B. +## +## Software distributed under the License is distributed on an "AS IS" basis, +## WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for +## the specific language governing rights and limitations under the License. +## +## The Original Code is Reddit. +## +## The Original Developer is the Initial Developer. The Initial Developer of +## the Original Code is CondeNet, Inc. +## +## All portions of the code written by CondeNet are Copyright (c) 2006-2009 +## CondeNet, Inc. All Rights Reserved. +################################################################################ + diff --git a/r2/r2/templates/mediaembedbody.html b/r2/r2/templates/mediaembedbody.html new file mode 100644 index 000000000..5c2c73e77 --- /dev/null +++ b/r2/r2/templates/mediaembedbody.html @@ -0,0 +1,33 @@ +## The contents of this file are subject to the Common Public Attribution +## License Version 1.0. (the "License"); you may not use this file except in +## compliance with the License. You may obtain a copy of the License at +## http://code.reddit.com/LICENSE. The License is based on the Mozilla Public +## License Version 1.1, but Sections 14 and 15 have been added to cover use of +## software over a computer network and provide for limited attribution for the +## Original Developer. In addition, Exhibit A has been modified to be consistent +## with Exhibit B. +## +## Software distributed under the License is distributed on an "AS IS" basis, +## WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for +## the specific language governing rights and limitations under the License. +## +## The Original Code is Reddit. +## +## The Original Developer is the Initial Developer. The Initial Developer of +## the Original Code is CondeNet, Inc. +## +## All portions of the code written by CondeNet are Copyright (c) 2006-2009 +## CondeNet, Inc. All Rights Reserved. +################################################################################ + + + + + + ${unsafe(thing.body)} + diff --git a/r2/setup.py b/r2/setup.py index 165ea7aef..82ec2b1df 100644 --- a/r2/setup.py +++ b/r2/setup.py @@ -83,7 +83,7 @@ setup( "flup", "simplejson", "SQLAlchemy==0.5.3", - "BeautifulSoup >= 3", + "BeautifulSoup == 3.0.7a", # last version to use the good parser "cssutils==0.9.5.1", "chardet", "psycopg2",