From c0d63cf803a5b7aa020340ddb9f6bff85bb07de8 Mon Sep 17 00:00:00 2001 From: Neil Williams Date: Tue, 17 Sep 2013 10:59:55 -0700 Subject: [PATCH] Start writing HTTPS-friendly subreddit stylesheets. This does several things to subreddit stylesheets: - stores them on the thumbs buckets rather than the main static bucket. (this was not desirable before on reddit.com due to CDN configuration) - enforces a new restriction of custom (%%style%%) images only in stylesheets to make secure urls easier to resolve. existing subreddits are grandfathered in for now. - writes, if possible as above, a second stylesheet that references subreddit images over https. At some point in the future, the thumbs buckets should be directly accessible on HTTPS via the same URLs which would remove the need for the second stylesheet to be created and uploaded. The custom image rules and other changes would still be good. --- r2/example.ini | 2 + r2/r2/controllers/api.py | 10 +++-- r2/r2/lib/app_globals.py | 1 + r2/r2/lib/cssfilter.py | 47 +++++++++++++++++----- r2/r2/lib/media.py | 12 ++++++ r2/r2/lib/strings.py | 1 + r2/r2/lib/template_helpers.py | 4 ++ r2/r2/models/subreddit.py | 75 +++++++++++++++++++++++++---------- 8 files changed, 119 insertions(+), 33 deletions(-) diff --git a/r2/example.ini b/r2/example.ini index 5ed30bea2..9c0bd6400 100644 --- a/r2/example.ini +++ b/r2/example.ini @@ -381,6 +381,8 @@ static_secure_pre_gzipped = false # which s3 bucket to place subreddit styles on (when empty, stylesheets will be served # from the local database instead. static_stylesheet_bucket = +# whether or not to put subreddit stylesheets on the thumbnail s3 buckets +subreddit_stylesheets_static = false # subreddit used for DMCA takedowns takedown_sr = _takedowns diff --git a/r2/r2/controllers/api.py b/r2/r2/controllers/api.py index 37a17483c..7e9544e9d 100755 --- a/r2/r2/controllers/api.py +++ b/r2/r2/controllers/api.py @@ -1579,9 +1579,7 @@ class ApiController(RedditController, OAuth2ResourceController): form.find('#conflict_box').hide() form.set_html(".errors ul", '') - stylesheet_contents_parsed = parsed or '' if op == 'save': - c.site.stylesheet_contents = stylesheet_contents_parsed try: wr = c.site.change_css(stylesheet_contents, parsed, prevstyle) form.find('.conflict_box').hide() @@ -1607,7 +1605,13 @@ class ApiController(RedditController, OAuth2ResourceController): c.errors.add(errors.BAD_REVISION, field="prevstyle") form.has_errors("prevstyle", errors.BAD_REVISION) return - jquery.apply_stylesheet(stylesheet_contents_parsed) + + parsed_http, parsed_https = parsed + if c.secure: + jquery.apply_stylesheet(parsed_https) + else: + jquery.apply_stylesheet(parsed_http) + if op == 'preview': # try to find a link to use, otherwise give up and # return diff --git a/r2/r2/lib/app_globals.py b/r2/r2/lib/app_globals.py index b7d7d40e0..a845f216c 100755 --- a/r2/r2/lib/app_globals.py +++ b/r2/r2/lib/app_globals.py @@ -160,6 +160,7 @@ class Globals(object): 'trust_local_proxies', 'shard_link_vote_queues', 'shard_commentstree_queues', + 'subreddit_stylesheets_static', ], ConfigValue.tuple: [ diff --git a/r2/r2/lib/cssfilter.py b/r2/r2/lib/cssfilter.py index 1b41580ca..b7b4cf8a0 100644 --- a/r2/r2/lib/cssfilter.py +++ b/r2/r2/lib/cssfilter.py @@ -38,7 +38,7 @@ from r2.lib import s3cp from r2.lib.media import upload_media -from r2.lib.template_helpers import s3_https_if_secure +from r2.lib.template_helpers import s3_direct_https import re from urlparse import urlparse @@ -184,7 +184,7 @@ local_urls = re.compile(r'\A/static/[a-z./-]+\Z') # substitutable urls will be css-valid labels surrounded by "%%" custom_img_urls = re.compile(r'%%([a-zA-Z0-9\-]+)%%') valid_url_schemes = ('http', 'https') -def valid_url(prop,value,report): +def valid_url(prop, value, report, generate_https_urls, enforce_custom_images_only): """ checks url(...) arguments in CSS, ensuring that the contents are officially sanctioned. Sanctioned urls include: @@ -199,6 +199,10 @@ def valid_url(prop,value,report): raise # local urls are allowed if local_urls.match(url): + if enforce_custom_images_only: + report.append(ValidationError(msgs["custom_images_only"], value)) + return + t_url = None while url != t_url: t_url, url = url, filters.url_unescape(url) @@ -215,7 +219,10 @@ def valid_url(prop,value,report): images = ImagesByWikiPage.get_images(c.site, "config/stylesheet") if name in images: - url = s3_https_if_secure(images[name]) + if not generate_https_urls: + url = images[name] + else: + url = s3_direct_https(images[name]) value._setCssText("url(%s)"%url) else: # unknown image label -> error @@ -223,6 +230,10 @@ def valid_url(prop,value,report): % dict(brokenurl = value.cssText), value)) else: + if enforce_custom_images_only: + report.append(ValidationError(msgs["custom_images_only"], value)) + return + try: u = urlparse(url) valid_scheme = u.scheme and u.scheme in valid_url_schemes @@ -245,7 +256,7 @@ def strip_browser_prefix(prop): t = prefix_regex.split(prop, maxsplit=1) return t[len(t) - 1] -def valid_value(prop,value,report): +def valid_value(prop, value, report, generate_https_urls, enforce_custom_images_only): prop_name = strip_browser_prefix(prop.name) # Remove browser-specific prefixes eg: -moz-border-radius becomes border-radius if not (value.valid and value.wellformed): if (value.wellformed @@ -280,11 +291,17 @@ def valid_value(prop,value,report): report.append(ValidationError(error,value)) if value.primitiveType == CSSPrimitiveValue.CSS_URI: - valid_url(prop,value,report) + valid_url( + prop, + value, + report, + generate_https_urls, + enforce_custom_images_only, + ) error_message_extract_re = re.compile('.*\\[([0-9]+):[0-9]*:.*\\]\Z') only_whitespace = re.compile('\A\s*\Z') -def validate_css(string): +def validate_css(string, generate_https_urls, enforce_custom_images_only): p = CSSParser(raiseExceptions = True) if not string or only_whitespace.match(string): @@ -330,13 +347,25 @@ def validate_css(string): if prop.cssValue.cssValueType == CSSValue.CSS_VALUE_LIST: for i in range(prop.cssValue.length): - valid_value(prop,prop.cssValue.item(i),report) + valid_value( + prop, + prop.cssValue.item(i), + report, + generate_https_urls, + enforce_custom_images_only, + ) if not (prop.cssValue.valid and prop.cssValue.wellformed): report.append(ValidationError(msgs['invalid_property_list'] % dict(proplist = prop.cssText), prop.cssValue)) elif prop.cssValue.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE: - valid_value(prop,prop.cssValue,report) + valid_value( + prop, + prop.cssValue, + report, + generate_https_urls, + enforce_custom_images_only, + ) # cssutils bug: because valid values might be marked # as invalid, we can't trust cssutils to properly @@ -358,7 +387,7 @@ def validate_css(string): % dict(ruletype = rule.cssText), rule)) - return parsed,report + return parsed.cssText if parsed else "", report def find_preview_comments(sr): from r2.lib.db.queries import get_sr_comments, get_all_comments diff --git a/r2/r2/lib/media.py b/r2/r2/lib/media.py index 7e0bc3c11..769ca214d 100644 --- a/r2/r2/lib/media.py +++ b/r2/r2/lib/media.py @@ -254,6 +254,18 @@ def upload_media(image, never_expire=True, file_type='.jpg'): return url +def upload_stylesheet(content): + file_name = get_filename_from_content(content) + + return s3_upload_media( + content, + file_name=file_name, + file_type=".css", + mime_type="text/css", + never_expire=True, + ) + + def _set_media(embedly_services, link, force=False): if link.is_self: return diff --git a/r2/r2/lib/strings.py b/r2/r2/lib/strings.py index 5552f5e51..e301d03cc 100644 --- a/r2/r2/lib/strings.py +++ b/r2/r2/lib/strings.py @@ -106,6 +106,7 @@ string_dict = dict( syntax_error = _('syntax error: "%(syntaxerror)s"'), no_imports = _('@imports are not allowed'), invalid_property_list = _('invalid CSS property list "%(proplist)s"'), + custom_images_only = _('only uploaded images are allowed; reference them with the %%imagename%% system below'), unknown_rule_type = _('unknown CSS rule type "%(ruletype)s"') ), permalink_title = _("%(author)s comments on %(title)s"), diff --git a/r2/r2/lib/template_helpers.py b/r2/r2/lib/template_helpers.py index d6741f517..73d48d047 100755 --- a/r2/r2/lib/template_helpers.py +++ b/r2/r2/lib/template_helpers.py @@ -127,6 +127,10 @@ def s3_https_if_secure(url): # In the event that more media sources (other than s3) are added, this function should be corrected if not c.secure: return url + return s3_direct_https(url) + + +def s3_direct_https(url): replace = "https://" if not url.startswith("http://%s" % s3_direct_url): replace = "https://%s/" % s3_direct_url diff --git a/r2/r2/models/subreddit.py b/r2/r2/models/subreddit.py index 9e5e0d873..307a55b56 100644 --- a/r2/r2/models/subreddit.py +++ b/r2/r2/models/subreddit.py @@ -50,7 +50,6 @@ from r2.models.wiki import WikiPage from r2.lib.merge import ConflictException from r2.lib.cache import CL_ONE from r2.lib.contrib.rcssmin import cssmin -from r2.lib import s3cp from r2.models.query_cache import MergedCachedQuery import pycassa @@ -496,13 +495,44 @@ class Subreddit(Thing, Printable, BaseSite): from r2.lib import cssfilter if g.css_killswitch or (verify and not self.can_change_stylesheet(c.user)): return (None, None) - - parsed, report = cssfilter.validate_css(content) - parsed = parsed.cssText if parsed else '' - return (report, parsed) + + # in the new world order, you can only use %%custom%% images so that we + # can manage https urls more easily. however, we'll only hold people to + # the new rules if they were already abiding by them. + is_empty = not self.stylesheet_hash and not self.stylesheet_url_http + is_already_secure = bool(self.stylesheet_url_https) + enforce_img_restriction = is_empty or is_already_secure + + # parse in regular old http mode + parsed_http, report_http = cssfilter.validate_css( + content, + generate_https_urls=False, + enforce_custom_images_only=enforce_img_restriction, + ) + + # parse and resolve images with https-safe urls + parsed_https, report_https = cssfilter.validate_css( + content, + generate_https_urls=True, + enforce_custom_images_only=True, + ) + + # the above https parsing was optimistic. if the subreddit isn't + # subject to the new "custom images only" rule and their stylesheet + # doesn't validate with it turned on, we'll just ignore the error and + # silently throw out the https parsing. + if not enforce_img_restriction and report_https.errors: + parsed_https = "" + + # the two reports should be identical except in the already handled + # case of using non-custom images, so we'll just return the http one. + return (report_http, (parsed_http, parsed_https)) def change_css(self, content, parsed, prev=None, reason=None, author=None, force=False): from r2.models import ModAction + from r2.lib.template_helpers import s3_direct_https + from r2.lib.media import upload_stylesheet + author = author if author else c.user._id36 if content is None: content = '' @@ -512,29 +542,32 @@ class Subreddit(Thing, Printable, BaseSite): wiki = WikiPage.create(self, 'config/stylesheet') wr = wiki.revise(content, previous=prev, author=author, reason=reason, force=force) - minified = cssmin(parsed) - if minified: - if g.static_stylesheet_bucket: - digest = hashlib.sha1(minified).digest() - self.stylesheet_hash = (base64.urlsafe_b64encode(digest) - .rstrip("=")) - - s3cp.send_file(g.static_stylesheet_bucket, - self.static_stylesheet_name, - minified, - content_type="text/css", - never_expire=True, - replace=False, - ) + minified_http, minified_https = map(cssmin, parsed) + if minified_http or minified_https: + if g.subreddit_stylesheets_static: + self.stylesheet_url_http = upload_stylesheet(minified_http) + if minified_https: + self.stylesheet_url_https = s3_direct_https( + upload_stylesheet(minified_https)) + else: + self.stylesheet_url_https = "" + self.stylesheet_hash = "" self.stylesheet_contents = "" + self.stylesheet_contents_secure = "" self.stylesheet_modified = None else: - self.stylesheet_hash = hashlib.md5(minified).hexdigest() - self.stylesheet_contents = minified + self.stylesheet_url_http = "" + self.stylesheet_url_https = "" + self.stylesheet_hash = hashlib.md5(minified_https).hexdigest() + self.stylesheet_contents = minified_http + self.stylesheet_contents_secure = minified_https self.stylesheet_modified = datetime.datetime.now(g.tz) else: + self.stylesheet_url_http = "" + self.stylesheet_url_https = "" self.stylesheet_contents = "" + self.stylesheet_contents_secure = "" self.stylesheet_hash = "" self.stylesheet_modified = datetime.datetime.now(g.tz) self.stylesheet_contents_user = "" # reads from wiki; ensure pg clean