diff --git a/r2/example.ini b/r2/example.ini index 502673056..dd9f81d6a 100755 --- a/r2/example.ini +++ b/r2/example.ini @@ -312,7 +312,7 @@ takedown_sr = _takedowns png_optimizer = /usr/bin/env optipng # bad words that should be *'d out profanity_wordlist = -# which markdown backend to use (c = discount, py = markdown.py, snudown = snudown) +# which markdown backend to use (c = discount, snudown = snudown) markdown_backend = snudown # -- search -- diff --git a/r2/r2/lib/contrib/markdown.py b/r2/r2/lib/contrib/markdown.py deleted file mode 100644 index 854ff0128..000000000 --- a/r2/r2/lib/contrib/markdown.py +++ /dev/null @@ -1,687 +0,0 @@ -#!/usr/bin/python -import re, md5, sys, string - -"""markdown.py: A Markdown-styled-text to HTML converter in Python. - -Usage: - ./markdown.py textfile.markdown - -Calling: - import markdown - somehtml = markdown.markdown(sometext) -""" - -__version__ = '1.0.1-2' # port of 1.0.1 -__license__ = "GNU GPL 2" -__author__ = [ - 'John Gruber ', - 'Tollef Fog Heen ', - 'Aaron Swartz ' -] - -def htmlquote(text): - """Encodes `text` for raw use in HTML.""" - text = text.replace("&", "&") # Must be done first! - text = text.replace("<", "<") - text = text.replace(">", ">") - text = text.replace("'", "'") - text = text.replace('"', """) - return text - -def mangle_text(text): - from pylons import g - return md5.new(text + g.SECRET).hexdigest() - -def semirandom(seed): - from pylons import g - x = 0 - for c in md5.new(seed + g.SECRET).digest(): x += ord(c) - return x / (255*16.) - -class _Markdown: - emptyelt = " />" - tabwidth = 4 - - escapechars = '\\`*_{}[]()>#+-.!' - escapetable = {} - for char in escapechars: - escapetable[char] = mangle_text(char) - - r_multiline = re.compile("\n{2,}") - r_stripspace = re.compile(r"^[ \t]+$", re.MULTILINE) - def parse(self, text): - self.urls = {} - self.titles = {} - self.html_blocks = {} - self.list_level = 0 - - text = text.replace("\r\n", "\n") - text = text.replace("\r", "\n") - text += "\n\n" - text = self._Detab(text) - text = self.r_stripspace.sub("", text) - text = self._HashHTMLBlocks(text) - text = self._StripLinkDefinitions(text) - text = self._RunBlockGamut(text) - text = self._UnescapeSpecialChars(text) - return text - - r_StripLinkDefinitions = re.compile(r""" - ^[ ]{0,%d}\[(.+)\]: # id = $1 - [ \t]*\n?[ \t]* - ? # url = $2 - [ \t]*\n?[ \t]* - (?: - (?<=\s) # lookbehind for whitespace - [\"\(] # " is backlashed so it colorizes our code right - (.+?) # title = $3 - [\"\)] - [ \t]* - )? # title is optional - (?:\n+|\Z) - """ % (tabwidth-1), re.MULTILINE|re.VERBOSE) - def _StripLinkDefinitions(self, text): - def replacefunc(matchobj): - (t1, t2, t3) = matchobj.groups() - #@@ case sensitivity? - self.urls[t1.lower()] = self._EncodeAmpsAndAngles(t2) - if t3 is not None: - self.titles[t1.lower()] = t3.replace('"', '"') - return "" - - text = self.r_StripLinkDefinitions.sub(replacefunc, text) - return text - - blocktagsb = r"p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|math" - blocktagsa = blocktagsb + "|ins|del" - - r_HashHTMLBlocks1 = re.compile(r""" - ( # save in $1 - ^ # start of line (with /m) - <(%s) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|$) # followed by a newline or end of document - ) - """ % blocktagsa, re.MULTILINE | re.VERBOSE) - - r_HashHTMLBlocks2 = re.compile(r""" - ( # save in $1 - ^ # start of line (with /m) - <(%s) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .* # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - """ % blocktagsb, re.MULTILINE | re.VERBOSE) - - r_HashHR = re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,%d} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z)# followed by a blank line or end of document - ) - """ % (tabwidth-1), re.VERBOSE) - r_HashComment = re.compile(r""" - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,%d} - (?: - - ) - [ \t]* - (?=\n{2,}|\Z)# followed by a blank line or end of document - ) - """ % (tabwidth-1), re.VERBOSE) - - def _HashHTMLBlocks(self, text): - def handler(m): - key = m.group(1) - try: - key = key.encode('utf8') - except UnicodeDecodeError: - key = ''.join(k for k in key if ord(k) < 128) - key = mangle_text(key) - self.html_blocks[key] = m.group(1) - return "\n\n%s\n\n" % key - - text = self.r_HashHTMLBlocks1.sub(handler, text) - text = self.r_HashHTMLBlocks2.sub(handler, text) - oldtext = text - text = self.r_HashHR.sub(handler, text) - text = self.r_HashComment.sub(handler, text) - return text - - #@@@ wrong! - r_hr1 = re.compile(r'^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$', re.M) - r_hr2 = re.compile(r'^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$', re.M) - r_hr3 = re.compile(r'^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$', re.M) - - def _RunBlockGamut(self, text): - text = self._DoHeaders(text) - for x in [self.r_hr1, self.r_hr2, self.r_hr3]: - text = x.sub("\ns. - text = self._HashHTMLBlocks(text) - text = self._FormParagraphs(text) - return text - - r_NewLine = re.compile(" {2,}\n") - def _RunSpanGamut(self, text): - text = self._DoCodeSpans(text) - text = self._EscapeSpecialChars(text) - text = self._DoImages(text) - text = self._DoAnchors(text) - text = self._DoAutoLinks(text) - text = self._EncodeAmpsAndAngles(text) - text = self._DoItalicsAndBold(text) - text = self.r_NewLine.sub(" ? # href = $3 - [ \t]* - ( # $4 - ([\'\"]) # quote char = $5 - (.*?) # Title = $6 - \5 # matching quote - )? # title is optional - \) - ) - """, re.S|re.VERBOSE) - def _DoAnchors(self, text): - # We here don't do the same as the perl version, as python's regex - # engine gives us no way to match brackets. - - def handler1(m): - whole_match = m.group(1) - link_text = m.group(2) - link_id = m.group(3).lower() - if not link_id: link_id = link_text.lower() - title = self.titles.get(link_id, None) - - - if self.urls.has_key(link_id): - url = self.urls[link_id] - url = url.replace("*", self.escapetable["*"]) - url = url.replace("_", self.escapetable["_"]) - url = url.replace("[", self.escapetable["["]) - res = '? # src url = $3 - [ \t]* - ( # $4 - ([\'\"]) # quote char = $5 - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - """, re.VERBOSE|re.S) - - def _DoImages(self, text): - def handler1(m): - whole_match = m.group(1) - alt_text = m.group(2) - link_id = m.group(3).lower() - - if not link_id: - link_id = alt_text.lower() - - alt_text = alt_text.replace('"', """) - if self.urls.has_key(link_id): - url = self.urls[link_id] - url = url.replace("*", self.escapetable["*"]) - url = url.replace("_", self.escapetable["_"]) - res = '''%s= len(textl): continue - count = textl[i].strip().count(c) - if count > 0 and count == len(textl[i].strip()) and textl[i+1].strip() == '' and textl[i-1].strip() != '': - textl = textl[:i] + textl[i+1:] - textl[i-1] = ''+self._RunSpanGamut(textl[i-1])+'' - textl = textl[:i] + textl[i+1:] - text = '\n'.join(textl) - return text - - def handler(m): - level = len(m.group(1)) - header = self._RunSpanGamut(m.group(2)) - return "%s\n\n" % (level, header, level) - - text = findheader(text, '=', '1') - text = findheader(text, '-', '2') - text = self.r_DoHeaders.sub(handler, text) - return text - - rt_l = r""" - ( - ( - [ ]{0,%d} - ([*+-]|\d+[.]) - [ \t]+ - ) - (?:.+?) - ( - \Z - | - \n{2,} - (?=\S) - (?![ \t]* ([*+-]|\d+[.])[ \t]+) - ) - ) - """ % (tabwidth - 1) - r_DoLists = re.compile('^'+rt_l, re.M | re.VERBOSE | re.S) - r_DoListsTop = re.compile( - r'(?:\A\n?|(?<=\n\n))'+rt_l, re.M | re.VERBOSE | re.S) - - def _DoLists(self, text): - def handler(m): - list_type = "ol" - if m.group(3) in [ "*", "-", "+" ]: - list_type = "ul" - listn = m.group(1) - listn = self.r_multiline.sub("\n\n\n", listn) - res = self._ProcessListItems(listn) - res = "<%s>\n%s\n" % (list_type, res, list_type) - return res - - if self.list_level: - text = self.r_DoLists.sub(handler, text) - else: - text = self.r_DoListsTop.sub(handler, text) - return text - - r_multiend = re.compile(r"\n{2,}\Z") - r_ProcessListItems = re.compile(r""" - (\n)? # leading line = $1 - (^[ \t]*) # leading whitespace = $2 - ([*+-]|\d+[.]) [ \t]+ # list marker = $3 - ((?:.+?) # list item text = $4 - (\n{1,2})) - (?= \n* (\Z | \2 ([*+-]|\d+[.]) [ \t]+)) - """, re.VERBOSE | re.M | re.S) - - def _ProcessListItems(self, text): - self.list_level += 1 - text = self.r_multiend.sub("\n", text) - - def handler(m): - item = m.group(4) - leading_line = m.group(1) - leading_space = m.group(2) - - if leading_line or self.r_multiline.search(item): - item = self._RunBlockGamut(self._Outdent(item)) - else: - item = self._DoLists(self._Outdent(item)) - if item[-1] == "\n": item = item[:-1] # chomp - item = self._RunSpanGamut(item) - return "
  • %s
  • \n" % item - - text = self.r_ProcessListItems.sub(handler, text) - self.list_level -= 1 - return text - - r_DoCodeBlocks = re.compile(r""" - (?:\n\n|\A) - ( # $1 = the code block - (?: - (?:[ ]{%d} | \t) # Lines must start with a tab or equiv - .*\n+ - )+ - ) - ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space/end of doc - """ % (tabwidth, tabwidth), re.M | re.VERBOSE) - def _DoCodeBlocks(self, text): - def handler(m): - codeblock = m.group(1) - codeblock = self._EncodeCode(self._Outdent(codeblock)) - codeblock = self._Detab(codeblock) - codeblock = codeblock.lstrip("\n") - codeblock = codeblock.rstrip() - res = "\n\n
    %s\n
    \n\n" % codeblock - return res - - text = self.r_DoCodeBlocks.sub(handler, text) - return text - r_DoCodeSpans = re.compile(r""" - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?%s" % c - - text = self.r_DoCodeSpans.sub(handler, text) - return text - - def _EncodeCode(self, text): - text = text.replace("&","&") - text = text.replace("<","<") - text = text.replace(">",">") - for c in "*_{}[]\\": - text = text.replace(c, self.escapetable[c]) - return text - - - r_DoBold = re.compile(r"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", re.VERBOSE | re.S) - r_DoItalics = re.compile(r"(\*|_) (?=\S) (.+?) (?<=\S) \1", re.VERBOSE | re.S) - def _DoItalicsAndBold(self, text): - text = self.r_DoBold.sub(r"\2", text) - text = self.r_DoItalics.sub(r"\2", text) - return text - - r_start = re.compile(r"^", re.M) - ####r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M) - r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M) - r_DoBlockQuotes2 = re.compile(r"^[ \t]+$", re.M) - r_DoBlockQuotes3 = re.compile(r""" - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - )""", re.M | re.VERBOSE) - r_protectpre = re.compile(r'(\s*
    .+?
    )', re.S) - r_propre = re.compile(r'^ ', re.M) - - def _DoBlockQuotes(self, text): - def prehandler(m): - return self.r_propre.sub('', m.group(1)) - - def handler(m): - bq = m.group(1) - bq = self.r_DoBlockQuotes1.sub("", bq) - bq = self.r_DoBlockQuotes2.sub("", bq) - bq = self._RunBlockGamut(bq) - bq = self.r_start.sub(" ", bq) - bq = self.r_protectpre.sub(prehandler, bq) - return "
    \n%s\n
    \n\n" % bq - - text = self.r_DoBlockQuotes3.sub(handler, text) - return text - - r_tabbed = re.compile(r"^([ \t]*)") - def _FormParagraphs(self, text): - text = text.strip("\n") - grafs = self.r_multiline.split(text) - - for g in xrange(len(grafs)): - t = grafs[g].strip() #@@? - if not self.html_blocks.has_key(t): - t = self._RunSpanGamut(t) - t = self.r_tabbed.sub(r"

    ", t) - t += "

    " - grafs[g] = t - - for g in xrange(len(grafs)): - t = grafs[g].strip() - if self.html_blocks.has_key(t): - grafs[g] = self.html_blocks[t] - - return "\n\n".join(grafs) - - r_EncodeAmps = re.compile(r"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)") - r_EncodeAngles = re.compile(r"<(?![a-z/?\$!])") - def _EncodeAmpsAndAngles(self, text): - text = self.r_EncodeAmps.sub("&", text) - text = self.r_EncodeAngles.sub("<", text) - return text - - def _EncodeBackslashEscapes(self, text): - for char in self.escapechars: - text = text.replace("\\" + char, self.escapetable[char]) - return text - - r_link = re.compile(r"<((https?|ftp):[^\'\">\s]+)>", re.I) - r_email = re.compile(r""" - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - >""", re.VERBOSE|re.I) - def _DoAutoLinks(self, text): - text = self.r_link.sub(r'
    \1', text) - - def handler(m): - l = m.group(1) - return self._EncodeEmailAddress(self._UnescapeSpecialChars(l)) - - text = self.r_email.sub(handler, text) - return text - - r_EncodeEmailAddress = re.compile(r">.+?:") - def _EncodeEmailAddress(self, text): - encode = [ - lambda x: "&#%s;" % ord(x), - lambda x: "&#x%X;" % ord(x), - lambda x: x - ] - - text = "mailto:" + text - addr = "" - for c in text: - if c == ':': addr += c; continue - - r = semirandom(addr) - if r < 0.45: - addr += encode[1](c) - elif r > 0.9 and c != '@': - addr += encode[2](c) - else: - addr += encode[0](c) - - text = '%s' % (addr, addr) - text = self.r_EncodeEmailAddress.sub('>', text) - return text - - def _UnescapeSpecialChars(self, text): - for key in self.escapetable.keys(): - text = text.replace(self.escapetable[key], key) - return text - - tokenize_depth = 6 - tokenize_nested_tags = '|'.join([r'(?:<[a-z/!$](?:[^<>]'] * tokenize_depth) + (')*>)' * tokenize_depth) - r_TokenizeHTML = re.compile( - r"""(?: ) | # comment - (?: <\? .*? \?> ) | # processing instruction - %s # nested tags - """ % tokenize_nested_tags, re.I|re.VERBOSE) - def _TokenizeHTML(self, text): - pos = 0 - tokens = [] - matchobj = self.r_TokenizeHTML.search(text, pos) - while matchobj: - whole_tag = matchobj.string[matchobj.start():matchobj.end()] - sec_start = matchobj.end() - tag_start = sec_start - len(whole_tag) - if pos < tag_start: - tokens.append(["text", matchobj.string[pos:tag_start]]) - - tokens.append(["tag", whole_tag]) - pos = sec_start - matchobj = self.r_TokenizeHTML.search(text, pos) - - if pos < len(text): - tokens.append(["text", text[pos:]]) - return tokens - - r_Outdent = re.compile(r"""^(\t|[ ]{1,%d})""" % tabwidth, re.M) - def _Outdent(self, text): - text = self.r_Outdent.sub("", text) - return text - - def _Detab(self, text): return text.expandtabs(self.tabwidth) - -def Markdown(*args, **kw): return _Markdown().parse(*args, **kw) -markdown = Markdown - -if __name__ == '__main__': - if len(sys.argv) > 1: - print Markdown(open(sys.argv[1]).read()) - else: - print Markdown(sys.stdin.read()) diff --git a/r2/r2/lib/filters.py b/r2/r2/lib/filters.py index ac0d3838d..c46b15250 100644 --- a/r2/r2/lib/filters.py +++ b/r2/r2/lib/filters.py @@ -209,7 +209,6 @@ def markdown_souptest(text, nofollow=False, target=None, lang=None): #@memoize('markdown') def safemarkdown(text, nofollow=False, target=None, lang=None, wrap=True): from r2.lib.c_markdown import c_markdown - from r2.lib.py_markdown import py_markdown if c.user.pref_no_profanity: text = profanity_filter(text) @@ -227,8 +226,6 @@ def safemarkdown(text, nofollow=False, target=None, lang=None, wrap=True): text = snudown.markdown(_force_utf8(text), nofollow, target) elif lang == "c": text = c_markdown(text, nofollow, target) - elif lang == "py": - text = py_markdown(text, nofollow, target) else: raise ValueError("weird lang [%s]" % lang) diff --git a/r2/r2/lib/pages/pages.py b/r2/r2/lib/pages/pages.py index 5f79b28d4..205f2bb53 100644 --- a/r2/r2/lib/pages/pages.py +++ b/r2/r2/lib/pages/pages.py @@ -37,7 +37,6 @@ from pylons.controllers.util import abort from r2.lib import promote from r2.lib.traffic import load_traffic, load_summary from r2.lib.captcha import get_iden -from r2.lib.contrib.markdown import markdown from r2.lib.filters import spaceCompress, _force_unicode, _force_utf8 from r2.lib.filters import unsafe, websafe, SC_ON, SC_OFF, websafe_json from r2.lib.menus import NavButton, NamedButton, NavMenu, PageNameNav, JsButton @@ -53,6 +52,7 @@ from r2.lib.scraper import get_media_embed from r2.lib.log import log_text from r2.lib.memoize import memoize from r2.lib.utils import trunc_string as _truncate +from r2.lib.filters import safemarkdown import sys, random, datetime, locale, calendar, simplejson, re, time import graph, pycountry, time @@ -1453,9 +1453,7 @@ class Thanks(Templated): if g.lounge_reddit: lounge_url = "/r/" + g.lounge_reddit - lounge_html = (SC_OFF + - markdown(strings.lounge_msg % dict(link=lounge_url)) - + SC_ON) + lounge_html = safemarkdown(strings.lounge_msg % dict(link=lounge_url)) else: lounge_html = None Templated.__init__(self, status=status, secret=secret, @@ -1670,10 +1668,7 @@ class SearchBar(Templated): class SearchFail(Templated): """Search failure page.""" def __init__(self, **kw): - md = SC_OFF + markdown(strings.search_failed % dict( - link="javascript:tryagain\(\)")) + SC_ON - - self.errmsg = md + self.errmsg = strings.search_failed Templated.__init__(self) diff --git a/r2/r2/lib/py_markdown.py b/r2/r2/lib/py_markdown.py deleted file mode 100644 index 312964736..000000000 --- a/r2/r2/lib/py_markdown.py +++ /dev/null @@ -1,59 +0,0 @@ -from contrib.markdown import markdown -import re - -r_url = re.compile('(?', re.I | re.S) -img = re.compile('', re.I | re.S) -href_re = re.compile('([^<]+)') -a_re = re.compile('>([^<]+)') -fix_url = re.compile('<(http://[^\s\'\"\]\)]+)>') - -def code_handler(m): - l = m.group(1) - return '%s' % l.replace('&','&') - -#unescape double escaping in links -def inner_a_handler(m): - l = m.group(1) - return '>%s' % l.replace('&','&') - -def py_markdown(text, nofollow=False, target=None): - # increase escaping of &, < and > once - text = text.replace("&", "&").replace("<", "<").replace(">", ">") - - #wrap urls in "<>" so that markdown will handle them as urls - text = r_url.sub(r'<\1>', text) - - text = markdown(text) - - text = img.sub('', text) #remove images - # remove the "&" escaping in urls - text = code_re.sub(code_handler, text) - text = a_re.sub(inner_a_handler, text) - - #remove images - text = img.sub('', text) - - #wipe malicious javascript - text = jscript_url.sub('', text) - - # remove the "&" escaping in urls - def href_handler(m): - url = m.group(1).replace('&', '&') - link = ' - var searchfail_timeout = new Date(); - - function tryagain() { - elapsed = new Date() - searchfail_timeout; - - seconds = elapsed / 1000; - - if (seconds < 10) { - alert("Please don't pound our servers! " + - "Give them a few minutes to cool off."); - } else { - window.location.reload(); - } - } - -
    ${unsafe(thing.errmsg)}