mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-26 07:19:25 -05:00
Remove markdown.py and replace scattered uses of it.
It was used directly in two places in pages.py, I've replaced those uses with safemarkdown calls. In the case of the search fail page, I just removed the javascript try-again link since it wouldn't be doable through safemarkdown and that page isn't hit very frequently any more anyway.
This commit is contained in:
@@ -312,7 +312,7 @@ takedown_sr = _takedowns
|
||||
png_optimizer = /usr/bin/env optipng
|
||||
# bad words that should be *'d out
|
||||
profanity_wordlist =
|
||||
# which markdown backend to use (c = discount, py = markdown.py, snudown = snudown)
|
||||
# which markdown backend to use (c = discount, snudown = snudown)
|
||||
markdown_backend = snudown
|
||||
|
||||
# -- search --
|
||||
|
||||
@@ -1,687 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
import re, md5, sys, string
|
||||
|
||||
"""markdown.py: A Markdown-styled-text to HTML converter in Python.
|
||||
|
||||
Usage:
|
||||
./markdown.py textfile.markdown
|
||||
|
||||
Calling:
|
||||
import markdown
|
||||
somehtml = markdown.markdown(sometext)
|
||||
"""
|
||||
|
||||
__version__ = '1.0.1-2' # port of 1.0.1
|
||||
__license__ = "GNU GPL 2"
|
||||
__author__ = [
|
||||
'John Gruber <http://daringfireball.net/>',
|
||||
'Tollef Fog Heen <tfheen@err.no>',
|
||||
'Aaron Swartz <me@aaronsw.com>'
|
||||
]
|
||||
|
||||
def htmlquote(text):
|
||||
"""Encodes `text` for raw use in HTML."""
|
||||
text = text.replace("&", "&") # Must be done first!
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
text = text.replace("'", "'")
|
||||
text = text.replace('"', """)
|
||||
return text
|
||||
|
||||
def mangle_text(text):
|
||||
from pylons import g
|
||||
return md5.new(text + g.SECRET).hexdigest()
|
||||
|
||||
def semirandom(seed):
|
||||
from pylons import g
|
||||
x = 0
|
||||
for c in md5.new(seed + g.SECRET).digest(): x += ord(c)
|
||||
return x / (255*16.)
|
||||
|
||||
class _Markdown:
|
||||
emptyelt = " />"
|
||||
tabwidth = 4
|
||||
|
||||
escapechars = '\\`*_{}[]()>#+-.!'
|
||||
escapetable = {}
|
||||
for char in escapechars:
|
||||
escapetable[char] = mangle_text(char)
|
||||
|
||||
r_multiline = re.compile("\n{2,}")
|
||||
r_stripspace = re.compile(r"^[ \t]+$", re.MULTILINE)
|
||||
def parse(self, text):
|
||||
self.urls = {}
|
||||
self.titles = {}
|
||||
self.html_blocks = {}
|
||||
self.list_level = 0
|
||||
|
||||
text = text.replace("\r\n", "\n")
|
||||
text = text.replace("\r", "\n")
|
||||
text += "\n\n"
|
||||
text = self._Detab(text)
|
||||
text = self.r_stripspace.sub("", text)
|
||||
text = self._HashHTMLBlocks(text)
|
||||
text = self._StripLinkDefinitions(text)
|
||||
text = self._RunBlockGamut(text)
|
||||
text = self._UnescapeSpecialChars(text)
|
||||
return text
|
||||
|
||||
r_StripLinkDefinitions = re.compile(r"""
|
||||
^[ ]{0,%d}\[(.+)\]: # id = $1
|
||||
[ \t]*\n?[ \t]*
|
||||
<?(\S+?)>? # url = $2
|
||||
[ \t]*\n?[ \t]*
|
||||
(?:
|
||||
(?<=\s) # lookbehind for whitespace
|
||||
[\"\(] # " is backlashed so it colorizes our code right
|
||||
(.+?) # title = $3
|
||||
[\"\)]
|
||||
[ \t]*
|
||||
)? # title is optional
|
||||
(?:\n+|\Z)
|
||||
""" % (tabwidth-1), re.MULTILINE|re.VERBOSE)
|
||||
def _StripLinkDefinitions(self, text):
|
||||
def replacefunc(matchobj):
|
||||
(t1, t2, t3) = matchobj.groups()
|
||||
#@@ case sensitivity?
|
||||
self.urls[t1.lower()] = self._EncodeAmpsAndAngles(t2)
|
||||
if t3 is not None:
|
||||
self.titles[t1.lower()] = t3.replace('"', '"')
|
||||
return ""
|
||||
|
||||
text = self.r_StripLinkDefinitions.sub(replacefunc, text)
|
||||
return text
|
||||
|
||||
blocktagsb = r"p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|math"
|
||||
blocktagsa = blocktagsb + "|ins|del"
|
||||
|
||||
r_HashHTMLBlocks1 = re.compile(r"""
|
||||
( # save in $1
|
||||
^ # start of line (with /m)
|
||||
<(%s) # start tag = $2
|
||||
\b # word break
|
||||
(.*\n)*? # any number of lines, minimally matching
|
||||
</\2> # the matching end tag
|
||||
[ \t]* # trailing spaces/tabs
|
||||
(?=\n+|$) # followed by a newline or end of document
|
||||
)
|
||||
""" % blocktagsa, re.MULTILINE | re.VERBOSE)
|
||||
|
||||
r_HashHTMLBlocks2 = re.compile(r"""
|
||||
( # save in $1
|
||||
^ # start of line (with /m)
|
||||
<(%s) # start tag = $2
|
||||
\b # word break
|
||||
(.*\n)*? # any number of lines, minimally matching
|
||||
.*</\2> # the matching end tag
|
||||
[ \t]* # trailing spaces/tabs
|
||||
(?=\n+|\Z) # followed by a newline or end of document
|
||||
)
|
||||
""" % blocktagsb, re.MULTILINE | re.VERBOSE)
|
||||
|
||||
r_HashHR = re.compile(r"""
|
||||
(?:
|
||||
(?<=\n\n) # Starting after a blank line
|
||||
| # or
|
||||
\A\n? # the beginning of the doc
|
||||
)
|
||||
( # save in $1
|
||||
[ ]{0,%d}
|
||||
<(hr) # start tag = $2
|
||||
\b # word break
|
||||
([^<>])*? #
|
||||
/?> # the matching end tag
|
||||
[ \t]*
|
||||
(?=\n{2,}|\Z)# followed by a blank line or end of document
|
||||
)
|
||||
""" % (tabwidth-1), re.VERBOSE)
|
||||
r_HashComment = re.compile(r"""
|
||||
(?:
|
||||
(?<=\n\n) # Starting after a blank line
|
||||
| # or
|
||||
\A\n? # the beginning of the doc
|
||||
)
|
||||
( # save in $1
|
||||
[ ]{0,%d}
|
||||
(?:
|
||||
<!
|
||||
(--.*?--\s*)+
|
||||
>
|
||||
)
|
||||
[ \t]*
|
||||
(?=\n{2,}|\Z)# followed by a blank line or end of document
|
||||
)
|
||||
""" % (tabwidth-1), re.VERBOSE)
|
||||
|
||||
def _HashHTMLBlocks(self, text):
|
||||
def handler(m):
|
||||
key = m.group(1)
|
||||
try:
|
||||
key = key.encode('utf8')
|
||||
except UnicodeDecodeError:
|
||||
key = ''.join(k for k in key if ord(k) < 128)
|
||||
key = mangle_text(key)
|
||||
self.html_blocks[key] = m.group(1)
|
||||
return "\n\n%s\n\n" % key
|
||||
|
||||
text = self.r_HashHTMLBlocks1.sub(handler, text)
|
||||
text = self.r_HashHTMLBlocks2.sub(handler, text)
|
||||
oldtext = text
|
||||
text = self.r_HashHR.sub(handler, text)
|
||||
text = self.r_HashComment.sub(handler, text)
|
||||
return text
|
||||
|
||||
#@@@ wrong!
|
||||
r_hr1 = re.compile(r'^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$', re.M)
|
||||
r_hr2 = re.compile(r'^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$', re.M)
|
||||
r_hr3 = re.compile(r'^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$', re.M)
|
||||
|
||||
def _RunBlockGamut(self, text):
|
||||
text = self._DoHeaders(text)
|
||||
for x in [self.r_hr1, self.r_hr2, self.r_hr3]:
|
||||
text = x.sub("\n<hr%s\n" % self.emptyelt, text);
|
||||
text = self._DoLists(text)
|
||||
text = self._DoCodeBlocks(text)
|
||||
text = self._DoBlockQuotes(text)
|
||||
|
||||
# We did this in parse()
|
||||
# to escape the source
|
||||
# now it's stuff _we_ made
|
||||
# so we don't wrap it in <p>s.
|
||||
text = self._HashHTMLBlocks(text)
|
||||
text = self._FormParagraphs(text)
|
||||
return text
|
||||
|
||||
r_NewLine = re.compile(" {2,}\n")
|
||||
def _RunSpanGamut(self, text):
|
||||
text = self._DoCodeSpans(text)
|
||||
text = self._EscapeSpecialChars(text)
|
||||
text = self._DoImages(text)
|
||||
text = self._DoAnchors(text)
|
||||
text = self._DoAutoLinks(text)
|
||||
text = self._EncodeAmpsAndAngles(text)
|
||||
text = self._DoItalicsAndBold(text)
|
||||
text = self.r_NewLine.sub(" <br%s\n" % self.emptyelt, text)
|
||||
return text
|
||||
|
||||
def _EscapeSpecialChars(self, text):
|
||||
tokens = self._TokenizeHTML(text)
|
||||
text = ""
|
||||
for cur_token in tokens:
|
||||
if cur_token[0] == "tag":
|
||||
cur_token[1] = cur_token[1].replace('*', self.escapetable["*"])
|
||||
cur_token[1] = cur_token[1].replace('_', self.escapetable["_"])
|
||||
text += cur_token[1]
|
||||
else:
|
||||
text += self._EncodeBackslashEscapes(cur_token[1])
|
||||
return text
|
||||
|
||||
r_DoAnchors1 = re.compile(
|
||||
r""" ( # wrap whole match in $1
|
||||
\[
|
||||
(.*?) # link text = $2
|
||||
# [for bracket nesting, see below]
|
||||
\]
|
||||
|
||||
[ ]? # one optional space
|
||||
(?:\n[ ]*)? # one optional newline followed by spaces
|
||||
|
||||
\[
|
||||
(.*?) # id = $3
|
||||
\]
|
||||
)
|
||||
""", re.S|re.VERBOSE)
|
||||
r_DoAnchors2 = re.compile(
|
||||
r""" ( # wrap whole match in $1
|
||||
\[
|
||||
(.*?) # link text = $2
|
||||
\]
|
||||
\( # literal paren
|
||||
[ \t]*
|
||||
<?(.+?)>? # href = $3
|
||||
[ \t]*
|
||||
( # $4
|
||||
([\'\"]) # quote char = $5
|
||||
(.*?) # Title = $6
|
||||
\5 # matching quote
|
||||
)? # title is optional
|
||||
\)
|
||||
)
|
||||
""", re.S|re.VERBOSE)
|
||||
def _DoAnchors(self, text):
|
||||
# We here don't do the same as the perl version, as python's regex
|
||||
# engine gives us no way to match brackets.
|
||||
|
||||
def handler1(m):
|
||||
whole_match = m.group(1)
|
||||
link_text = m.group(2)
|
||||
link_id = m.group(3).lower()
|
||||
if not link_id: link_id = link_text.lower()
|
||||
title = self.titles.get(link_id, None)
|
||||
|
||||
|
||||
if self.urls.has_key(link_id):
|
||||
url = self.urls[link_id]
|
||||
url = url.replace("*", self.escapetable["*"])
|
||||
url = url.replace("_", self.escapetable["_"])
|
||||
url = url.replace("[", self.escapetable["["])
|
||||
res = '<a href="%s"' % htmlquote(url)
|
||||
|
||||
if title:
|
||||
title = title.replace("*", self.escapetable["*"])
|
||||
title = title.replace("_", self.escapetable["_"])
|
||||
res += ' title="%s"' % htmlquote(title)
|
||||
res += ">%s</a>" % htmlquote(link_text)
|
||||
else:
|
||||
res = whole_match
|
||||
|
||||
return res
|
||||
|
||||
def handler2(m):
|
||||
whole_match = m.group(1)
|
||||
link_text = m.group(2)
|
||||
url = m.group(3)
|
||||
title = m.group(6)
|
||||
|
||||
url = url.replace("*", self.escapetable["*"])
|
||||
url = url.replace("_", self.escapetable["_"])
|
||||
url = url.replace("[", self.escapetable["["])
|
||||
res = '''<a href="%s"''' % htmlquote(url)
|
||||
|
||||
if title:
|
||||
title = title.replace('"', '"')
|
||||
title = title.replace("*", self.escapetable["*"])
|
||||
title = title.replace("_", self.escapetable["_"])
|
||||
res += ' title="%s"' % htmlquote(title)
|
||||
res += ">%s</a>" % htmlquote(link_text)
|
||||
return res
|
||||
|
||||
#text = self.r_DoAnchors1.sub(handler1, text)
|
||||
text = self.r_DoAnchors2.sub(handler2, text)
|
||||
return text
|
||||
|
||||
r_DoImages1 = re.compile(
|
||||
r""" ( # wrap whole match in $1
|
||||
!\[
|
||||
(.*?) # alt text = $2
|
||||
\]
|
||||
|
||||
[ ]? # one optional space
|
||||
(?:\n[ ]*)? # one optional newline followed by spaces
|
||||
|
||||
\[
|
||||
(.*?) # id = $3
|
||||
\]
|
||||
|
||||
)
|
||||
""", re.VERBOSE|re.S)
|
||||
|
||||
r_DoImages2 = re.compile(
|
||||
r""" ( # wrap whole match in $1
|
||||
!\[
|
||||
(.*?) # alt text = $2
|
||||
\]
|
||||
\( # literal paren
|
||||
[ \t]*
|
||||
<?(\S+?)>? # src url = $3
|
||||
[ \t]*
|
||||
( # $4
|
||||
([\'\"]) # quote char = $5
|
||||
(.*?) # title = $6
|
||||
\5 # matching quote
|
||||
[ \t]*
|
||||
)? # title is optional
|
||||
\)
|
||||
)
|
||||
""", re.VERBOSE|re.S)
|
||||
|
||||
def _DoImages(self, text):
|
||||
def handler1(m):
|
||||
whole_match = m.group(1)
|
||||
alt_text = m.group(2)
|
||||
link_id = m.group(3).lower()
|
||||
|
||||
if not link_id:
|
||||
link_id = alt_text.lower()
|
||||
|
||||
alt_text = alt_text.replace('"', """)
|
||||
if self.urls.has_key(link_id):
|
||||
url = self.urls[link_id]
|
||||
url = url.replace("*", self.escapetable["*"])
|
||||
url = url.replace("_", self.escapetable["_"])
|
||||
res = '''<img src="%s" alt="%s"''' % (htmlquote(url), htmlquote(alt_text))
|
||||
if self.titles.has_key(link_id):
|
||||
title = self.titles[link_id]
|
||||
title = title.replace("*", self.escapetable["*"])
|
||||
title = title.replace("_", self.escapetable["_"])
|
||||
res += ' title="%s"' % htmlquote(title)
|
||||
res += self.emptyelt
|
||||
else:
|
||||
res = whole_match
|
||||
return res
|
||||
|
||||
def handler2(m):
|
||||
whole_match = m.group(1)
|
||||
alt_text = m.group(2)
|
||||
url = m.group(3)
|
||||
title = m.group(6) or ''
|
||||
|
||||
alt_text = alt_text.replace('"', """)
|
||||
title = title.replace('"', """)
|
||||
url = url.replace("*", self.escapetable["*"])
|
||||
url = url.replace("_", self.escapetable["_"])
|
||||
res = '<img src="%s" alt="%s"' % (htmlquote(url), htmlquote(alt_text))
|
||||
if title is not None:
|
||||
title = title.replace("*", self.escapetable["*"])
|
||||
title = title.replace("_", self.escapetable["_"])
|
||||
res += ' title="%s"' % htmlquote(title)
|
||||
res += self.emptyelt
|
||||
return res
|
||||
|
||||
text = self.r_DoImages1.sub(handler1, text)
|
||||
text = self.r_DoImages2.sub(handler2, text)
|
||||
return text
|
||||
|
||||
r_DoHeaders = re.compile(r"^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+", re.VERBOSE|re.M)
|
||||
def _DoHeaders(self, text):
|
||||
def findheader(text, c, n):
|
||||
textl = text.split('\n')
|
||||
for i in xrange(len(textl)):
|
||||
if i >= len(textl): continue
|
||||
count = textl[i].strip().count(c)
|
||||
if count > 0 and count == len(textl[i].strip()) and textl[i+1].strip() == '' and textl[i-1].strip() != '':
|
||||
textl = textl[:i] + textl[i+1:]
|
||||
textl[i-1] = '<h'+n+'>'+self._RunSpanGamut(textl[i-1])+'</h'+n+'>'
|
||||
textl = textl[:i] + textl[i+1:]
|
||||
text = '\n'.join(textl)
|
||||
return text
|
||||
|
||||
def handler(m):
|
||||
level = len(m.group(1))
|
||||
header = self._RunSpanGamut(m.group(2))
|
||||
return "<h%s>%s</h%s>\n\n" % (level, header, level)
|
||||
|
||||
text = findheader(text, '=', '1')
|
||||
text = findheader(text, '-', '2')
|
||||
text = self.r_DoHeaders.sub(handler, text)
|
||||
return text
|
||||
|
||||
rt_l = r"""
|
||||
(
|
||||
(
|
||||
[ ]{0,%d}
|
||||
([*+-]|\d+[.])
|
||||
[ \t]+
|
||||
)
|
||||
(?:.+?)
|
||||
(
|
||||
\Z
|
||||
|
|
||||
\n{2,}
|
||||
(?=\S)
|
||||
(?![ \t]* ([*+-]|\d+[.])[ \t]+)
|
||||
)
|
||||
)
|
||||
""" % (tabwidth - 1)
|
||||
r_DoLists = re.compile('^'+rt_l, re.M | re.VERBOSE | re.S)
|
||||
r_DoListsTop = re.compile(
|
||||
r'(?:\A\n?|(?<=\n\n))'+rt_l, re.M | re.VERBOSE | re.S)
|
||||
|
||||
def _DoLists(self, text):
|
||||
def handler(m):
|
||||
list_type = "ol"
|
||||
if m.group(3) in [ "*", "-", "+" ]:
|
||||
list_type = "ul"
|
||||
listn = m.group(1)
|
||||
listn = self.r_multiline.sub("\n\n\n", listn)
|
||||
res = self._ProcessListItems(listn)
|
||||
res = "<%s>\n%s</%s>\n" % (list_type, res, list_type)
|
||||
return res
|
||||
|
||||
if self.list_level:
|
||||
text = self.r_DoLists.sub(handler, text)
|
||||
else:
|
||||
text = self.r_DoListsTop.sub(handler, text)
|
||||
return text
|
||||
|
||||
r_multiend = re.compile(r"\n{2,}\Z")
|
||||
r_ProcessListItems = re.compile(r"""
|
||||
(\n)? # leading line = $1
|
||||
(^[ \t]*) # leading whitespace = $2
|
||||
([*+-]|\d+[.]) [ \t]+ # list marker = $3
|
||||
((?:.+?) # list item text = $4
|
||||
(\n{1,2}))
|
||||
(?= \n* (\Z | \2 ([*+-]|\d+[.]) [ \t]+))
|
||||
""", re.VERBOSE | re.M | re.S)
|
||||
|
||||
def _ProcessListItems(self, text):
|
||||
self.list_level += 1
|
||||
text = self.r_multiend.sub("\n", text)
|
||||
|
||||
def handler(m):
|
||||
item = m.group(4)
|
||||
leading_line = m.group(1)
|
||||
leading_space = m.group(2)
|
||||
|
||||
if leading_line or self.r_multiline.search(item):
|
||||
item = self._RunBlockGamut(self._Outdent(item))
|
||||
else:
|
||||
item = self._DoLists(self._Outdent(item))
|
||||
if item[-1] == "\n": item = item[:-1] # chomp
|
||||
item = self._RunSpanGamut(item)
|
||||
return "<li>%s</li>\n" % item
|
||||
|
||||
text = self.r_ProcessListItems.sub(handler, text)
|
||||
self.list_level -= 1
|
||||
return text
|
||||
|
||||
r_DoCodeBlocks = re.compile(r"""
|
||||
(?:\n\n|\A)
|
||||
( # $1 = the code block
|
||||
(?:
|
||||
(?:[ ]{%d} | \t) # Lines must start with a tab or equiv
|
||||
.*\n+
|
||||
)+
|
||||
)
|
||||
((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space/end of doc
|
||||
""" % (tabwidth, tabwidth), re.M | re.VERBOSE)
|
||||
def _DoCodeBlocks(self, text):
|
||||
def handler(m):
|
||||
codeblock = m.group(1)
|
||||
codeblock = self._EncodeCode(self._Outdent(codeblock))
|
||||
codeblock = self._Detab(codeblock)
|
||||
codeblock = codeblock.lstrip("\n")
|
||||
codeblock = codeblock.rstrip()
|
||||
res = "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
|
||||
return res
|
||||
|
||||
text = self.r_DoCodeBlocks.sub(handler, text)
|
||||
return text
|
||||
r_DoCodeSpans = re.compile(r"""
|
||||
(`+) # $1 = Opening run of `
|
||||
(.+?) # $2 = The code block
|
||||
(?<!`)
|
||||
\1 # Matching closer
|
||||
(?!`)
|
||||
""", re.I|re.VERBOSE)
|
||||
def _DoCodeSpans(self, text):
|
||||
def handler(m):
|
||||
c = m.group(2)
|
||||
c = c.strip()
|
||||
c = self._EncodeCode(c)
|
||||
return "<code>%s</code>" % c
|
||||
|
||||
text = self.r_DoCodeSpans.sub(handler, text)
|
||||
return text
|
||||
|
||||
def _EncodeCode(self, text):
|
||||
text = text.replace("&","&")
|
||||
text = text.replace("<","<")
|
||||
text = text.replace(">",">")
|
||||
for c in "*_{}[]\\":
|
||||
text = text.replace(c, self.escapetable[c])
|
||||
return text
|
||||
|
||||
|
||||
r_DoBold = re.compile(r"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", re.VERBOSE | re.S)
|
||||
r_DoItalics = re.compile(r"(\*|_) (?=\S) (.+?) (?<=\S) \1", re.VERBOSE | re.S)
|
||||
def _DoItalicsAndBold(self, text):
|
||||
text = self.r_DoBold.sub(r"<strong>\2</strong>", text)
|
||||
text = self.r_DoItalics.sub(r"<em>\2</em>", text)
|
||||
return text
|
||||
|
||||
r_start = re.compile(r"^", re.M)
|
||||
####r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M)
|
||||
r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M)
|
||||
r_DoBlockQuotes2 = re.compile(r"^[ \t]+$", re.M)
|
||||
r_DoBlockQuotes3 = re.compile(r"""
|
||||
( # Wrap whole match in $1
|
||||
(
|
||||
^[ \t]*>[ \t]? # '>' at the start of a line
|
||||
.+\n # rest of the first line
|
||||
(.+\n)* # subsequent consecutive lines
|
||||
\n* # blanks
|
||||
)+
|
||||
)""", re.M | re.VERBOSE)
|
||||
r_protectpre = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
|
||||
r_propre = re.compile(r'^ ', re.M)
|
||||
|
||||
def _DoBlockQuotes(self, text):
|
||||
def prehandler(m):
|
||||
return self.r_propre.sub('', m.group(1))
|
||||
|
||||
def handler(m):
|
||||
bq = m.group(1)
|
||||
bq = self.r_DoBlockQuotes1.sub("", bq)
|
||||
bq = self.r_DoBlockQuotes2.sub("", bq)
|
||||
bq = self._RunBlockGamut(bq)
|
||||
bq = self.r_start.sub(" ", bq)
|
||||
bq = self.r_protectpre.sub(prehandler, bq)
|
||||
return "<blockquote>\n%s\n</blockquote>\n\n" % bq
|
||||
|
||||
text = self.r_DoBlockQuotes3.sub(handler, text)
|
||||
return text
|
||||
|
||||
r_tabbed = re.compile(r"^([ \t]*)")
|
||||
def _FormParagraphs(self, text):
|
||||
text = text.strip("\n")
|
||||
grafs = self.r_multiline.split(text)
|
||||
|
||||
for g in xrange(len(grafs)):
|
||||
t = grafs[g].strip() #@@?
|
||||
if not self.html_blocks.has_key(t):
|
||||
t = self._RunSpanGamut(t)
|
||||
t = self.r_tabbed.sub(r"<p>", t)
|
||||
t += "</p>"
|
||||
grafs[g] = t
|
||||
|
||||
for g in xrange(len(grafs)):
|
||||
t = grafs[g].strip()
|
||||
if self.html_blocks.has_key(t):
|
||||
grafs[g] = self.html_blocks[t]
|
||||
|
||||
return "\n\n".join(grafs)
|
||||
|
||||
r_EncodeAmps = re.compile(r"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)")
|
||||
r_EncodeAngles = re.compile(r"<(?![a-z/?\$!])")
|
||||
def _EncodeAmpsAndAngles(self, text):
|
||||
text = self.r_EncodeAmps.sub("&", text)
|
||||
text = self.r_EncodeAngles.sub("<", text)
|
||||
return text
|
||||
|
||||
def _EncodeBackslashEscapes(self, text):
|
||||
for char in self.escapechars:
|
||||
text = text.replace("\\" + char, self.escapetable[char])
|
||||
return text
|
||||
|
||||
r_link = re.compile(r"<((https?|ftp):[^\'\">\s]+)>", re.I)
|
||||
r_email = re.compile(r"""
|
||||
<
|
||||
(?:mailto:)?
|
||||
(
|
||||
[-.\w]+
|
||||
\@
|
||||
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
||||
)
|
||||
>""", re.VERBOSE|re.I)
|
||||
def _DoAutoLinks(self, text):
|
||||
text = self.r_link.sub(r'<a href="\1">\1</a>', text)
|
||||
|
||||
def handler(m):
|
||||
l = m.group(1)
|
||||
return self._EncodeEmailAddress(self._UnescapeSpecialChars(l))
|
||||
|
||||
text = self.r_email.sub(handler, text)
|
||||
return text
|
||||
|
||||
r_EncodeEmailAddress = re.compile(r">.+?:")
|
||||
def _EncodeEmailAddress(self, text):
|
||||
encode = [
|
||||
lambda x: "&#%s;" % ord(x),
|
||||
lambda x: "&#x%X;" % ord(x),
|
||||
lambda x: x
|
||||
]
|
||||
|
||||
text = "mailto:" + text
|
||||
addr = ""
|
||||
for c in text:
|
||||
if c == ':': addr += c; continue
|
||||
|
||||
r = semirandom(addr)
|
||||
if r < 0.45:
|
||||
addr += encode[1](c)
|
||||
elif r > 0.9 and c != '@':
|
||||
addr += encode[2](c)
|
||||
else:
|
||||
addr += encode[0](c)
|
||||
|
||||
text = '<a href="%s">%s</a>' % (addr, addr)
|
||||
text = self.r_EncodeEmailAddress.sub('>', text)
|
||||
return text
|
||||
|
||||
def _UnescapeSpecialChars(self, text):
|
||||
for key in self.escapetable.keys():
|
||||
text = text.replace(self.escapetable[key], key)
|
||||
return text
|
||||
|
||||
tokenize_depth = 6
|
||||
tokenize_nested_tags = '|'.join([r'(?:<[a-z/!$](?:[^<>]'] * tokenize_depth) + (')*>)' * tokenize_depth)
|
||||
r_TokenizeHTML = re.compile(
|
||||
r"""(?: <! ( -- .*? -- \s* )+ > ) | # comment
|
||||
(?: <\? .*? \?> ) | # processing instruction
|
||||
%s # nested tags
|
||||
""" % tokenize_nested_tags, re.I|re.VERBOSE)
|
||||
def _TokenizeHTML(self, text):
|
||||
pos = 0
|
||||
tokens = []
|
||||
matchobj = self.r_TokenizeHTML.search(text, pos)
|
||||
while matchobj:
|
||||
whole_tag = matchobj.string[matchobj.start():matchobj.end()]
|
||||
sec_start = matchobj.end()
|
||||
tag_start = sec_start - len(whole_tag)
|
||||
if pos < tag_start:
|
||||
tokens.append(["text", matchobj.string[pos:tag_start]])
|
||||
|
||||
tokens.append(["tag", whole_tag])
|
||||
pos = sec_start
|
||||
matchobj = self.r_TokenizeHTML.search(text, pos)
|
||||
|
||||
if pos < len(text):
|
||||
tokens.append(["text", text[pos:]])
|
||||
return tokens
|
||||
|
||||
r_Outdent = re.compile(r"""^(\t|[ ]{1,%d})""" % tabwidth, re.M)
|
||||
def _Outdent(self, text):
|
||||
text = self.r_Outdent.sub("", text)
|
||||
return text
|
||||
|
||||
def _Detab(self, text): return text.expandtabs(self.tabwidth)
|
||||
|
||||
def Markdown(*args, **kw): return _Markdown().parse(*args, **kw)
|
||||
markdown = Markdown
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
print Markdown(open(sys.argv[1]).read())
|
||||
else:
|
||||
print Markdown(sys.stdin.read())
|
||||
@@ -209,7 +209,6 @@ def markdown_souptest(text, nofollow=False, target=None, lang=None):
|
||||
#@memoize('markdown')
|
||||
def safemarkdown(text, nofollow=False, target=None, lang=None, wrap=True):
|
||||
from r2.lib.c_markdown import c_markdown
|
||||
from r2.lib.py_markdown import py_markdown
|
||||
|
||||
if c.user.pref_no_profanity:
|
||||
text = profanity_filter(text)
|
||||
@@ -227,8 +226,6 @@ def safemarkdown(text, nofollow=False, target=None, lang=None, wrap=True):
|
||||
text = snudown.markdown(_force_utf8(text), nofollow, target)
|
||||
elif lang == "c":
|
||||
text = c_markdown(text, nofollow, target)
|
||||
elif lang == "py":
|
||||
text = py_markdown(text, nofollow, target)
|
||||
else:
|
||||
raise ValueError("weird lang [%s]" % lang)
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@ from pylons.controllers.util import abort
|
||||
from r2.lib import promote
|
||||
from r2.lib.traffic import load_traffic, load_summary
|
||||
from r2.lib.captcha import get_iden
|
||||
from r2.lib.contrib.markdown import markdown
|
||||
from r2.lib.filters import spaceCompress, _force_unicode, _force_utf8
|
||||
from r2.lib.filters import unsafe, websafe, SC_ON, SC_OFF, websafe_json
|
||||
from r2.lib.menus import NavButton, NamedButton, NavMenu, PageNameNav, JsButton
|
||||
@@ -53,6 +52,7 @@ from r2.lib.scraper import get_media_embed
|
||||
from r2.lib.log import log_text
|
||||
from r2.lib.memoize import memoize
|
||||
from r2.lib.utils import trunc_string as _truncate
|
||||
from r2.lib.filters import safemarkdown
|
||||
|
||||
import sys, random, datetime, locale, calendar, simplejson, re, time
|
||||
import graph, pycountry, time
|
||||
@@ -1453,9 +1453,7 @@ class Thanks(Templated):
|
||||
|
||||
if g.lounge_reddit:
|
||||
lounge_url = "/r/" + g.lounge_reddit
|
||||
lounge_html = (SC_OFF +
|
||||
markdown(strings.lounge_msg % dict(link=lounge_url))
|
||||
+ SC_ON)
|
||||
lounge_html = safemarkdown(strings.lounge_msg % dict(link=lounge_url))
|
||||
else:
|
||||
lounge_html = None
|
||||
Templated.__init__(self, status=status, secret=secret,
|
||||
@@ -1670,10 +1668,7 @@ class SearchBar(Templated):
|
||||
class SearchFail(Templated):
|
||||
"""Search failure page."""
|
||||
def __init__(self, **kw):
|
||||
md = SC_OFF + markdown(strings.search_failed % dict(
|
||||
link="javascript:tryagain\(\)")) + SC_ON
|
||||
|
||||
self.errmsg = md
|
||||
self.errmsg = strings.search_failed
|
||||
|
||||
Templated.__init__(self)
|
||||
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
from contrib.markdown import markdown
|
||||
import re
|
||||
|
||||
r_url = re.compile('(?<![\(\[])(http://[^\s\'\"\]\)]+)')
|
||||
jscript_url = re.compile('<a href="(?!http|ftp|mailto|/).*</a>', re.I | re.S)
|
||||
img = re.compile('<img.*?>', re.I | re.S)
|
||||
href_re = re.compile('<a href="([^"]+)"', re.I)
|
||||
code_re = re.compile('<code>([^<]+)</code>')
|
||||
a_re = re.compile('>([^<]+)</a>')
|
||||
fix_url = re.compile('<(http://[^\s\'\"\]\)]+)>')
|
||||
|
||||
def code_handler(m):
|
||||
l = m.group(1)
|
||||
return '<code>%s</code>' % l.replace('&','&')
|
||||
|
||||
#unescape double escaping in links
|
||||
def inner_a_handler(m):
|
||||
l = m.group(1)
|
||||
return '>%s</a>' % l.replace('&','&')
|
||||
|
||||
def py_markdown(text, nofollow=False, target=None):
|
||||
# increase escaping of &, < and > once
|
||||
text = text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
|
||||
#wrap urls in "<>" so that markdown will handle them as urls
|
||||
text = r_url.sub(r'<\1>', text)
|
||||
|
||||
text = markdown(text)
|
||||
|
||||
text = img.sub('', text) #remove images
|
||||
# remove the "&" escaping in urls
|
||||
text = code_re.sub(code_handler, text)
|
||||
text = a_re.sub(inner_a_handler, text)
|
||||
|
||||
#remove images
|
||||
text = img.sub('', text)
|
||||
|
||||
#wipe malicious javascript
|
||||
text = jscript_url.sub('', text)
|
||||
|
||||
# remove the "&" escaping in urls
|
||||
def href_handler(m):
|
||||
url = m.group(1).replace('&', '&')
|
||||
link = '<a href="%s"' % url
|
||||
|
||||
if target:
|
||||
link += ' target="%s"' % target
|
||||
|
||||
if nofollow:
|
||||
link += ' rel="nofollow"'
|
||||
|
||||
return link
|
||||
|
||||
text = href_re.sub(href_handler, text)
|
||||
text = code_re.sub(code_handler, text)
|
||||
text = a_re.sub(inner_a_handler, text)
|
||||
text = fix_url.sub(r'\1', text)
|
||||
|
||||
return text
|
||||
@@ -136,7 +136,7 @@ string_dict = dict(
|
||||
verify_email_submit = _("you'll be able to submit more frequently once you verify your email address"),
|
||||
email_verified = _("your email address has been verfied"),
|
||||
email_verify_failed = _("Verification failed. Please try that again"),
|
||||
search_failed = _("Our search machines are under too much load to handle your request right now. :( Sorry for the inconvenience. [Try again](%(link)s) in a little bit -- but please don't mash reload; that only makes the problem worse."),
|
||||
search_failed = _("Our search machines are under too much load to handle your request right now. :( Sorry for the inconvenience. Try again in a little bit -- but please don't mash reload; that only makes the problem worse."),
|
||||
invalid_search_query = _("I couldn't understand your query, so I simplified it and searched for \"%(clean_query)s\" instead."),
|
||||
completely_invalid_search_query = _("I couldn't understand your search query. Please try again."),
|
||||
generic_quota_msg = _("You've submitted too many links recently. Please try again in an hour."),
|
||||
|
||||
@@ -20,23 +20,6 @@
|
||||
## CondeNet, Inc. All Rights Reserved.
|
||||
################################################################################
|
||||
|
||||
<script type="text/javascript">
|
||||
var searchfail_timeout = new Date();
|
||||
|
||||
function tryagain() {
|
||||
elapsed = new Date() - searchfail_timeout;
|
||||
|
||||
seconds = elapsed / 1000;
|
||||
|
||||
if (seconds < 10) {
|
||||
alert("Please don't pound our servers! " +
|
||||
"Give them a few minutes to cool off.");
|
||||
} else {
|
||||
window.location.reload();
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="searchfail">
|
||||
${unsafe(thing.errmsg)}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user