nicer and more efficient sanitisation
This commit is contained in:
parent
8c7c76feb6
commit
1b9f7860c5
3 changed files with 14 additions and 4 deletions
|
@ -396,9 +396,9 @@ class Submission(Base):
|
||||||
@lazy
|
@lazy
|
||||||
def realtitle(self, v):
|
def realtitle(self, v):
|
||||||
if self.title_html:
|
if self.title_html:
|
||||||
return self.title_html
|
return self.title_html
|
||||||
else:
|
else:
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
@lazy
|
@lazy
|
||||||
def plaintitle(self, v):
|
def plaintitle(self, v):
|
||||||
|
|
|
@ -478,6 +478,13 @@ spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A)
|
||||||
reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
|
reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
|
||||||
sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
|
sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
|
||||||
|
|
||||||
|
# Bytes that shouldn't be allowed in user-submitted text
|
||||||
|
# U+200E is LTR toggle, U+200F is RTL toggle, U+200B and U+FEFF are Zero-Width Spaces,
|
||||||
|
# and U+1242A is a massive and terrifying cuneiform numeral
|
||||||
|
unwanted_bytes_regex = re.compile("\u200e|\u200f|\u200b|\ufeff|\U0001242a")
|
||||||
|
|
||||||
|
whitespace_regex = re.compile('\s+')
|
||||||
|
|
||||||
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
|
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
|
||||||
|
|
||||||
mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A)
|
mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import functools
|
import functools
|
||||||
|
import html
|
||||||
import bleach
|
import bleach
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bleach.linkifier import LinkifyFilter, build_url_re
|
from bleach.linkifier import LinkifyFilter, build_url_re
|
||||||
|
@ -166,7 +167,7 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
||||||
sanitized = strikethrough_regex.sub(r'<del>\1</del>', sanitized)
|
sanitized = strikethrough_regex.sub(r'<del>\1</del>', sanitized)
|
||||||
|
|
||||||
# remove left-to-right mark; remove zero width space; remove zero width no-break space; remove Cuneiform Numeric Sign Eight;
|
# remove left-to-right mark; remove zero width space; remove zero width no-break space; remove Cuneiform Numeric Sign Eight;
|
||||||
sanitized = sanitized.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","")
|
sanitized = unwanted_bytes_regex.sub('', sanitized)
|
||||||
|
|
||||||
if alert:
|
if alert:
|
||||||
matches = { g.group(1):g for g in mention_regex2.finditer(sanitized) if g }
|
matches = { g.group(1):g for g in mention_regex2.finditer(sanitized) if g }
|
||||||
|
@ -340,7 +341,9 @@ def allowed_attributes_emojis(tag, name, value):
|
||||||
@with_sigalrm_timeout(1)
|
@with_sigalrm_timeout(1)
|
||||||
def filter_emojis_only(title, edit=False, graceful=False):
|
def filter_emojis_only(title, edit=False, graceful=False):
|
||||||
|
|
||||||
title = title.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&").replace('<','<').replace('>','>').replace('"', '"').replace("'", "'").strip()
|
title = unwanted_bytes_regex.sub('', title)
|
||||||
|
title = whitespace_regex.sub(' ', title)
|
||||||
|
title = html.escape(title, quote=True)
|
||||||
|
|
||||||
# title = render_emoji(title, emoji_regex3, edit)
|
# title = render_emoji(title, emoji_regex3, edit)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue