80 lines
3.2 KiB
Python
80 lines
3.2 KiB
Python
import re
|
|
|
|
# usernames
|
|
|
|
valid_username_chars = 'a-zA-Z0-9_\\-'
|
|
valid_username_regex = re.compile("^[a-zA-Z0-9_\\-]{3,25}$", flags=re.A)
|
|
mention_regex = re.compile('(^|\\s|<p>)@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)
|
|
mention_regex2 = re.compile('<p>@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)
|
|
|
|
valid_password_regex = re.compile("^.{8,100}$", flags=re.A)
|
|
|
|
marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)
|
|
|
|
tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
|
|
|
|
valid_sub_regex = re.compile("^[a-zA-Z0-9_\\-]{3,20}$", flags=re.A)
|
|
|
|
query_regex = re.compile("(\\w+):(\\S+)", flags=re.A)
|
|
|
|
title_regex = re.compile("[^\\w ]", flags=re.A)
|
|
|
|
based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)
|
|
|
|
controversial_regex = re.compile('["> ](https:\\/\\/old\\.reddit\\.com/r/[a-zA-Z0-9_]{3,20}\\/comments\\/[\\w\\-.#&/=\\?@%+]{5,250})["< ]', flags=re.A)
|
|
|
|
fishylinks_regex = re.compile("https?://\\S+", flags=re.A)
|
|
|
|
spoiler_regex = re.compile('''\\|\\|(.+?)\\|\\|''', flags=re.A)
|
|
reddit_regex = re.compile('(^|\\s|<p>)\\/?((r|u)\\/(\\w|-){3,25})(?![^<]*<\\/(code|pre|a)>)', flags=re.A)
|
|
sub_regex = re.compile('(^|\\s|<p>)\\/?(h\\/(\\w|-){3,25})', flags=re.A)
|
|
|
|
|
|
unwanted_bytes_regex = re.compile("\u200e|\u200f|\u200b|\ufeff|\U0001242a")
|
|
'''
|
|
Bytes that shouldn't be allowed in user-submitted text
|
|
U+200E is LTR toggle, U+200F is RTL toggle, U+200B and U+FEFF are Zero-Width
|
|
Spaces, and U+1242A is a massive and terrifying cuneiform numeral
|
|
'''
|
|
|
|
whitespace_regex = re.compile('\\s+')
|
|
|
|
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
|
|
|
|
mute_regex = re.compile("/mute @([a-z0-9_\\-]{3,25}) ([0-9])+", flags=re.A)
|
|
|
|
emoji_regex = re.compile(f"[^a]>\\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\\s*)+<\\/", flags=re.A)
|
|
emoji_regex2 = re.compile(f"(?<!\"):([!#@{valid_username_chars}]{{1,31}}?):", flags=re.A)
|
|
emoji_regex3 = re.compile(f"(?<!\"):([!@{valid_username_chars}]{{1,31}}?):", flags=re.A)
|
|
|
|
snappy_url_regex = re.compile('<a href=\\"(https?:\\/\\/[a-z]{1,20}\\.[\\w:~,()\\-.#&\\/=?@%;+]{5,250})\\" rel=\\"nofollow noopener noreferrer\\" target=\\"_blank\\">([\\w:~,()\\-.#&\\/=?@%;+]{5,250})<\\/a>', flags=re.A)
|
|
|
|
email_regex = re.compile('[^@]+@[^@]+\\.[^@]+', flags=re.A)
|
|
'''
|
|
Regex to use for email addresses.
|
|
|
|
.. note::
|
|
Technically this allows stuff that is not a valid email address, but
|
|
realistically we care "does this email go to the correct person" rather
|
|
than "is this email address syntactically valid", so if we care we should
|
|
be sending a confirmation link, and otherwise should be pretty liberal in
|
|
what we accept here.
|
|
'''
|
|
|
|
utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
|
|
utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
|
|
|
|
|
|
# urls
|
|
|
|
youtube_regex = re.compile('(<p>[^<]*)(https:\\/\\/youtube\\.com\\/watch\\?v\\=([a-z0-9-_]{5,20})[\\w\\-.#&/=\\?@%+]*)', flags=re.I|re.A)
|
|
|
|
yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
|
|
|
|
image_regex = re.compile("(^|\\s)(https:\\/\\/[\\w\\-.#&/=\\?@%;+]{5,250}(\\.png|\\.jpg|\\.jpeg|\\.gif|\\.webp|maxwidth=9999|fidelity=high))($|\\s)", flags=re.I|re.A)
|
|
|
|
linefeeds_regex = re.compile("([^\\n])\\n([^\\n])", flags=re.A)
|
|
|
|
html_title_regex = re.compile(r"<title>(.{1,200})</title>", flags=re.I)
|
|
|
|
css_url_regex = re.compile(r'url\(\s*[\'"]?(.*?)[\'"]?\s*\)', flags=re.I|re.A)
|