
* Integrate chat from upstream Substantially borrowed from upstream ref: 13a208ee88e55 (before they started editing generated artefacts instead of source). Integrated, including: - Remove previously removed features: emoji, hats, and name colors - Compensate for lack of unified root template - Add React build process to Dockerfile and `bootstrap/init.sh` - Preliminary integration of chat websocket workers For testing, modify `supervisord.conf.dev` to put chat on port 80 and the site service on some other port. Then visit: http://localhost/chat Still to do: - Access control for specific small-groups (and admins probably): Set the values somewhere (site_settings.json? Redis?) and use for authorization in `chat_is_allowed`. - Proxying only /chat to the websocket workers - Chat persistance across restarts: either Redis devops or to DB * Add nginx server to do appropriate redirection. * Add necessary columns to User. * Wire up chat permissions. * Reload chat on source change. * Add a better structure for slash commands and add/remove functionality. * Stop putting up previews of slash commands. * We require more whitespace. * Strip DMs out entirely, I currently do not want to deal with them. * Change "Users Online" to just "Users". * Clean up a little more DM detritus. * Save chat history in database. * Remove unnecessary hefty query to the DB. * Clean up optimistic messages. * Initial implementation of notification icon. * Update readme a little bit. * Fix notification highlight (mostly). * Remove chat version number that will never be updated. * Fix: Errors on logged-out users. * Add function to nuke the chat state. * Update DB. * Add a dedicated deployable docker image. * Fix: init_build.sh execute bit not set. * Whoops, screwed up the abort() call. * Relax chat rate limiter. * Remove a somewhat silly comment. * Remove an unnecessary g.db.add(). --------- Co-authored-by: TLSM <duolsm@outlook.com>
80 lines
3.2 KiB
Python
80 lines
3.2 KiB
Python
import re
|
|
|
|
# usernames
|
|
|
|
valid_username_chars = 'a-zA-Z0-9_\\-'
|
|
valid_username_regex = re.compile("^[a-zA-Z0-9_\\-]{3,25}$", flags=re.A)
|
|
mention_regex = re.compile('(^|\\s|<p>)@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)
|
|
mention_regex2 = re.compile('<p>@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)
|
|
|
|
valid_password_regex = re.compile("^.{8,100}$", flags=re.A)
|
|
|
|
marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)
|
|
|
|
tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
|
|
|
|
valid_sub_regex = re.compile("^[a-zA-Z0-9_\\-]{3,20}$", flags=re.A)
|
|
|
|
query_regex = re.compile("(\\w+):(\\S+)", flags=re.A)
|
|
|
|
title_regex = re.compile("[^\\w ]", flags=re.A)
|
|
|
|
based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)
|
|
|
|
controversial_regex = re.compile('["> ](https:\\/\\/old\\.reddit\\.com/r/[a-zA-Z0-9_]{3,20}\\/comments\\/[\\w\\-.#&/=\\?@%+]{5,250})["< ]', flags=re.A)
|
|
|
|
fishylinks_regex = re.compile("https?://\\S+", flags=re.A)
|
|
|
|
spoiler_regex = re.compile('''\\|\\|(.+?)\\|\\|''', flags=re.A)
|
|
reddit_regex = re.compile('(^|\\s|<p>)\\/?((r|u)\\/(\\w|-){3,25})(?![^<]*<\\/(code|pre|a)>)', flags=re.A)
|
|
sub_regex = re.compile('(^|\\s|<p>)\\/?(h\\/(\\w|-){3,25})', flags=re.A)
|
|
|
|
|
|
unwanted_bytes_regex = re.compile("\u200e|\u200f|\u200b|\ufeff|\U0001242a")
|
|
'''
|
|
Bytes that shouldn't be allowed in user-submitted text
|
|
U+200E is LTR toggle, U+200F is RTL toggle, U+200B and U+FEFF are Zero-Width
|
|
Spaces, and U+1242A is a massive and terrifying cuneiform numeral
|
|
'''
|
|
|
|
whitespace_regex = re.compile('\\s+')
|
|
|
|
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
|
|
|
|
chat_command_regex = re.compile(r"^/(\w+)\s?(.*)", flags=re.A)
|
|
|
|
emoji_regex = re.compile(f"[^a]>\\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\\s*)+<\\/", flags=re.A)
|
|
emoji_regex2 = re.compile(f"(?<!\"):([!#@{valid_username_chars}]{{1,31}}?):", flags=re.A)
|
|
emoji_regex3 = re.compile(f"(?<!\"):([!@{valid_username_chars}]{{1,31}}?):", flags=re.A)
|
|
|
|
snappy_url_regex = re.compile('<a href=\\"(https?:\\/\\/[a-z]{1,20}\\.[\\w:~,()\\-.#&\\/=?@%;+]{5,250})\\" rel=\\"nofollow noopener noreferrer\\" target=\\"_blank\\">([\\w:~,()\\-.#&\\/=?@%;+]{5,250})<\\/a>', flags=re.A)
|
|
|
|
email_regex = re.compile('[^@]+@[^@]+\\.[^@]+', flags=re.A)
|
|
'''
|
|
Regex to use for email addresses.
|
|
|
|
.. note::
|
|
Technically this allows stuff that is not a valid email address, but
|
|
realistically we care "does this email go to the correct person" rather
|
|
than "is this email address syntactically valid", so if we care we should
|
|
be sending a confirmation link, and otherwise should be pretty liberal in
|
|
what we accept here.
|
|
'''
|
|
|
|
utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
|
|
utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
|
|
|
|
|
|
# urls
|
|
|
|
youtube_regex = re.compile('(<p>[^<]*)(https:\\/\\/youtube\\.com\\/watch\\?v\\=([a-z0-9-_]{5,20})[\\w\\-.#&/=\\?@%+]*)', flags=re.I|re.A)
|
|
|
|
yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
|
|
|
|
image_regex = re.compile("(^|\\s)(https:\\/\\/[\\w\\-.#&/=\\?@%;+]{5,250}(\\.png|\\.jpg|\\.jpeg|\\.gif|\\.webp|maxwidth=9999|fidelity=high))($|\\s)", flags=re.I|re.A)
|
|
|
|
linefeeds_regex = re.compile("([^\\n])\\n([^\\n])", flags=re.A)
|
|
|
|
html_title_regex = re.compile(r"<title>(.{1,200})</title>", flags=re.I)
|
|
|
|
css_url_regex = re.compile(r'url\(\s*[\'"]?(.*?)[\'"]?\s*\)', flags=re.I|re.A)
|