From b46ada9f72bf8af6d217ccbebd2b8a582bcabc27 Mon Sep 17 00:00:00 2001 From: FatherInire <105288522+FatherInire@users.noreply.github.com> Date: Fri, 28 Oct 2022 14:15:48 +1100 Subject: [PATCH] Fix some deprecations and factor out a helper function (#387) --- .gitignore | 1 + files/__main__.py | 6 +++++ files/helpers/const.py | 50 +++++++++++++++++++-------------------- files/helpers/get.py | 10 ++++---- files/helpers/sanitize.py | 1 + files/helpers/strings.py | 4 ++++ files/routes/front.py | 2 +- files/routes/login.py | 9 +++---- files/routes/posts.py | 5 ++-- files/routes/search.py | 9 +++---- files/routes/settings.py | 3 ++- files/routes/users.py | 3 ++- 12 files changed, 59 insertions(+), 44 deletions(-) create mode 100644 files/helpers/strings.py diff --git a/.gitignore b/.gitignore index 1e4c03064..5b41b0b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ venv/ flask_session/ .DS_Store .venv +*.pyc diff --git a/files/__main__.py b/files/__main__.py index 2523be385..e7675f6ee 100644 --- a/files/__main__.py +++ b/files/__main__.py @@ -1,4 +1,5 @@ +from pathlib import Path import gevent.monkey gevent.monkey.patch_all() from os import environ, path @@ -19,12 +20,17 @@ from sys import stdout, argv import faulthandler import json + app = Flask(__name__, template_folder='templates') app.url_map.strict_slashes = False app.jinja_env.cache = {} app.jinja_env.auto_reload = True faulthandler.enable() +if environ.get("SITE_ID") is None: + from dotenv import load_dotenv + load_dotenv(dotenv_path=Path("env")) + if environ.get("FLASK_PROFILER_ENDPOINT"): app.config["flask_profiler"] = { "enabled": True, diff --git a/files/helpers/const.py b/files/helpers/const.py index 2a14cbf8c..ba80087f1 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -447,14 +447,14 @@ marseys_const = [x[0] for x in db.query(Marsey.name).filter(Marsey.name!='chudse marseys_const2 = marseys_const + ['chudsey','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9','exclamationpoint','period','questionmark'] db.close() -valid_username_chars = 'a-zA-Z0-9_\-' -valid_username_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A) -mention_regex = re.compile('(^|\s|

)@(([a-zA-Z0-9_\-]){1,25})', flags=re.A) -mention_regex2 = re.compile('

@(([a-zA-Z0-9_\-]){1,25})', flags=re.A) +valid_username_chars = 'a-zA-Z0-9_\\-' +valid_username_regex = re.compile("^[a-zA-Z0-9_\\-]{3,25}$", flags=re.A) +mention_regex = re.compile('(^|\\s|

)@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A) +mention_regex2 = re.compile('

@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A) valid_password_regex = re.compile("^.{8,100}$", flags=re.A) -marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A) +marseyaward_body_regex = re.compile(">[^<\\s+]|[^>\\s+]<", flags=re.A) marseyaward_title_regex = re.compile("( *]+>)+", flags=re.A) @@ -462,44 +462,44 @@ marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A) tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A) -valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A) +valid_sub_regex = re.compile("^[a-zA-Z0-9_\\-]{3,20}$", flags=re.A) -query_regex = re.compile("(\w+):(\S+)", flags=re.A) +query_regex = re.compile("(\\w+):(\\S+)", flags=re.A) -title_regex = re.compile("[^\w ]", flags=re.A) +title_regex = re.compile("[^\\w ]", flags=re.A) based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A) -controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A) +controversial_regex = re.compile('["> ](https:\\/\\/old\\.reddit\\.com/r/[a-zA-Z0-9_]{3,20}\\/comments\\/[\\w\\-.#&/=\\?@%+]{5,250})["< ]', flags=re.A) -fishylinks_regex = re.compile("https?://\S+", flags=re.A) +fishylinks_regex = re.compile("https?://\\S+", flags=re.A) -spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A) -reddit_regex = re.compile('(^|\s|

)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) -sub_regex = re.compile('(^|\s|

)\/?(h\/(\w|-){3,25})', flags=re.A) +spoiler_regex = re.compile('''\\|\\|(.+)\\|\\|''', flags=re.A) +reddit_regex = re.compile('(^|\\s|

)\\/?((r|u)\\/(\\w|-){3,25})(?![^<]*<\\/(code|pre|a)>)', flags=re.A) +sub_regex = re.compile('(^|\\s|

)\\/?(h\\/(\\w|-){3,25})', flags=re.A) # Bytes that shouldn't be allowed in user-submitted text # U+200E is LTR toggle, U+200F is RTL toggle, U+200B and U+FEFF are Zero-Width Spaces, # and U+1242A is a massive and terrifying cuneiform numeral unwanted_bytes_regex = re.compile("\u200e|\u200f|\u200b|\ufeff|\U0001242a") -whitespace_regex = re.compile('\s+') +whitespace_regex = re.compile('\\s+') strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A) -mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A) +mute_regex = re.compile("/mute @([a-z0-9_\\-]{3,25}) ([0-9])+", flags=re.A) -emoji_regex = re.compile(f"[^a]>\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\s*)+<\/", flags=re.A) +emoji_regex = re.compile(f"[^a]>\\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\\s*)+<\\/", flags=re.A) emoji_regex2 = re.compile(f"(?([\w:~,()\-.#&\/=?@%;+]{5,250})<\/a>', flags=re.A) +snappy_url_regex = re.compile('([\\w:~,()\\-.#&\\/=?@%;+]{5,250})<\\/a>', flags=re.A) # Technically this allows stuff that is not a valid email address, but realistically # we care "does this email go to the correct person" rather than "is this email # address syntactically valid", so if we care we should be sending a confirmation # link, and otherwise should be pretty liberal in what we accept here. -email_regex = re.compile('[^@]+@[^@]+\.[^@]+', flags=re.A) +email_regex = re.compile('[^@]+@[^@]+\\.[^@]+', flags=re.A) utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) @@ -564,22 +564,22 @@ approved_embed_hosts = [ 'redditmedia.com' ] -hosts = "|".join(approved_embed_hosts).replace('.','\.') +hosts = "|".join(approved_embed_hosts).replace('.','\\.') -image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A) +image_check_regex = re.compile(f'!\\[\\]\\(((?!(https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/|\\/)).*?)\\)', flags=re.A) -embed_fullmatch_regex = re.compile(f'https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.A) +embed_fullmatch_regex = re.compile(f'https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/[\\w:~,()\\-.#&\\/=?@%;+]*', flags=re.A) -video_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A) +video_sub_regex = re.compile(f'(

[^<]*)(https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/[\\w:~,()\\-.#&\\/=?@%;+]*?\\.(mp4|webm|mov))', flags=re.A) -youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A) +youtube_regex = re.compile('(

[^<]*)(https:\\/\\/youtube\\.com\\/watch\\?v\\=([a-z0-9-_]{5,20})[\\w\\-.#&/=\\?@%+]*)', flags=re.I|re.A) yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) -image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp|maxwidth=9999|fidelity=high))($|\s)", flags=re.I|re.A) +image_regex = re.compile("(^|\\s)(https:\\/\\/[\\w\\-.#&/=\\?@%;+]{5,250}(\\.png|\\.jpg|\\.jpeg|\\.gif|\\.webp|maxwidth=9999|fidelity=high))($|\\s)", flags=re.I|re.A) procoins_li = (0,2500,5000,10000,25000,50000,125000,250000) -linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) +linefeeds_regex = re.compile("([^\\n])\\n([^\\n])", flags=re.A) def make_name(*args, **kwargs): return request.base_url diff --git a/files/helpers/get.py b/files/helpers/get.py index de464469f..1e8ada2ed 100644 --- a/files/helpers/get.py +++ b/files/helpers/get.py @@ -1,10 +1,11 @@ from files.classes import * +from files.helpers.strings import sql_ilike_clean from flask import g def get_id(username, v=None, graceful=False): - username = username.replace('\\', '').replace('_', '\_').replace('%', '').strip() + username = sql_ilike_clean(username) user = g.db.query( User.id @@ -30,7 +31,7 @@ def get_user(username, v=None, graceful=False): if not graceful: abort(404) else: return None - username = username.replace('\\', '').replace('_', '\_').replace('%', '').strip() + username = sql_ilike_clean(username) user = g.db.query( User @@ -68,10 +69,7 @@ def get_users(usernames, v=None, graceful=False): if not graceful: abort(404) else: return [] - def clean(n): - return n.replace('\\', '').replace('_', '\_').replace('%', '').strip() - - usernames = [ clean(n) for n in usernames ] + usernames = [ sql_ilike_clean(n) for n in usernames ] users = g.db.query(User).filter( or_( diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index c168747aa..7e622abc8 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -22,6 +22,7 @@ allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5' if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: allowed_tags += ('img', 'lite-youtube', 'video', 'source',) + def allowed_attributes(tag, name, value): if name == 'style': return True diff --git a/files/helpers/strings.py b/files/helpers/strings.py new file mode 100644 index 000000000..2fdbdf804 --- /dev/null +++ b/files/helpers/strings.py @@ -0,0 +1,4 @@ + +# clean strings for searching +def sql_ilike_clean(my_str): + return my_str.replace(r'\\', '').replace('_', r'\_').replace('%', '').strip() \ No newline at end of file diff --git a/files/routes/front.py b/files/routes/front.py index 6004df8b5..ab5539602 100644 --- a/files/routes/front.py +++ b/files/routes/front.py @@ -334,7 +334,7 @@ def frontlist(v=None, sort='new', page=1, t="all", ids_only=True, ccmode="false" if v and filter_words: for word in filter_words: - word = word.replace('\\', '').replace('_', '\_').replace('%', '\%').strip() + word = word.replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip() posts=posts.filter(not_(Submission.title.ilike(f'%{word}%'))) if not (v and v.shadowbanned): diff --git a/files/routes/login.py b/files/routes/login.py index 8a5dc57df..fcf2928f5 100644 --- a/files/routes/login.py +++ b/files/routes/login.py @@ -2,6 +2,7 @@ from urllib.parse import urlencode from files.mail import * from files.__main__ import app, limiter from files.helpers.const import * +from files.helpers.strings import sql_ilike_clean import requests @app.get("/login") @@ -87,7 +88,7 @@ def login_post(): username = request.values.get("username") if not username: abort(400) - username = username.lstrip('@').replace('\\', '').replace('_', '\_').replace('%', '').strip() + username = sql_ilike_clean(username.lstrip('@')) if not username: abort(400) if username.startswith('@'): username = username[1:] @@ -192,7 +193,7 @@ def sign_up_get(v): ref = request.values.get("ref") if ref: - ref = ref.replace('\\', '').replace('_', '\_').replace('%', '').strip() + ref = sql_ilike_clean(ref) ref_user = g.db.query(User).filter(User.username.ilike(ref)).one_or_none() else: @@ -390,8 +391,8 @@ def post_forgot(): return render_template("forgot_password.html", error="Invalid email.") - username = username.lstrip('@').replace('\\', '').replace('_', '\_').replace('%', '').strip() - email = email.replace('\\', '').replace('_', '\_').replace('%', '').strip() + username = sql_ilike_clean(username.lstrip('@')) + email = sql_ilike_clean(email) user = g.db.query(User).filter( User.username.ilike(username), diff --git a/files/routes/posts.py b/files/routes/posts.py index 40732e43f..e06ee4f80 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -2,6 +2,7 @@ import time import gevent from files.helpers.wrappers import * from files.helpers.sanitize import * +from files.helpers.strings import sql_ilike_clean from files.helpers.alerts import * from files.helpers.discord import send_discord_message, send_cringetopia_message from files.helpers.const import * @@ -712,7 +713,7 @@ def api_is_repost(): if url.endswith('/'): url = url[:-1] - search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip() + search_url = url.replace('%', '').replace(r'\\', '').replace('_', r'\_').strip() repost = g.db.query(Submission).filter( Submission.url.ilike(search_url), Submission.deleted_utc == 0, @@ -798,7 +799,7 @@ def submit_post(v, sub=None): if url.endswith('/'): url = url[:-1] - search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip() + search_url = sql_ilike_clean(url) repost = g.db.query(Submission).filter( Submission.url.ilike(search_url), Submission.deleted_utc == 0, diff --git a/files/routes/search.py b/files/routes/search.py index 56204467c..ecab91432 100644 --- a/files/routes/search.py +++ b/files/routes/search.py @@ -3,6 +3,7 @@ import re from sqlalchemy import * from flask import * from files.__main__ import app +from files.helpers.strings import sql_ilike_clean valid_params=[ @@ -88,7 +89,7 @@ def searchposts(v=None): if 'q' in criteria: words=criteria['q'].split() - words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split() + words = criteria['q'].replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip().split() words=[Submission.title.ilike('%'+x+'%') for x in words] posts=posts.filter(*words) @@ -97,7 +98,7 @@ def searchposts(v=None): if 'domain' in criteria: domain=criteria['domain'] - domain = domain.replace('\\', '').replace('_', '\_').replace('%', '').strip() + domain = sql_ilike_clean(domain) posts=posts.filter( or_( @@ -202,7 +203,7 @@ def searchcomments(v=None): else: comments = comments.filter(Comment.author_id == author.id) if 'q' in criteria: - words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split() + words = criteria['q'].replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip().split() words = [Comment.body.ilike('%'+x+'%') for x in words] comments = comments.filter(*words) @@ -274,7 +275,7 @@ def searchusers(v=None): sort = request.values.get("sort", "new").lower() t = request.values.get('t', 'all').lower() term=query.lstrip('@') - term = term.replace('\\','').replace('_','\_').replace('%','') + term = sql_ilike_clean(term) users=g.db.query(User).filter(User.username.ilike(f'%{term}%')) diff --git a/files/routes/settings.py b/files/routes/settings.py index e53fd9aba..99ab9e0b8 100644 --- a/files/routes/settings.py +++ b/files/routes/settings.py @@ -9,6 +9,7 @@ import youtube_dl from .front import frontlist import os from files.helpers.sanitize import filter_emojis_only +from files.helpers.strings import sql_ilike_clean from files.helpers.discord import add_role from shutil import copyfile import requests @@ -687,7 +688,7 @@ def settings_name_change(v): v=v, error="This isn't a valid username.") - search_name = new_name.replace('\\', '').replace('_','\_').replace('%','') + search_name = sql_ilike_clean(new_name) x= g.db.query(User).filter( or_( diff --git a/files/routes/users.py b/files/routes/users.py index 9189478c7..6070ad7e2 100644 --- a/files/routes/users.py +++ b/files/routes/users.py @@ -5,6 +5,7 @@ import math from files.classes.views import ViewerRelationship from files.helpers.alerts import * from files.helpers.sanitize import * +from files.helpers.strings import sql_ilike_clean from files.helpers.const import * from files.helpers.assetcache import assetcache_path from files.mail import * @@ -736,7 +737,7 @@ def api_is_available(name): if len(name)<3 or len(name)>25: return {name:False} - name2 = name.replace('\\', '').replace('_','\_').replace('%','') + name2 = sql_ilike_clean(name) x= g.db.query(User).filter( or_(