Fix some deprecations and factor out a helper function (#387)

2022-10-28 14:15:48 +11:00 · 2022-10-28 14:15:48 +11:00 · b46ada9f72
commit b46ada9f72
parent 4bdfe28a35
12 changed files with 59 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,3 +11,4 @@ venv/
 flask_session/
 .DS_Store
 .venv
+*.pyc
--- a/files/main.py
+++ b/files/main.py
@ -1,4 +1,5 @@

+from pathlib import Path
 import gevent.monkey
 gevent.monkey.patch_all()
 from os import environ, path
@ -19,12 +20,17 @@ from sys import stdout, argv
 import faulthandler
 import json

+
 app = Flask(__name__, template_folder='templates')
 app.url_map.strict_slashes = False
 app.jinja_env.cache = {}
 app.jinja_env.auto_reload = True
 faulthandler.enable()

+if environ.get("SITE_ID") is None:
+	from dotenv import load_dotenv
+	load_dotenv(dotenv_path=Path("env"))
+
 if environ.get("FLASK_PROFILER_ENDPOINT"):
 	app.config["flask_profiler"] = {
 		"enabled": True,
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -447,14 +447,14 @@ marseys_const = [x[0] for x in db.query(Marsey.name).filter(Marsey.name!='chudse
 marseys_const2 = marseys_const + ['chudsey','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9','exclamationpoint','period','questionmark']
 db.close()

-valid_username_chars = 'a-zA-Z0-9_\-'
-valid_username_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A)
-mention_regex = re.compile('(^|\s|<p>)@(([a-zA-Z0-9_\-]){1,25})', flags=re.A)
-mention_regex2 = re.compile('<p>@(([a-zA-Z0-9_\-]){1,25})', flags=re.A)
+valid_username_chars = 'a-zA-Z0-9_\\-'
+valid_username_regex = re.compile("^[a-zA-Z0-9_\\-]{3,25}$", flags=re.A)
+mention_regex = re.compile('(^|\\s|<p>)@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)
+mention_regex2 = re.compile('<p>@(([a-zA-Z0-9_\\-]){1,25})', flags=re.A)

 valid_password_regex = re.compile("^.{8,100}$", flags=re.A)

-marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A)
+marseyaward_body_regex = re.compile(">[^<\\s+]|[^>\\s+]<", flags=re.A)

 marseyaward_title_regex = re.compile("( *<img[^>]+>)+", flags=re.A)

@ -462,44 +462,44 @@ marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)

 tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)

-valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A)
+valid_sub_regex = re.compile("^[a-zA-Z0-9_\\-]{3,20}$", flags=re.A)

-query_regex = re.compile("(\w+):(\S+)", flags=re.A)
+query_regex = re.compile("(\\w+):(\\S+)", flags=re.A)

-title_regex = re.compile("[^\w ]", flags=re.A)
+title_regex = re.compile("[^\\w ]", flags=re.A)

 based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)

-controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A)
+controversial_regex = re.compile('["> ](https:\\/\\/old\\.reddit\\.com/r/[a-zA-Z0-9_]{3,20}\\/comments\\/[\\w\\-.#&/=\\?@%+]{5,250})["< ]', flags=re.A)

-fishylinks_regex = re.compile("https?://\S+", flags=re.A)
+fishylinks_regex = re.compile("https?://\\S+", flags=re.A)

-spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A)
-reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
-sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
+spoiler_regex = re.compile('''\\|\\|(.+)\\|\\|''', flags=re.A)
+reddit_regex = re.compile('(^|\\s|<p>)\\/?((r|u)\\/(\\w|-){3,25})(?![^<]*<\\/(code|pre|a)>)', flags=re.A)
+sub_regex = re.compile('(^|\\s|<p>)\\/?(h\\/(\\w|-){3,25})', flags=re.A)

 # Bytes that shouldn't be allowed in user-submitted text
 # U+200E is LTR toggle,  U+200F is RTL toggle, U+200B and U+FEFF are Zero-Width Spaces,
 # and U+1242A is a massive and terrifying cuneiform numeral
 unwanted_bytes_regex = re.compile("\u200e|\u200f|\u200b|\ufeff|\U0001242a")

-whitespace_regex = re.compile('\s+')
+whitespace_regex = re.compile('\\s+')

 strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)

-mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A)
+mute_regex = re.compile("/mute @([a-z0-9_\\-]{3,25}) ([0-9])+", flags=re.A)

-emoji_regex = re.compile(f"[^a]>\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\s*)+<\/", flags=re.A)
+emoji_regex = re.compile(f"[^a]>\\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\\s*)+<\\/", flags=re.A)
 emoji_regex2 = re.compile(f"(?<!\"):([!#@{valid_username_chars}]{{1,31}}?):", flags=re.A)
 emoji_regex3 = re.compile(f"(?<!\"):([!@{valid_username_chars}]{{1,31}}?):", flags=re.A)

-snappy_url_regex = re.compile('<a href=\"(https?:\/\/[a-z]{1,20}\.[\w:~,()\-.#&\/=?@%;+]{5,250})\" rel=\"nofollow noopener noreferrer\" target=\"_blank\">([\w:~,()\-.#&\/=?@%;+]{5,250})<\/a>', flags=re.A)
+snappy_url_regex = re.compile('<a href=\\"(https?:\\/\\/[a-z]{1,20}\\.[\\w:~,()\\-.#&\\/=?@%;+]{5,250})\\" rel=\\"nofollow noopener noreferrer\\" target=\\"_blank\\">([\\w:~,()\\-.#&\\/=?@%;+]{5,250})<\\/a>', flags=re.A)

 # Technically this allows stuff that is not a valid email address, but realistically
 # we care "does this email go to the correct person" rather than "is this email
 # address syntactically valid", so if we care we should be sending a confirmation
 # link, and otherwise should be pretty liberal in what we accept here.
-email_regex = re.compile('[^@]+@[^@]+\.[^@]+', flags=re.A)
+email_regex = re.compile('[^@]+@[^@]+\\.[^@]+', flags=re.A)

 utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
 utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
@ -564,22 +564,22 @@ approved_embed_hosts = [
 	'redditmedia.com'
 	]

-hosts = "|".join(approved_embed_hosts).replace('.','\.')
+hosts = "|".join(approved_embed_hosts).replace('.','\\.')

-image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A)
+image_check_regex = re.compile(f'!\\[\\]\\(((?!(https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/|\\/)).*?)\\)', flags=re.A)

-embed_fullmatch_regex = re.compile(f'https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.A)
+embed_fullmatch_regex = re.compile(f'https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/[\\w:~,()\\-.#&\\/=?@%;+]*', flags=re.A)

-video_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A)
+video_sub_regex = re.compile(f'(<p>[^<]*)(https:\\/\\/([a-z0-9-]+\\.)*({hosts})\\/[\\w:~,()\\-.#&\\/=?@%;+]*?\\.(mp4|webm|mov))', flags=re.A)

-youtube_regex = re.compile('(<p>[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A)
+youtube_regex = re.compile('(<p>[^<]*)(https:\\/\\/youtube\\.com\\/watch\\?v\\=([a-z0-9-_]{5,20})[\\w\\-.#&/=\\?@%+]*)', flags=re.I|re.A)

 yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)

-image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp|maxwidth=9999|fidelity=high))($|\s)", flags=re.I|re.A)
+image_regex = re.compile("(^|\\s)(https:\\/\\/[\\w\\-.#&/=\\?@%;+]{5,250}(\\.png|\\.jpg|\\.jpeg|\\.gif|\\.webp|maxwidth=9999|fidelity=high))($|\\s)", flags=re.I|re.A)

 procoins_li = (0,2500,5000,10000,25000,50000,125000,250000)

-linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A)
+linefeeds_regex = re.compile("([^\\n])\\n([^\\n])", flags=re.A)

 def make_name(*args, **kwargs): return request.base_url
--- a/files/helpers/get.py
+++ b/files/helpers/get.py
@ -1,10 +1,11 @@
 from files.classes import *
+from files.helpers.strings import sql_ilike_clean
 from flask import g


 def get_id(username, v=None, graceful=False):
 	
-	username = username.replace('\\', '').replace('_', '\_').replace('%', '').strip()
+	username = sql_ilike_clean(username)

 	user = g.db.query(
 		User.id
@ -30,7 +31,7 @@ def get_user(username, v=None, graceful=False):
 		if not graceful: abort(404)
 		else: return None

-	username = username.replace('\\', '').replace('_', '\_').replace('%', '').strip()
+	username = sql_ilike_clean(username)

 	user = g.db.query(
 		User
@ -68,10 +69,7 @@ def get_users(usernames, v=None, graceful=False):
 		if not graceful: abort(404)
 		else: return []

-	def clean(n):
-		return n.replace('\\', '').replace('_', '\_').replace('%', '').strip()
-
-	usernames = [ clean(n) for n in usernames ]
+	usernames = [ sql_ilike_clean(n) for n in usernames ]

 	users = g.db.query(User).filter(
 		or_(
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@ -22,6 +22,7 @@ allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5'
 if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
 	allowed_tags += ('img', 'lite-youtube', 'video', 'source',)

+
 def allowed_attributes(tag, name, value):

 	if name == 'style': return True
--- a/files/helpers/strings.py
+++ b/files/helpers/strings.py
@ -0,0 +1,4 @@
+
+# clean strings for searching
+def sql_ilike_clean(my_str):
+	return my_str.replace(r'\\', '').replace('_', r'\_').replace('%', '').strip()
--- a/files/routes/front.py
+++ b/files/routes/front.py
@ -334,7 +334,7 @@ def frontlist(v=None, sort='new', page=1, t="all", ids_only=True, ccmode="false"

 	if v and filter_words:
 		for word in filter_words:
-			word  = word.replace('\\', '').replace('_', '\_').replace('%', '\%').strip()
+			word  = word.replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip()
 			posts=posts.filter(not_(Submission.title.ilike(f'%{word}%')))

 	if not (v and v.shadowbanned):
--- a/files/routes/login.py
+++ b/files/routes/login.py
@ -2,6 +2,7 @@ from urllib.parse import urlencode
 from files.mail import *
 from files.__main__ import app, limiter
 from files.helpers.const import *
+from files.helpers.strings import sql_ilike_clean
 import requests

@app.get("/login")
@ -87,7 +88,7 @@ def login_post():
 	username = request.values.get("username")

 	if not username: abort(400)
-	username  = username.lstrip('@').replace('\\', '').replace('_', '\_').replace('%', '').strip()
+	username  = sql_ilike_clean(username.lstrip('@'))

 	if not username: abort(400)
 	if username.startswith('@'): username = username[1:]
@ -192,7 +193,7 @@ def sign_up_get(v):
 	ref = request.values.get("ref")

 	if ref:
-		ref  = ref.replace('\\', '').replace('_', '\_').replace('%', '').strip()
+		ref  = sql_ilike_clean(ref)
 		ref_user = g.db.query(User).filter(User.username.ilike(ref)).one_or_none()

 	else:
@ -390,8 +391,8 @@ def post_forgot():
 		return render_template("forgot_password.html", error="Invalid email.")


-	username  = username.lstrip('@').replace('\\', '').replace('_', '\_').replace('%', '').strip()
-	email  = email.replace('\\', '').replace('_', '\_').replace('%', '').strip()
+	username  = sql_ilike_clean(username.lstrip('@'))
+	email  = sql_ilike_clean(email)

 	user = g.db.query(User).filter(
 		User.username.ilike(username),
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@ -2,6 +2,7 @@ import time
 import gevent
 from files.helpers.wrappers import *
 from files.helpers.sanitize import *
+from files.helpers.strings import sql_ilike_clean
 from files.helpers.alerts import *
 from files.helpers.discord import send_discord_message, send_cringetopia_message
 from files.helpers.const import *
@ -712,7 +713,7 @@ def api_is_repost():

 	if url.endswith('/'): url = url[:-1]

-	search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip()
+	search_url = url.replace('%', '').replace(r'\\', '').replace('_', r'\_').strip()
 	repost = g.db.query(Submission).filter(
 		Submission.url.ilike(search_url),
 		Submission.deleted_utc == 0,
@ -798,7 +799,7 @@ def submit_post(v, sub=None):

 		if url.endswith('/'): url = url[:-1]

-		search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip()
+		search_url = sql_ilike_clean(url)
 		repost = g.db.query(Submission).filter(
 			Submission.url.ilike(search_url),
 			Submission.deleted_utc == 0,
--- a/files/routes/search.py
+++ b/files/routes/search.py
@ -3,6 +3,7 @@ import re
 from sqlalchemy import *
 from flask import *
 from files.__main__ import app
+from files.helpers.strings import sql_ilike_clean


 valid_params=[
@ -88,7 +89,7 @@ def searchposts(v=None):

 	if 'q' in criteria:
 		words=criteria['q'].split()
-		words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split()
+		words = criteria['q'].replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip().split()
 		words=[Submission.title.ilike('%'+x+'%') for x in words]
 		posts=posts.filter(*words)
 		
@ -97,7 +98,7 @@ def searchposts(v=None):
 	if 'domain' in criteria:
 		domain=criteria['domain']

-		domain = domain.replace('\\', '').replace('_', '\_').replace('%', '').strip()
+		domain = sql_ilike_clean(domain)

 		posts=posts.filter(
 			or_(
@ -202,7 +203,7 @@ def searchcomments(v=None):
 		else: comments = comments.filter(Comment.author_id == author.id)

 	if 'q' in criteria:
-		words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split()
+		words = criteria['q'].replace(r'\\', '').replace('_', r'\_').replace('%', r'\%').strip().split()

 		words = [Comment.body.ilike('%'+x+'%') for x in words]
 		comments = comments.filter(*words)
@ -274,7 +275,7 @@ def searchusers(v=None):
 	sort = request.values.get("sort", "new").lower()
 	t = request.values.get('t', 'all').lower()
 	term=query.lstrip('@')
-	term = term.replace('\\','').replace('_','\_').replace('%','')
+	term = sql_ilike_clean(term)
 	
 	users=g.db.query(User).filter(User.username.ilike(f'%{term}%'))
 	
--- a/files/routes/settings.py
+++ b/files/routes/settings.py
@ -9,6 +9,7 @@ import youtube_dl
 from .front import frontlist
 import os
 from files.helpers.sanitize import filter_emojis_only
+from files.helpers.strings import sql_ilike_clean
 from files.helpers.discord import add_role
 from shutil import copyfile
 import requests
@ -687,7 +688,7 @@ def settings_name_change(v):
 						   v=v,
 						   error="This isn't a valid username.")

-	search_name = new_name.replace('\\', '').replace('_','\_').replace('%','')
+	search_name = sql_ilike_clean(new_name)

 	x= g.db.query(User).filter(
 		or_(
--- a/files/routes/users.py
+++ b/files/routes/users.py
@ -5,6 +5,7 @@ import math
 from files.classes.views import ViewerRelationship
 from files.helpers.alerts import *
 from files.helpers.sanitize import *
+from files.helpers.strings import sql_ilike_clean
 from files.helpers.const import *
 from files.helpers.assetcache import assetcache_path
 from files.mail import *
@ -736,7 +737,7 @@ def api_is_available(name):
 	if len(name)<3 or len(name)>25:
 		return {name:False}
 		
-	name2 = name.replace('\\', '').replace('_','\_').replace('%','')
+	name2 = sql_ilike_clean(name)

 	x= g.db.query(User).filter(
 		or_(