From cbcc2aac6ff7d4a3798ceb5dde290b4983e46a5a Mon Sep 17 00:00:00 2001
From: painejohn <109989267+painejohn@users.noreply.github.com>
Date: Sun, 7 Aug 2022 02:30:47 -0400
Subject: [PATCH] 139: Disabled multimedia embedding

This change disables multimedia embedding:

- In comments and comments replies.
- In new submissions.
- In comment & submission preview

And it's all toggle-able via an envvar, except for the JS bits,
but I linked those to the github issue, so should be easy to find
in the future.

The way it works is:

- removes markdown image/video syntax,
  eg. `![](https://example.org/someimage.jpg)` into ``
- changes link text into anchors, eg.
  `https://example.org/someimage.jpg` into
  `[https://example.org/someimage.jpg](https://example.org/someimage.jpg)`
- removes html img/video/audio tags, eg.
  `<img href="https://example.org/someimage.jpg" />` into ``
- when embedding gifs via the giphy modal in "new submission", it will
  insert only an anchor to the gif
- when attaching an image, it will upload the image, then add only an
  anchor to the post/comment body

I tested this manually, but not sure if I got all the test cases. What I
checked was:

- create comment w/ image/video/audio media using markdown -> success
- create comment reply w/ image/video/audio media using markdown ->
  success
- create comment w/ link to img/imgur/youtube/audio -> success
- create comment w/ attachment -> success
- create comment reply w/ attachment -> success
- create comment w/ img/video tag -> success
- create comment reply w/ image/video tag -> success
- create post submission w/ image/video/media using markdown -> success
- create post submission w/ link to img/imgur/youtube/audio -> success
- create post submission w/ attachment -> success
- create post submission w/ giphy gif -> success

Also, updated the formatting page.

Co-authored-by: Ben Rog-Wilhelm <zorba-github@pavlovian.net>
---
 env                              |  1 +
 files/__main__.py                |  1 +
 files/assets/js/gif_modal.js     |  7 +++-
 files/assets/js/marked.custom.js |  6 ++-
 files/helpers/sanitize.py        | 63 ++++++++++++++++++++------------
 files/routes/comments.py         | 10 ++++-
 files/routes/posts.py            | 21 +++++++++--
 files/templates/comments.html    |  2 +-
 files/templates/formatting.html  |  8 ++--
 files/templates/gif_modal.html   |  2 +-
 files/templates/submit.html      |  2 +-
 files/templates/userpage.html    |  2 +-
 12 files changed, 84 insertions(+), 41 deletions(-)
diff --git a/env b/env
index 0be062740..60295303f 100644
--- a/env
+++ b/env
@@ -34,3 +34,4 @@ CF_KEY=blahblahblah
 CF_ZONE=blahblahblah
 DEBIAN_FRONTEND=noninteractive
 MENTION_LIMIT=100
+MULTIMEDIA_EMBEDDING_ENABLED=False
diff --git a/files/__main__.py b/files/__main__.py
index 9cc2a4cfc..762f0de8d 100644
--- a/files/__main__.py
+++ b/files/__main__.py
@@ -61,6 +61,7 @@ app.config['DESCRIPTION'] = environ.get("DESCRIPTION", "DESCRIPTION GOES HERE").
 app.config['SETTINGS'] = {}
 app.config['SQLALCHEMY_DATABASE_URI'] = app.config['DATABASE_URL']
 app.config['MENTION_LIMIT'] = int(environ.get('MENTION_LIMIT', 100))
+app.config['MULTIMEDIA_EMBEDDING_ENABLED'] = environ.get('MULTIMEDIA_EMBEDDING_ENABLED', "false").lower() == "true"
 
 r=redis.Redis(host=environ.get("REDIS_URL", "redis://localhost"), decode_responses=True, ssl_cert_reqs=None)
 
diff --git a/files/assets/js/gif_modal.js b/files/assets/js/gif_modal.js
index ba6d57470..94746bf09 100644
--- a/files/assets/js/gif_modal.js
+++ b/files/assets/js/gif_modal.js
@@ -43,7 +43,7 @@ async function getGif(searchTerm) {
 
 		let response = await fetch("/giphy?searchTerm=" + searchTerm + "&limit=48");
 		let data = await response.json()
-		var max = data.length - 1
+		var max = data.data?.length === undefined ? 0 : data.data.length - 1
 		data = data.data
 					var gifURL = [];
 
@@ -70,7 +70,10 @@ async function getGif(searchTerm) {
 
 function insertGIF(url,form) {
 
-	var gif = "\n\n![](" + url +")";
+	// https://github.com/themotte/rDrama/issues/139
+	// when MULTIMEDIA_EMBEDDING_ENABLED == False, we want to insert an anchor, NOT an img
+	//var gif = "\n\n![](" + url +")";
+	var gif = '\n\n[' + url + '](' + url + ')';
 
 	var commentBox = document.getElementById(form);
 
diff --git a/files/assets/js/marked.custom.js b/files/assets/js/marked.custom.js
index 53a83b9ef..4f8d9df6f 100644
--- a/files/assets/js/marked.custom.js
+++ b/files/assets/js/marked.custom.js
@@ -65,7 +65,9 @@ function markdown(first, second) {
 			dest.removeChild(dest.children[i]);
 		}
 		const html = marked.parse(input.value);
-		dest.innerHTML = DOMPurify.sanitize(html);
+		// https://github.com/themotte/rDrama/issues/139
+		// Remove disallowed tags completely.
+		dest.innerHTML = DOMPurify.sanitize(html, {FORBID_TAGS: ['img', 'video', 'source']});
 	}
 }
 
@@ -87,4 +89,4 @@ function charLimit(form, content) {
 	text.innerText = length + ' / ' + maxLength;
 }
 
-setTimeout(() => markdown('post-text','preview'), 200);
\ No newline at end of file
+setTimeout(() => markdown('post-text','preview'), 200);
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 8fb933a16..a56249e1d 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -13,9 +13,12 @@ import time
 import requests
 from files.__main__ import app
 
-TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw')
+TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw', 'moe')
 
-allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','source')
+allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler',)
+
+if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+	allowed_tags += ('img', 'lite-youtube', 'video', 'source',)
 
 def allowed_attributes(tag, name, value):
 
@@ -132,16 +135,26 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
 	signal.signal(signal.SIGALRM, handler)
 	signal.alarm(1)
 
+	# double newlines, eg. hello\nworld becomes hello\n\nworld, which later becomes <p>hello</p><p>world</p>
 	sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
 
-	sanitized = image_regex.sub(r'\1![](\2)\4', sanitized)
+	if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+		# turn eg. https://wikipedia.org/someimage.jpg into ![](https://wikipedia.org/someimage.jpg)
+		sanitized = image_regex.sub(r'\1![](\2)\4', sanitized)
 
+	# if image url in whitelist, do nothing
+	# eg. ![](https://wikipedia.org/someimage.jpg) turns into ![](https://wikipedia.org/someimage.jpg)
+	# but if not, then extract url
+	# eg ![](https://example.org/someimage.jpg) turns into https://example.org/someimage.jpg
 	sanitized = image_check_regex.sub(r'\1', sanitized)
 
+	# transform markdown into html
 	sanitized = markdown(sanitized)
 
+	# turn ~something~ or ~~something~~  into <del>something</del>
 	sanitized = strikethrough_regex.sub(r'<del>\1</del>', sanitized)
 
+	# remove left-to-right mark; remove zero width space; remove zero width no-break space; remove Cuneiform Numeric Sign Eight;
 	sanitized = sanitized.replace('‎','').replace('​','').replace("\ufeff", "").replace("𒐪","")
 
 	if alert:
@@ -181,13 +194,14 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
 
 	soup = BeautifulSoup(sanitized, 'lxml')
 
-	for tag in soup.find_all("img"):
-		if tag.get("src") and not tag["src"].startswith('/pp/'):
-			tag["loading"] = "lazy"
-			tag["data-src"] = tag["src"]
-			tag["src"] = "/assets/images/loading.webp"
-			tag['alt'] = f'![]({tag["data-src"]})'
-			tag['referrerpolicy'] = "no-referrer"
+	if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+		for tag in soup.find_all("img"):
+			if tag.get("src") and not tag["src"].startswith('/pp/'):
+				tag["loading"] = "lazy"
+				tag["data-src"] = tag["src"]
+				tag["src"] = "/assets/images/loading.webp"
+				tag['alt'] = f'![]({tag["data-src"]})'
+				tag['referrerpolicy'] = "no-referrer"
 
 	for tag in soup.find_all("a"):
 		if tag.get("href") and fishylinks_regex.fullmatch(str(tag.string)):
@@ -229,22 +243,24 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
 
 	if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
 
-	captured = []
-	for i in youtube_regex.finditer(sanitized):
-		if i.group(0) in captured: continue
-		captured.append(i.group(0))
+	if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+		captured = []
+		for i in youtube_regex.finditer(sanitized):
+			if i.group(0) in captured: continue
+			captured.append(i.group(0))
 
-		params = parse_qs(urlparse(i.group(2).replace('&amp;','&')).query)
-		t = params.get('t', params.get('start', [0]))[0]
-		if isinstance(t, str): t = t.replace('s','')
+			params = parse_qs(urlparse(i.group(2).replace('&amp;','&')).query)
+			t = params.get('t', params.get('start', [0]))[0]
+			if isinstance(t, str): t = t.replace('s','')
 
-		htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
-		if t: htmlsource += f'&start={t}'
-		htmlsource += '"></lite-youtube>'
+			htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
+			if t: htmlsource += f'&start={t}'
+			htmlsource += '"></lite-youtube>'
 
-		sanitized = sanitized.replace(i.group(0), htmlsource)
+			sanitized = sanitized.replace(i.group(0), htmlsource)
 
-	sanitized = video_sub_regex.sub(r'\1<video controls preload="none"><source src="\2"></video>', sanitized)
+	if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+		sanitized = video_sub_regex.sub(r'\1<video controls preload="none"><source src="\2"></video>', sanitized)
 
 	if comment:
 		for marsey in g.db.query(Marsey).filter(Marsey.name.in_(marseys_used)).all():
@@ -264,7 +280,8 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
 								attributes=allowed_attributes,
 								protocols=['http', 'https'],
 								styles=['color', 'background-color', 'font-weight', 'text-align'],
-								filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)]
+								filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)],
+								strip=True,
 								).clean(sanitized)
 
 
diff --git a/files/routes/comments.py b/files/routes/comments.py
index 70771d6a9..62f077750 100644
--- a/files/routes/comments.py
+++ b/files/routes/comments.py
@@ -232,7 +232,10 @@ def api_comment(v):
 							requests.post(f'https://api.cloudflare.com/client/v4/zones/{CF_ZONE}/purge_cache', headers=CF_HEADERS, data={'files': [f"https://{request.host}/assets/images/badges/{badge.id}.webp"]}, timeout=5)
 						except Exception as e:
 							return {"error": str(e)}, 400
-				body += f"\n\n![]({image})"
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n![]({image})"
+				else:
+					body += f'\n\n<a href="{image}">{image}</a>'
 			elif file.content_type.startswith('video/'):
 				file.save("video.mp4")
 				with open("video.mp4", 'rb') as f:
@@ -244,7 +247,10 @@ def api_comment(v):
 						if error == 'File exceeds max duration': error += ' (60 seconds)'
 						return {"error": error}, 400
 				if url.endswith('.'): url += 'mp4'
-				body += f"\n\n{url}"
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n{url}"
+				else:
+					body += f'\n\n<a href="{url}">{url}</a>'
 			else: return {"error": "Image/Video files only"}, 400
 
 	body_html = sanitize(body, comment=True)
diff --git a/files/routes/posts.py b/files/routes/posts.py
index 0c6ef2e0e..0ea9c9d10 100644
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@@ -457,7 +457,10 @@ def edit_post(pid, v):
 				name = f'/images/{time.time()}'.replace('.','') + '.webp'
 				file.save(name)
 				url = process_image(name)
-				body += f"\n\n![]({url})"
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n![]({url})"
+				else:
+					body += f'\n\n<a href="{url}">{url}</a>'
 			elif file.content_type.startswith('video/'):
 				file.save("video.mp4")
 				with open("video.mp4", 'rb') as f:
@@ -469,7 +472,10 @@ def edit_post(pid, v):
 						if error == 'File exceeds max duration': error += ' (60 seconds)'
 						return {"error": error}, 400
 				if url.endswith('.'): url += 'mp4'
-				body += f"\n\n{url}"
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n![]({url})"
+				else:
+					body += f'\n\n<a href="{url}">{url}</a>'
 			else: return {"error": "Image/Video files only"}, 400
 
 	body_html = sanitize(body, edit=True)
@@ -902,7 +908,11 @@ def submit_post(v, sub=None):
 			if file.content_type.startswith('image/'):
 				name = f'/images/{time.time()}'.replace('.','') + '.webp'
 				file.save(name)
-				body += f"\n\n![]({process_image(name)})"
+				image = process_image(name)
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n![]({image})"
+				else:
+					body += f'\n\n<a href="{image}">{image}</a>'
 			elif file.content_type.startswith('video/'):
 				file.save("video.mp4")
 				with open("video.mp4", 'rb') as f:
@@ -914,7 +924,10 @@ def submit_post(v, sub=None):
 						if err == 'File exceeds max duration': err += ' (60 seconds)'
 						return error(err)
 				if url.endswith('.'): url += 'mp4'
-				body += f"\n\n{url}"
+				if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
+					body += f"\n\n![]({url})"
+				else:
+					body += f'\n\n<a href="{url}">{url}</a>'
 			else:
 				return error("Image/Video files only.")
 
diff --git a/files/templates/comments.html b/files/templates/comments.html
index bec90bc97..031b68854 100644
--- a/files/templates/comments.html
+++ b/files/templates/comments.html
@@ -841,7 +841,7 @@
 	{% if v %}
 		<script src="/assets/js/vendor/purify.min.js?v=251"></script>
 		<script src="/assets/js/vendor/marked.min.js?v=251"></script>
-		<script src="/assets/js/marked.custom.js?v=251"></script>
+		<script src="/assets/js/marked.custom.js?v=252"></script>
 		<script src="/assets/js/comments_v.js?v=267"></script>
 		<script src="/assets/js/award_modal.js?v=1"></script>
 	{% endif %}
diff --git a/files/templates/formatting.html b/files/templates/formatting.html
index fa8d7897f..592b555a5 100644
--- a/files/templates/formatting.html
+++ b/files/templates/formatting.html
@@ -69,17 +69,17 @@ Text 2
 		<tr>
 			<td>Images</td>
 			<td>https://i.imgur.com/SwVuagI_d.webp</td>
-			<td><img loading="lazy" alt="example image" referrerpolicy="no-referrer" src="https://i.imgur.com/SwVuagI_d.webp"></td>
+			<td><a href="https://i.imgur.com/SwVuagI_d.webp">https://i.imgur.com/SwVuagI_d.webp</td>
 		</tr>
 		<tr>
 			<td>Youtube Videos</td>
 			<td>https://youtube.com/watch?v=3Hecr51ByE4</td>
-			<td><lite-youtube videoid="3Hecr51ByE4" params="autoplay=1&modestbranding=1"></lite-youtube></td>
+			<td><a href="https://youtube.com/watch?v=3Hecr51ByE4">https://youtube.com/watch?v=3Hecr51ByE4</a></td>
 		</tr>
 		<tr>
 			<td>Video Files</td>
 			<td>https://files.catbox.moe/v4om92.mp4</td>
-			<td><video controls preload="none" class="vid"><source referrerpolicy="no-referrer" src="https://files.catbox.moe/v4om92.mp4" type="video/mp4"></video></td>
+			<td><a href="https://files.catbox.moe/v4om92.mp4">https://files.catbox.moe/v4om92.mp4</a></td>
 		</tr>
 		<tr>
 			<td>Poll Options (can select multiple options)</td>
@@ -458,7 +458,7 @@ line breaks
 				&lt;img referrerpolicy="no-referrer" src="https://i.imgur.com/SwVuagI_d.webp" width="200"&gt;
 			</td>
 			<td>
-				<img loading="lazy" alt="example image" referrerpolicy="no-referrer" src="https://i.imgur.com/SwVuagI_d.webp" width="200">
+				Nothing!
 			</td>
 		</tr>
 	</tbody>
diff --git a/files/templates/gif_modal.html b/files/templates/gif_modal.html
index b2f056602..170d5f790 100644
--- a/files/templates/gif_modal.html
+++ b/files/templates/gif_modal.html
@@ -26,4 +26,4 @@
 	</div>
 </div>
 
-<script src="/assets/js/gif_modal.js?v=244"></script>
+<script src="/assets/js/gif_modal.js?v=245"></script>
diff --git a/files/templates/submit.html b/files/templates/submit.html
index 91e4651e5..6faf70c74 100644
--- a/files/templates/submit.html
+++ b/files/templates/submit.html
@@ -171,7 +171,7 @@
 
 		<script src="/assets/js/vendor/purify.min.js?v=251"></script>
 		<script src="/assets/js/vendor/marked.min.js?v=251"></script>
-		<script src="/assets/js/marked.custom.js?v=251"></script>
+		<script src="/assets/js/marked.custom.js?v=252"></script>
 		<script src="/assets/js/formatting.js?v=240"></script>
 		<script src="/assets/js/submit.js?v=255"></script>
 
diff --git a/files/templates/userpage.html b/files/templates/userpage.html
index bf9bf80c5..0aeafa1f4 100644
--- a/files/templates/userpage.html
+++ b/files/templates/userpage.html
@@ -698,7 +698,7 @@
 {% endif %}
 
 <script src="/assets/js/vendor/purify.min.js?v=251"></script>
-<script src="/assets/js/vendor/marked.min.js?v=251"></script>
+<script src="/assets/js/vendor/marked.min.js?v=252"></script>
 <script src="/assets/js/marked.custom.js?v=251"></script>
 
 {% endblock %}