From cbcc2aac6ff7d4a3798ceb5dde290b4983e46a5a Mon Sep 17 00:00:00 2001 From: painejohn <109989267+painejohn@users.noreply.github.com> Date: Sun, 7 Aug 2022 02:30:47 -0400 Subject: [PATCH] 139: Disabled multimedia embedding This change disables multimedia embedding: - In comments and comments replies. - In new submissions. - In comment & submission preview And it's all toggle-able via an envvar, except for the JS bits, but I linked those to the github issue, so should be easy to find in the future. The way it works is: - removes markdown image/video syntax, eg. `![](https://example.org/someimage.jpg)` into `` - changes link text into anchors, eg. `https://example.org/someimage.jpg` into `[https://example.org/someimage.jpg](https://example.org/someimage.jpg)` - removes html img/video/audio tags, eg. `` into `` - when embedding gifs via the giphy modal in "new submission", it will insert only an anchor to the gif - when attaching an image, it will upload the image, then add only an anchor to the post/comment body I tested this manually, but not sure if I got all the test cases. What I checked was: - create comment w/ image/video/audio media using markdown -> success - create comment reply w/ image/video/audio media using markdown -> success - create comment w/ link to img/imgur/youtube/audio -> success - create comment w/ attachment -> success - create comment reply w/ attachment -> success - create comment w/ img/video tag -> success - create comment reply w/ image/video tag -> success - create post submission w/ image/video/media using markdown -> success - create post submission w/ link to img/imgur/youtube/audio -> success - create post submission w/ attachment -> success - create post submission w/ giphy gif -> success Also, updated the formatting page. Co-authored-by: Ben Rog-Wilhelm --- env | 1 + files/__main__.py | 1 + files/assets/js/gif_modal.js | 7 +++- files/assets/js/marked.custom.js | 6 ++- files/helpers/sanitize.py | 63 ++++++++++++++++++++------------ files/routes/comments.py | 10 ++++- files/routes/posts.py | 21 +++++++++-- files/templates/comments.html | 2 +- files/templates/formatting.html | 8 ++-- files/templates/gif_modal.html | 2 +- files/templates/submit.html | 2 +- files/templates/userpage.html | 2 +- 12 files changed, 84 insertions(+), 41 deletions(-) diff --git a/env b/env index 0be062740..60295303f 100644 --- a/env +++ b/env @@ -34,3 +34,4 @@ CF_KEY=blahblahblah CF_ZONE=blahblahblah DEBIAN_FRONTEND=noninteractive MENTION_LIMIT=100 +MULTIMEDIA_EMBEDDING_ENABLED=False diff --git a/files/__main__.py b/files/__main__.py index 9cc2a4cfc..762f0de8d 100644 --- a/files/__main__.py +++ b/files/__main__.py @@ -61,6 +61,7 @@ app.config['DESCRIPTION'] = environ.get("DESCRIPTION", "DESCRIPTION GOES HERE"). app.config['SETTINGS'] = {} app.config['SQLALCHEMY_DATABASE_URI'] = app.config['DATABASE_URL'] app.config['MENTION_LIMIT'] = int(environ.get('MENTION_LIMIT', 100)) +app.config['MULTIMEDIA_EMBEDDING_ENABLED'] = environ.get('MULTIMEDIA_EMBEDDING_ENABLED', "false").lower() == "true" r=redis.Redis(host=environ.get("REDIS_URL", "redis://localhost"), decode_responses=True, ssl_cert_reqs=None) diff --git a/files/assets/js/gif_modal.js b/files/assets/js/gif_modal.js index ba6d57470..94746bf09 100644 --- a/files/assets/js/gif_modal.js +++ b/files/assets/js/gif_modal.js @@ -43,7 +43,7 @@ async function getGif(searchTerm) { let response = await fetch("/giphy?searchTerm=" + searchTerm + "&limit=48"); let data = await response.json() - var max = data.length - 1 + var max = data.data?.length === undefined ? 0 : data.data.length - 1 data = data.data var gifURL = []; @@ -70,7 +70,10 @@ async function getGif(searchTerm) { function insertGIF(url,form) { - var gif = "\n\n![](" + url +")"; + // https://github.com/themotte/rDrama/issues/139 + // when MULTIMEDIA_EMBEDDING_ENABLED == False, we want to insert an anchor, NOT an img + //var gif = "\n\n![](" + url +")"; + var gif = '\n\n[' + url + '](' + url + ')'; var commentBox = document.getElementById(form); diff --git a/files/assets/js/marked.custom.js b/files/assets/js/marked.custom.js index 53a83b9ef..4f8d9df6f 100644 --- a/files/assets/js/marked.custom.js +++ b/files/assets/js/marked.custom.js @@ -65,7 +65,9 @@ function markdown(first, second) { dest.removeChild(dest.children[i]); } const html = marked.parse(input.value); - dest.innerHTML = DOMPurify.sanitize(html); + // https://github.com/themotte/rDrama/issues/139 + // Remove disallowed tags completely. + dest.innerHTML = DOMPurify.sanitize(html, {FORBID_TAGS: ['img', 'video', 'source']}); } } @@ -87,4 +89,4 @@ function charLimit(form, content) { text.innerText = length + ' / ' + maxLength; } -setTimeout(() => markdown('post-text','preview'), 200); \ No newline at end of file +setTimeout(() => markdown('post-text','preview'), 200); diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 8fb933a16..a56249e1d 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -13,9 +13,12 @@ import time import requests from files.__main__ import app -TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw') +TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw', 'moe') -allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','source') +allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler',) + +if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + allowed_tags += ('img', 'lite-youtube', 'video', 'source',) def allowed_attributes(tag, name, value): @@ -132,16 +135,26 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): signal.signal(signal.SIGALRM, handler) signal.alarm(1) + # double newlines, eg. hello\nworld becomes hello\n\nworld, which later becomes

hello

world

sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized) - sanitized = image_regex.sub(r'\1![](\2)\4', sanitized) + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + # turn eg. https://wikipedia.org/someimage.jpg into ![](https://wikipedia.org/someimage.jpg) + sanitized = image_regex.sub(r'\1![](\2)\4', sanitized) + # if image url in whitelist, do nothing + # eg. ![](https://wikipedia.org/someimage.jpg) turns into ![](https://wikipedia.org/someimage.jpg) + # but if not, then extract url + # eg ![](https://example.org/someimage.jpg) turns into https://example.org/someimage.jpg sanitized = image_check_regex.sub(r'\1', sanitized) + # transform markdown into html sanitized = markdown(sanitized) + # turn ~something~ or ~~something~~ into something sanitized = strikethrough_regex.sub(r'\1', sanitized) + # remove left-to-right mark; remove zero width space; remove zero width no-break space; remove Cuneiform Numeric Sign Eight; sanitized = sanitized.replace('‎','').replace('​','').replace("\ufeff", "").replace("𒐪","") if alert: @@ -181,13 +194,14 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): soup = BeautifulSoup(sanitized, 'lxml') - for tag in soup.find_all("img"): - if tag.get("src") and not tag["src"].startswith('/pp/'): - tag["loading"] = "lazy" - tag["data-src"] = tag["src"] - tag["src"] = "/assets/images/loading.webp" - tag['alt'] = f'![]({tag["data-src"]})' - tag['referrerpolicy'] = "no-referrer" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + for tag in soup.find_all("img"): + if tag.get("src") and not tag["src"].startswith('/pp/'): + tag["loading"] = "lazy" + tag["data-src"] = tag["src"] + tag["src"] = "/assets/images/loading.webp" + tag['alt'] = f'![]({tag["data-src"]})' + tag['referrerpolicy'] = "no-referrer" for tag in soup.find_all("a"): if tag.get("href") and fishylinks_regex.fullmatch(str(tag.string)): @@ -229,22 +243,24 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=") - captured = [] - for i in youtube_regex.finditer(sanitized): - if i.group(0) in captured: continue - captured.append(i.group(0)) + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + captured = [] + for i in youtube_regex.finditer(sanitized): + if i.group(0) in captured: continue + captured.append(i.group(0)) - params = parse_qs(urlparse(i.group(2).replace('&','&')).query) - t = params.get('t', params.get('start', [0]))[0] - if isinstance(t, str): t = t.replace('s','') + params = parse_qs(urlparse(i.group(2).replace('&','&')).query) + t = params.get('t', params.get('start', [0]))[0] + if isinstance(t, str): t = t.replace('s','') - htmlsource = f'{i.group(1)}' + htmlsource = f'{i.group(1)}' - sanitized = sanitized.replace(i.group(0), htmlsource) + sanitized = sanitized.replace(i.group(0), htmlsource) - sanitized = video_sub_regex.sub(r'\1', sanitized) + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + sanitized = video_sub_regex.sub(r'\1', sanitized) if comment: for marsey in g.db.query(Marsey).filter(Marsey.name.in_(marseys_used)).all(): @@ -264,7 +280,8 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): attributes=allowed_attributes, protocols=['http', 'https'], styles=['color', 'background-color', 'font-weight', 'text-align'], - filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)] + filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)], + strip=True, ).clean(sanitized) diff --git a/files/routes/comments.py b/files/routes/comments.py index 70771d6a9..62f077750 100644 --- a/files/routes/comments.py +++ b/files/routes/comments.py @@ -232,7 +232,10 @@ def api_comment(v): requests.post(f'https://api.cloudflare.com/client/v4/zones/{CF_ZONE}/purge_cache', headers=CF_HEADERS, data={'files': [f"https://{request.host}/assets/images/badges/{badge.id}.webp"]}, timeout=5) except Exception as e: return {"error": str(e)}, 400 - body += f"\n\n![]({image})" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n![]({image})" + else: + body += f'\n\n{image}' elif file.content_type.startswith('video/'): file.save("video.mp4") with open("video.mp4", 'rb') as f: @@ -244,7 +247,10 @@ def api_comment(v): if error == 'File exceeds max duration': error += ' (60 seconds)' return {"error": error}, 400 if url.endswith('.'): url += 'mp4' - body += f"\n\n{url}" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n{url}" + else: + body += f'\n\n{url}' else: return {"error": "Image/Video files only"}, 400 body_html = sanitize(body, comment=True) diff --git a/files/routes/posts.py b/files/routes/posts.py index 0c6ef2e0e..0ea9c9d10 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -457,7 +457,10 @@ def edit_post(pid, v): name = f'/images/{time.time()}'.replace('.','') + '.webp' file.save(name) url = process_image(name) - body += f"\n\n![]({url})" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n![]({url})" + else: + body += f'\n\n{url}' elif file.content_type.startswith('video/'): file.save("video.mp4") with open("video.mp4", 'rb') as f: @@ -469,7 +472,10 @@ def edit_post(pid, v): if error == 'File exceeds max duration': error += ' (60 seconds)' return {"error": error}, 400 if url.endswith('.'): url += 'mp4' - body += f"\n\n{url}" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n![]({url})" + else: + body += f'\n\n{url}' else: return {"error": "Image/Video files only"}, 400 body_html = sanitize(body, edit=True) @@ -902,7 +908,11 @@ def submit_post(v, sub=None): if file.content_type.startswith('image/'): name = f'/images/{time.time()}'.replace('.','') + '.webp' file.save(name) - body += f"\n\n![]({process_image(name)})" + image = process_image(name) + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n![]({image})" + else: + body += f'\n\n{image}' elif file.content_type.startswith('video/'): file.save("video.mp4") with open("video.mp4", 'rb') as f: @@ -914,7 +924,10 @@ def submit_post(v, sub=None): if err == 'File exceeds max duration': err += ' (60 seconds)' return error(err) if url.endswith('.'): url += 'mp4' - body += f"\n\n{url}" + if app.config['MULTIMEDIA_EMBEDDING_ENABLED']: + body += f"\n\n![]({url})" + else: + body += f'\n\n{url}' else: return error("Image/Video files only.") diff --git a/files/templates/comments.html b/files/templates/comments.html index bec90bc97..031b68854 100644 --- a/files/templates/comments.html +++ b/files/templates/comments.html @@ -841,7 +841,7 @@ {% if v %} - + {% endif %} diff --git a/files/templates/formatting.html b/files/templates/formatting.html index fa8d7897f..592b555a5 100644 --- a/files/templates/formatting.html +++ b/files/templates/formatting.html @@ -69,17 +69,17 @@ Text 2 Images https://i.imgur.com/SwVuagI_d.webp - example image + https://i.imgur.com/SwVuagI_d.webp Youtube Videos https://youtube.com/watch?v=3Hecr51ByE4 - + https://youtube.com/watch?v=3Hecr51ByE4 Video Files https://files.catbox.moe/v4om92.mp4 - + https://files.catbox.moe/v4om92.mp4 Poll Options (can select multiple options) @@ -458,7 +458,7 @@ line breaks <img referrerpolicy="no-referrer" src="https://i.imgur.com/SwVuagI_d.webp" width="200"> - example image + Nothing! diff --git a/files/templates/gif_modal.html b/files/templates/gif_modal.html index b2f056602..170d5f790 100644 --- a/files/templates/gif_modal.html +++ b/files/templates/gif_modal.html @@ -26,4 +26,4 @@ - + diff --git a/files/templates/submit.html b/files/templates/submit.html index 91e4651e5..6faf70c74 100644 --- a/files/templates/submit.html +++ b/files/templates/submit.html @@ -171,7 +171,7 @@ - + diff --git a/files/templates/userpage.html b/files/templates/userpage.html index bf9bf80c5..0aeafa1f4 100644 --- a/files/templates/userpage.html +++ b/files/templates/userpage.html @@ -698,7 +698,7 @@ {% endif %} - + {% endblock %}