139: Disabled multimedia embedding
This change disables multimedia embedding: - In comments and comments replies. - In new submissions. - In comment & submission preview And it's all toggle-able via an envvar, except for the JS bits, but I linked those to the github issue, so should be easy to find in the future. The way it works is: - removes markdown image/video syntax, eg. `` into `` - changes link text into anchors, eg. `https://example.org/someimage.jpg` into `[https://example.org/someimage.jpg](https://example.org/someimage.jpg)` - removes html img/video/audio tags, eg. `<img href="https://example.org/someimage.jpg" />` into `` - when embedding gifs via the giphy modal in "new submission", it will insert only an anchor to the gif - when attaching an image, it will upload the image, then add only an anchor to the post/comment body I tested this manually, but not sure if I got all the test cases. What I checked was: - create comment w/ image/video/audio media using markdown -> success - create comment reply w/ image/video/audio media using markdown -> success - create comment w/ link to img/imgur/youtube/audio -> success - create comment w/ attachment -> success - create comment reply w/ attachment -> success - create comment w/ img/video tag -> success - create comment reply w/ image/video tag -> success - create post submission w/ image/video/media using markdown -> success - create post submission w/ link to img/imgur/youtube/audio -> success - create post submission w/ attachment -> success - create post submission w/ giphy gif -> success Also, updated the formatting page. Co-authored-by: Ben Rog-Wilhelm <zorba-github@pavlovian.net>
This commit is contained in:
parent
8463a9ebbe
commit
cbcc2aac6f
12 changed files with 84 additions and 41 deletions
|
@ -13,9 +13,12 @@ import time
|
|||
import requests
|
||||
from files.__main__ import app
|
||||
|
||||
TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw')
|
||||
TLDS = ('ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','xyz','ye','yt','yu','za','zm','zw', 'moe')
|
||||
|
||||
allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','source')
|
||||
allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler',)
|
||||
|
||||
if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
|
||||
allowed_tags += ('img', 'lite-youtube', 'video', 'source',)
|
||||
|
||||
def allowed_attributes(tag, name, value):
|
||||
|
||||
|
@ -132,16 +135,26 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
|||
signal.signal(signal.SIGALRM, handler)
|
||||
signal.alarm(1)
|
||||
|
||||
# double newlines, eg. hello\nworld becomes hello\n\nworld, which later becomes <p>hello</p><p>world</p>
|
||||
sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
|
||||
|
||||
sanitized = image_regex.sub(r'\1\4', sanitized)
|
||||
if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
|
||||
# turn eg. https://wikipedia.org/someimage.jpg into 
|
||||
sanitized = image_regex.sub(r'\1\4', sanitized)
|
||||
|
||||
# if image url in whitelist, do nothing
|
||||
# eg.  turns into 
|
||||
# but if not, then extract url
|
||||
# eg  turns into https://example.org/someimage.jpg
|
||||
sanitized = image_check_regex.sub(r'\1', sanitized)
|
||||
|
||||
# transform markdown into html
|
||||
sanitized = markdown(sanitized)
|
||||
|
||||
# turn ~something~ or ~~something~~ into <del>something</del>
|
||||
sanitized = strikethrough_regex.sub(r'<del>\1</del>', sanitized)
|
||||
|
||||
# remove left-to-right mark; remove zero width space; remove zero width no-break space; remove Cuneiform Numeric Sign Eight;
|
||||
sanitized = sanitized.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","")
|
||||
|
||||
if alert:
|
||||
|
@ -181,13 +194,14 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
|||
|
||||
soup = BeautifulSoup(sanitized, 'lxml')
|
||||
|
||||
for tag in soup.find_all("img"):
|
||||
if tag.get("src") and not tag["src"].startswith('/pp/'):
|
||||
tag["loading"] = "lazy"
|
||||
tag["data-src"] = tag["src"]
|
||||
tag["src"] = "/assets/images/loading.webp"
|
||||
tag['alt'] = f''
|
||||
tag['referrerpolicy'] = "no-referrer"
|
||||
if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
|
||||
for tag in soup.find_all("img"):
|
||||
if tag.get("src") and not tag["src"].startswith('/pp/'):
|
||||
tag["loading"] = "lazy"
|
||||
tag["data-src"] = tag["src"]
|
||||
tag["src"] = "/assets/images/loading.webp"
|
||||
tag['alt'] = f''
|
||||
tag['referrerpolicy'] = "no-referrer"
|
||||
|
||||
for tag in soup.find_all("a"):
|
||||
if tag.get("href") and fishylinks_regex.fullmatch(str(tag.string)):
|
||||
|
@ -229,22 +243,24 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
|||
|
||||
if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
|
||||
|
||||
captured = []
|
||||
for i in youtube_regex.finditer(sanitized):
|
||||
if i.group(0) in captured: continue
|
||||
captured.append(i.group(0))
|
||||
if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
|
||||
captured = []
|
||||
for i in youtube_regex.finditer(sanitized):
|
||||
if i.group(0) in captured: continue
|
||||
captured.append(i.group(0))
|
||||
|
||||
params = parse_qs(urlparse(i.group(2).replace('&','&')).query)
|
||||
t = params.get('t', params.get('start', [0]))[0]
|
||||
if isinstance(t, str): t = t.replace('s','')
|
||||
params = parse_qs(urlparse(i.group(2).replace('&','&')).query)
|
||||
t = params.get('t', params.get('start', [0]))[0]
|
||||
if isinstance(t, str): t = t.replace('s','')
|
||||
|
||||
htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
|
||||
if t: htmlsource += f'&start={t}'
|
||||
htmlsource += '"></lite-youtube>'
|
||||
htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
|
||||
if t: htmlsource += f'&start={t}'
|
||||
htmlsource += '"></lite-youtube>'
|
||||
|
||||
sanitized = sanitized.replace(i.group(0), htmlsource)
|
||||
sanitized = sanitized.replace(i.group(0), htmlsource)
|
||||
|
||||
sanitized = video_sub_regex.sub(r'\1<video controls preload="none"><source src="\2"></video>', sanitized)
|
||||
if app.config['MULTIMEDIA_EMBEDDING_ENABLED']:
|
||||
sanitized = video_sub_regex.sub(r'\1<video controls preload="none"><source src="\2"></video>', sanitized)
|
||||
|
||||
if comment:
|
||||
for marsey in g.db.query(Marsey).filter(Marsey.name.in_(marseys_used)).all():
|
||||
|
@ -264,7 +280,8 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
|||
attributes=allowed_attributes,
|
||||
protocols=['http', 'https'],
|
||||
styles=['color', 'background-color', 'font-weight', 'text-align'],
|
||||
filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)]
|
||||
filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)],
|
||||
strip=True,
|
||||
).clean(sanitized)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue