rDrama/files/routes/posts.py
2023-04-03 03:30:57 -06:00

742 lines
20 KiB
Python

import sys
import time
import urllib.parse
from io import BytesIO
from urllib.parse import ParseResult, urlparse
import gevent
import requests
import werkzeug.wrappers
from PIL import Image as PILimage
from sqlalchemy.orm import Query
import files.helpers.validators as validators
from files.__main__ import app, db_session, limiter
from files.classes import *
from files.helpers.alerts import *
from files.helpers.caching import invalidate_cache
from files.helpers.config.const import *
from files.helpers.content import canonicalize_url2
from files.helpers.contentsorting import sort_objects
from files.helpers.media import process_image
from files.helpers.sanitize import *
from files.helpers.strings import sql_ilike_clean
from files.helpers.wrappers import *
from files.routes.importstar import *
titleheaders = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"
}
MAX_TITLE_LENGTH = 500
MAX_URL_LENGTH = 2048
@app.post("/toggle_club/<pid>")
@auth_required
def toggle_club(pid, v):
post = get_post(pid)
if post.author_id != v.id and v.admin_level < 2: abort(403)
post.club = not post.club
g.db.add(post)
g.db.commit()
if post.club: return {"message": "Post has been marked as club-only!"}
else: return {"message": "Post has been unmarked as club-only!"}
@app.post("/publish/<pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def publish(pid, v):
post = get_post(pid)
if not post.private: return {"message": "Post published!"}
if post.author_id != v.id: abort(403)
post.private = False
post.created_utc = int(time.time())
g.db.add(post)
post.publish()
g.db.commit()
return redirect(post.permalink)
@app.get("/submit")
@auth_required
def submit_get(v):
return render_template("submit.html", v=v)
@app.get("/post/<pid>")
@app.get("/post/<pid>/<anything>")
@auth_desired
def post_id(pid, anything=None, v=None):
post = get_post(pid, v=v)
if post.over_18 and not (v and v.over_18) and session.get('over_18', 0) < int(time.time()):
if request.headers.get("Authorization") or request.headers.get("xhr"): abort(403, "Must be 18+ to view")
return render_template("errors/nsfw.html", v=v)
if v: defaultsortingcomments = v.defaultsortingcomments
else: defaultsortingcomments = "new"
sort = request.values.get("sort", defaultsortingcomments)
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
offset = 0
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
Comment.parent_submission == post.id,
Comment.level == 1,
).order_by(Comment.is_pinned.desc().nulls_last())
top_comments = sort_objects(top_comments, sort, Comment)
pg_top_comment_ids = []
pg_comment_qty = 0
for tc_id, tc_children_qty in top_comments.all():
if pg_comment_qty >= limit:
offset = 1
break
pg_comment_qty += tc_children_qty + 1
pg_top_comment_ids.append(tc_id)
def comment_tree_filter(q: Query) -> Query:
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
return q
comments, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
post.replies = comment_tree[None] # parent=None -> top-level comments
ids = {c.id for c in post.replies}
post.views += 1
g.db.expire_on_commit = False
g.db.add(post)
g.db.commit()
g.db.expire_on_commit = True
if request.headers.get("Authorization"): return post.json
else:
if post.is_banned and not (v and (v.admin_level > 1 or post.author_id == v.id)): template = "submission_banned.html"
else: template = "submission.html"
return render_template(template, v=v, p=post, ids=list(ids), sort=sort, render_replies=True, offset=offset)
@app.get("/viewmore/<pid>/<sort>/<offset>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_desired
def viewmore(v, pid, sort, offset):
post = get_post(pid, v=v)
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
offset_prev = int(offset)
try: ids = set(int(x) for x in request.values.get("ids").split(','))
except: abort(400)
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
offset = 0
# TODO: Unify with common post_id logic
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
Comment.parent_submission == post.id,
Comment.level == 1,
Comment.id.notin_(ids),
Comment.is_pinned == None,
).order_by(Comment.is_pinned.desc().nulls_last())
if sort == "new":
newest_created_utc = g.db.query(Comment.created_utc).filter(
Comment.id.in_(ids),
Comment.is_pinned == None,
).order_by(Comment.created_utc.desc()).limit(1).scalar()
# Needs to be <=, not just <, to support seed_db data which has many identical
# created_utc values. Shouldn't cause duplication in real data because of the
# `NOT IN :ids` in top_comments.
top_comments = top_comments.filter(Comment.created_utc <= newest_created_utc)
top_comments = sort_objects(top_comments, sort, Comment)
pg_top_comment_ids = []
pg_comment_qty = 0
for tc_id, tc_children_qty in top_comments.all():
if pg_comment_qty >= limit:
offset = offset_prev + 1
break
pg_comment_qty += tc_children_qty + 1
pg_top_comment_ids.append(tc_id)
def comment_tree_filter(q: Query) -> Query:
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
return q
_, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
comments = comment_tree[None] # parent=None -> top-level comments
ids |= {c.id for c in comments}
return render_template("comments.html", v=v, comments=comments, p=post, ids=list(ids), render_replies=True, pid=pid, sort=sort, offset=offset, ajax=True)
@app.get("/morecomments/<cid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_desired
def morecomments(v, cid):
try: cid = int(cid)
except: abort(400)
tcid = g.db.query(Comment.top_comment_id).filter_by(id=cid).one_or_none()[0]
if v:
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
blocking = v.blocking.subquery()
blocked = v.blocked.subquery()
comments = g.db.query(
Comment,
votes.c.vote_type,
blocking.c.target_id,
blocked.c.target_id,
).filter(Comment.top_comment_id == tcid, Comment.level > RENDER_DEPTH_LIMIT).join(
votes,
votes.c.comment_id == Comment.id,
isouter=True
).join(
blocking,
blocking.c.target_id == Comment.author_id,
isouter=True
).join(
blocked,
blocked.c.user_id == Comment.author_id,
isouter=True
)
output = []
dump = []
for c in comments.all():
comment = c[0]
comment.voted = c[1] or 0
comment.is_blocking = c[2] or 0
comment.is_blocked = c[3] or 0
if c[0].parent_comment_id == int(cid): output.append(comment)
else: dump.append(comment)
comments = output
else:
c = g.db.query(Comment).filter_by(id=cid).one_or_none()
comments = c.replies(None)
if comments: p = comments[0].post
else: p = None
return render_template("comments.html", v=v, comments=comments, p=p, render_replies=True, ajax=True)
@app.post("/edit_post/<int:pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def edit_post(pid, v):
p = get_post(pid)
if not v.can_edit(p): abort(403)
validated_post: validators.ValidatedSubmissionLike = \
validators.ValidatedSubmissionLike.from_flask_request(
request,
allow_embedding=MULTIMEDIA_EMBEDDING_ENABLED,
allow_media_url_upload=False,
embed_url_file_key="file",
edit=True
)
changed:bool=False
if validated_post.title != p.title:
p.title = validated_post.title
p.title_html = validated_post.title_html
changed = True
if validated_post.body != p.body:
p.body = validated_post.body
p.body_html = validated_post.body_html
changed = True
if not changed:
abort(400, "You need to change something")
p.publish()
if v.id == p.author_id:
if int(time.time()) - p.created_utc > 60 * 3: p.edited_utc = int(time.time())
g.db.add(p)
else:
ma=ModAction(
kind="edit_post",
user_id=v.id,
target_submission_id=p.id
)
g.db.add(ma)
g.db.commit()
return redirect(p.permalink)
def archiveorg(url):
try: requests.get(f'https://web.archive.org/save/{url}', headers={'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}, timeout=100)
except: pass
def thumbnail_thread(pid):
db = db_session()
def expand_url(post_url, fragment_url):
if fragment_url.startswith("https://"):
return fragment_url
elif fragment_url.startswith("https://"):
return f"https://{fragment_url.split('https://')[1]}"
elif fragment_url.startswith('//'):
return f"https:{fragment_url}"
elif fragment_url.startswith('/'):
parsed_url = urlparse(post_url)
return f"https://{parsed_url.netloc}{fragment_url}"
else:
return f"{post_url}{'/' if not post_url.endswith('/') else ''}{fragment_url}"
post = db.query(Submission).filter_by(id=pid).one_or_none()
if not post or not post.url:
time.sleep(5)
post = db.query(Submission).filter_by(id=pid).one_or_none()
if not post or not post.url: return
fetch_url = post.url
if fetch_url.startswith('/'): fetch_url = f"{SITE_FULL}{fetch_url}"
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"}
try:
x=requests.get(fetch_url, headers=headers, timeout=5, proxies=proxies)
except:
db.close()
return
if x.status_code != 200:
db.close()
return
if x.headers.get("Content-Type","").startswith("text/html"):
soup=BeautifulSoup(x.content, 'lxml')
thumb_candidate_urls=[]
meta_tags = [
"themotte:thumbnail",
"twitter:image",
"og:image",
"thumbnail"
]
for tag_name in meta_tags:
tag = soup.find(
'meta',
attrs={
"name": tag_name,
"content": True
}
)
if not tag:
tag = soup.find(
'meta',
attrs={
'property': tag_name,
'content': True
}
)
if tag:
thumb_candidate_urls.append(expand_url(post.url, tag['content']))
for tag in soup.find_all("img", attrs={'src':True}):
thumb_candidate_urls.append(expand_url(post.url, tag['src']))
for url in thumb_candidate_urls:
try:
image_req=requests.get(url, headers=headers, timeout=5, proxies=proxies)
except:
continue
if image_req.status_code >= 400:
continue
if not image_req.headers.get("Content-Type","").startswith("image/"):
continue
if image_req.headers.get("Content-Type","").startswith("image/svg"):
continue
image = PILimage.open(BytesIO(image_req.content))
if image.width < 30 or image.height < 30:
continue
break
else:
db.close()
return
elif x.headers.get("Content-Type","").startswith("image/"):
image_req=x
image = PILimage.open(BytesIO(x.content))
else:
db.close()
return
size = len(image.fp.read())
if size > 8 * 1024 * 1024:
db.close()
return
name = f'/images/{time.time()}'.replace('.','') + '.webp'
with open(name, "wb") as file:
for chunk in image_req.iter_content(1024):
file.write(chunk)
post.thumburl = process_image(name, resize=100)
db.add(post)
db.commit()
db.commit()
db.close()
sys.stdout.flush()
return
@app.post("/is_repost")
def api_is_repost():
url = request.values.get('url')
if not url: abort(400)
url = canonicalize_url2(url, httpsify=True).geturl()
if url.endswith('/'): url = url[:-1]
search_url = sql_ilike_clean(url)
repost = g.db.query(Submission).filter(
Submission.url.ilike(search_url),
Submission.deleted_utc == 0,
Submission.is_banned == False
).first()
if repost: return {'permalink': repost.permalink}
else: return {'permalink': ''}
def _do_antispam_submission_check(v:User, validated:validators.ValidatedSubmissionLike):
now = int(time.time())
cutoff = now - 60 * 60 * 24
similar_posts = g.db.query(Submission).filter(
Submission.author_id == v.id,
Submission.title.op('<->')(validated.title) < app.config["SPAM_SIMILARITY_THRESHOLD"],
Submission.created_utc > cutoff
).all()
if validated.url:
similar_urls = g.db.query(Submission).filter(
Submission.author_id == v.id,
Submission.url.op('<->')(validated.url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"],
Submission.created_utc > cutoff
).all()
else:
similar_urls = []
threshold = app.config["SPAM_SIMILAR_COUNT_THRESHOLD"]
if v.age_seconds >= (60 * 60 * 24 * 7): threshold *= 3
elif v.age_seconds >= (60 * 60 * 24): threshold *= 2
if max(len(similar_urls), len(similar_posts)) < threshold:
return
text = "Your account has been banned for **1 day** for the following reason:\n\n> Too much spam!"
send_repeatable_notification(v.id, text)
v.ban(reason="Spamming.", days=1)
for post in similar_posts + similar_urls:
post.is_banned = True
post.is_pinned = False
post.ban_reason = "AutoJanny"
g.db.add(post)
ma=ModAction(
user_id=AUTOJANNY_ID,
target_submission_id=post.id,
kind="ban_post",
_note="spam"
)
g.db.add(ma)
g.db.commit()
abort(403)
def _execute_domain_ban_check(parsed_url:ParseResult):
domain:str = parsed_url.netloc
domain_obj = get_domain(domain)
if not domain_obj:
domain_obj = get_domain(domain+parsed_url.path)
if not domain_obj: return
abort(403, f"Remove the {domain_obj.domain} link from your post and try again. {domain_obj.reason}")
def _duplicate_check(search_url:Optional[str]) -> Optional[werkzeug.wrappers.Response]:
if not search_url: return None
repost = g.db.query(Submission).filter(
func.lower(Submission.url) == search_url.lower(),
Submission.deleted_utc == 0,
Submission.is_banned == False
).first()
if repost and SITE != 'localhost':
return redirect(repost.permalink)
return None
def _duplicate_check2(
user_id:int,
validated_post:validators.ValidatedSubmissionLike) -> Optional[werkzeug.wrappers.Response]:
dup = g.db.query(Submission).filter(
Submission.author_id == user_id,
Submission.deleted_utc == 0,
Submission.title == validated_post.title,
Submission.url == validated_post.url,
Submission.body == validated_post.body
).one_or_none()
if dup and SITE != 'localhost':
return redirect(dup.permalink)
return None
@app.post("/submit")
@limiter.limit("1/second;2/minute;10/hour;50/day")
@auth_required
def submit_post(v):
def error(error):
title:str = request.values.get("title", "")
body:str = request.values.get("body", "")
url:str = request.values.get("url", "")
if request.headers.get("Authorization") or request.headers.get("xhr"): abort(400, error)
return render_template("submit.html", v=v, error=error, title=title, url=url, body=body), 400
if v.is_suspended: return error("You can't perform this action while banned.")
try:
validated_post: validators.ValidatedSubmissionLike = \
validators.ValidatedSubmissionLike.from_flask_request(request,
allow_embedding=MULTIMEDIA_EMBEDDING_ENABLED,
)
except ValueError as e:
return error(str(e))
duplicate:Optional[werkzeug.wrappers.Response] = \
_duplicate_check(validated_post.repost_search_url)
if duplicate: return duplicate
parsed_url:Optional[ParseResult] = validated_post.url_canonical
if parsed_url:
_execute_domain_ban_check(parsed_url)
duplicate:Optional[werkzeug.wrappers.Response] = \
_duplicate_check2(v.id, validated_post)
if duplicate: return duplicate
_do_antispam_submission_check(v, validated_post)
club = bool(request.values.get("club",""))
is_bot = bool(request.headers.get("Authorization"))
# Invariant: these values are guarded and obey the length bound
assert len(validated_post.title) <= MAX_TITLE_LENGTH
assert len(validated_post.body) <= SUBMISSION_BODY_LENGTH_MAXIMUM
post = Submission(
private=bool(request.values.get("private","")),
club=club,
author_id=v.id,
over_18=bool(request.values.get("over_18","")),
app_id=v.client.application.id if v.client else None,
is_bot=is_bot,
url=validated_post.url,
body=validated_post.body,
body_html=validated_post.body_html,
embed_url=validated_post.embed_slow,
title=validated_post.title,
title_html=validated_post.title_html,
ghost=False,
filter_state='filtered' if v.admin_level == 0 and app.config['SETTINGS']['FilterNewPosts'] else 'normal',
thumburl=validated_post.thumburl
)
post.submit(g.db)
if not post.thumburl and post.url:
gevent.spawn(thumbnail_thread, post.id)
post.publish()
g.db.commit()
if request.headers.get("Authorization"):
return post.json
else:
post.voted = 1
if 'megathread' in post.title.lower(): sort = 'new'
else: sort = v.defaultsortingcomments
return render_template('submission.html', v=v, p=post, sort=sort, render_replies=True, offset=0, success=True)
@app.post("/delete_post/<pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def delete_post_pid(pid, v):
post = get_post(pid)
if post.author_id != v.id:
abort(403)
post.deleted_utc = int(time.time())
post.is_pinned = False
post.stickied = None
g.db.add(post)
invalidate_cache(frontlist=True)
g.db.commit()
return {"message": "Post deleted!"}
@app.post("/undelete_post/<pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def undelete_post_pid(pid, v):
post = get_post(pid)
if post.author_id != v.id: abort(403)
post.deleted_utc = 0
g.db.add(post)
invalidate_cache(frontlist=True)
g.db.commit()
return {"message": "Post undeleted!"}
@app.post("/toggle_comment_nsfw/<cid>")
@auth_required
def toggle_comment_nsfw(cid, v):
comment = g.db.query(Comment).filter_by(id=cid).one_or_none()
if comment.author_id != v.id and not v.admin_level > 1: abort(403)
comment.over_18 = not comment.over_18
g.db.add(comment)
g.db.commit()
if comment.over_18: return {"message": "Comment has been marked as +18!"}
else: return {"message": "Comment has been unmarked as +18!"}
@app.post("/toggle_post_nsfw/<pid>")
@auth_required
def toggle_post_nsfw(pid, v):
post = get_post(pid)
if post.author_id != v.id and not v.admin_level > 1:
abort(403)
post.over_18 = not post.over_18
g.db.add(post)
if post.author_id!=v.id:
ma=ModAction(
kind="set_nsfw" if post.over_18 else "unset_nsfw",
user_id=v.id,
target_submission_id=post.id,
)
g.db.add(ma)
g.db.commit()
if post.over_18: return {"message": "Post has been marked as +18!"}
else: return {"message": "Post has been unmarked as +18!"}
@app.post("/save_post/<pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def save_post(pid, v):
post=get_post(pid)
save = g.db.query(SaveRelationship).filter_by(user_id=v.id, submission_id=post.id).one_or_none()
if not save:
new_save=SaveRelationship(user_id=v.id, submission_id=post.id)
g.db.add(new_save)
g.db.commit()
return {"message": "Post saved!"}
@app.post("/unsave_post/<pid>")
@limiter.limit("1/second;30/minute;200/hour;1000/day")
@auth_required
def unsave_post(pid, v):
post=get_post(pid)
save = g.db.query(SaveRelationship).filter_by(user_id=v.id, submission_id=post.id).one_or_none()
if save:
g.db.delete(save)
g.db.commit()
return {"message": "Post unsaved!"}
@app.post("/pin/<post_id>")
@auth_required
def api_pin_post(post_id, v):
post = get_post(post_id)
if v.id != post.author_id: abort(403, "Only the post author's can do that!")
post.is_pinned = not post.is_pinned
g.db.add(post)
invalidate_cache(userpagelisting=True)
g.db.commit()
if post.is_pinned: return {"message": "Post pinned!"}
else: return {"message": "Post unpinned!"}
@app.get("/submit/title")
@limiter.limit("6/minute")
@auth_required
def get_post_title(v):
POST_TITLE_TIMEOUT = 5
url = request.values.get("url")
if not url or '\\' in url: abort(400)
url = url.strip()
if not url.startswith('http'): abort(400)
checking_url = url.lower().split('?')[0].split('%3F')[0]
if any((checking_url.endswith(f'.{x}') for x in NO_TITLE_EXTENSIONS)):
abort(400)
try:
x = gevent.with_timeout(POST_TITLE_TIMEOUT, requests.get,
url, headers=titleheaders, timeout=POST_TITLE_TIMEOUT,
proxies=proxies)
except: abort(400)
content_type = x.headers.get("Content-Type")
if not content_type or "text/html" not in content_type: abort(400)
match = html_title_regex.search(x.text)
if match and match.lastindex >= 1:
title = html.unescape(match.group(1))
else: abort(400)
return {"url": url, "title": title}