import sys import time import urllib.parse from io import BytesIO from urllib.parse import ParseResult, urlparse import gevent import requests import werkzeug.wrappers from PIL import Image as PILimage from sqlalchemy.orm import Query import files.helpers.validators as validators from files.__main__ import app, db_session, limiter from files.classes import * from files.helpers.alerts import * from files.helpers.caching import invalidate_cache from files.helpers.config.const import * from files.helpers.content import canonicalize_url2 from files.helpers.contentsorting import sort_objects from files.helpers.media import process_image from files.helpers.sanitize import * from files.helpers.strings import sql_ilike_clean from files.helpers.wrappers import * from files.routes.importstar import * titleheaders = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36" } MAX_TITLE_LENGTH = 500 MAX_URL_LENGTH = 2048 @app.post("/toggle_club/") @auth_required def toggle_club(pid, v): post = get_post(pid) if post.author_id != v.id and v.admin_level < 2: abort(403) post.club = not post.club g.db.add(post) g.db.commit() if post.club: return {"message": "Post has been marked as club-only!"} else: return {"message": "Post has been unmarked as club-only!"} @app.post("/publish/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def publish(pid, v): post = get_post(pid) if not post.private: return {"message": "Post published!"} if post.author_id != v.id: abort(403) post.private = False post.created_utc = int(time.time()) g.db.add(post) post.publish() g.db.commit() return redirect(post.permalink) @app.get("/submit") @auth_required def submit_get(v): return render_template("submit.html", v=v) @app.get("/post/") @app.get("/post//") @auth_desired def post_id(pid, anything=None, v=None): post = get_post(pid, v=v) if post.over_18 and not (v and v.over_18) and session.get('over_18', 0) < int(time.time()): if request.headers.get("Authorization") or request.headers.get("xhr"): abort(403, "Must be 18+ to view") return render_template("errors/nsfw.html", v=v) if v: defaultsortingcomments = v.defaultsortingcomments else: defaultsortingcomments = "new" sort = request.values.get("sort", defaultsortingcomments) if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403) limit = app.config['RESULTS_PER_PAGE_COMMENTS'] offset = 0 top_comments = g.db.query(Comment.id, Comment.descendant_count).filter( Comment.parent_submission == post.id, Comment.level == 1, ).order_by(Comment.is_pinned.desc().nulls_last()) top_comments = sort_objects(top_comments, sort, Comment) pg_top_comment_ids = [] pg_comment_qty = 0 for tc_id, tc_children_qty in top_comments.all(): if pg_comment_qty >= limit: offset = 1 break pg_comment_qty += tc_children_qty + 1 pg_top_comment_ids.append(tc_id) def comment_tree_filter(q: Query) -> Query: q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids)) return q comments, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v) post.replies = comment_tree[None] # parent=None -> top-level comments ids = {c.id for c in post.replies} post.views += 1 g.db.expire_on_commit = False g.db.add(post) g.db.commit() g.db.expire_on_commit = True if request.headers.get("Authorization"): return post.json else: if post.is_banned and not (v and (v.admin_level > 1 or post.author_id == v.id)): template = "submission_banned.html" else: template = "submission.html" return render_template(template, v=v, p=post, ids=list(ids), sort=sort, render_replies=True, offset=offset) @app.get("/viewmore///") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_desired def viewmore(v, pid, sort, offset): post = get_post(pid, v=v) if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403) offset_prev = int(offset) try: ids = set(int(x) for x in request.values.get("ids").split(',')) except: abort(400) limit = app.config['RESULTS_PER_PAGE_COMMENTS'] offset = 0 # TODO: Unify with common post_id logic top_comments = g.db.query(Comment.id, Comment.descendant_count).filter( Comment.parent_submission == post.id, Comment.level == 1, Comment.id.notin_(ids), Comment.is_pinned == None, ).order_by(Comment.is_pinned.desc().nulls_last()) if sort == "new": newest_created_utc = g.db.query(Comment.created_utc).filter( Comment.id.in_(ids), Comment.is_pinned == None, ).order_by(Comment.created_utc.desc()).limit(1).scalar() # Needs to be <=, not just <, to support seed_db data which has many identical # created_utc values. Shouldn't cause duplication in real data because of the # `NOT IN :ids` in top_comments. top_comments = top_comments.filter(Comment.created_utc <= newest_created_utc) top_comments = sort_objects(top_comments, sort, Comment) pg_top_comment_ids = [] pg_comment_qty = 0 for tc_id, tc_children_qty in top_comments.all(): if pg_comment_qty >= limit: offset = offset_prev + 1 break pg_comment_qty += tc_children_qty + 1 pg_top_comment_ids.append(tc_id) def comment_tree_filter(q: Query) -> Query: q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids)) return q _, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v) comments = comment_tree[None] # parent=None -> top-level comments ids |= {c.id for c in comments} return render_template("comments.html", v=v, comments=comments, p=post, ids=list(ids), render_replies=True, pid=pid, sort=sort, offset=offset, ajax=True) @app.get("/morecomments/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_desired def morecomments(v, cid): try: cid = int(cid) except: abort(400) tcid = g.db.query(Comment.top_comment_id).filter_by(id=cid).one_or_none()[0] if v: votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery() blocking = v.blocking.subquery() blocked = v.blocked.subquery() comments = g.db.query( Comment, votes.c.vote_type, blocking.c.target_id, blocked.c.target_id, ).filter(Comment.top_comment_id == tcid, Comment.level > RENDER_DEPTH_LIMIT).join( votes, votes.c.comment_id == Comment.id, isouter=True ).join( blocking, blocking.c.target_id == Comment.author_id, isouter=True ).join( blocked, blocked.c.user_id == Comment.author_id, isouter=True ) output = [] dump = [] for c in comments.all(): comment = c[0] comment.voted = c[1] or 0 comment.is_blocking = c[2] or 0 comment.is_blocked = c[3] or 0 if c[0].parent_comment_id == int(cid): output.append(comment) else: dump.append(comment) comments = output else: c = g.db.query(Comment).filter_by(id=cid).one_or_none() comments = c.replies(None) if comments: p = comments[0].post else: p = None return render_template("comments.html", v=v, comments=comments, p=p, render_replies=True, ajax=True) @app.post("/edit_post/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def edit_post(pid, v): p = get_post(pid) if not v.can_edit(p): abort(403) validated_post: validators.ValidatedSubmissionLike = \ validators.ValidatedSubmissionLike.from_flask_request( request, allow_embedding=MULTIMEDIA_EMBEDDING_ENABLED, allow_media_url_upload=False, embed_url_file_key="file", edit=True ) changed:bool=False if validated_post.title != p.title: p.title = validated_post.title p.title_html = validated_post.title_html changed = True if validated_post.body != p.body: p.body = validated_post.body p.body_html = validated_post.body_html changed = True if not changed: abort(400, "You need to change something") p.publish() if v.id == p.author_id: if int(time.time()) - p.created_utc > 60 * 3: p.edited_utc = int(time.time()) g.db.add(p) else: ma=ModAction( kind="edit_post", user_id=v.id, target_submission_id=p.id ) g.db.add(ma) g.db.commit() return redirect(p.permalink) def archiveorg(url): try: requests.get(f'https://web.archive.org/save/{url}', headers={'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}, timeout=100) except: pass def thumbnail_thread(pid): db = db_session() def expand_url(post_url, fragment_url): if fragment_url.startswith("https://"): return fragment_url elif fragment_url.startswith("https://"): return f"https://{fragment_url.split('https://')[1]}" elif fragment_url.startswith('//'): return f"https:{fragment_url}" elif fragment_url.startswith('/'): parsed_url = urlparse(post_url) return f"https://{parsed_url.netloc}{fragment_url}" else: return f"{post_url}{'/' if not post_url.endswith('/') else ''}{fragment_url}" post = db.query(Submission).filter_by(id=pid).one_or_none() if not post or not post.url: time.sleep(5) post = db.query(Submission).filter_by(id=pid).one_or_none() if not post or not post.url: return fetch_url = post.url if fetch_url.startswith('/'): fetch_url = f"{SITE_FULL}{fetch_url}" headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"} try: x=requests.get(fetch_url, headers=headers, timeout=5, proxies=proxies) except: db.close() return if x.status_code != 200: db.close() return if x.headers.get("Content-Type","").startswith("text/html"): soup=BeautifulSoup(x.content, 'lxml') thumb_candidate_urls=[] meta_tags = [ "themotte:thumbnail", "twitter:image", "og:image", "thumbnail" ] for tag_name in meta_tags: tag = soup.find( 'meta', attrs={ "name": tag_name, "content": True } ) if not tag: tag = soup.find( 'meta', attrs={ 'property': tag_name, 'content': True } ) if tag: thumb_candidate_urls.append(expand_url(post.url, tag['content'])) for tag in soup.find_all("img", attrs={'src':True}): thumb_candidate_urls.append(expand_url(post.url, tag['src'])) for url in thumb_candidate_urls: try: image_req=requests.get(url, headers=headers, timeout=5, proxies=proxies) except: continue if image_req.status_code >= 400: continue if not image_req.headers.get("Content-Type","").startswith("image/"): continue if image_req.headers.get("Content-Type","").startswith("image/svg"): continue image = PILimage.open(BytesIO(image_req.content)) if image.width < 30 or image.height < 30: continue break else: db.close() return elif x.headers.get("Content-Type","").startswith("image/"): image_req=x image = PILimage.open(BytesIO(x.content)) else: db.close() return size = len(image.fp.read()) if size > 8 * 1024 * 1024: db.close() return name = f'/images/{time.time()}'.replace('.','') + '.webp' with open(name, "wb") as file: for chunk in image_req.iter_content(1024): file.write(chunk) post.thumburl = process_image(name, resize=100) db.add(post) db.commit() db.commit() db.close() sys.stdout.flush() return @app.post("/is_repost") def api_is_repost(): url = request.values.get('url') if not url: abort(400) url = canonicalize_url2(url, httpsify=True).geturl() if url.endswith('/'): url = url[:-1] search_url = sql_ilike_clean(url) repost = g.db.query(Submission).filter( Submission.url.ilike(search_url), Submission.deleted_utc == 0, Submission.is_banned == False ).first() if repost: return {'permalink': repost.permalink} else: return {'permalink': ''} def _do_antispam_submission_check(v:User, validated:validators.ValidatedSubmissionLike): now = int(time.time()) cutoff = now - 60 * 60 * 24 similar_posts = g.db.query(Submission).filter( Submission.author_id == v.id, Submission.title.op('<->')(validated.title) < app.config["SPAM_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff ).all() if validated.url: similar_urls = g.db.query(Submission).filter( Submission.author_id == v.id, Submission.url.op('<->')(validated.url) < app.config["SPAM_URL_SIMILARITY_THRESHOLD"], Submission.created_utc > cutoff ).all() else: similar_urls = [] threshold = app.config["SPAM_SIMILAR_COUNT_THRESHOLD"] if v.age_seconds >= (60 * 60 * 24 * 7): threshold *= 3 elif v.age_seconds >= (60 * 60 * 24): threshold *= 2 if max(len(similar_urls), len(similar_posts)) < threshold: return text = "Your account has been banned for **1 day** for the following reason:\n\n> Too much spam!" send_repeatable_notification(v.id, text) v.ban(reason="Spamming.", days=1) for post in similar_posts + similar_urls: post.is_banned = True post.is_pinned = False post.ban_reason = "AutoJanny" g.db.add(post) ma=ModAction( user_id=AUTOJANNY_ID, target_submission_id=post.id, kind="ban_post", _note="spam" ) g.db.add(ma) g.db.commit() abort(403) def _execute_domain_ban_check(parsed_url:ParseResult): domain:str = parsed_url.netloc domain_obj = get_domain(domain) if not domain_obj: domain_obj = get_domain(domain+parsed_url.path) if not domain_obj: return abort(403, f"Remove the {domain_obj.domain} link from your post and try again. {domain_obj.reason}") def _duplicate_check(search_url:Optional[str]) -> Optional[werkzeug.wrappers.Response]: if not search_url: return None repost = g.db.query(Submission).filter( func.lower(Submission.url) == search_url.lower(), Submission.deleted_utc == 0, Submission.is_banned == False ).first() if repost and SITE != 'localhost': return redirect(repost.permalink) return None def _duplicate_check2( user_id:int, validated_post:validators.ValidatedSubmissionLike) -> Optional[werkzeug.wrappers.Response]: dup = g.db.query(Submission).filter( Submission.author_id == user_id, Submission.deleted_utc == 0, Submission.title == validated_post.title, Submission.url == validated_post.url, Submission.body == validated_post.body ).one_or_none() if dup and SITE != 'localhost': return redirect(dup.permalink) return None @app.post("/submit") @limiter.limit("1/second;2/minute;10/hour;50/day") @auth_required def submit_post(v): def error(error): title:str = request.values.get("title", "") body:str = request.values.get("body", "") url:str = request.values.get("url", "") if request.headers.get("Authorization") or request.headers.get("xhr"): abort(400, error) return render_template("submit.html", v=v, error=error, title=title, url=url, body=body), 400 if v.is_suspended: return error("You can't perform this action while banned.") try: validated_post: validators.ValidatedSubmissionLike = \ validators.ValidatedSubmissionLike.from_flask_request(request, allow_embedding=MULTIMEDIA_EMBEDDING_ENABLED, ) except ValueError as e: return error(str(e)) duplicate:Optional[werkzeug.wrappers.Response] = \ _duplicate_check(validated_post.repost_search_url) if duplicate: return duplicate parsed_url:Optional[ParseResult] = validated_post.url_canonical if parsed_url: _execute_domain_ban_check(parsed_url) duplicate:Optional[werkzeug.wrappers.Response] = \ _duplicate_check2(v.id, validated_post) if duplicate: return duplicate _do_antispam_submission_check(v, validated_post) club = bool(request.values.get("club","")) is_bot = bool(request.headers.get("Authorization")) # Invariant: these values are guarded and obey the length bound assert len(validated_post.title) <= MAX_TITLE_LENGTH assert len(validated_post.body) <= SUBMISSION_BODY_LENGTH_MAXIMUM post = Submission( private=bool(request.values.get("private","")), club=club, author_id=v.id, over_18=bool(request.values.get("over_18","")), app_id=v.client.application.id if v.client else None, is_bot=is_bot, url=validated_post.url, body=validated_post.body, body_html=validated_post.body_html, embed_url=validated_post.embed_slow, title=validated_post.title, title_html=validated_post.title_html, ghost=False, filter_state='filtered' if v.admin_level == 0 and app.config['SETTINGS']['FilterNewPosts'] else 'normal', thumburl=validated_post.thumburl ) post.submit(g.db) if not post.thumburl and post.url: gevent.spawn(thumbnail_thread, post.id) post.publish() g.db.commit() if request.headers.get("Authorization"): return post.json else: post.voted = 1 if 'megathread' in post.title.lower(): sort = 'new' else: sort = v.defaultsortingcomments return render_template('submission.html', v=v, p=post, sort=sort, render_replies=True, offset=0, success=True) @app.post("/delete_post/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def delete_post_pid(pid, v): post = get_post(pid) if post.author_id != v.id: abort(403) post.deleted_utc = int(time.time()) post.is_pinned = False post.stickied = None g.db.add(post) invalidate_cache(frontlist=True) g.db.commit() return {"message": "Post deleted!"} @app.post("/undelete_post/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def undelete_post_pid(pid, v): post = get_post(pid) if post.author_id != v.id: abort(403) post.deleted_utc = 0 g.db.add(post) invalidate_cache(frontlist=True) g.db.commit() return {"message": "Post undeleted!"} @app.post("/toggle_comment_nsfw/") @auth_required def toggle_comment_nsfw(cid, v): comment = g.db.query(Comment).filter_by(id=cid).one_or_none() if comment.author_id != v.id and not v.admin_level > 1: abort(403) comment.over_18 = not comment.over_18 g.db.add(comment) g.db.commit() if comment.over_18: return {"message": "Comment has been marked as +18!"} else: return {"message": "Comment has been unmarked as +18!"} @app.post("/toggle_post_nsfw/") @auth_required def toggle_post_nsfw(pid, v): post = get_post(pid) if post.author_id != v.id and not v.admin_level > 1: abort(403) post.over_18 = not post.over_18 g.db.add(post) if post.author_id!=v.id: ma=ModAction( kind="set_nsfw" if post.over_18 else "unset_nsfw", user_id=v.id, target_submission_id=post.id, ) g.db.add(ma) g.db.commit() if post.over_18: return {"message": "Post has been marked as +18!"} else: return {"message": "Post has been unmarked as +18!"} @app.post("/save_post/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def save_post(pid, v): post=get_post(pid) save = g.db.query(SaveRelationship).filter_by(user_id=v.id, submission_id=post.id).one_or_none() if not save: new_save=SaveRelationship(user_id=v.id, submission_id=post.id) g.db.add(new_save) g.db.commit() return {"message": "Post saved!"} @app.post("/unsave_post/") @limiter.limit("1/second;30/minute;200/hour;1000/day") @auth_required def unsave_post(pid, v): post=get_post(pid) save = g.db.query(SaveRelationship).filter_by(user_id=v.id, submission_id=post.id).one_or_none() if save: g.db.delete(save) g.db.commit() return {"message": "Post unsaved!"} @app.post("/pin/") @auth_required def api_pin_post(post_id, v): post = get_post(post_id) if v.id != post.author_id: abort(403, "Only the post author's can do that!") post.is_pinned = not post.is_pinned g.db.add(post) invalidate_cache(userpagelisting=True) g.db.commit() if post.is_pinned: return {"message": "Post pinned!"} else: return {"message": "Post unpinned!"} @app.get("/submit/title") @limiter.limit("6/minute") @auth_required def get_post_title(v): POST_TITLE_TIMEOUT = 5 url = request.values.get("url") if not url or '\\' in url: abort(400) url = url.strip() if not url.startswith('http'): abort(400) checking_url = url.lower().split('?')[0].split('%3F')[0] if any((checking_url.endswith(f'.{x}') for x in NO_TITLE_EXTENSIONS)): abort(400) try: x = gevent.with_timeout(POST_TITLE_TIMEOUT, requests.get, url, headers=titleheaders, timeout=POST_TITLE_TIMEOUT, proxies=proxies) except: abort(400) content_type = x.headers.get("Content-Type") if not content_type or "text/html" not in content_type: abort(400) match = html_title_regex.search(x.text) if match and match.lastindex >= 1: title = html.unescape(match.group(1)) else: abort(400) return {"url": url, "title": title}