From afe209d5d8034d19ea85b33c9133ab81e649f716 Mon Sep 17 00:00:00 2001 From: TLSM Date: Mon, 28 Nov 2022 17:47:54 -0500 Subject: [PATCH] Eager load comments for post rendering. GET /post/1/clever-unique-post-title-number-0 |----------|--------|--------|--------|--------|--------|------------| | Database | SELECT | INSERT | UPDATE | DELETE | Totals | Duplicates | |----------|--------|--------|--------|--------|--------|------------| | default | 942 | 0 | 1 | 0 | 943 | 921 | |----------|--------|--------|--------|--------|--------|------------| Total queries: 943 in 0.377s # request time in browser 17249ms GET /post/1/clever-unique-post-title-number-0 |----------|--------|--------|--------|--------|--------|------------| | Database | SELECT | INSERT | UPDATE | DELETE | Totals | Duplicates | |----------|--------|--------|--------|--------|--------|------------| | default | 58 | 0 | 1 | 0 | 59 | 35 | |----------|--------|--------|--------|--------|--------|------------| Total queries: 59 in 0.0423s # request time in browser 544ms Also, fixes seed_db not populating top_comment_id on generated comments. If you want to test locally with seed_db test data, you need to reseed. --- files/classes/comment.py | 12 ++++-- files/commands/seed_db.py | 8 +++- files/helpers/contentsorting.py | 34 ++++++++++++++++ files/helpers/get.py | 72 +++++++++++++++++++++++++++++++++ files/routes/posts.py | 7 +++- 5 files changed, 125 insertions(+), 8 deletions(-) diff --git a/files/classes/comment.py b/files/classes/comment.py index 8e3a899cf..81049e244 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -58,8 +58,13 @@ class Comment(Base): senttouser = relationship("User", primaryjoin="User.id==Comment.sentto", viewonly=True) parent_comment = relationship("Comment", remote_side=[id], viewonly=True) child_comments = relationship("Comment", lazy="dynamic", remote_side=[parent_comment_id], viewonly=True) - awards = relationship("AwardRelationship", viewonly=True) - reports = relationship("CommentFlag", viewonly=True) + awards = relationship("AwardRelationship", + primaryjoin="AwardRelationship.comment_id == Comment.id", + viewonly=True) + reports = relationship("CommentFlag", + primaryjoin="CommentFlag.comment_id == Comment.id", + order_by="CommentFlag.created_utc", + viewonly=True) notes = relationship("UserNote", back_populates="comment") def __init__(self, *args, **kwargs): @@ -70,7 +75,6 @@ class Comment(Base): super().__init__(*args, **kwargs) def __repr__(self): - return f"" @property @@ -87,7 +91,7 @@ class Comment(Base): @lazy def flags(self, v): - flags = g.db.query(CommentFlag).filter_by(comment_id=self.id).order_by(CommentFlag.created_utc).all() + flags = self.reports if not (v and (v.shadowbanned or v.admin_level > 2)): for flag in flags: if flag.user.shadowbanned: diff --git a/files/commands/seed_db.py b/files/commands/seed_db.py index 00b41cf73..6ea27008b 100644 --- a/files/commands/seed_db.py +++ b/files/commands/seed_db.py @@ -130,8 +130,12 @@ def seed_db(): db.session.add(comment) comments.append(comment) - db.session.commit() db.session.flush() + for c in comments: + c.top_comment_id = c.id + db.session.add(c) + + db.session.commit() print(f"Creating {NUM_REPLY_COMMENTS} reply comments") for i in range(NUM_REPLY_COMMENTS): @@ -143,6 +147,7 @@ def seed_db(): author_id=user.id, parent_submission=str(parent.post.id), parent_comment_id=parent.id, + top_comment_id=parent.top_comment_id, level=parent.level + 1, over_18=False, is_bot=False, @@ -155,7 +160,6 @@ def seed_db(): comments.append(comment) db.session.commit() - db.session.flush() print("Updating comment counts for all posts") for post in posts: diff --git a/files/helpers/contentsorting.py b/files/helpers/contentsorting.py index ec3bfb513..bdb4fb1db 100644 --- a/files/helpers/contentsorting.py +++ b/files/helpers/contentsorting.py @@ -51,3 +51,37 @@ def sort_objects(objects, sort, cls): return objects.order_by(cls.created_utc) else: # default, or sort == 'new' return objects.order_by(cls.created_utc.desc()) + + +# Presently designed around files.helpers.get.get_comment_trees_eager +# Behavior should parallel that of sort_objects above. TODO: Unify someday? +def sort_comment_results(comments, sort): + DESC = (2 << 30) - 1 # descending sorts, Y2038 problem, change before then + if sort == 'hot': + ti = int(time.time()) + 3600 + key_func = lambda c: ( + -100000 + * (c.upvotes + 1) + / (pow(((ti - c.created_utc) / 1000), 1.23)), + DESC - c.created_utc + ) + elif sort == 'controversial': + key_func = lambda c: ( + (c.upvotes + 1) / (c.downvotes + 1) + + (c.downvotes + 1) / (c.upvotes + 1), + DESC - c.downvotes, + DESC - c.created_utc + ) + elif sort == 'top': + key_func = lambda c: (c.downvotes - c.upvotes, DESC - c.created_utc) + elif sort == 'bottom': + key_func = lambda c: (c.upvotes - c.downvotes, DESC - c.created_utc) + elif sort == 'old': + key_func = lambda c: c.created_utc + else: # default, or sort == 'new' + key_func = lambda c: DESC - c.created_utc + + key_func_pinned = lambda c: ( + (c.is_pinned is None, c.is_pinned == '', c.is_pinned), # sort None last + key_func(c)) + return sorted(comments, key=key_func_pinned) diff --git a/files/helpers/get.py b/files/helpers/get.py index c68679d3f..886b31677 100644 --- a/files/helpers/get.py +++ b/files/helpers/get.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import Iterable, List, Optional, Type, Union from flask import g @@ -6,6 +7,7 @@ from sqlalchemy.orm import selectinload from files.classes import * from files.helpers.const import AUTOJANNY_ID +from files.helpers.contentsorting import sort_comment_results from files.helpers.strings import sql_ilike_clean @@ -276,6 +278,76 @@ def get_comments( return sorted(output, key=lambda x: cids.index(x.id)) +# TODO: There is probably some way to unify this with get_comments. However, in +# the interim, it's a hot path and benefits from having tailored code. +def get_comment_trees_eager( + top_comment_ids:Iterable[int], + sort:str="old", + v:Optional[User]=None) -> List[Comment]: + + if v: + votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery() + blocking = v.blocking.subquery() + blocked = v.blocked.subquery() + + query = g.db.query( + Comment, + votes.c.vote_type, + blocking.c.target_id, + blocked.c.target_id, + ).join( + votes, votes.c.comment_id==Comment.id, isouter=True + ).join( + blocking, + blocking.c.target_id == Comment.author_id, + isouter=True + ).join( + blocked, + blocked.c.user_id == Comment.author_id, + isouter=True + ) + else: + query = g.db.query(Comment) + + query = query.filter(Comment.top_comment_id.in_(top_comment_ids)) + query = query.options( + selectinload(Comment.author).options( + selectinload(User.badges), + selectinload(User.notes), + ), + selectinload(Comment.reports).options( + selectinload(CommentFlag.user), + ), + selectinload(Comment.awards), + ) + results = query.all() + + if v: + comments = [c[0] for c in results] + for i in range(len(comments)): + comments[i].voted = results[i][1] or 0 + comments[i].is_blocking = results[i][2] or 0 + comments[i].is_blocked = results[i][3] or 0 + else: + comments = results + + comments_map = {} + comments_map_parent = defaultdict(lambda: list()) + for c in comments: + c.replies2 = [] + comments_map[c.id] = c + comments_map_parent[c.parent_comment_id].append(c) + + for parent_id in comments_map_parent: + if parent_id is None: continue + + comments_map_parent[parent_id] = sort_comment_results( + comments_map_parent[parent_id], sort) + comments_map[parent_id].replies2 = comments_map_parent[parent_id] + + return [comments_map[tcid] for tcid in top_comment_ids] + + # TODO: This function was concisely inlined into posts.py in upstream. # Think it involved adding `tldextract` as a dependency. def get_domain(s:str) -> Optional[BannedDomain]: diff --git a/files/routes/posts.py b/files/routes/posts.py index c066383fa..de2c55300 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -231,11 +231,14 @@ def post_id(pid, anything=None, v=None, sub=None): g.db.add(pin) pinned.remove(pin) - post.replies = pinned + comments + top_comments = pinned + comments + top_comment_ids = [c.id for c in top_comments] + post.replies = get_comment_trees_eager(top_comment_ids, sort, v) post.views += 1 g.db.add(post) - g.db.commit() + g.db.flush() + if request.headers.get("Authorization"): return post.json else: if post.is_banned and not (v and (v.admin_level > 1 or post.author_id == v.id)): template = "submission_banned.html"