Eager load comments for post rendering.

GET /post/1/clever-unique-post-title-number-0
|----------|--------|--------|--------|--------|--------|------------|
| Database | SELECT | INSERT | UPDATE | DELETE | Totals | Duplicates |
|----------|--------|--------|--------|--------|--------|------------|
| default  |  942   |   0    |   1    |   0    |  943   |    921     |
|----------|--------|--------|--------|--------|--------|------------|
Total queries: 943 in 0.377s # request time in browser 17249ms

GET /post/1/clever-unique-post-title-number-0
|----------|--------|--------|--------|--------|--------|------------|
| Database | SELECT | INSERT | UPDATE | DELETE | Totals | Duplicates |
|----------|--------|--------|--------|--------|--------|------------|
| default  |   58   |   0    |   1    |   0    |   59   |     35     |
|----------|--------|--------|--------|--------|--------|------------|
Total queries: 59 in 0.0423s # request time in browser 544ms

Also, fixes seed_db not populating top_comment_id on generated
comments. If you want to test locally with seed_db test data, you need
to reseed.
This commit is contained in:
TLSM 2022-11-28 17:47:54 -05:00
parent 5aaef144cf
commit afe209d5d8
No known key found for this signature in database
GPG key ID: E745A82778055C7E
5 changed files with 125 additions and 8 deletions

View file

@ -58,8 +58,13 @@ class Comment(Base):
senttouser = relationship("User", primaryjoin="User.id==Comment.sentto", viewonly=True)
parent_comment = relationship("Comment", remote_side=[id], viewonly=True)
child_comments = relationship("Comment", lazy="dynamic", remote_side=[parent_comment_id], viewonly=True)
awards = relationship("AwardRelationship", viewonly=True)
reports = relationship("CommentFlag", viewonly=True)
awards = relationship("AwardRelationship",
primaryjoin="AwardRelationship.comment_id == Comment.id",
viewonly=True)
reports = relationship("CommentFlag",
primaryjoin="CommentFlag.comment_id == Comment.id",
order_by="CommentFlag.created_utc",
viewonly=True)
notes = relationship("UserNote", back_populates="comment")
def __init__(self, *args, **kwargs):
@ -70,7 +75,6 @@ class Comment(Base):
super().__init__(*args, **kwargs)
def __repr__(self):
return f"<Comment(id={self.id})>"
@property
@ -87,7 +91,7 @@ class Comment(Base):
@lazy
def flags(self, v):
flags = g.db.query(CommentFlag).filter_by(comment_id=self.id).order_by(CommentFlag.created_utc).all()
flags = self.reports
if not (v and (v.shadowbanned or v.admin_level > 2)):
for flag in flags:
if flag.user.shadowbanned:

View file

@ -130,8 +130,12 @@ def seed_db():
db.session.add(comment)
comments.append(comment)
db.session.commit()
db.session.flush()
for c in comments:
c.top_comment_id = c.id
db.session.add(c)
db.session.commit()
print(f"Creating {NUM_REPLY_COMMENTS} reply comments")
for i in range(NUM_REPLY_COMMENTS):
@ -143,6 +147,7 @@ def seed_db():
author_id=user.id,
parent_submission=str(parent.post.id),
parent_comment_id=parent.id,
top_comment_id=parent.top_comment_id,
level=parent.level + 1,
over_18=False,
is_bot=False,
@ -155,7 +160,6 @@ def seed_db():
comments.append(comment)
db.session.commit()
db.session.flush()
print("Updating comment counts for all posts")
for post in posts:

View file

@ -51,3 +51,37 @@ def sort_objects(objects, sort, cls):
return objects.order_by(cls.created_utc)
else: # default, or sort == 'new'
return objects.order_by(cls.created_utc.desc())
# Presently designed around files.helpers.get.get_comment_trees_eager
# Behavior should parallel that of sort_objects above. TODO: Unify someday?
def sort_comment_results(comments, sort):
DESC = (2 << 30) - 1 # descending sorts, Y2038 problem, change before then
if sort == 'hot':
ti = int(time.time()) + 3600
key_func = lambda c: (
-100000
* (c.upvotes + 1)
/ (pow(((ti - c.created_utc) / 1000), 1.23)),
DESC - c.created_utc
)
elif sort == 'controversial':
key_func = lambda c: (
(c.upvotes + 1) / (c.downvotes + 1)
+ (c.downvotes + 1) / (c.upvotes + 1),
DESC - c.downvotes,
DESC - c.created_utc
)
elif sort == 'top':
key_func = lambda c: (c.downvotes - c.upvotes, DESC - c.created_utc)
elif sort == 'bottom':
key_func = lambda c: (c.upvotes - c.downvotes, DESC - c.created_utc)
elif sort == 'old':
key_func = lambda c: c.created_utc
else: # default, or sort == 'new'
key_func = lambda c: DESC - c.created_utc
key_func_pinned = lambda c: (
(c.is_pinned is None, c.is_pinned == '', c.is_pinned), # sort None last
key_func(c))
return sorted(comments, key=key_func_pinned)

View file

@ -1,3 +1,4 @@
from collections import defaultdict
from typing import Iterable, List, Optional, Type, Union
from flask import g
@ -6,6 +7,7 @@ from sqlalchemy.orm import selectinload
from files.classes import *
from files.helpers.const import AUTOJANNY_ID
from files.helpers.contentsorting import sort_comment_results
from files.helpers.strings import sql_ilike_clean
@ -276,6 +278,76 @@ def get_comments(
return sorted(output, key=lambda x: cids.index(x.id))
# TODO: There is probably some way to unify this with get_comments. However, in
# the interim, it's a hot path and benefits from having tailored code.
def get_comment_trees_eager(
top_comment_ids:Iterable[int],
sort:str="old",
v:Optional[User]=None) -> List[Comment]:
if v:
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
blocking = v.blocking.subquery()
blocked = v.blocked.subquery()
query = g.db.query(
Comment,
votes.c.vote_type,
blocking.c.target_id,
blocked.c.target_id,
).join(
votes, votes.c.comment_id==Comment.id, isouter=True
).join(
blocking,
blocking.c.target_id == Comment.author_id,
isouter=True
).join(
blocked,
blocked.c.user_id == Comment.author_id,
isouter=True
)
else:
query = g.db.query(Comment)
query = query.filter(Comment.top_comment_id.in_(top_comment_ids))
query = query.options(
selectinload(Comment.author).options(
selectinload(User.badges),
selectinload(User.notes),
),
selectinload(Comment.reports).options(
selectinload(CommentFlag.user),
),
selectinload(Comment.awards),
)
results = query.all()
if v:
comments = [c[0] for c in results]
for i in range(len(comments)):
comments[i].voted = results[i][1] or 0
comments[i].is_blocking = results[i][2] or 0
comments[i].is_blocked = results[i][3] or 0
else:
comments = results
comments_map = {}
comments_map_parent = defaultdict(lambda: list())
for c in comments:
c.replies2 = []
comments_map[c.id] = c
comments_map_parent[c.parent_comment_id].append(c)
for parent_id in comments_map_parent:
if parent_id is None: continue
comments_map_parent[parent_id] = sort_comment_results(
comments_map_parent[parent_id], sort)
comments_map[parent_id].replies2 = comments_map_parent[parent_id]
return [comments_map[tcid] for tcid in top_comment_ids]
# TODO: This function was concisely inlined into posts.py in upstream.
# Think it involved adding `tldextract` as a dependency.
def get_domain(s:str) -> Optional[BannedDomain]:

View file

@ -231,11 +231,14 @@ def post_id(pid, anything=None, v=None, sub=None):
g.db.add(pin)
pinned.remove(pin)
post.replies = pinned + comments
top_comments = pinned + comments
top_comment_ids = [c.id for c in top_comments]
post.replies = get_comment_trees_eager(top_comment_ids, sort, v)
post.views += 1
g.db.add(post)
g.db.commit()
g.db.flush()
if request.headers.get("Authorization"): return post.json
else:
if post.is_banned and not (v and (v.admin_level > 1 or post.author_id == v.id)): template = "submission_banned.html"