Optimize comment pagination in post_id, viewmore

Against a clean seeded DB, reduces `GET /post/1/` from 63 queries to
26 by removing redundancies and slow lazy-loaded queries during
top comment pagination.

Also applies eager loading to /viewmore/ with the expected reduction
from 5*(N comments) queries to ~12/request.

For testing locally, use a newly seeded DB to ensure
Comment.descendant_count is populated.

Ref: #485
This commit is contained in:
TLSM 2023-02-07 14:05:16 -05:00 committed by Ben Rog-Wilhelm
parent 946ee6291d
commit 007f0a3f02
4 changed files with 94 additions and 172 deletions

View file

@ -4,6 +4,7 @@ import sqlalchemy
from werkzeug.security import generate_password_hash
from files.__main__ import app
from files.classes import User, Submission, Comment, Vote, CommentVote
from files.helpers.comments import bulk_recompute_descendant_counts
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy(app)
@ -262,5 +263,8 @@ def seed_db():
comment.realupvotes = comment.upvotes - comment.downvotes
db.session.add(comment)
print("Computing comment descendant_count")
bulk_recompute_descendant_counts(db=db.session)
db.session.commit()
db.session.flush()

View file

@ -1,15 +1,15 @@
from pusher_push_notifications import PushNotifications
from files.classes import Comment, Notification, Subscription
from files.classes import Comment, Notification, Subscription, User
from files.helpers.alerts import NOTIFY_USERS
from files.helpers.const import PUSHER_ID, PUSHER_KEY, SITE_ID, SITE_FULL
from files.helpers.assetcache import assetcache_path
from flask import g
from sqlalchemy import select, update
from sqlalchemy.sql.expression import func, text, alias
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Query, aliased
from sys import stdout
import gevent
import typing
from typing import Optional
if PUSHER_ID != 'blahblahblah':
beams_client = PushNotifications(instance_id=PUSHER_ID, secret_key=PUSHER_KEY)
@ -217,3 +217,16 @@ def comment_on_unpublish(comment:Comment):
reflect the comments users will actually see.
"""
update_stateful_counters(comment, -1)
def comment_filter_moderated(q: Query, v: Optional[User]) -> Query:
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
q = q.join(User, User.id == Comment.author_id) \
.filter(User.shadowbanned == None)
if not v or v.admin_level < 2:
q = q.filter(
((Comment.filter_state != 'filtered')
& (Comment.filter_state != 'removed'))
| (Comment.author_id == ((v and v.id) or 0))
)
return q

View file

@ -1,9 +1,9 @@
from collections import defaultdict
from typing import Iterable, List, Optional, Type, Union
from typing import Callable, Iterable, List, Optional, Type, Union
from flask import g
from sqlalchemy import and_, or_, func
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Query, selectinload
from files.classes import *
from files.helpers.const import AUTOJANNY_ID
@ -277,7 +277,7 @@ def get_comments(
# TODO: There is probably some way to unify this with get_comments. However, in
# the interim, it's a hot path and benefits from having tailored code.
def get_comment_trees_eager(
top_comment_ids:Iterable[int],
query_filter_callable: Callable[[Query], Query],
sort: str="old",
v: Optional[User]=None) -> List[Comment]:
@ -305,7 +305,7 @@ def get_comment_trees_eager(
else:
query = g.db.query(Comment)
query = query.filter(Comment.top_comment_id.in_(top_comment_ids))
query = query_filter_callable(query)
query = query.options(
selectinload(Comment.author).options(
selectinload(User.badges),
@ -335,13 +335,12 @@ def get_comment_trees_eager(
comments_map_parent[c.parent_comment_id].append(c)
for parent_id in comments_map_parent:
if parent_id is None: continue
comments_map_parent[parent_id] = sort_comment_results(
comments_map_parent[parent_id], sort)
if parent_id in comments_map:
comments_map[parent_id].replies2 = comments_map_parent[parent_id]
return [comments_map[tcid] for tcid in top_comment_ids]
return comments, comments_map_parent
# TODO: This function was concisely inlined into posts.py in upstream.

View file

@ -3,6 +3,7 @@ import gevent
from files.helpers.wrappers import *
from files.helpers.sanitize import *
from files.helpers.alerts import *
from files.helpers.comments import comment_filter_moderated
from files.helpers.contentsorting import sort_objects
from files.helpers.const import *
from files.helpers.strings import sql_ilike_clean
@ -17,6 +18,7 @@ from os import path
import requests
from shutil import copyfile
from sys import stdout
from sqlalchemy.orm import Query
snappyquotes = [f':#{x}:' for x in marseys_const2]
@ -139,93 +141,33 @@ def post_id(pid, anything=None, v=None, sub=None):
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
if v:
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
blocking = v.blocking.subquery()
blocked = v.blocked.subquery()
comments = g.db.query(
Comment,
votes.c.vote_type,
blocking.c.target_id,
blocked.c.target_id,
)
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
comments = comments.join(User, User.id == Comment.author_id).filter(User.shadowbanned == None)
if v.admin_level < 2:
filter_clause = ((Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')) | (Comment.author_id == v.id)
comments = comments.filter(filter_clause)
comments=comments.filter(Comment.parent_submission == post.id).join(
votes,
votes.c.comment_id == Comment.id,
isouter=True
).join(
blocking,
blocking.c.target_id == Comment.author_id,
isouter=True
).join(
blocked,
blocked.c.user_id == Comment.author_id,
isouter=True
)
output = []
for c in comments.all():
comment = c[0]
comment.voted = c[1] or 0
comment.is_blocking = c[2] or 0
comment.is_blocked = c[3] or 0
output.append(comment)
pinned = [c[0] for c in comments.filter(Comment.is_pinned != None).all()]
comments = comments.filter(Comment.level == 1, Comment.is_pinned == None)
comments = sort_objects(comments, sort, Comment)
comments = [c[0] for c in comments.all()]
else:
pinned = g.db.query(Comment).filter(Comment.parent_submission == post.id, Comment.is_pinned != None).all()
comments = g.db.query(Comment).join(User, User.id == Comment.author_id).filter(User.shadowbanned == None, Comment.parent_submission == post.id, Comment.level == 1, Comment.is_pinned == None)
comments = sort_objects(comments, sort, Comment)
filter_clause = (Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')
comments = comments.filter(filter_clause)
comments = comments.all()
offset = 0
ids = set()
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
offset = 0
if post.comment_count > limit and not request.headers.get("Authorization") and not request.values.get("all"):
comments2 = []
count = 0
for comment in comments:
comments2.append(comment)
ids.add(comment.id)
count += g.db.query(Comment.id).filter_by(parent_submission=post.id, top_comment_id=comment.id).count() + 1
if count > limit: break
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
Comment.parent_submission == post.id,
Comment.level == 1,
).order_by(Comment.is_pinned.desc().nulls_last())
top_comments = comment_filter_moderated(top_comments, v)
top_comments = sort_objects(top_comments, sort, Comment)
if len(comments) == len(comments2): offset = 0
else: offset = 1
comments = comments2
pg_top_comment_ids = []
pg_comment_qty = 0
for tc_id, tc_children_qty in top_comments.all():
if pg_comment_qty >= limit:
offset = 1
break
pg_comment_qty += tc_children_qty + 1
pg_top_comment_ids.append(tc_id)
for pin in pinned:
if pin.is_pinned_utc and int(time.time()) > pin.is_pinned_utc:
pin.is_pinned = None
pin.is_pinned_utc = None
g.db.add(pin)
pinned.remove(pin)
def comment_tree_filter(q: Query) -> Query:
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
q = comment_filter_moderated(q, v)
return q
top_comments = pinned + comments
top_comment_ids = [c.id for c in top_comments]
post.replies = get_comment_trees_eager(top_comment_ids, sort, v)
comments, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
post.replies = comment_tree[None] # parent=None -> top-level comments
ids = {c.id for c in post.replies}
post.views += 1
g.db.expire_on_commit = False
@ -246,88 +188,52 @@ def viewmore(v, pid, sort, offset):
post = get_post(pid, v=v)
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
offset = int(offset)
offset_prev = int(offset)
try: ids = set(int(x) for x in request.values.get("ids").split(','))
except: abort(400)
if sort == "new":
newest = g.db.query(Comment).filter(Comment.id.in_(ids)).order_by(Comment.created_utc.desc()).first()
if v:
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
blocking = v.blocking.subquery()
blocked = v.blocked.subquery()
comments = g.db.query(
Comment,
votes.c.vote_type,
blocking.c.target_id,
blocked.c.target_id,
).filter(Comment.parent_submission == pid, Comment.is_pinned == None, Comment.id.notin_(ids))
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
comments = comments.join(User, User.id == Comment.author_id).filter(User.shadowbanned == None)
if not v or v.admin_level < 2:
filter_clause = (Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')
if v:
filter_clause = filter_clause | (Comment.author_id == v.id)
comments = comments.filter(filter_clause)
comments=comments.join(
votes,
votes.c.comment_id == Comment.id,
isouter=True
).join(
blocking,
blocking.c.target_id == Comment.author_id,
isouter=True
).join(
blocked,
blocked.c.user_id == Comment.author_id,
isouter=True
)
output = []
for c in comments.all():
comment = c[0]
comment.voted = c[1] or 0
comment.is_blocking = c[2] or 0
comment.is_blocked = c[3] or 0
output.append(comment)
comments = comments.filter(Comment.level == 1)
if sort == "new":
comments = comments.filter(Comment.created_utc < newest.created_utc)
comments = sort_objects(comments, sort, Comment)
comments = [c[0] for c in comments.all()]
else:
comments = g.db.query(Comment).join(User, User.id == Comment.author_id).filter(User.shadowbanned == None, Comment.parent_submission == pid, Comment.level == 1, Comment.is_pinned == None, Comment.id.notin_(ids))
if sort == "new":
comments = comments.filter(Comment.created_utc < newest.created_utc)
comments = sort_objects(comments, sort, Comment)
comments = comments.all()
comments = comments[offset:]
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
comments2 = []
count = 0
offset = 0
for comment in comments:
comments2.append(comment)
ids.add(comment.id)
count += g.db.query(Comment.id).filter_by(parent_submission=post.id, top_comment_id=comment.id).count() + 1
if count > limit: break
# TODO: Unify with common post_id logic
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
Comment.parent_submission == post.id,
Comment.level == 1,
Comment.id.notin_(ids),
Comment.is_pinned == None,
).order_by(Comment.is_pinned.desc().nulls_last())
if len(comments) == len(comments2): offset = 0
else: offset += 1
comments = comments2
if sort == "new":
newest_created_utc = g.db.query(Comment.created_utc).filter(
Comment.id.in_(ids),
Comment.is_pinned == None,
).order_by(Comment.created_utc.desc()).limit(1).scalar()
# Needs to be <=, not just <, to support seed_db data which has many identical
# created_utc values. Shouldn't cause duplication in real data because of the
# `NOT IN :ids` in top_comments.
top_comments = top_comments.filter(Comment.created_utc <= newest_created_utc)
top_comments = comment_filter_moderated(top_comments, v)
top_comments = sort_objects(top_comments, sort, Comment)
pg_top_comment_ids = []
pg_comment_qty = 0
for tc_id, tc_children_qty in top_comments.all():
if pg_comment_qty >= limit:
offset = offset_prev + 1
break
pg_comment_qty += tc_children_qty + 1
pg_top_comment_ids.append(tc_id)
def comment_tree_filter(q: Query) -> Query:
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
q = comment_filter_moderated(q, v)
return q
_, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
comments = comment_tree[None] # parent=None -> top-level comments
ids |= {c.id for c in comments}
return render_template("comments.html", v=v, comments=comments, p=post, ids=list(ids), render_replies=True, pid=pid, sort=sort, offset=offset, ajax=True)