Optimize comment pagination in post_id, viewmore
Against a clean seeded DB, reduces `GET /post/1/` from 63 queries to 26 by removing redundancies and slow lazy-loaded queries during top comment pagination. Also applies eager loading to /viewmore/ with the expected reduction from 5*(N comments) queries to ~12/request. For testing locally, use a newly seeded DB to ensure Comment.descendant_count is populated. Ref: #485
This commit is contained in:
parent
946ee6291d
commit
007f0a3f02
4 changed files with 94 additions and 172 deletions
|
@ -4,6 +4,7 @@ import sqlalchemy
|
|||
from werkzeug.security import generate_password_hash
|
||||
from files.__main__ import app
|
||||
from files.classes import User, Submission, Comment, Vote, CommentVote
|
||||
from files.helpers.comments import bulk_recompute_descendant_counts
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
|
||||
db = SQLAlchemy(app)
|
||||
|
@ -262,5 +263,8 @@ def seed_db():
|
|||
comment.realupvotes = comment.upvotes - comment.downvotes
|
||||
db.session.add(comment)
|
||||
|
||||
print("Computing comment descendant_count")
|
||||
bulk_recompute_descendant_counts(db=db.session)
|
||||
|
||||
db.session.commit()
|
||||
db.session.flush()
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
from pusher_push_notifications import PushNotifications
|
||||
from files.classes import Comment, Notification, Subscription
|
||||
from files.classes import Comment, Notification, Subscription, User
|
||||
from files.helpers.alerts import NOTIFY_USERS
|
||||
from files.helpers.const import PUSHER_ID, PUSHER_KEY, SITE_ID, SITE_FULL
|
||||
from files.helpers.assetcache import assetcache_path
|
||||
from flask import g
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.sql.expression import func, text, alias
|
||||
from sqlalchemy.orm import aliased
|
||||
from sqlalchemy.orm import Query, aliased
|
||||
from sys import stdout
|
||||
import gevent
|
||||
import typing
|
||||
from typing import Optional
|
||||
|
||||
if PUSHER_ID != 'blahblahblah':
|
||||
beams_client = PushNotifications(instance_id=PUSHER_ID, secret_key=PUSHER_KEY)
|
||||
|
@ -217,3 +217,16 @@ def comment_on_unpublish(comment:Comment):
|
|||
reflect the comments users will actually see.
|
||||
"""
|
||||
update_stateful_counters(comment, -1)
|
||||
|
||||
|
||||
def comment_filter_moderated(q: Query, v: Optional[User]) -> Query:
|
||||
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
|
||||
q = q.join(User, User.id == Comment.author_id) \
|
||||
.filter(User.shadowbanned == None)
|
||||
if not v or v.admin_level < 2:
|
||||
q = q.filter(
|
||||
((Comment.filter_state != 'filtered')
|
||||
& (Comment.filter_state != 'removed'))
|
||||
| (Comment.author_id == ((v and v.id) or 0))
|
||||
)
|
||||
return q
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from collections import defaultdict
|
||||
from typing import Iterable, List, Optional, Type, Union
|
||||
from typing import Callable, Iterable, List, Optional, Type, Union
|
||||
|
||||
from flask import g
|
||||
from sqlalchemy import and_, or_, func
|
||||
from sqlalchemy.orm import selectinload
|
||||
from sqlalchemy.orm import Query, selectinload
|
||||
|
||||
from files.classes import *
|
||||
from files.helpers.const import AUTOJANNY_ID
|
||||
|
@ -277,7 +277,7 @@ def get_comments(
|
|||
# TODO: There is probably some way to unify this with get_comments. However, in
|
||||
# the interim, it's a hot path and benefits from having tailored code.
|
||||
def get_comment_trees_eager(
|
||||
top_comment_ids:Iterable[int],
|
||||
query_filter_callable: Callable[[Query], Query],
|
||||
sort: str="old",
|
||||
v: Optional[User]=None) -> List[Comment]:
|
||||
|
||||
|
@ -305,7 +305,7 @@ def get_comment_trees_eager(
|
|||
else:
|
||||
query = g.db.query(Comment)
|
||||
|
||||
query = query.filter(Comment.top_comment_id.in_(top_comment_ids))
|
||||
query = query_filter_callable(query)
|
||||
query = query.options(
|
||||
selectinload(Comment.author).options(
|
||||
selectinload(User.badges),
|
||||
|
@ -335,13 +335,12 @@ def get_comment_trees_eager(
|
|||
comments_map_parent[c.parent_comment_id].append(c)
|
||||
|
||||
for parent_id in comments_map_parent:
|
||||
if parent_id is None: continue
|
||||
|
||||
comments_map_parent[parent_id] = sort_comment_results(
|
||||
comments_map_parent[parent_id], sort)
|
||||
if parent_id in comments_map:
|
||||
comments_map[parent_id].replies2 = comments_map_parent[parent_id]
|
||||
|
||||
return [comments_map[tcid] for tcid in top_comment_ids]
|
||||
return comments, comments_map_parent
|
||||
|
||||
|
||||
# TODO: This function was concisely inlined into posts.py in upstream.
|
||||
|
|
|
@ -3,6 +3,7 @@ import gevent
|
|||
from files.helpers.wrappers import *
|
||||
from files.helpers.sanitize import *
|
||||
from files.helpers.alerts import *
|
||||
from files.helpers.comments import comment_filter_moderated
|
||||
from files.helpers.contentsorting import sort_objects
|
||||
from files.helpers.const import *
|
||||
from files.helpers.strings import sql_ilike_clean
|
||||
|
@ -17,6 +18,7 @@ from os import path
|
|||
import requests
|
||||
from shutil import copyfile
|
||||
from sys import stdout
|
||||
from sqlalchemy.orm import Query
|
||||
|
||||
|
||||
snappyquotes = [f':#{x}:' for x in marseys_const2]
|
||||
|
@ -139,93 +141,33 @@ def post_id(pid, anything=None, v=None, sub=None):
|
|||
|
||||
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
|
||||
|
||||
if v:
|
||||
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
|
||||
|
||||
blocking = v.blocking.subquery()
|
||||
|
||||
blocked = v.blocked.subquery()
|
||||
|
||||
comments = g.db.query(
|
||||
Comment,
|
||||
votes.c.vote_type,
|
||||
blocking.c.target_id,
|
||||
blocked.c.target_id,
|
||||
)
|
||||
|
||||
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
|
||||
comments = comments.join(User, User.id == Comment.author_id).filter(User.shadowbanned == None)
|
||||
|
||||
if v.admin_level < 2:
|
||||
filter_clause = ((Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')) | (Comment.author_id == v.id)
|
||||
comments = comments.filter(filter_clause)
|
||||
|
||||
comments=comments.filter(Comment.parent_submission == post.id).join(
|
||||
votes,
|
||||
votes.c.comment_id == Comment.id,
|
||||
isouter=True
|
||||
).join(
|
||||
blocking,
|
||||
blocking.c.target_id == Comment.author_id,
|
||||
isouter=True
|
||||
).join(
|
||||
blocked,
|
||||
blocked.c.user_id == Comment.author_id,
|
||||
isouter=True
|
||||
)
|
||||
|
||||
output = []
|
||||
for c in comments.all():
|
||||
comment = c[0]
|
||||
comment.voted = c[1] or 0
|
||||
comment.is_blocking = c[2] or 0
|
||||
comment.is_blocked = c[3] or 0
|
||||
output.append(comment)
|
||||
|
||||
pinned = [c[0] for c in comments.filter(Comment.is_pinned != None).all()]
|
||||
|
||||
comments = comments.filter(Comment.level == 1, Comment.is_pinned == None)
|
||||
comments = sort_objects(comments, sort, Comment)
|
||||
comments = [c[0] for c in comments.all()]
|
||||
else:
|
||||
pinned = g.db.query(Comment).filter(Comment.parent_submission == post.id, Comment.is_pinned != None).all()
|
||||
|
||||
comments = g.db.query(Comment).join(User, User.id == Comment.author_id).filter(User.shadowbanned == None, Comment.parent_submission == post.id, Comment.level == 1, Comment.is_pinned == None)
|
||||
comments = sort_objects(comments, sort, Comment)
|
||||
|
||||
filter_clause = (Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')
|
||||
comments = comments.filter(filter_clause)
|
||||
|
||||
comments = comments.all()
|
||||
|
||||
offset = 0
|
||||
ids = set()
|
||||
|
||||
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
|
||||
offset = 0
|
||||
|
||||
if post.comment_count > limit and not request.headers.get("Authorization") and not request.values.get("all"):
|
||||
comments2 = []
|
||||
count = 0
|
||||
for comment in comments:
|
||||
comments2.append(comment)
|
||||
ids.add(comment.id)
|
||||
count += g.db.query(Comment.id).filter_by(parent_submission=post.id, top_comment_id=comment.id).count() + 1
|
||||
if count > limit: break
|
||||
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
|
||||
Comment.parent_submission == post.id,
|
||||
Comment.level == 1,
|
||||
).order_by(Comment.is_pinned.desc().nulls_last())
|
||||
top_comments = comment_filter_moderated(top_comments, v)
|
||||
top_comments = sort_objects(top_comments, sort, Comment)
|
||||
|
||||
if len(comments) == len(comments2): offset = 0
|
||||
else: offset = 1
|
||||
comments = comments2
|
||||
pg_top_comment_ids = []
|
||||
pg_comment_qty = 0
|
||||
for tc_id, tc_children_qty in top_comments.all():
|
||||
if pg_comment_qty >= limit:
|
||||
offset = 1
|
||||
break
|
||||
pg_comment_qty += tc_children_qty + 1
|
||||
pg_top_comment_ids.append(tc_id)
|
||||
|
||||
for pin in pinned:
|
||||
if pin.is_pinned_utc and int(time.time()) > pin.is_pinned_utc:
|
||||
pin.is_pinned = None
|
||||
pin.is_pinned_utc = None
|
||||
g.db.add(pin)
|
||||
pinned.remove(pin)
|
||||
def comment_tree_filter(q: Query) -> Query:
|
||||
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
|
||||
q = comment_filter_moderated(q, v)
|
||||
return q
|
||||
|
||||
top_comments = pinned + comments
|
||||
top_comment_ids = [c.id for c in top_comments]
|
||||
post.replies = get_comment_trees_eager(top_comment_ids, sort, v)
|
||||
comments, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
|
||||
post.replies = comment_tree[None] # parent=None -> top-level comments
|
||||
ids = {c.id for c in post.replies}
|
||||
|
||||
post.views += 1
|
||||
g.db.expire_on_commit = False
|
||||
|
@ -246,88 +188,52 @@ def viewmore(v, pid, sort, offset):
|
|||
post = get_post(pid, v=v)
|
||||
if post.club and not (v and (v.paid_dues or v.id == post.author_id)): abort(403)
|
||||
|
||||
offset = int(offset)
|
||||
offset_prev = int(offset)
|
||||
try: ids = set(int(x) for x in request.values.get("ids").split(','))
|
||||
except: abort(400)
|
||||
|
||||
if sort == "new":
|
||||
newest = g.db.query(Comment).filter(Comment.id.in_(ids)).order_by(Comment.created_utc.desc()).first()
|
||||
|
||||
if v:
|
||||
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
|
||||
|
||||
blocking = v.blocking.subquery()
|
||||
|
||||
blocked = v.blocked.subquery()
|
||||
|
||||
comments = g.db.query(
|
||||
Comment,
|
||||
votes.c.vote_type,
|
||||
blocking.c.target_id,
|
||||
blocked.c.target_id,
|
||||
).filter(Comment.parent_submission == pid, Comment.is_pinned == None, Comment.id.notin_(ids))
|
||||
|
||||
if not (v and v.shadowbanned) and not (v and v.admin_level > 2):
|
||||
comments = comments.join(User, User.id == Comment.author_id).filter(User.shadowbanned == None)
|
||||
|
||||
if not v or v.admin_level < 2:
|
||||
filter_clause = (Comment.filter_state != 'filtered') & (Comment.filter_state != 'removed')
|
||||
if v:
|
||||
filter_clause = filter_clause | (Comment.author_id == v.id)
|
||||
comments = comments.filter(filter_clause)
|
||||
|
||||
comments=comments.join(
|
||||
votes,
|
||||
votes.c.comment_id == Comment.id,
|
||||
isouter=True
|
||||
).join(
|
||||
blocking,
|
||||
blocking.c.target_id == Comment.author_id,
|
||||
isouter=True
|
||||
).join(
|
||||
blocked,
|
||||
blocked.c.user_id == Comment.author_id,
|
||||
isouter=True
|
||||
)
|
||||
|
||||
output = []
|
||||
for c in comments.all():
|
||||
comment = c[0]
|
||||
comment.voted = c[1] or 0
|
||||
comment.is_blocking = c[2] or 0
|
||||
comment.is_blocked = c[3] or 0
|
||||
output.append(comment)
|
||||
|
||||
comments = comments.filter(Comment.level == 1)
|
||||
|
||||
if sort == "new":
|
||||
comments = comments.filter(Comment.created_utc < newest.created_utc)
|
||||
comments = sort_objects(comments, sort, Comment)
|
||||
|
||||
comments = [c[0] for c in comments.all()]
|
||||
else:
|
||||
comments = g.db.query(Comment).join(User, User.id == Comment.author_id).filter(User.shadowbanned == None, Comment.parent_submission == pid, Comment.level == 1, Comment.is_pinned == None, Comment.id.notin_(ids))
|
||||
|
||||
if sort == "new":
|
||||
comments = comments.filter(Comment.created_utc < newest.created_utc)
|
||||
comments = sort_objects(comments, sort, Comment)
|
||||
|
||||
comments = comments.all()
|
||||
comments = comments[offset:]
|
||||
|
||||
limit = app.config['RESULTS_PER_PAGE_COMMENTS']
|
||||
comments2 = []
|
||||
count = 0
|
||||
offset = 0
|
||||
|
||||
for comment in comments:
|
||||
comments2.append(comment)
|
||||
ids.add(comment.id)
|
||||
count += g.db.query(Comment.id).filter_by(parent_submission=post.id, top_comment_id=comment.id).count() + 1
|
||||
if count > limit: break
|
||||
# TODO: Unify with common post_id logic
|
||||
top_comments = g.db.query(Comment.id, Comment.descendant_count).filter(
|
||||
Comment.parent_submission == post.id,
|
||||
Comment.level == 1,
|
||||
Comment.id.notin_(ids),
|
||||
Comment.is_pinned == None,
|
||||
).order_by(Comment.is_pinned.desc().nulls_last())
|
||||
|
||||
if len(comments) == len(comments2): offset = 0
|
||||
else: offset += 1
|
||||
comments = comments2
|
||||
if sort == "new":
|
||||
newest_created_utc = g.db.query(Comment.created_utc).filter(
|
||||
Comment.id.in_(ids),
|
||||
Comment.is_pinned == None,
|
||||
).order_by(Comment.created_utc.desc()).limit(1).scalar()
|
||||
|
||||
# Needs to be <=, not just <, to support seed_db data which has many identical
|
||||
# created_utc values. Shouldn't cause duplication in real data because of the
|
||||
# `NOT IN :ids` in top_comments.
|
||||
top_comments = top_comments.filter(Comment.created_utc <= newest_created_utc)
|
||||
|
||||
top_comments = comment_filter_moderated(top_comments, v)
|
||||
top_comments = sort_objects(top_comments, sort, Comment)
|
||||
|
||||
pg_top_comment_ids = []
|
||||
pg_comment_qty = 0
|
||||
for tc_id, tc_children_qty in top_comments.all():
|
||||
if pg_comment_qty >= limit:
|
||||
offset = offset_prev + 1
|
||||
break
|
||||
pg_comment_qty += tc_children_qty + 1
|
||||
pg_top_comment_ids.append(tc_id)
|
||||
|
||||
def comment_tree_filter(q: Query) -> Query:
|
||||
q = q.filter(Comment.top_comment_id.in_(pg_top_comment_ids))
|
||||
q = comment_filter_moderated(q, v)
|
||||
return q
|
||||
|
||||
_, comment_tree = get_comment_trees_eager(comment_tree_filter, sort, v)
|
||||
comments = comment_tree[None] # parent=None -> top-level comments
|
||||
ids |= {c.id for c in comments}
|
||||
|
||||
return render_template("comments.html", v=v, comments=comments, p=post, ids=list(ids), render_replies=True, pid=pid, sort=sort, offset=offset, ajax=True)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue