[themotte/rDrama#451] Extract the recomputation of descendant counts to its own function

This commit is contained in:
faul_sname 2023-01-13 19:12:16 -08:00
parent b75c93e600
commit e14d1b809c

View file

@ -4,8 +4,12 @@ from files.helpers.alerts import NOTIFY_USERS
from files.helpers.const import PUSHER_ID, PUSHER_KEY, SITE_ID, SITE_FULL from files.helpers.const import PUSHER_ID, PUSHER_KEY, SITE_ID, SITE_FULL
from files.helpers.assetcache import assetcache_path from files.helpers.assetcache import assetcache_path
from flask import g from flask import g
from sqlalchemy import select, update
from sqlalchemy.sql.expression import func, text, alias
from sqlalchemy.orm import aliased
from sys import stdout from sys import stdout
import gevent import gevent
import typing
if PUSHER_ID != 'blahblahblah': if PUSHER_ID != 'blahblahblah':
beams_client = PushNotifications(instance_id=PUSHER_ID, secret_key=PUSHER_KEY) beams_client = PushNotifications(instance_id=PUSHER_ID, secret_key=PUSHER_KEY)
@ -71,6 +75,93 @@ def update_ancestor_descendant_counts(comment, delta):
g.db.add(parent) g.db.add(parent)
update_ancestor_descendant_counts(parent, delta) update_ancestor_descendant_counts(parent, delta)
def bulk_recompute_descendant_counts(predicate = None):
"""
Recomputes the descendant_count of a large number of comments.
The descendant_count of a comment is equal to the number of direct visible child comments
plus the sum of the descendant_count of those visible child comments.
:param Callable predicate: If set, only update comments matching this predicate
So for example
>>> bulk_update_descendant_counts()
will update all comments, while
>>> bulk_update_descendant_counts(lambda q: q.where(Comment.parent_submission == 32)
will only update the descendant counts of comments where parent_submission=32
Internally, how this works is
1. Find the maximum level of comments matching the predicate
2. Starting from that level and going down, for each level update the descendant_counts
Since the comments at the max level will always have 0 children, this means that we will perform
`level` updates to update all comments.
The update query looks like
UPDATE comments
SET descendant_count=descendant_counts.descendant_count
FROM (
SELECT
parent_comments.id AS id,
coalesce(sum(child_comments.descendant_count + 1), 0) AS descendant_count
FROM comments AS parent_comments
LEFT OUTER JOIN comments AS child_comments ON parent_comments.id = child_comments.parent_comment_id
GROUP BY parent_comments.id
) AS descendant_counts
WHERE comments.id = descendant_counts.id
AND comments.level = :level_1
<predicate goes here>
"""
max_level_query = g.db.query(func.max(Comment.level))
if predicate:
max_level_query = predicate(max_level_query)
max_level = max_level_query.scalar()
if max_level is None:
max_level = 0
for level in range(max_level, 0, -1):
parent_comments = alias(Comment, name="parent_comments")
child_comments = alias(Comment, name="child_comments")
descendant_counts = aliased(
Comment,
(
select(parent_comments)
.join(
child_comments,
parent_comments.corresponding_column(Comment.id) == child_comments.corresponding_column(Comment.parent_comment_id),
True
)
.group_by(parent_comments.corresponding_column(Comment.id))
.with_only_columns([
parent_comments.corresponding_column(Comment.id),
func.coalesce(
func.sum(child_comments.corresponding_column(Comment.descendant_count) + text(str(1))),
text(str(0))
).label('descendant_count')
])
.subquery(name='descendant_counts')
),
adapt_on_names=True
)
update_statement = (
update(Comment)
.values(descendant_count=descendant_counts.descendant_count)
.execution_options(synchronize_session=False)
.where(Comment.id == descendant_counts.id)
.where(Comment.level == level)
)
if predicate:
update_statement = predicate(update_statement)
g.db.execute(update_statement)
g.db.commit()
def comment_on_publish(comment:Comment): def comment_on_publish(comment:Comment):
""" """
Run when comment becomes visible: immediately for non-filtered comments, Run when comment becomes visible: immediately for non-filtered comments,
@ -78,7 +169,6 @@ def comment_on_publish(comment:Comment):
Should be used to update stateful counters, notifications, etc. that Should be used to update stateful counters, notifications, etc. that
reflect the comments users will actually see. reflect the comments users will actually see.
""" """
# TODO: Get this out of the routes and into a model eventually...
author = comment.author author = comment.author
# Shadowbanned users are invisible. This may lead to inconsistencies if # Shadowbanned users are invisible. This may lead to inconsistencies if