rDrama/files/commands/volunteer_janitor_recalc.py

373 lines
16 KiB
Python

import pprint
import sqlalchemy
from sqlalchemy.orm import Session
from alive_progress import alive_it
from collections import defaultdict
from files.classes import User, Comment, UserNote, UserTag
from files.classes.cron.tasks import TaskRunContext
from files.classes.volunteer_janitor import VolunteerJanitorRecord, VolunteerJanitorResult
from files.helpers.volunteer_janitor import evaluate_badness_of, userweight_from_user_accuracy, calculate_final_comment_badness, update_comment_badness
from files.helpers.math import saturate, remap, lerp
import logging
import random
from tabulate import tabulate
from files.__main__ import app, db_session
CONFIG_modhat_weight = 4
CONFIG_admin_volunteer_weight = 4
CONFIG_new_user_damping = 2
CONFIG_default_user_accuracy = 0.2
CONFIG_user_correctness_lerp = 0.2
def _compile_records(db):
vrecords = db.query(VolunteerJanitorRecord).order_by(VolunteerJanitorRecord.recorded_utc).all()
# get the info we need for all mentioned posts
reported_comment_ids = {record.comment_id for record in vrecords}
reported_comments = db.query(Comment).where(Comment.id.in_(reported_comment_ids)).options(sqlalchemy.orm.load_only('id', 'deleted_utc'))
reported_comments = {comment.id: comment for comment in reported_comments}
# get our compiled data
records_compiled = {}
for record in vrecords:
# we're just going to ignore deleted comments entirely
if reported_comments[record.comment_id].deleted_utc != 0:
continue
# unique identifier for user/comment report pair
uic = (record.user_id, record.comment_id)
if record.result == VolunteerJanitorResult.Pending:
if uic in records_compiled:
# something wonky happened, we went back to pending after getting a result?
records_compiled[uic]["status"] = "wonky"
else:
# fill out the pending field
records_compiled[uic] = {"status": "pending"}
else:
if not uic in records_compiled:
# something wonky happened, we never asked them for the info to begin with
records_compiled[uic] = {"status": "wonky"}
elif records_compiled[uic]["status"] != "pending":
# received two submissions; practically we'll just use their first submission
records_compiled[uic]["status"] = "resubmit"
else:
# actually got a result, yay
records_compiled[uic]["status"] = "submit"
records_compiled[uic]["result"] = record.result
# todo:
# filter out anything submitted *after* a mod chimed in
# filter out anything submitted too long after the request
users_compiled = defaultdict(lambda: {
"pending": 0,
"wonky": 0,
"submit": 0,
"resubmit": 0,
})
for key, result in records_compiled.items():
#pprint.pprint(key)
userid = key[0]
users_compiled[key[0]][result["status"]] += 1
#pprint.pprint(records_compiled)
#pprint.pprint(users_compiled)
# strip out invalid records
random_removal = -1 # this is sometimes useful for testing that our algorithm is somewhat stable; removing a random half of all responses shouldn't drastically invert people's quality scores, for example
records_compiled = {key: value for key, value in records_compiled.items() if "result" in value and random.random() > random_removal}
return records_compiled, users_compiled
def dbg_commentdump(cid, records_compiled, users, user_accuracy):
print(f"Dump for comment {cid}")
dats = []
for key, value in [(key, value) for key, value in records_compiled.items() if key[1] == cid]:
uid = key[0]
dats.append({
"vote": value["result"],
"username": users[uid]["username"],
"accuracy": user_accuracy[uid],
})
print(tabulate(dats, headers = "keys"))
def dbg_userdump(uid, records_compiled, users, comment_calculated_badness_user):
print(f"Dump for user {users[uid]['username']}")
dats = []
for key, value in [(key, value) for key, value in records_compiled.items() if key[0] == uid]:
cid = key[1]
bad, weight = evaluate_badness_of(value["result"])
dats.append({
"cid": cid,
"vote": value["result"],
"calculated": evaluate_badness_of(value["result"]),
"badness": comment_calculated_badness_user[cid],
"correctness": evaluate_correctness_single(bad, weight, comment_calculated_badness_user[cid]),
})
print(tabulate(dats, headers = "keys"))
# Calculates how correct a user is, based on whether they thought it was bad, how confident they were, and how bad we think it is
# Returns (IsCorrect, Confidence)
def evaluate_correctness_single(bad, user_weight, calculated_badness):
# Boolean for whether this comment is bad
calculated_badbool = calculated_badness > 0.5
# Boolean for whether the user was correct
correctness_result = (bad == calculated_badbool) and 1 or 0
# "how confident are we that this is bad/notbad", range [0, 0.5]
calculated_badness_confidence = abs(calculated_badness - 0.5)
# "how much do we want this to influence the user's correctness"
# there's a deadzone around not-confident where we just push it to 0 and don't make it relevant
calculated_badness_weight = saturate(remap(calculated_badness_confidence, 0.1, 0.5, 0, 1))
# see how correct we think the user is
user_correctness = user_weight * calculated_badness_weight
return correctness_result, user_correctness
def volunteer_janitor_recalc(db: Session, diagnostics: bool = False):
logging.info("Starting full janitor recalculation")
# Get our full list of data
records_compiled, users_compiled = _compile_records(db)
reporting_user_ids = {record[0] for record in records_compiled}
reported_comment_ids = {record[1] for record in records_compiled}
# Get some metadata for all reported comments
comments = db.query(Comment) \
.where(Comment.id.in_(reported_comment_ids)) \
.options(sqlalchemy.orm.load_only('id', 'created_utc', 'author_id'))
comments = {comment.id: comment for comment in comments}
reported_user_ids = {comment.author_id for comment in comments.values()}
# Get mod intervention data
modhats_raw = db.query(Comment) \
.where(Comment.parent_comment_id.in_(reported_comment_ids)) \
.where(Comment.distinguish_level > 0) \
.options(sqlalchemy.orm.load_only('parent_comment_id', 'created_utc'))
modhats = {}
# we jump through some hoops to deduplicate this; I guess we just pick the last one in our list for now
for modhat in modhats_raw:
modhats[modhat.parent_comment_id] = modhat
usernotes_raw = db.query(UserNote) \
.where(UserNote.tag.in_([UserTag.Warning, UserTag.Tempban, UserTag.Permban, UserTag.Spam, UserTag.Bot])) \
.options(sqlalchemy.orm.load_only('reference_user', 'created_utc', 'tag'))
# Here we're trying to figure out whether modhats are actually warnings/bans
# We don't have a formal connection between "a comment is bad" and "the user got a warning", so we're kind of awkwardly trying to derive it from our database
# In addition, sometimes someone posts a lot of bad comments and only gets modhatted for one of them
# That doesn't mean the other comments weren't bad
# It just means we picked the worst one
# So we ignore comments near the actual modhat time
commentresults = {}
for uid in reported_user_ids:
# For each user, figure out when modhats happened
# this is slow but whatever
modhat_times = []
for modhat in modhats.values():
if comments[modhat.parent_comment_id].author_id != uid:
continue
modhat_times.append(modhat.created_utc)
usernote_times = []
for usernote in usernotes_raw:
if usernote.reference_user != uid:
continue
usernote_times.append(usernote.created_utc)
# For each comment . . .
for comment in comments.values():
if comment.author_id != uid:
continue
if comment.id in modhats:
modhat_comment = modhats[comment.id]
else:
modhat_comment = None
# if the comment was modhatted *and* resulted in a negative usernote near the modhat time, it's bad
if modhat_comment is not None and next((time for time in usernote_times if abs(modhat_comment.created_utc - time) < 60 * 15), None) is not None:
commentresults[comment.id] = "bad"
# otherwise, if the comment was posted less than 48 hours before a negative usernote, we ignore it for processing on the assumption that it may just have been part of a larger warning
elif next((time for time in usernote_times if comment.created_utc < time and comment.created_utc + 48 * 60 * 60 > time), None) is not None:
commentresults[comment.id] = "ignored"
# otherwise, we call it not-bad
else:
commentresults[comment.id] = "notbad"
# get per-user metadata
users = db.query(User) \
.where(User.id.in_(reporting_user_ids)) \
.options(sqlalchemy.orm.load_only('id', 'username', 'admin_level'))
users = {user.id: {"username": user.username, "admin": user.admin_level != 0} for user in users}
user_accuracy = defaultdict(lambda: CONFIG_default_user_accuracy)
# Do an update loop!
for lid in range(0, 100):
# Accumulated weight/badness, taking admin flags into account
# This is used for training
comment_weight_admin = defaultdict(lambda: 0)
comment_badness_admin = defaultdict(lambda: 0)
# Accumulated weight/badness, not taking admin flags into account
# This is used for output and display
comment_weight_user = defaultdict(lambda: 0)
comment_badness_user = defaultdict(lambda: 0)
# accumulate modhat weights
for cid in reported_comment_ids:
result = commentresults[cid]
if result == "ignored":
# I guess we'll just let the users decide?
continue
if result == "bad":
comment_weight_admin[cid] += CONFIG_modhat_weight
comment_badness_admin[cid] += CONFIG_modhat_weight
if result == "notbad":
comment_weight_admin[cid] += CONFIG_modhat_weight
# accumulate volunteer weights
for key, value in records_compiled.items():
uid, cid = key
# Calculate how much to weight a user; highly inaccurate users are not inverted! They just don't get contribution
# (losers)
userweight_user = userweight_from_user_accuracy(user_accuracy[uid]);
if users[uid]["admin"]:
userweight_admin = CONFIG_admin_volunteer_weight
else:
userweight_admin = userweight_user
bad, weight = evaluate_badness_of(value["result"])
# Accumulate these to our buffers
comment_weight_admin[cid] += userweight_admin * weight
comment_weight_user[cid] += userweight_user * weight
if bad:
comment_badness_admin[cid] += userweight_admin * weight
comment_badness_user[cid] += userweight_user * weight
# Calculated badnesses, both taking admins into account and not doing so, and "theoretical idea" versus a conversative view designed to be more skeptical of low-weighted comments
comment_calculated_badness_admin = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], False) for cid in reported_comment_ids}
comment_calculated_badness_admin_conservative = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], True) for cid in reported_comment_ids}
comment_calculated_badness_user = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], False) for cid in reported_comment_ids}
comment_calculated_badness_user_conservative = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], True) for cid in reported_comment_ids}
# go through user submissions and count up how good users seem to be at this
user_correctness_weight = defaultdict(lambda: CONFIG_new_user_damping)
user_correctness_value = defaultdict(lambda: CONFIG_default_user_accuracy * user_correctness_weight[0])
for key, value in records_compiled.items():
uid, cid = key
# if this is "ignored", I don't trust that we have a real answer, so we just skip it for training purposes
if commentresults[cid] == "ignored":
continue
bad, weight = evaluate_badness_of(value["result"])
correctness, weight = evaluate_correctness_single(bad, weight, comment_calculated_badness_admin[cid])
user_correctness_weight[uid] += weight
user_correctness_value[uid] += correctness * weight
# calculate new correctnesses
for uid in reporting_user_ids:
target_user_correctness = user_correctness_value[uid] / user_correctness_weight[uid]
# lerp slowly to the new values
user_accuracy[uid] = lerp(user_accuracy[uid], target_user_correctness, CONFIG_user_correctness_lerp)
if diagnostics:
# debug print
commentscores = [{
"link": f"https://themotte.org/comment/{cid}",
"badness": comment_calculated_badness_admin[cid],
"badnessuser": comment_calculated_badness_user[cid],
"badnessusercons": comment_calculated_badness_user_conservative[cid],
"participation": comment_weight_user[cid],
"mh": commentresults[cid]} for cid in reported_comment_ids]
commentscores.sort(key = lambda item: item["badnessusercons"] + item["badnessuser"] / 100)
print(tabulate(commentscores, headers = "keys"))
results = [{
"user": f"https://themotte.org/@{users[uid]['username']}",
"accuracy": user_accuracy[uid],
"submit": users_compiled[uid]["submit"],
"nonsubmit": sum(users_compiled[uid].values()) - users_compiled[uid]["submit"],
"admin": users[uid]["admin"] and "Admin" or "",
} for uid in reporting_user_ids]
results.sort(key = lambda k: k["accuracy"])
print(tabulate(results, headers = "keys"))
dbg_commentdump(89681, records_compiled, users, user_accuracy)
print(calculate_final_comment_badness(comment_badness_user[89681], comment_weight_user[89681], True))
#dbg_userdump(131, records_compiled, users, comment_calculated_badness_user)
# Shove all this in the database, yaaay
# Conditional needed because sqlalchemy breaks if you try passing it zero data
if len(user_accuracy) > 0:
db.query(User) \
.where(User.id.in_([id for id in user_accuracy.keys()])) \
.update({
User.volunteer_janitor_correctness: sqlalchemy.sql.case(
user_accuracy,
value = User.id,
)
})
db.commit()
# We don't bother recalculating comment confidences here; it's a pain to do it and they shouldn't change much
logging.info("Finished full janitor recalculation")
@app.cli.command('volunteer_janitor_recalc')
def volunteer_janitor_recalc_cmd():
volunteer_janitor_recalc(db_session(), diagnostics = True)
def volunteer_janitor_recalc_cron(ctx:TaskRunContext):
volunteer_janitor_recalc(ctx.db)
def volunteer_janitor_recalc_all_comments(db: Session):
# may as well do this first
volunteer_janitor_recalc(db)
# I'm not sure of the details here, but there seems to be some session-related caching cruft left around
# so let's just nuke that
db.expire_all()
# going through all the comments piecemeal like this is hilariously efficient, but this entire system gets run exactly once ever, so, okay
for comment in alive_it(db.query(Comment).join(Comment.reports)):
update_comment_badness(db, comment.id)
db.commit()
@app.cli.command('volunteer_janitor_recalc_all_comments')
def volunteer_janitor_recalc_all_comments_cmd():
volunteer_janitor_recalc_all_comments(db_session())