379 lines
17 KiB
Python
379 lines
17 KiB
Python
|
|
import pprint
|
|
|
|
import sqlalchemy
|
|
from sqlalchemy.orm import Session
|
|
|
|
from alive_progress import alive_it
|
|
from collections import defaultdict
|
|
from files.classes import User, Comment, UserNote, UserTag
|
|
from files.classes.cron.tasks import TaskRunContext
|
|
from files.classes.volunteer_janitor import VolunteerJanitorRecord, VolunteerJanitorResult
|
|
from files.helpers.volunteer_janitor import evaluate_badness_of, userweight_from_user_accuracy, calculate_final_comment_badness, update_comment_badness
|
|
from files.helpers.math import saturate, remap, lerp
|
|
|
|
import logging
|
|
import random
|
|
|
|
from files.__main__ import app, db_session
|
|
|
|
CONFIG_modhat_weight = 4
|
|
CONFIG_admin_volunteer_weight = 4
|
|
CONFIG_new_user_damping = 2
|
|
CONFIG_default_user_accuracy = 0.2
|
|
CONFIG_user_correctness_lerp = 0.2
|
|
|
|
def _compile_records(db):
|
|
vrecords = db.query(VolunteerJanitorRecord).order_by(VolunteerJanitorRecord.recorded_utc).all()
|
|
|
|
# get the info we need for all mentioned posts
|
|
reported_comment_ids = {record.comment_id for record in vrecords}
|
|
reported_comments = db.query(Comment).where(Comment.id.in_(reported_comment_ids)).options(sqlalchemy.orm.load_only('id', 'state_user_deleted_utc'))
|
|
reported_comments = {comment.id: comment for comment in reported_comments}
|
|
|
|
# get our compiled data
|
|
records_compiled = {}
|
|
for record in vrecords:
|
|
# we're just going to ignore deleted comments entirely
|
|
if reported_comments[record.comment_id].state_user_deleted_utc != None:
|
|
continue
|
|
|
|
# unique identifier for user/comment report pair
|
|
uic = (record.user_id, record.comment_id)
|
|
|
|
if record.result == VolunteerJanitorResult.Pending:
|
|
if uic in records_compiled:
|
|
# something wonky happened, we went back to pending after getting a result?
|
|
records_compiled[uic]["status"] = "wonky"
|
|
else:
|
|
# fill out the pending field
|
|
records_compiled[uic] = {"status": "pending"}
|
|
else:
|
|
if not uic in records_compiled:
|
|
# something wonky happened, we never asked them for the info to begin with
|
|
records_compiled[uic] = {"status": "wonky"}
|
|
elif records_compiled[uic]["status"] != "pending":
|
|
# received two submissions; practically we'll just use their first submission
|
|
records_compiled[uic]["status"] = "resubmit"
|
|
else:
|
|
# actually got a result, yay
|
|
records_compiled[uic]["status"] = "submit"
|
|
records_compiled[uic]["result"] = record.result
|
|
|
|
# todo:
|
|
# filter out anything submitted *after* a mod chimed in
|
|
# filter out anything submitted too long after the request
|
|
|
|
users_compiled = defaultdict(lambda: {
|
|
"pending": 0,
|
|
"wonky": 0,
|
|
"submit": 0,
|
|
"resubmit": 0,
|
|
})
|
|
for key, result in records_compiled.items():
|
|
#pprint.pprint(key)
|
|
userid = key[0]
|
|
|
|
users_compiled[key[0]][result["status"]] += 1
|
|
|
|
#pprint.pprint(records_compiled)
|
|
#pprint.pprint(users_compiled)
|
|
|
|
# strip out invalid records
|
|
random_removal = -1 # this is sometimes useful for testing that our algorithm is somewhat stable; removing a random half of all responses shouldn't drastically invert people's quality scores, for example
|
|
records_compiled = {key: value for key, value in records_compiled.items() if "result" in value and random.random() > random_removal}
|
|
|
|
return records_compiled, users_compiled
|
|
|
|
def dbg_commentdump(cid, records_compiled, users, user_accuracy):
|
|
print(f"Dump for comment {cid}")
|
|
|
|
from tabulate import tabulate
|
|
|
|
dats = []
|
|
for key, value in [(key, value) for key, value in records_compiled.items() if key[1] == cid]:
|
|
uid = key[0]
|
|
dats.append({
|
|
"vote": value["result"],
|
|
"username": users[uid]["username"],
|
|
"accuracy": user_accuracy[uid],
|
|
})
|
|
print(tabulate(dats, headers = "keys"))
|
|
|
|
def dbg_userdump(uid, records_compiled, users, comment_calculated_badness_user):
|
|
print(f"Dump for user {users[uid]['username']}")
|
|
|
|
from tabulate import tabulate
|
|
|
|
dats = []
|
|
for key, value in [(key, value) for key, value in records_compiled.items() if key[0] == uid]:
|
|
cid = key[1]
|
|
bad, weight = evaluate_badness_of(value["result"])
|
|
dats.append({
|
|
"cid": cid,
|
|
"vote": value["result"],
|
|
"calculated": evaluate_badness_of(value["result"]),
|
|
"badness": comment_calculated_badness_user[cid],
|
|
"correctness": evaluate_correctness_single(bad, weight, comment_calculated_badness_user[cid]),
|
|
})
|
|
print(tabulate(dats, headers = "keys"))
|
|
|
|
# Calculates how correct a user is, based on whether they thought it was bad, how confident they were, and how bad we think it is
|
|
# Returns (IsCorrect, Confidence)
|
|
def evaluate_correctness_single(bad, user_weight, calculated_badness):
|
|
# Boolean for whether this comment is bad
|
|
calculated_badbool = calculated_badness > 0.5
|
|
|
|
# Boolean for whether the user was correct
|
|
correctness_result = (bad == calculated_badbool) and 1 or 0
|
|
|
|
# "how confident are we that this is bad/notbad", range [0, 0.5]
|
|
calculated_badness_confidence = abs(calculated_badness - 0.5)
|
|
|
|
# "how much do we want this to influence the user's correctness"
|
|
# there's a deadzone around not-confident where we just push it to 0 and don't make it relevant
|
|
calculated_badness_weight = saturate(remap(calculated_badness_confidence, 0.1, 0.5, 0, 1))
|
|
|
|
# see how correct we think the user is
|
|
user_correctness = user_weight * calculated_badness_weight
|
|
|
|
return correctness_result, user_correctness
|
|
|
|
def volunteer_janitor_recalc(db: Session, diagnostics: bool = False):
|
|
logging.info("Starting full janitor recalculation")
|
|
|
|
# Get our full list of data
|
|
records_compiled, users_compiled = _compile_records(db)
|
|
|
|
reporting_user_ids = {record[0] for record in records_compiled}
|
|
reported_comment_ids = {record[1] for record in records_compiled}
|
|
|
|
# Get some metadata for all reported comments
|
|
comments = db.query(Comment) \
|
|
.where(Comment.id.in_(reported_comment_ids)) \
|
|
.options(sqlalchemy.orm.load_only('id', 'created_utc', 'author_id'))
|
|
comments = {comment.id: comment for comment in comments}
|
|
|
|
reported_user_ids = {comment.author_id for comment in comments.values()}
|
|
|
|
# Get mod intervention data
|
|
modhats_raw = db.query(Comment) \
|
|
.where(Comment.parent_comment_id.in_(reported_comment_ids)) \
|
|
.where(Comment.distinguish_level > 0) \
|
|
.options(sqlalchemy.orm.load_only('parent_comment_id', 'created_utc'))
|
|
|
|
modhats = {}
|
|
# we jump through some hoops to deduplicate this; I guess we just pick the last one in our list for now
|
|
for modhat in modhats_raw:
|
|
modhats[modhat.parent_comment_id] = modhat
|
|
|
|
usernotes_raw = db.query(UserNote) \
|
|
.where(UserNote.tag.in_([UserTag.Warning, UserTag.Tempban, UserTag.Permban, UserTag.Spam, UserTag.Bot])) \
|
|
.options(sqlalchemy.orm.load_only('reference_user', 'created_datetimez', 'tag'))
|
|
|
|
# Here we're trying to figure out whether modhats are actually warnings/bans
|
|
# We don't have a formal connection between "a comment is bad" and "the user got a warning", so we're kind of awkwardly trying to derive it from our database
|
|
# In addition, sometimes someone posts a lot of bad comments and only gets modhatted for one of them
|
|
# That doesn't mean the other comments weren't bad
|
|
# It just means we picked the worst one
|
|
# So we ignore comments near the actual modhat time
|
|
|
|
commentresults = {}
|
|
for uid in reported_user_ids:
|
|
# For each user, figure out when modhats happened
|
|
# this is slow but whatever
|
|
modhat_times = []
|
|
for modhat in modhats.values():
|
|
if comments[modhat.parent_comment_id].author_id != uid:
|
|
continue
|
|
|
|
modhat_times.append(modhat.created_utc)
|
|
|
|
usernote_times = []
|
|
for usernote in usernotes_raw:
|
|
if usernote.reference_user != uid:
|
|
continue
|
|
|
|
usernote_times.append(usernote.created_utc)
|
|
|
|
# For each comment . . .
|
|
for comment in comments.values():
|
|
if comment.author_id != uid:
|
|
continue
|
|
|
|
if comment.id in modhats:
|
|
modhat_comment = modhats[comment.id]
|
|
else:
|
|
modhat_comment = None
|
|
|
|
# if the comment was modhatted *and* resulted in a negative usernote near the modhat time, it's bad
|
|
if modhat_comment is not None and next((time for time in usernote_times if abs(modhat_comment.created_utc - time) < 60 * 15), None) is not None:
|
|
commentresults[comment.id] = "bad"
|
|
# otherwise, if the comment was posted less than 48 hours before a negative usernote, we ignore it for processing on the assumption that it may just have been part of a larger warning
|
|
elif next((time for time in usernote_times if comment.created_utc < time and comment.created_utc + 48 * 60 * 60 > time), None) is not None:
|
|
commentresults[comment.id] = "ignored"
|
|
# otherwise, we call it not-bad
|
|
else:
|
|
commentresults[comment.id] = "notbad"
|
|
|
|
# get per-user metadata
|
|
users = db.query(User) \
|
|
.where(User.id.in_(reporting_user_ids)) \
|
|
.options(sqlalchemy.orm.load_only('id', 'username', 'admin_level'))
|
|
users = {user.id: {"username": user.username, "admin": user.admin_level != 0} for user in users}
|
|
|
|
user_accuracy = defaultdict(lambda: CONFIG_default_user_accuracy)
|
|
|
|
# Do an update loop!
|
|
for lid in range(0, 100):
|
|
|
|
# Accumulated weight/badness, taking admin flags into account
|
|
# This is used for training
|
|
comment_weight_admin = defaultdict(lambda: 0)
|
|
comment_badness_admin = defaultdict(lambda: 0)
|
|
|
|
# Accumulated weight/badness, not taking admin flags into account
|
|
# This is used for output and display
|
|
comment_weight_user = defaultdict(lambda: 0)
|
|
comment_badness_user = defaultdict(lambda: 0)
|
|
|
|
# accumulate modhat weights
|
|
for cid in reported_comment_ids:
|
|
result = commentresults[cid]
|
|
|
|
if result == "ignored":
|
|
# I guess we'll just let the users decide?
|
|
continue
|
|
|
|
if result == "bad":
|
|
comment_weight_admin[cid] += CONFIG_modhat_weight
|
|
comment_badness_admin[cid] += CONFIG_modhat_weight
|
|
|
|
if result == "notbad":
|
|
comment_weight_admin[cid] += CONFIG_modhat_weight
|
|
|
|
# accumulate volunteer weights
|
|
for key, value in records_compiled.items():
|
|
uid, cid = key
|
|
|
|
# Calculate how much to weight a user; highly inaccurate users are not inverted! They just don't get contribution
|
|
# (losers)
|
|
userweight_user = userweight_from_user_accuracy(user_accuracy[uid]);
|
|
|
|
if users[uid]["admin"]:
|
|
userweight_admin = CONFIG_admin_volunteer_weight
|
|
else:
|
|
userweight_admin = userweight_user
|
|
|
|
bad, weight = evaluate_badness_of(value["result"])
|
|
|
|
# Accumulate these to our buffers
|
|
comment_weight_admin[cid] += userweight_admin * weight
|
|
comment_weight_user[cid] += userweight_user * weight
|
|
|
|
if bad:
|
|
comment_badness_admin[cid] += userweight_admin * weight
|
|
comment_badness_user[cid] += userweight_user * weight
|
|
|
|
# Calculated badnesses, both taking admins into account and not doing so, and "theoretical idea" versus a conversative view designed to be more skeptical of low-weighted comments
|
|
comment_calculated_badness_admin = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], False) for cid in reported_comment_ids}
|
|
comment_calculated_badness_admin_conservative = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], True) for cid in reported_comment_ids}
|
|
comment_calculated_badness_user = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], False) for cid in reported_comment_ids}
|
|
comment_calculated_badness_user_conservative = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], True) for cid in reported_comment_ids}
|
|
|
|
# go through user submissions and count up how good users seem to be at this
|
|
user_correctness_weight = defaultdict(lambda: CONFIG_new_user_damping)
|
|
user_correctness_value = defaultdict(lambda: CONFIG_default_user_accuracy * user_correctness_weight[0])
|
|
|
|
for key, value in records_compiled.items():
|
|
uid, cid = key
|
|
|
|
# if this is "ignored", I don't trust that we have a real answer, so we just skip it for training purposes
|
|
if commentresults[cid] == "ignored":
|
|
continue
|
|
|
|
bad, weight = evaluate_badness_of(value["result"])
|
|
|
|
correctness, weight = evaluate_correctness_single(bad, weight, comment_calculated_badness_admin[cid])
|
|
|
|
user_correctness_weight[uid] += weight
|
|
user_correctness_value[uid] += correctness * weight
|
|
|
|
# calculate new correctnesses
|
|
for uid in reporting_user_ids:
|
|
target_user_correctness = user_correctness_value[uid] / user_correctness_weight[uid]
|
|
|
|
# lerp slowly to the new values
|
|
user_accuracy[uid] = lerp(user_accuracy[uid], target_user_correctness, CONFIG_user_correctness_lerp)
|
|
|
|
if diagnostics:
|
|
# debug print
|
|
|
|
from tabulate import tabulate
|
|
|
|
commentscores = [{
|
|
"link": f"https://themotte.org/comment/{cid}",
|
|
"badness": comment_calculated_badness_admin[cid],
|
|
"badnessuser": comment_calculated_badness_user[cid],
|
|
"badnessusercons": comment_calculated_badness_user_conservative[cid],
|
|
"participation": comment_weight_user[cid],
|
|
"mh": commentresults[cid]} for cid in reported_comment_ids]
|
|
commentscores.sort(key = lambda item: item["badnessusercons"] + item["badnessuser"] / 100)
|
|
print(tabulate(commentscores, headers = "keys"))
|
|
|
|
results = [{
|
|
"user": f"https://themotte.org/@{users[uid]['username']}",
|
|
"accuracy": user_accuracy[uid],
|
|
"submit": users_compiled[uid]["submit"],
|
|
"nonsubmit": sum(users_compiled[uid].values()) - users_compiled[uid]["submit"],
|
|
"admin": users[uid]["admin"] and "Admin" or "",
|
|
} for uid in reporting_user_ids]
|
|
results.sort(key = lambda k: k["accuracy"])
|
|
print(tabulate(results, headers = "keys"))
|
|
|
|
dbg_commentdump(89681, records_compiled, users, user_accuracy)
|
|
print(calculate_final_comment_badness(comment_badness_user[89681], comment_weight_user[89681], True))
|
|
|
|
#dbg_userdump(131, records_compiled, users, comment_calculated_badness_user)
|
|
|
|
# Shove all this in the database, yaaay
|
|
# Conditional needed because sqlalchemy breaks if you try passing it zero data
|
|
if len(user_accuracy) > 0:
|
|
db.query(User) \
|
|
.where(User.id.in_([id for id in user_accuracy.keys()])) \
|
|
.update({
|
|
User.volunteer_janitor_correctness: sqlalchemy.sql.case(
|
|
user_accuracy,
|
|
value = User.id,
|
|
)
|
|
})
|
|
db.commit()
|
|
|
|
# We don't bother recalculating comment confidences here; it's a pain to do it and they shouldn't change much
|
|
|
|
logging.info("Finished full janitor recalculation")
|
|
|
|
@app.cli.command('volunteer_janitor_recalc')
|
|
def volunteer_janitor_recalc_cmd():
|
|
volunteer_janitor_recalc(db_session(), diagnostics = True)
|
|
|
|
def volunteer_janitor_recalc_cron(ctx:TaskRunContext):
|
|
volunteer_janitor_recalc(ctx.db)
|
|
|
|
def volunteer_janitor_recalc_all_comments(db: Session):
|
|
# may as well do this first
|
|
volunteer_janitor_recalc(db)
|
|
|
|
# I'm not sure of the details here, but there seems to be some session-related caching cruft left around
|
|
# so let's just nuke that
|
|
db.expire_all()
|
|
|
|
# going through all the comments piecemeal like this is hilariously efficient, but this entire system gets run exactly once ever, so, okay
|
|
for comment in alive_it(db.query(Comment).join(Comment.reports)):
|
|
update_comment_badness(db, comment.id)
|
|
|
|
db.commit()
|
|
|
|
@app.cli.command('volunteer_janitor_recalc_all_comments')
|
|
def volunteer_janitor_recalc_all_comments_cmd():
|
|
volunteer_janitor_recalc_all_comments(db_session())
|