Implement mod-viewable janitor-generated badness stats.

2023-04-22 08:25:17 -05:00 · 2023-04-22 08:25:17 -05:00 · 9d264dcf3a
commit 9d264dcf3a
parent 8933f77bf5
14 changed files with 1772 additions and 1112 deletions
--- a/files/assets/css/main.css
+++ b/files/assets/css/main.css
@ -5325,3 +5325,41 @@ div[id^="reply-edit-"] li > p:first-child {
 .volunteer_janitor .choices {
 	margin-top: 1rem;
 }
+
+.volunteer_janitor_result {
+	border-radius: 1rem;
+	padding: 0.2rem;
+	color: var(--primary-dark1);
+}
+
+.volunteer_janitor_result_notbad_0 {
+	background-color: hsl(240, 100%, 99%);
+}
+
+.volunteer_janitor_result_notbad_1 {
+	background-color: hsl(240, 100%, 98%);
+}
+
+.volunteer_janitor_result_notbad_2 {
+	background-color: hsl(240, 100%, 94%);
+}
+
+.volunteer_janitor_result_notbad_3 {
+	background-color: hsl(240, 100%, 90%);
+}
+
+.volunteer_janitor_result_bad_0 {
+	background-color: hsl(0, 100%, 98%);
+}
+
+.volunteer_janitor_result_bad_1 {
+	background-color: hsl(0, 100%, 94%);
+}
+
+.volunteer_janitor_result_bad_2 {
+	background-color: hsl(0, 100%, 88%);
+}
+
+.volunteer_janitor_result_bad_3 {
+	background-color: hsl(0, 100%, 80%);
+}
--- a/files/classes/comment.py
+++ b/files/classes/comment.py
@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Literal, Optional
 from urllib.parse import parse_qs, urlencode, urlparse

 from flask import g
+import math
 from sqlalchemy import *
 from sqlalchemy.orm import relationship

@ -11,6 +12,7 @@ from files.helpers.config.environment import SCORE_HIDING_TIME_HOURS, SITE_FULL
 from files.helpers.content import (ModerationState, body_displayed,
                                   execute_shadowbanned_fake_votes)
 from files.helpers.lazy import lazy
+from files.helpers.math import clamp
 from files.helpers.time import format_age

 if TYPE_CHECKING:
@ -48,6 +50,7 @@ class Comment(CreatedBase):
 	body_html = Column(Text, nullable=False)
 	ban_reason = Column(String)
 	filter_state = Column(String, nullable=False)
+	volunteer_janitor_badness = Column(Float, default=0.5, nullable=False)

 	Index('comment_parent_index', parent_comment_id)
 	Index('comment_post_id_index', parent_submission)
@ -441,3 +444,30 @@ class Comment(CreatedBase):
 	@property
 	def moderation_state(self) -> ModerationState:
 		return ModerationState.from_submittable(self)
+	
+	def volunteer_janitor_is_unknown(self):
+		return self.volunteer_janitor_badness > 0.4 and self.volunteer_janitor_badness < 0.6
+
+	def volunteer_janitor_is_bad(self):
+		return self.volunteer_janitor_badness >= 0.6
+	
+	def volunteer_janitor_is_notbad(self):
+		return self.volunteer_janitor_badness <= 0.4
+
+	def volunteer_janitor_confidence(self):
+		unitconfidence = (abs(self.volunteer_janitor_badness - 0.5) * 2)
+		unitanticonfidence = 1 - unitconfidence
+		logconfidence = -math.log(unitanticonfidence, 2)
+		return round(logconfidence * 10)
+
+	def volunteer_janitor_css(self):
+		if self.volunteer_janitor_is_unknown():
+			category = "unknown"
+		elif self.volunteer_janitor_is_bad():
+			category = "bad"
+		elif self.volunteer_janitor_is_notbad():
+			category = "notbad"
+		
+		strength = clamp(math.trunc(self.volunteer_janitor_confidence() / 10), 0, 3)
+
+		return f"{category}_{strength}"
--- a/files/classes/user.py
+++ b/files/classes/user.py
@ -111,6 +111,7 @@ class User(CreatedBase):
 	original_username = deferred(Column(String))
 	referred_by = Column(Integer, ForeignKey("users.id"))
 	volunteer_last_started_utc = Column(DateTime, nullable=True)
+	volunteer_janitor_correctness = Column(Float, default=0, nullable=False)

 	Index(
 		'users_original_username_trgm_idx',
--- a/files/cli.py
+++ b/files/cli.py
@ -4,6 +4,7 @@ from flask_sqlalchemy import SQLAlchemy
 from files.__main__ import app
 from files.commands.cron import cron_app_worker
 from files.commands.seed_db import seed_db
+from files.commands.volunteer_janitor_recalc import volunteer_janitor_recalc
 from files.commands.cron_setup import cron_setup
 import files.classes

--- a/files/commands/cron_setup.py
+++ b/files/commands/cron_setup.py
@ -19,12 +19,12 @@ def cron_setup():

    # I guess in theory we should load this from a file or something, but, ehhhh
    hardcoded_cron_jobs = {
-        #'testjob': {
-            #'frequency_day': DayOfWeek.ALL,
-            #'time_of_day_utc': datetime.time(0, 0),
-            #'import_path': 'files.commands.debug_printout',
-            #'callable': 'printstuff',
-        #},
+        'volunteer_janitor_recalc': {
+            'frequency_day': DayOfWeek.ALL,
+            'time_of_day_utc': datetime.time(0, 0),
+            'import_path': 'files.commands.volunteer_janitor_recalc',
+            'callable': 'volunteer_janitor_recalc_cron',
+        },
    }

    print(f"{tasklist.count()} tasks")
--- a/files/commands/volunteer_janitor_recalc.py
+++ b/files/commands/volunteer_janitor_recalc.py
@ -0,0 +1,373 @@
+
+import pprint
+
+import sqlalchemy
+from sqlalchemy.orm import Session
+
+from alive_progress import alive_it
+from collections import defaultdict
+from files.classes import User, Comment, UserNote, UserTag
+from files.classes.cron.tasks import TaskRunContext
+from files.classes.volunteer_janitor import VolunteerJanitorRecord, VolunteerJanitorResult
+from files.helpers.volunteer_janitor import evaluate_badness_of, userweight_from_user_accuracy, calculate_final_comment_badness, update_comment_badness
+from files.helpers.math import saturate, remap, lerp
+
+import logging
+import random
+from tabulate import tabulate
+
+from files.__main__ import app, db_session
+
+CONFIG_modhat_weight = 4
+CONFIG_admin_volunteer_weight = 4
+CONFIG_new_user_damping = 2
+CONFIG_default_user_accuracy = 0.2
+CONFIG_user_correctness_lerp = 0.2
+
+def _compile_records(db):
+    vrecords = db.query(VolunteerJanitorRecord).order_by(VolunteerJanitorRecord.recorded_utc).all()
+
+    # get the info we need for all mentioned posts
+    reported_comment_ids = {record.comment_id for record in vrecords}
+    reported_comments = db.query(Comment).where(Comment.id.in_(reported_comment_ids)).options(sqlalchemy.orm.load_only('id', 'deleted_utc'))
+    reported_comments = {comment.id: comment for comment in reported_comments}
+
+    # get our compiled data
+    records_compiled = {}
+    for record in vrecords:
+        # we're just going to ignore deleted comments entirely
+        if reported_comments[record.comment_id].deleted_utc != 0:
+            continue
+
+        # unique identifier for user/comment report pair
+        uic = (record.user_id, record.comment_id)
+
+        if record.result == VolunteerJanitorResult.Pending:
+            if uic in records_compiled:
+                # something wonky happened, we went back to pending after getting a result?
+                records_compiled[uic]["status"] = "wonky"
+            else:
+                # fill out the pending field
+                records_compiled[uic] = {"status": "pending"}
+        else:
+            if not uic in records_compiled:
+                # something wonky happened, we never asked them for the info to begin with
+                records_compiled[uic] = {"status": "wonky"}
+            elif records_compiled[uic]["status"] != "pending":
+                # received two submissions; practically we'll just use their first submission
+                records_compiled[uic]["status"] = "resubmit"
+            else:
+                # actually got a result, yay
+                records_compiled[uic]["status"] = "submit"
+                records_compiled[uic]["result"] = record.result
+
+    # todo:
+    # filter out anything submitted *after* a mod chimed in
+    # filter out anything submitted too long after the request
+
+    users_compiled = defaultdict(lambda: {
+                "pending": 0,
+                "wonky": 0,
+                "submit": 0,
+                "resubmit": 0,
+            })
+    for key, result in records_compiled.items():
+        #pprint.pprint(key)
+        userid = key[0]
+        
+        users_compiled[key[0]][result["status"]] += 1
+
+    #pprint.pprint(records_compiled)
+    #pprint.pprint(users_compiled)
+
+    # strip out invalid records
+    random_removal = -1 # this is sometimes useful for testing that our algorithm is somewhat stable; removing a random half of all responses shouldn't drastically invert people's quality scores, for example
+    records_compiled = {key: value for key, value in records_compiled.items() if "result" in value and random.random() > random_removal}
+
+    return records_compiled, users_compiled
+
+def dbg_commentdump(cid, records_compiled, users, user_accuracy):
+    print(f"Dump for comment {cid}")
+
+    dats = []
+    for key, value in [(key, value) for key, value in records_compiled.items() if key[1] == cid]:
+        uid = key[0]
+        dats.append({
+            "vote": value["result"],
+            "username": users[uid]["username"],
+            "accuracy": user_accuracy[uid],
+        })
+    print(tabulate(dats, headers = "keys"))
+
+def dbg_userdump(uid, records_compiled, users, comment_calculated_badness_user):
+    print(f"Dump for user {users[uid]['username']}")
+
+    dats = []
+    for key, value in [(key, value) for key, value in records_compiled.items() if key[0] == uid]:
+        cid = key[1]
+        bad, weight = evaluate_badness_of(value["result"])
+        dats.append({
+            "cid": cid,
+            "vote": value["result"],
+            "calculated": evaluate_badness_of(value["result"]),
+            "badness": comment_calculated_badness_user[cid],
+            "correctness": evaluate_correctness_single(bad, weight, comment_calculated_badness_user[cid]),
+        })
+    print(tabulate(dats, headers = "keys"))
+
+# Calculates how correct a user is, based on whether they thought it was bad, how confident they were, and how bad we think it is
+# Returns (IsCorrect, Confidence)
+def evaluate_correctness_single(bad, user_weight, calculated_badness):
+    # Boolean for whether this comment is bad
+    calculated_badbool = calculated_badness > 0.5
+
+    # Boolean for whether the user was correct
+    correctness_result = (bad == calculated_badbool) and 1 or 0
+
+    # "how confident are we that this is bad/notbad", range [0, 0.5]
+    calculated_badness_confidence = abs(calculated_badness - 0.5)
+
+    # "how much do we want this to influence the user's correctness"
+    # there's a deadzone around not-confident where we just push it to 0 and don't make it relevant
+    calculated_badness_weight = saturate(remap(calculated_badness_confidence, 0.1, 0.5, 0, 1))
+
+    # see how correct we think the user is
+    user_correctness = user_weight * calculated_badness_weight
+    
+    return correctness_result, user_correctness
+
+def volunteer_janitor_recalc(db: Session, diagnostics: bool = False):
+    logging.info("Starting full janitor recalculation")
+
+    # Get our full list of data
+    records_compiled, users_compiled = _compile_records(db)
+
+    reporting_user_ids = {record[0] for record in records_compiled}
+    reported_comment_ids = {record[1] for record in records_compiled}
+
+    # Get some metadata for all reported comments
+    comments = db.query(Comment) \
+        .where(Comment.id.in_(reported_comment_ids)) \
+        .options(sqlalchemy.orm.load_only('id', 'created_utc', 'author_id'))
+    comments = {comment.id: comment for comment in comments}
+
+    reported_user_ids = {comment.author_id for comment in comments.values()}
+
+    # Get mod intervention data
+    modhats_raw = db.query(Comment) \
+        .where(Comment.parent_comment_id.in_(reported_comment_ids)) \
+        .where(Comment.distinguish_level > 0) \
+        .options(sqlalchemy.orm.load_only('parent_comment_id', 'created_utc'))
+
+    modhats = {}
+    # we jump through some hoops to deduplicate this; I guess we just pick the last one in our list for now
+    for modhat in modhats_raw:
+        modhats[modhat.parent_comment_id] = modhat
+
+    usernotes_raw = db.query(UserNote) \
+        .where(UserNote.tag.in_([UserTag.Warning, UserTag.Tempban, UserTag.Permban, UserTag.Spam, UserTag.Bot])) \
+        .options(sqlalchemy.orm.load_only('reference_user', 'created_utc', 'tag'))
+
+    # Here we're trying to figure out whether modhats are actually warnings/bans
+    # We don't have a formal connection between "a comment is bad" and "the user got a warning", so we're kind of awkwardly trying to derive it from our database
+    # In addition, sometimes someone posts a lot of bad comments and only gets modhatted for one of them
+    # That doesn't mean the other comments weren't bad
+    # It just means we picked the worst one
+    # So we ignore comments near the actual modhat time
+
+    commentresults = {}
+    for uid in reported_user_ids:
+        # For each user, figure out when modhats happened
+        # this is slow but whatever
+        modhat_times = []
+        for modhat in modhats.values():
+            if comments[modhat.parent_comment_id].author_id != uid:
+                continue
+            
+            modhat_times.append(modhat.created_utc)
+        
+        usernote_times = []
+        for usernote in usernotes_raw:
+            if usernote.reference_user != uid:
+                continue
+            
+            usernote_times.append(usernote.created_utc)
+        
+        # For each comment . . .
+        for comment in comments.values():
+            if comment.author_id != uid:
+                continue
+
+            if comment.id in modhats:
+                modhat_comment = modhats[comment.id]
+            else:
+                modhat_comment = None
+
+            # if the comment was modhatted *and* resulted in a negative usernote near the modhat time, it's bad
+            if modhat_comment is not None and next((time for time in usernote_times if abs(modhat_comment.created_utc - time) < 60 * 15), None) is not None:
+                commentresults[comment.id] = "bad"
+            # otherwise, if the comment was posted less than 48 hours before a negative usernote, we ignore it for processing on the assumption that it may just have been part of a larger warning
+            elif next((time for time in usernote_times if comment.created_utc < time and comment.created_utc + 48 * 60 * 60 > time), None) is not None:
+                commentresults[comment.id] = "ignored"
+            # otherwise, we call it not-bad
+            else:
+                commentresults[comment.id] = "notbad"
+
+    # get per-user metadata
+    users = db.query(User) \
+        .where(User.id.in_(reporting_user_ids)) \
+        .options(sqlalchemy.orm.load_only('id', 'username', 'admin_level'))
+    users = {user.id: {"username": user.username, "admin": user.admin_level != 0} for user in users}
+
+    user_accuracy = defaultdict(lambda: CONFIG_default_user_accuracy)
+    
+    # Do an update loop!
+    for lid in range(0, 100):
+
+        # Accumulated weight/badness, taking admin flags into account
+        # This is used for training
+        comment_weight_admin = defaultdict(lambda: 0)
+        comment_badness_admin = defaultdict(lambda: 0)
+
+        # Accumulated weight/badness, not taking admin flags into account
+        # This is used for output and display
+        comment_weight_user = defaultdict(lambda: 0)
+        comment_badness_user = defaultdict(lambda: 0)
+
+        # accumulate modhat weights
+        for cid in reported_comment_ids:
+            result = commentresults[cid]
+
+            if result == "ignored":
+                # I guess we'll just let the users decide?
+                continue
+
+            if result == "bad":
+                comment_weight_admin[cid] += CONFIG_modhat_weight
+                comment_badness_admin[cid] += CONFIG_modhat_weight
+            
+            if result == "notbad":
+                comment_weight_admin[cid] += CONFIG_modhat_weight
+        
+        # accumulate volunteer weights
+        for key, value in records_compiled.items():
+            uid, cid = key
+
+            # Calculate how much to weight a user; highly inaccurate users are not inverted! They just don't get contribution
+            # (losers)
+            userweight_user = userweight_from_user_accuracy(user_accuracy[uid]);
+            
+            if users[uid]["admin"]:
+                userweight_admin = CONFIG_admin_volunteer_weight
+            else:
+                userweight_admin = userweight_user
+
+            bad, weight = evaluate_badness_of(value["result"])
+
+            # Accumulate these to our buffers
+            comment_weight_admin[cid] += userweight_admin * weight
+            comment_weight_user[cid] += userweight_user * weight
+            
+            if bad:
+                comment_badness_admin[cid] += userweight_admin * weight
+                comment_badness_user[cid] += userweight_user * weight
+
+        # Calculated badnesses, both taking admins into account and not doing so, and "theoretical idea" versus a conversative view designed to be more skeptical of low-weighted comments
+        comment_calculated_badness_admin = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], False) for cid in reported_comment_ids}
+        comment_calculated_badness_admin_conservative = {cid: calculate_final_comment_badness(comment_badness_admin[cid], comment_weight_admin[cid], True) for cid in reported_comment_ids}
+        comment_calculated_badness_user = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], False) for cid in reported_comment_ids}
+        comment_calculated_badness_user_conservative = {cid: calculate_final_comment_badness(comment_badness_user[cid], comment_weight_user[cid], True) for cid in reported_comment_ids}
+
+        # go through user submissions and count up how good users seem to be at this
+        user_correctness_weight = defaultdict(lambda: CONFIG_new_user_damping)
+        user_correctness_value = defaultdict(lambda: CONFIG_default_user_accuracy * user_correctness_weight[0])
+
+        for key, value in records_compiled.items():
+            uid, cid = key
+
+            # if this is "ignored", I don't trust that we have a real answer, so we just skip it for training purposes
+            if commentresults[cid] == "ignored":
+                continue
+
+            bad, weight = evaluate_badness_of(value["result"])
+
+            correctness, weight = evaluate_correctness_single(bad, weight, comment_calculated_badness_admin[cid])
+
+            user_correctness_weight[uid] += weight
+            user_correctness_value[uid] += correctness * weight
+
+        # calculate new correctnesses
+        for uid in reporting_user_ids:
+            target_user_correctness = user_correctness_value[uid] / user_correctness_weight[uid]
+
+            # lerp slowly to the new values
+            user_accuracy[uid] = lerp(user_accuracy[uid], target_user_correctness, CONFIG_user_correctness_lerp)
+
+    if diagnostics:
+        # debug print
+        commentscores = [{
+            "link": f"https://themotte.org/comment/{cid}",
+            "badness": comment_calculated_badness_admin[cid],
+            "badnessuser": comment_calculated_badness_user[cid],
+            "badnessusercons": comment_calculated_badness_user_conservative[cid],
+            "participation": comment_weight_user[cid],
+            "mh": commentresults[cid]} for cid in reported_comment_ids]
+        commentscores.sort(key = lambda item: item["badnessusercons"] + item["badnessuser"] / 100)
+        print(tabulate(commentscores, headers = "keys"))
+
+        results = [{
+                "user": f"https://themotte.org/@{users[uid]['username']}",
+                "accuracy": user_accuracy[uid],
+                "submit": users_compiled[uid]["submit"],
+                "nonsubmit": sum(users_compiled[uid].values()) - users_compiled[uid]["submit"],
+                "admin": users[uid]["admin"] and "Admin" or "",
+            } for uid in reporting_user_ids]
+        results.sort(key = lambda k: k["accuracy"])
+        print(tabulate(results, headers = "keys"))
+
+        dbg_commentdump(89681, records_compiled, users, user_accuracy)
+        print(calculate_final_comment_badness(comment_badness_user[89681], comment_weight_user[89681], True))
+
+        #dbg_userdump(131, records_compiled, users, comment_calculated_badness_user)
+
+    # Shove all this in the database, yaaay
+    # Conditional needed because sqlalchemy breaks if you try passing it zero data
+    if len(user_accuracy) > 0:
+        db.query(User) \
+            .where(User.id.in_([id for id in user_accuracy.keys()])) \
+            .update({
+                User.volunteer_janitor_correctness: sqlalchemy.sql.case(
+                    user_accuracy,
+                    value = User.id,
+                )
+            })
+        db.commit()
+
+    # We don't bother recalculating comment confidences here; it's a pain to do it and they shouldn't change much
+
+    logging.info("Finished full janitor recalculation")
+
+@app.cli.command('volunteer_janitor_recalc')
+def volunteer_janitor_recalc_cmd():
+    volunteer_janitor_recalc(db_session(), diagnostics = True)
+
+def volunteer_janitor_recalc_cron(ctx:TaskRunContext):
+    volunteer_janitor_recalc(ctx.db)
+
+def volunteer_janitor_recalc_all_comments(db: Session):
+    # may as well do this first
+    volunteer_janitor_recalc(db)
+
+    # I'm not sure of the details here, but there seems to be some session-related caching cruft left around
+    # so let's just nuke that
+    db.expire_all()
+
+    # going through all the comments piecemeal like this is hilariously efficient, but this entire system gets run exactly once ever, so, okay
+    for comment in alive_it(db.query(Comment).join(Comment.reports)):
+        update_comment_badness(db, comment.id)
+    
+    db.commit()
+
+@app.cli.command('volunteer_janitor_recalc_all_comments')
+def volunteer_janitor_recalc_all_comments_cmd():
+    volunteer_janitor_recalc_all_comments(db_session())
--- a/files/helpers/math.py
+++ b/files/helpers/math.py
@ -0,0 +1,15 @@
+
+def remap(input: float, smin: float, smax: float, emin: float, emax: float) -> float:
+    t = (input - smin) / (smax - smin)
+    return (1 - t) * emin + t * emax
+
+def clamp(input: float, min: float, max: float) -> float:
+    if input < min: return min
+    if input > max: return max
+    return input
+
+def saturate(input: float) -> float:
+    return clamp(input, 0, 1)
+
+def lerp(a: float, b: float, t: float) -> float:
+    return (1 - t) * a + t * b
--- a/files/helpers/volunteer_janitor.py
+++ b/files/helpers/volunteer_janitor.py
@ -0,0 +1,87 @@
+
+from files.classes.comment import Comment
+from files.classes.volunteer_janitor import VolunteerJanitorRecord, VolunteerJanitorResult
+from files.helpers.math import saturate, remap
+
+# Returns (IsBad, Confidence)
+def evaluate_badness_of(choice):
+    if choice == VolunteerJanitorResult.Warning:
+        return True, 1
+    if choice == VolunteerJanitorResult.Ban:
+        return True, 1
+    
+    # treating this like a low-weight bad response
+    if choice == VolunteerJanitorResult.Bad:
+        return True, 0.5
+    
+    # treating this like a low-weight not-bad response
+    if choice == VolunteerJanitorResult.Neutral:
+        return False, 0.5
+    
+    return False, 1
+
+def userweight_from_user_accuracy(accuracy):
+    return saturate(remap(accuracy, 0.5, 1, 0, 1))
+
+def calculate_final_comment_badness(comment_total, comment_weight, conservative):
+    if not conservative:
+        # insert an artificial 50%-badness confident vote, to prevent us from ever reaching 1.0 or 0.0
+        return (comment_total + 0.5) / (comment_weight + 1.0)
+    
+    if comment_weight == 0:
+        # INSUFFICIENT DATA FOR A MEANINGFUL ANSWER
+        return 0.5
+    
+    original_badness = calculate_final_comment_badness(comment_total, comment_weight, False)
+    if original_badness > 0.5:
+        # fake a not-bad vote, with the confidence being the opposite of our confidence in it being bad
+        # don't let it invert though
+        forged_weight = 1.0 - original_badness
+        calculated_badness = max(comment_total / (comment_weight + forged_weight), 0.5)
+    else:
+        # fake a bad vote, with the confidence being the opposite of our confidence in it being not-bad
+        # don't let it invert though
+        forged_weight = original_badness
+        calculated_badness = min((comment_total + forged_weight) / (comment_weight + forged_weight), 0.5)
+    
+    return calculated_badness
+
+def update_comment_badness(db, cid, diagnostics: bool = False):
+    # Recalculate the comment's confidence values
+    # This probably does more SQL queries than it should
+    records = db.query(VolunteerJanitorRecord) \
+        .where(VolunteerJanitorRecord.comment_id == cid) \
+        .order_by(VolunteerJanitorRecord.recorded_utc)
+
+    user_has_pending = {}
+    earliest_submission = {}
+
+    for rec in records:
+        if rec.result == VolunteerJanitorResult.Pending:
+            user_has_pending[rec.user_id] = True
+        else:
+            if rec.user_id in user_has_pending:
+                if rec.user_id not in earliest_submission or earliest_submission[rec.user_id].recorded_utc > rec.recorded_utc:
+                    earliest_submission[rec.user_id] = rec
+    
+    badness = 0
+    weight = 0
+
+    for submission in earliest_submission.values():
+        userweight_user = userweight_from_user_accuracy(submission.user.volunteer_janitor_correctness);
+        submission_bad, submission_weight = evaluate_badness_of(submission.result)
+
+        additive_weight = submission_weight * userweight_user
+
+        weight += additive_weight
+        if submission_bad:
+            badness += additive_weight
+
+    comment_badness = calculate_final_comment_badness(badness, weight, True)
+
+    db.query(Comment) \
+        .where(Comment.id == cid) \
+        .update({Comment.volunteer_janitor_badness: comment_badness})
+
+    if diagnostics:
+        print(f"Updated comment {cid} to {comment_badness}")
--- a/files/routes/volunteer_janitor.py
+++ b/files/routes/volunteer_janitor.py
@ -5,6 +5,7 @@ from files.classes.comment import Comment
 from files.classes.flags import CommentFlag
 from files.classes.user import User
 from files.classes.volunteer_janitor import VolunteerJanitorRecord, VolunteerJanitorResult
+from files.helpers.volunteer_janitor import update_comment_badness
 from files.routes.volunteer_common import VolunteerDuty
 from flask import g
 import pprint
@ -93,4 +94,7 @@ def submitted(v: User, key: str, val: str) -> None:
    record.recorded_utc = sqlalchemy.func.now()
    record.result = VolunteerJanitorResult(int(val))
    g.db.add(record)
+
+    update_comment_badness(g.db, key)
+
    g.db.commit()
--- a/files/templates/component/comment/user_info.html
+++ b/files/templates/component/comment/user_info.html
@ -27,7 +27,18 @@
 	{% if c.bannedfor %}
 		<a role="button"><i class="fas fa-hammer-crash text-danger" data-bs-toggle="tooltip" data-bs-placement="bottom" title="User was banned for this comment{% if c.author.banned_by %} by @{{c.author.banned_by.username}}{% endif %}"></i></a>
 	{% endif %}
-	{% if v and c.filter_state == 'reported' and v.can_manage_reports() %}<a class="btn btn-primary" id="report-btn-{{c.id}}" style="padding:1px 5px; font-size:10px" role="button" onclick="document.getElementById('flaggers-{{c.id}}').classList.toggle('d-none')">{{c.active_flags(v)}} Reports</a>{% endif %}
+	{% if v and c.filter_state == 'reported' and v.can_manage_reports() %}
+		<a class="btn btn-primary" id="report-btn-{{c.id}}" style="padding:1px 5px; font-size:10px" role="button" onclick="document.getElementById('flaggers-{{c.id}}').classList.toggle('d-none')">{{c.active_flags(v)}} Reports</a>
+		<span class="volunteer_janitor_result_{{c.volunteer_janitor_css()}} volunteer_janitor_result">
+			{% if c.volunteer_janitor_is_unknown() %}
+				Unknown
+			{% elif c.volunteer_janitor_is_notbad() %}
+				Not-Bad (confidence {{c.volunteer_janitor_confidence()}})
+			{% elif c.volunteer_janitor_is_bad() %}
+				Bad (confidence {{c.volunteer_janitor_confidence()}})
+			{% endif %}
+		</span>
+	{% endif %}
 	{% if c.over_18 %}<span class="badge badge-danger text-small-extra mr-1">+18</span>{% endif %}
 	{% if v and v.admin_level >= 2 and c.author.shadowbanned %}<i class="fas fa-user-times text-admin" data-bs-toggle="tooltip" data-bs-placement="bottom" title="Shadowbanned by @{{c.author.shadowbanned}}"></i>{% endif %}
 	{% if c.is_pinned %}
--- a/migrations/versions/2023_04_22_10_48_16_b2373d7b1b84_add_persistent_volunteer_janitor_data_.py
+++ b/migrations/versions/2023_04_22_10_48_16_b2373d7b1b84_add_persistent_volunteer_janitor_data_.py
@ -0,0 +1,38 @@
+"""add persistent volunteer-janitor data fields
+
+Revision ID: b2373d7b1b84
+Revises: 0cd3a7ebef3f
+Create Date: 2023-04-22 10:48:16.476497+00:00
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'b2373d7b1b84'
+down_revision = '0cd3a7ebef3f'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # manually adjusted to introduce our intended defaults
+    op.add_column('comments', sa.Column('volunteer_janitor_badness', sa.Float()))
+    op.execute("UPDATE comments SET volunteer_janitor_badness = 0.5")
+    op.alter_column('comments', 'volunteer_janitor_badness', nullable=False)
+
+    op.add_column('users', sa.Column('volunteer_janitor_correctness', sa.Float()))
+    op.execute("UPDATE users SET volunteer_janitor_correctness = 0")
+    op.alter_column('users', 'volunteer_janitor_correctness', nullable=False)
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.drop_column('volunteer_janitor_correctness')
+
+    with op.batch_alter_table('comments', schema=None) as batch_op:
+        batch_op.drop_column('volunteer_janitor_badness')
+
+    # ### end Alembic commands ###
--- a/migrations/versions/2023_04_22_11_33_50_6403c9151c12_recalculate_all_volunteer_janitor_.py
+++ b/migrations/versions/2023_04_22_11_33_50_6403c9151c12_recalculate_all_volunteer_janitor_.py
@ -0,0 +1,26 @@
+"""recalculate all volunteer janitor ephemeral data
+
+Revision ID: 6403c9151c12
+Revises: b2373d7b1b84
+Create Date: 2023-04-22 11:33:50.049868+00:00
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.orm.session import Session
+
+from files.commands.volunteer_janitor_recalc import volunteer_janitor_recalc_all_comments
+
+# revision identifiers, used by Alembic.
+revision = '6403c9151c12'
+down_revision = 'b2373d7b1b84'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    volunteer_janitor_recalc_all_comments(Session(bind=op.get_bind()))
+
+
+def downgrade():
+       pass
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -39,12 +39,14 @@ yattag = "*"
 webptools = "*"
 supervisor = "*"
 superlance = "*"
+alive-progress = "*"

 [tool.poetry.group.dev]
 optional = true

 [tool.poetry.group.dev.dependencies]
 pytest = "*"
+tabulate = "*"

 [build-system]
 requires = ["poetry-core>=1.0.0"]