rDrama/files/helpers/word_censor.py

from collections import ChainMap
import re
from re import Match
from typing import Dict, Pattern

from files.helpers.const import SLURS


def first_upper(phrase: str) -> str:
    """Converts the first character of the phrase to uppercase, not messing with the others"""
    return phrase[0].upper() + phrase[1:]


def first_all_upper(phrase: str) -> str:
    """Converts the first character of each word to uppercase, not messing with the others"""
    if " " not in phrase:
        return first_upper(phrase)

    return " ".join([first_upper(word) for word in phrase.split(" ")])


def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
    """
    Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the
    corresponding substitution with case
    """
    stripped = slur.strip()
    is_link = replacer.startswith("http")  # special case for the :marseymerchant:

    # the order the things are added into the dict is important, so that the 'Correctest' version is written last
    result = {
        stripped.upper(): replacer.upper() if not is_link else replacer,
        first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer,
        stripped.lower(): replacer,
        stripped: replacer,
        first_upper(stripped): first_upper(replacer) if not is_link else replacer,
    }

    return result


def create_slur_regex() -> Pattern[str]:
    # words that can have suffixes and prefixes
    words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])

    # to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
    regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"

    # words that need to match exactly
    single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])

    return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")


def create_replace_map() -> Dict[str, str]:
    """Creates the map that will be used to get the mathing replaced for the given slur"""
    dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]

    # flattens the list of dict to a single dict
    return dict(ChainMap(*dicts))


SLUR_REGEX = create_slur_regex()
REPLACE_MAP = create_replace_map()


def sub_matcher(match: Match) -> str:
    """given a match returns the correct replacer string"""
    found = match.group(0)
    # if it does not find the correct capitalization, it tries the all lower, or return the original word
    return REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower()) or found


def censor_slurs(body: str, logged_user) -> str:
    """Censors all the slurs in the body if the user is not logged-in or if they have the slurreplacer active"""

    if not logged_user or logged_user.slurreplacer:
        try:
            body = SLUR_REGEX.sub(sub_matcher, body)
        except Exception as e:
            print(e)

    return body