Initial working version of word censor

2021-10-16 19:04:08 +02:00 · 2021-10-16 19:04:08 +02:00 · b83e0c6665
commit b83e0c6665
parent 909600fff2
6 changed files with 189 additions and 61 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,6 @@ cache/
 __pycache__/
 disablesignups
 *rules.html
 .idea/
 **/.pytest_cache/
 venv/
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -1,6 +1,6 @@
 from os import environ
-site = environ.get("DOMAIN").strip()
+site = environ.get("DOMAIN", '').strip()
 SLURS = {
 		" faggot":" cute twink",
--- a/files/helpers/word_censor.py
+++ b/files/helpers/word_censor.py
@ -0,0 +1,47 @@
 from collections import ChainMap
 import re
 from re import Match
 from files.helpers.const import SLURS
 def create_replace_map():
    dicts = [{
        slur: replacer,
        slur.title(): replacer.title(),
        slur.capitalize(): replacer.capitalize(),
        slur.upper(): replacer.upper(),
    } for (slur, replacer) in SLURS.items()]
    # flattens the list of dict to a single dict
    return dict(ChainMap(*dicts))
 REPLACE_MAP = create_replace_map()
 def create_variations_slur_regex(slur: str):
    variations = [slur, slur.upper(), slur.capitalize()]
    # capitalize multiple words if there are multiple words (just in case)
    if " " in slur:
        variations.append(slur.title())
    return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations]
 def sub_matcher(match: Match):
    found = match.group(2) if (match.group(2) is not None) else match.group(3)
    replacer = REPLACE_MAP[found]
    return (match.group(1) or '') + replacer + (match.group(4) or '')
 def censor_slurs(v, body):
    for (slur, replace) in SLURS.items():
        for variation in create_variations_slur_regex(slur):
            try:
                body = re.sub(variation, sub_matcher, body)
            except:
                pass
    return body
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,4 @@
 assertpy
 beautifulsoup4
 bleach
 Flask
@ -20,6 +21,7 @@ requests
 SQLAlchemy
 psycopg2-binary
 pusher_push_notifications
 pytest
 youtube-dl
 yattag
 webptools
--- a/test/files/helpers/test_word_censor.py
+++ b/test/files/helpers/test_word_censor.py
@ -0,0 +1,76 @@
 import re
 from unittest.mock import patch
 from assertpy import assert_that
 from files.helpers import word_censor
 from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher
 def test_create_variations_slur_regex_single_word():
    expected = [r"(\s|>)(retard)|(retard)(\s|<)",
                r"(\s|>)(Retard)|(Retard)(\s|<)",
                r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
    result = create_variations_slur_regex("retard")
    assert_that(result).is_length(3).contains_only(*expected)
 def test_create_variations_slur_regex_multiple_word():
    expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
                r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
                r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
                r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
    result = create_variations_slur_regex("kill yourself")
    assert_that(result).is_length(4).contains_only(*expected)
@patch("files.helpers.word_censor.SLURS", {
    "tranny": "🚂🚃🚃",
    "kill yourself": "keep yourself safe",
    "faggot": "cute twink",
 })
 def test_create_replace_map():
    expected = {
        "tranny": "🚂🚃🚃",
        "Tranny": "🚂🚃🚃",
        "TRANNY": "🚂🚃🚃",
        "kill yourself": "keep yourself safe",
        "Kill yourself": "Keep yourself safe",
        "KILL YOURSELF": "KEEP YOURSELF SAFE",
        "Kill Yourself": "Keep Yourself Safe",
        "faggot": "cute twink",
        "Faggot": "Cute twink",
        "FAGGOT": "CUTE TWINK",
    }
    result = create_replace_map()
    assert_that(result).is_equal_to(expected)
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur'})
 def test_sub_matcher():
    match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
    assert_that(sub_matcher(match)).is_equal_to(">r-slur")
    match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
    assert_that(sub_matcher(match)).is_equal_to("r-slur<")
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
 def test_censor_slurs():
    word_censor.REPLACE_MAP = create_replace_map()
    assert_that(censor_slurs(None, "<p>retard</p>")).is_equal_to("<p>r-slur</p>")
    assert_that(censor_slurs(None, "<p>preretard</p>")).is_equal_to("<p>prer-slur</p>")
    assert_that(censor_slurs(None, "that is Retarded like")).is_equal_to("that is R-slured like")
    assert_that(censor_slurs(None, "that is SUPERRETARD like")).is_equal_to("that is SUPERR-SLUR like")
    assert_that(censor_slurs(None, "<p>Manlets get out!</p>")).is_equal_to("<p>Little kings get out!</p>")
    assert_that(censor_slurs(None, '... "retard" ...')).is_equal_to('... "retard" ...')
    assert_that(censor_slurs(None, '... ReTaRd ...')).is_equal_to('... ReTaRd ...')
    assert_that(censor_slurs(None, '... aretarded ...')).is_equal_to('... aretarded ...')
    assert_that(censor_slurs(None, "LLM is a manlet hehe")).is_equal_to("LLM is a little king hehe")
    assert_that(censor_slurs(None, "LLM is :marseycapitalistmanlet: hehe")) \
        .is_equal_to("LLM is :marseycapitalistmanlet: hehe")