Initial working version of word censor
This commit is contained in:
parent
909600fff2
commit
b83e0c6665
6 changed files with 189 additions and 61 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -5,3 +5,6 @@ cache/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
disablesignups
|
disablesignups
|
||||||
*rules.html
|
*rules.html
|
||||||
|
.idea/
|
||||||
|
**/.pytest_cache/
|
||||||
|
venv/
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from os import environ
|
from os import environ
|
||||||
|
|
||||||
site = environ.get("DOMAIN").strip()
|
site = environ.get("DOMAIN", '').strip()
|
||||||
|
|
||||||
SLURS = {
|
SLURS = {
|
||||||
" faggot":" cute twink",
|
" faggot":" cute twink",
|
||||||
|
|
47
files/helpers/word_censor.py
Normal file
47
files/helpers/word_censor.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
from collections import ChainMap
|
||||||
|
import re
|
||||||
|
from re import Match
|
||||||
|
|
||||||
|
from files.helpers.const import SLURS
|
||||||
|
|
||||||
|
|
||||||
|
def create_replace_map():
|
||||||
|
dicts = [{
|
||||||
|
slur: replacer,
|
||||||
|
slur.title(): replacer.title(),
|
||||||
|
slur.capitalize(): replacer.capitalize(),
|
||||||
|
slur.upper(): replacer.upper(),
|
||||||
|
} for (slur, replacer) in SLURS.items()]
|
||||||
|
|
||||||
|
# flattens the list of dict to a single dict
|
||||||
|
return dict(ChainMap(*dicts))
|
||||||
|
|
||||||
|
|
||||||
|
REPLACE_MAP = create_replace_map()
|
||||||
|
|
||||||
|
|
||||||
|
def create_variations_slur_regex(slur: str):
|
||||||
|
variations = [slur, slur.upper(), slur.capitalize()]
|
||||||
|
|
||||||
|
# capitalize multiple words if there are multiple words (just in case)
|
||||||
|
if " " in slur:
|
||||||
|
variations.append(slur.title())
|
||||||
|
|
||||||
|
return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations]
|
||||||
|
|
||||||
|
|
||||||
|
def sub_matcher(match: Match):
|
||||||
|
found = match.group(2) if (match.group(2) is not None) else match.group(3)
|
||||||
|
replacer = REPLACE_MAP[found]
|
||||||
|
return (match.group(1) or '') + replacer + (match.group(4) or '')
|
||||||
|
|
||||||
|
|
||||||
|
def censor_slurs(v, body):
|
||||||
|
for (slur, replace) in SLURS.items():
|
||||||
|
for variation in create_variations_slur_regex(slur):
|
||||||
|
try:
|
||||||
|
body = re.sub(variation, sub_matcher, body)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return body
|
|
@ -1,3 +1,4 @@
|
||||||
|
assertpy
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
bleach
|
bleach
|
||||||
Flask
|
Flask
|
||||||
|
@ -20,6 +21,7 @@ requests
|
||||||
SQLAlchemy
|
SQLAlchemy
|
||||||
psycopg2-binary
|
psycopg2-binary
|
||||||
pusher_push_notifications
|
pusher_push_notifications
|
||||||
|
pytest
|
||||||
youtube-dl
|
youtube-dl
|
||||||
yattag
|
yattag
|
||||||
webptools
|
webptools
|
76
test/files/helpers/test_word_censor.py
Normal file
76
test/files/helpers/test_word_censor.py
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
import re
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from assertpy import assert_that
|
||||||
|
|
||||||
|
from files.helpers import word_censor
|
||||||
|
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_variations_slur_regex_single_word():
|
||||||
|
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
|
||||||
|
r"(\s|>)(Retard)|(Retard)(\s|<)",
|
||||||
|
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
|
||||||
|
|
||||||
|
result = create_variations_slur_regex("retard")
|
||||||
|
|
||||||
|
assert_that(result).is_length(3).contains_only(*expected)
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_variations_slur_regex_multiple_word():
|
||||||
|
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
|
||||||
|
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
|
||||||
|
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
|
||||||
|
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
|
||||||
|
result = create_variations_slur_regex("kill yourself")
|
||||||
|
|
||||||
|
assert_that(result).is_length(4).contains_only(*expected)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("files.helpers.word_censor.SLURS", {
|
||||||
|
"tranny": "🚂🚃🚃",
|
||||||
|
"kill yourself": "keep yourself safe",
|
||||||
|
"faggot": "cute twink",
|
||||||
|
})
|
||||||
|
def test_create_replace_map():
|
||||||
|
expected = {
|
||||||
|
"tranny": "🚂🚃🚃",
|
||||||
|
"Tranny": "🚂🚃🚃",
|
||||||
|
"TRANNY": "🚂🚃🚃",
|
||||||
|
"kill yourself": "keep yourself safe",
|
||||||
|
"Kill yourself": "Keep yourself safe",
|
||||||
|
"KILL YOURSELF": "KEEP YOURSELF SAFE",
|
||||||
|
"Kill Yourself": "Keep Yourself Safe",
|
||||||
|
"faggot": "cute twink",
|
||||||
|
"Faggot": "Cute twink",
|
||||||
|
"FAGGOT": "CUTE TWINK",
|
||||||
|
}
|
||||||
|
result = create_replace_map()
|
||||||
|
|
||||||
|
assert_that(result).is_equal_to(expected)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur'})
|
||||||
|
def test_sub_matcher():
|
||||||
|
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
|
||||||
|
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||||
|
|
||||||
|
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
|
||||||
|
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
|
||||||
|
|
||||||
|
|
||||||
|
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
|
||||||
|
def test_censor_slurs():
|
||||||
|
word_censor.REPLACE_MAP = create_replace_map()
|
||||||
|
|
||||||
|
assert_that(censor_slurs(None, "<p>retard</p>")).is_equal_to("<p>r-slur</p>")
|
||||||
|
assert_that(censor_slurs(None, "<p>preretard</p>")).is_equal_to("<p>prer-slur</p>")
|
||||||
|
assert_that(censor_slurs(None, "that is Retarded like")).is_equal_to("that is R-slured like")
|
||||||
|
assert_that(censor_slurs(None, "that is SUPERRETARD like")).is_equal_to("that is SUPERR-SLUR like")
|
||||||
|
assert_that(censor_slurs(None, "<p>Manlets get out!</p>")).is_equal_to("<p>Little kings get out!</p>")
|
||||||
|
assert_that(censor_slurs(None, '... "retard" ...')).is_equal_to('... "retard" ...')
|
||||||
|
assert_that(censor_slurs(None, '... ReTaRd ...')).is_equal_to('... ReTaRd ...')
|
||||||
|
assert_that(censor_slurs(None, '... aretarded ...')).is_equal_to('... aretarded ...')
|
||||||
|
assert_that(censor_slurs(None, "LLM is a manlet hehe")).is_equal_to("LLM is a little king hehe")
|
||||||
|
assert_that(censor_slurs(None, "LLM is :marseycapitalistmanlet: hehe")) \
|
||||||
|
.is_equal_to("LLM is :marseycapitalistmanlet: hehe")
|
Loading…
Add table
Add a link
Reference in a new issue