Fix bug that censor was not censoring a slur if it was followed by '.' or ','

This commit is contained in:
Yo Mama 2021-10-20 00:37:23 +02:00
parent 7ece47cba3
commit 6a350e8242
2 changed files with 5 additions and 4 deletions

View file

@ -43,7 +43,7 @@ def create_slur_regex() -> Pattern[str]:
"""Creates the regex that will find the slurs""" """Creates the regex that will find the slurs"""
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys()]) single_words = "|".join([slur.strip().lower() for slur in SLURS.keys()])
return re.compile(rf"(?i)(?<=\s|>)({single_words})(?=\s|<)") return re.compile(rf"(?i)(?<=\s|>)({single_words})(?=[\s<,.])")
def create_replace_map() -> Dict[str, str]: def create_replace_map() -> Dict[str, str]:

View file

@ -52,7 +52,7 @@ def test_get_permutations_slur_wiht_link_replacer():
"retard": "r-slur", "retard": "r-slur",
}) })
def test_create_slur_regex(): def test_create_slur_regex():
expected = r"(?i)(?<=\s|>)(kill yourself|faggot|nig|retard)(?=\s|<)" expected = r"(?i)(?<=\s|>)(kill yourself|faggot|nig|retard)(?=[\s<,.])"
assert_that(create_slur_regex()).is_equal_to(re.compile(expected)) assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
@ -91,8 +91,7 @@ def test_create_replace_map():
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'}) @patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'})
def test_sub_matcher(): def test_sub_matcher():
regex = re.compile( regex = re.compile(r"(?i)(?<=\s|>)(kill yourself|retard|nig|faggot)(?=[\s<,.])")
r"(?i)(?<=\s|>)(kill yourself|retard)|(kill yourself|retard)(?=\s|<)|(?<=\s|>)(nig|faggot)(?=\s|<)")
match = regex.search("<p>retard</p>") match = regex.search("<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur") assert_that(sub_matcher(match)).is_equal_to("r-slur")
@ -121,6 +120,8 @@ def test_censor_slurs():
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>") assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...') assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
assert_that(censor_slurs("<p>Manlet get out!</p>", None)).is_equal_to("<p>Little king get out!</p>") assert_that(censor_slurs("<p>Manlet get out!</p>", None)).is_equal_to("<p>Little king get out!</p>")
assert_that(censor_slurs("... retard. other", None)).is_equal_to("... r-slur. other")
assert_that(censor_slurs("... retard, other", None)).is_equal_to("... r-slur, other")
# does not work: # does not work:
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>preretard</p>") assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>preretard</p>")