-
Notifications
You must be signed in to change notification settings - Fork 4
/
offensive.py
35 lines (33 loc) · 1.53 KB
/
offensive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""This script contains regular expression compilation of offensive words
which is to be used to filter away potentially offensive tweets."""
import re
# Following offensive compilation is not my stuff.
# To be used for identifying offensive tweets.
# Copyright (c) 2013-2017 Molly White.
OFFENSIVE = re.compile(
r"\b(deaths?|dead(ly)?|die(s|d)?|hurts?|(sex(ual(ly)?)?|"
r"child)[ -]?(abused?|trafficking|"
r"assault(ed|s)?)|injur(e|i?es|ed|y)|kill(ing|ed|er|s)?s?|"
r"wound(ing|ed|s)?|fatal(ly|ity)?|"
r"shoo?t(s|ing|er)?s?|crash(es|ed|ing)?|attack(s|ers?|ing|ed)?|"
r"murder(s|er|ed|ing)?s?|"
r"hostages?|(gang)?rap(e|es|ed|ist|ists|ing)|assault(s|ed)?|"
r"pile-?ups?|massacre(s|d)?|"
r"assassinate(d|s)?|sla(y|in|yed|ys|ying|yings)|victims?|"
r"tortur(e|ed|ing|es)|"
r"execut(e|ion|ed|ioner)s?|gun(man|men|ned)|suicid(e|al|es)|"
r"bomb(s|ed|ing|ings|er|ers)?|"
r"mass[- ]?graves?|bloodshed|state[- ]?of[- ]?emergency|al[- ]?Qaeda|"
r"blasts?|violen(t|ce)|"
r"lethal|cancer(ous)?|stab(bed|bing|ber)?s?|casualt(y|ies)|"
r"sla(y|ying|yer|in)|"
r"drown(s|ing|ed|ings)?|bod(y|ies)|kidnap(s|ped|per|pers|ping|pings)?|"
r"rampage|beat(ings?|en)|"
r"terminal(ly)?|abduct(s|ed|ion)?s?|missing|behead(s|ed|ings?)?|"
r"homicid(e|es|al)|"
r"burn(s|ed|ing)? alive|decapitated?s?|jihadi?s?t?|hang(ed|ing|s)?|"
r"funerals?|traged(y|ies)|"
r"autops(y|ies)|child sex|sob(s|bing|bed)?|pa?edophil(e|es|ia)|"
r"9(/|-)11|Sept(ember|\.)? 11|"
r"genocide)\W?\b",
flags=re.IGNORECASE)