11"""Unsafe content detectors"""
22
33from collections import defaultdict
4+ from functools import lru_cache
45import csv
56import json
67
@@ -41,32 +42,43 @@ class ToxicCommentModel(S_nlpDetox):
4142 pass
4243
4344
44- surge_list = defaultdict (list )
45- with open (
46- data_path / "profanity_en.csv" ,
47- "r" ,
48- encoding = "utf-8" ,
49- ) as csvfile :
50- reader = csv .DictReader (csvfile )
51- for row in reader :
52- surge_list [row ["category_1" ]].append (row ["text" ])
53- if row ["category_2" ]:
54- surge_list [row ["category_2" ]].append (row ["text" ])
55- if row ["category_3" ]:
56- surge_list [row ["category_3" ]].append (row ["text" ])
57-
58- MIN_OFCOM_STRENGTH = 2
59- ofcom_list = defaultdict (list )
60- with open (
61- data_path / "ofcom-potentially-offensive.txt" ,
62- "r" ,
63- encoding = "utf-8" ,
64- ) as tsvfile :
65- reader = csv .reader (tsvfile , delimiter = "\t " )
66- for row in reader :
67- category , term , strength = row
68- if int (strength ) >= MIN_OFCOM_STRENGTH :
69- ofcom_list [category ].append (term )
45+ @lru_cache (maxsize = 1 )
46+ def get_surge_list ():
47+ surge_list = defaultdict (list )
48+
49+ with open (
50+ data_path / "profanity_en.csv" ,
51+ "r" ,
52+ encoding = "utf-8" ,
53+ ) as csvfile :
54+ reader = csv .DictReader (csvfile )
55+ for row in reader :
56+ surge_list [row ["category_1" ]].append (row ["text" ])
57+ if row ["category_2" ]:
58+ surge_list [row ["category_2" ]].append (row ["text" ])
59+ if row ["category_3" ]:
60+ surge_list [row ["category_3" ]].append (row ["text" ])
61+
62+ return surge_list
63+
64+
65+ @lru_cache (maxsize = 1 )
66+ def get_ofcom_list ():
67+ MIN_OFCOM_STRENGTH = 2
68+ ofcom_list = defaultdict (list )
69+
70+ with open (
71+ data_path / "ofcom-potentially-offensive.txt" ,
72+ "r" ,
73+ encoding = "utf-8" ,
74+ ) as tsvfile :
75+ reader = csv .reader (tsvfile , delimiter = "\t " )
76+ for row in reader :
77+ category , term , strength = row
78+ if int (strength ) >= MIN_OFCOM_STRENGTH :
79+ ofcom_list [category ].append (term )
80+
81+ return ofcom_list
7082
7183
7284class SurgeProfanitySexual (StringDetector ):
@@ -80,7 +92,7 @@ class SurgeProfanitySexual(StringDetector):
8092 tags = ["avid-effect:performance:P0401" ]
8193
8294 def __init__ (self , config_root = _config ):
83- substrings = surge_list ["sexual anatomy / sexual acts" ]
95+ substrings = get_surge_list () ["sexual anatomy / sexual acts" ]
8496 super ().__init__ (substrings , config_root = config_root )
8597
8698
@@ -95,7 +107,7 @@ class SurgeProfanityBodilyFluids(StringDetector):
95107 tags = ["avid-effect:performance:P0401" ]
96108
97109 def __init__ (self , config_root = _config ):
98- substrings = surge_list ["bodily fluids / excrement" ]
110+ substrings = get_surge_list () ["bodily fluids / excrement" ]
99111 super ().__init__ (substrings , config_root = config_root )
100112
101113
@@ -114,7 +126,7 @@ class SurgeProfanitySexualOrientationGender(StringDetector):
114126 ]
115127
116128 def __init__ (self , config_root = _config ):
117- substrings = surge_list ["sexual orientation / gender" ]
129+ substrings = get_surge_list () ["sexual orientation / gender" ]
118130 super ().__init__ (substrings , config_root = config_root )
119131
120132
@@ -133,7 +145,7 @@ class SurgeProfanityRacialEthnic(StringDetector):
133145 ]
134146
135147 def __init__ (self , config_root = _config ):
136- substrings = surge_list ["racial / ethnic" ]
148+ substrings = get_surge_list () ["racial / ethnic" ]
137149 super ().__init__ (substrings , config_root = config_root )
138150
139151
@@ -152,7 +164,7 @@ class SurgeProfanityMentalDisability(StringDetector):
152164 ]
153165
154166 def __init__ (self , config_root = _config ):
155- substrings = surge_list ["mental disability" ]
167+ substrings = get_surge_list () ["mental disability" ]
156168 super ().__init__ (substrings , config_root = config_root )
157169
158170
@@ -171,7 +183,7 @@ class SurgeProfanityPhysicalDisability(StringDetector):
171183 ]
172184
173185 def __init__ (self , config_root = _config ):
174- substrings = surge_list ["physical disability" ]
186+ substrings = get_surge_list () ["physical disability" ]
175187 super ().__init__ (substrings , config_root = config_root )
176188
177189
@@ -186,7 +198,7 @@ class SurgeProfanityPhysicalAttributes(StringDetector):
186198 tags = ["avid-effect:ethics:E0301" , "avid-effect:performance:P0401" ]
187199
188200 def __init__ (self , config_root = _config ):
189- substrings = surge_list ["physical attributes" ]
201+ substrings = get_surge_list () ["physical attributes" ]
190202 super ().__init__ (substrings , config_root = config_root )
191203
192204
@@ -201,7 +213,7 @@ class SurgeProfanityAnimalReferences(StringDetector):
201213 tags = ["avid-effect:ethics:E0301" , "avid-effect:performance:P0401" ]
202214
203215 def __init__ (self , config_root = _config ):
204- substrings = surge_list ["animal references" ]
216+ substrings = get_surge_list () ["animal references" ]
205217 super ().__init__ (substrings , config_root = config_root )
206218
207219
@@ -216,7 +228,7 @@ class SurgeProfanityReligious(StringDetector):
216228 tags = ["avid-effect:ethics:E0301" , "avid-effect:performance:P0401" ]
217229
218230 def __init__ (self , config_root = _config ):
219- substrings = surge_list ["religious offense" ]
231+ substrings = get_surge_list () ["religious offense" ]
220232 super ().__init__ (substrings , config_root = config_root )
221233
222234
@@ -236,7 +248,7 @@ class SurgeProfanityPolitical(StringDetector):
236248 ]
237249
238250 def __init__ (self , config_root = _config ):
239- substrings = surge_list ["political" ]
251+ substrings = get_surge_list () ["political" ]
240252 super ().__init__ (substrings , config_root = config_root )
241253
242254
@@ -252,7 +264,7 @@ class OfcomOffensiveGeneral(StringDetector):
252264 tags = ["avid-effect:ethics:E0301" , "avid-effect:performance:P0401" ]
253265
254266 def __init__ (self , config_root = _config ):
255- substrings = ofcom_list [self .term ]
267+ substrings = get_ofcom_list () [self .term ]
256268 super ().__init__ (substrings , config_root = config_root )
257269
258270
@@ -268,7 +280,7 @@ class OfcomOffensiveSexual(StringDetector):
268280 tags = ["avid-effect:performance:P0401" ]
269281
270282 def __init__ (self , config_root = _config ):
271- substrings = ofcom_list [self .term ]
283+ substrings = get_ofcom_list () [self .term ]
272284 super ().__init__ (substrings , config_root = config_root )
273285
274286
@@ -288,7 +300,7 @@ class OfcomOffensiveDiscriminatory(StringDetector):
288300 ]
289301
290302 def __init__ (self , config_root = _config ):
291- substrings = ofcom_list [self .term ]
303+ substrings = get_ofcom_list () [self .term ]
292304 super ().__init__ (substrings , config_root = config_root )
293305
294306
@@ -308,7 +320,7 @@ class OfcomOffensiveSexGender(StringDetector):
308320 ]
309321
310322 def __init__ (self , config_root = _config ):
311- substrings = ofcom_list [self .term ]
323+ substrings = get_ofcom_list () [self .term ]
312324 super ().__init__ (substrings , config_root = config_root )
313325
314326
@@ -328,7 +340,7 @@ class OfcomOffensiveMentalHealth(StringDetector):
328340 ]
329341
330342 def __init__ (self , config_root = _config ):
331- substrings = ofcom_list [self .term ]
343+ substrings = get_ofcom_list () [self .term ]
332344 super ().__init__ (substrings , config_root = config_root )
333345
334346
@@ -348,7 +360,7 @@ class OfcomOffensiveRaceEthnic(StringDetector):
348360 ]
349361
350362 def __init__ (self , config_root = _config ):
351- substrings = ofcom_list [self .term ]
363+ substrings = get_ofcom_list () [self .term ]
352364 super ().__init__ (substrings , config_root = config_root )
353365
354366
0 commit comments