8
8
9
9
class Plugin (Source ):
10
10
11
- def __init__ (self , name , url , filter = None , path = None ):
11
+ def __init__ (self , name , url , include = None , exclude = None , path = None ):
12
12
self .name = name
13
13
self .url = url
14
- self .filter = filter
14
+ self .include = include
15
+ self .exclude = exclude
15
16
self .path = path
16
17
17
18
def run (self , saved_state ):
@@ -47,13 +48,13 @@ def run(self, saved_state):
47
48
[x .unwrap () for x in soup .find_all ('i' )]
48
49
soup = BeautifulSoup (soup .decode (), 'html.parser' )
49
50
50
- if self .filter is not None :
51
+ if self .exclude is not None :
51
52
# Regex input via config.yml
52
53
# Example: security|threat|malware
53
- xml_query = re .compile (r"{0}" .format (self .filter )).findall (str (self .filter .split ('|' )))
54
+ xml_exclude = re .compile (r"{0}" .format (self .exclude )).findall (str (self .exclude .split ('|' )))
54
55
55
56
# Iterates over the regex output to locate all provided keywords
56
- for x in xml_query :
57
+ for xe in xml_exclude :
57
58
# Uses a path instead of a keyword
58
59
if self .path is not None :
59
60
if self .path in loc :
@@ -62,19 +63,38 @@ def run(self, saved_state):
62
63
63
64
# Only filters using a keyword
64
65
if self .path is None :
65
- if x in loc :
66
+ if xe not in loc :
66
67
text = soup .get_text (separator = ' ' ).split ('Indicators of Compromise' )[- 1 ]
67
68
artifacts += self .process_element (content = text , reference_link = str (loc ), include_nonobfuscated = True )
68
69
69
- elif self .filter is None and self .path is not None :
70
- # Filters only by path in XML loc, no set filter
70
+ if self .include is not None :
71
+ # Regex input via config.yml
72
+ # Example: security|threat|malware
73
+ xml_include = re .compile (r"{0}" .format (self .include )).findall (str (self .include .split ('|' )))
74
+
75
+ # Iterates over the regex output to locate all provided keywords
76
+ for xi in xml_include :
77
+ # Uses a path instead of a keyword
78
+ if self .path is not None :
79
+ if self .path in loc :
80
+ text = soup .get_text (separator = ' ' ).split ('Indicators of Compromise' )[- 1 ]
81
+ artifacts += self .process_element (content = text , reference_link = str (loc ), include_nonobfuscated = True )
82
+
83
+ # Only filters using a keyword
84
+ if self .path is None :
85
+ if xi in loc :
86
+ text = soup .get_text (separator = ' ' ).split ('Indicators of Compromise' )[- 1 ]
87
+ artifacts += self .process_element (content = text , reference_link = str (loc ), include_nonobfuscated = True )
88
+
89
+ if self .include is None or self .exclude is None and self .path is not None :
90
+ # Filters only by path in XML loc, no set include
71
91
# Default: /path/name/*
72
92
73
93
if self .path in loc :
74
94
text = soup .get_text (separator = ' ' ).split ('Indicators of Compromise' )[- 1 ]
75
95
artifacts += self .process_element (content = text , reference_link = str (loc ), include_nonobfuscated = True )
76
96
77
- else :
97
+ if self . include is None and self . path is None and self . exclude is None :
78
98
# Locates all blog links within the sitemap
79
99
if "blog" in loc :
80
100
text = soup .get_text (separator = ' ' ).split ('Indicators of Compromise' )[- 1 ]
0 commit comments