-
Notifications
You must be signed in to change notification settings - Fork 335
/
data_source_mapping.py
993 lines (834 loc) · 51.1 KB
/
data_source_mapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
import xlsxwriter
import simplejson
from copy import deepcopy
from datetime import datetime
from itertools import chain
from generic import *
from file_output import *
from navigator_layer import *
# Imports for pandas and plotly are because of performance reasons in the function that uses these libraries.
def _count_applicable_data_sources(technique, applicable_data_sources, applicable_dettect_data_sources):
"""
get the count of applicable (DeTT&CT) data sources for the provided technique.
This takes into account which data sources are applicable for a platform(s).
:param technique: ATT&CK CTI technique object
:param applicable_data_sources: a list of applicable ATT&CK data sources
:param applicable_dettect_data_sources: a list of applicable DeTT&CT data sources
:return: a count of the applicable data sources for this technique
"""
applicable_ds_count = 0
for ds in technique['data_components']:
if ds in applicable_data_sources:
applicable_ds_count += 1
for ds in technique['dettect_data_sources']:
if ds in applicable_dettect_data_sources:
applicable_ds_count += 1
return applicable_ds_count
def _system_in_data_source_details_object(data_source, system):
"""
Checks if the provided system is present within the provided YAML global data source object
:param data_source: YAML data source object
:param system: YAML system object
:return: True if present otherwise False
"""
for ds in data_source['data_source']:
if system['applicable_to'].lower() in (app_to.lower() for app_to in ds['applicable_to']):
return True
return False
def _map_and_colorize_techniques(my_ds, systems, exceptions, domain, layer_settings):
"""
Determine the color of the technique based on how many data sources are available per technique. Also, it will
create much of the content for the Navigator layer.
:param my_ds: the configured data sources
:param systems: the systems YAML object from the data source file
:param exceptions: the list of ATT&CK technique exception within the data source YAML file
:param domain: the specified domain
:param layer_settings: settings for the Navigator layer
:return: a dictionary with techniques that can be used in the layer's output file
"""
techniques = load_attack_data(DATA_TYPE_STIX_ALL_TECH_ENTERPRISE if domain ==
'enterprise-attack' else DATA_TYPE_STIX_ALL_TECH_ICS if domain == 'ics-attack' else DATA_TYPE_STIX_ALL_TECH_MOBILE)
output_techniques = []
for t in techniques:
tech_id = t['technique_id']
tactics = []
if 'includeTactic' in layer_settings.keys() and layer_settings['includeTactic'] == 'True':
for kill_chain_phase in t['kill_chain_phases']:
if kill_chain_phase['kill_chain_name'] == 'mitre-attack':
tactics.append(kill_chain_phase['phase_name'])
else:
tactics.append(None)
if tech_id not in list(map(lambda x: x.upper(), exceptions)):
scores_idx = 0
ds_scores = []
system_available_data_sources = {}
# calculate visibility score per system
for system in systems:
# the system is relevant for this technique due to a match in ATT&CK platform
if len(set(system['platform']).intersection(set(t['x_mitre_platforms']))) > 0:
applicable_data_sources = get_applicable_data_sources_platform(system['platform'], domain)
applicable_dettect_data_sources = get_applicable_dettect_data_sources_platform(system['platform'], domain)
total_ds_count = _count_applicable_data_sources(t, applicable_data_sources, applicable_dettect_data_sources)
if total_ds_count > 0: # the system's platform has a data source applicable to this technique
ds_count = 0
for ds in t['data_components']:
# the ATT&CK data source is applicable to this system and available
if ds in applicable_data_sources and ds in my_ds.keys() and _system_in_data_source_details_object(my_ds[ds], system):
if ds_count == 0:
system_available_data_sources[scores_idx] = [ds]
else:
system_available_data_sources[scores_idx].append(ds)
ds_count += 1
for cdc in t['dettect_data_sources']:
if cdc in applicable_dettect_data_sources and cdc in my_ds.keys() and _system_in_data_source_details_object(my_ds[cdc], system):
if ds_count == 0:
system_available_data_sources[scores_idx] = [cdc]
else:
system_available_data_sources[scores_idx].append(cdc)
ds_count += 1
if ds_count > 0:
ds_scores.append((float(ds_count) / float(total_ds_count)) * 100)
else:
ds_scores.append(0) # none of the applicable data sources are available for this system
else:
# the technique is applicable to this system (and thus its platform(s)),
# but none of the technique's listed data source are applicable for its platform(s)
ds_scores.append(0)
scores_idx += 1
# Populate the metadata.
avg_ds_score = 0
if not all(s == 0 for s in ds_scores):
avg_ds_score = float(sum(ds_scores)) / float(len(ds_scores))
color = COLOR_DS_25p if avg_ds_score <= 25 else COLOR_DS_50p if avg_ds_score <= 50 else COLOR_DS_75p \
if avg_ds_score <= 75 else COLOR_DS_99p if avg_ds_score <= 99 else COLOR_DS_100p
d = dict()
d['techniqueID'] = tech_id
if avg_ds_score > 0:
d['color'] = color
d['comment'] = ''
d['enabled'] = True
d['metadata'] = []
if 'showMetadata' not in layer_settings.keys() or ('showMetadata' in layer_settings.keys() and str(layer_settings['showMetadata']) == 'True'):
scores_idx = 0
divider = 0
for system in systems:
# the system is relevant for this technique due to a match in ATT&CK platform
if len(set(system['platform']).intersection(set(t['x_mitre_platforms']))) > 0:
score = ds_scores[scores_idx]
if divider != 0:
d['metadata'].append({'divider': True})
divider += 1
d['metadata'].append({'name': 'Applicable to', 'value': system['applicable_to']})
app_data_sources = get_applicable_data_sources_technique(
t['data_components'], get_applicable_data_sources_platform(system['platform'], domain))
app_dettect_data_sources = get_applicable_dettect_data_sources_technique(
t['dettect_data_sources'], get_applicable_dettect_data_sources_platform(system['platform'], domain))
if score > 0:
d['metadata'].append({'name': 'Available data sources', 'value': ', '.join(
system_available_data_sources[scores_idx])})
else:
d['metadata'].append({'name': 'Available data sources', 'value': ''})
d['metadata'].append({'name': 'ATT&CK data sources', 'value': ', '.join(app_data_sources)})
d['metadata'].append({'name': 'DeTT&CT data sources', 'value': ', '.join(app_dettect_data_sources)})
d['metadata'].append({'name': 'Score', 'value': str(int(score)) + '%'})
scores_idx += 1
d['metadata'] = make_layer_metadata_compliant(d['metadata'])
for tactic in tactics:
if tactic is not None:
d['tactic'] = tactic
output_techniques.append(deepcopy(d))
determine_and_set_show_sub_techniques(output_techniques, techniques, layer_settings)
return output_techniques
def _indent_comment(comment, indent):
"""
Indent a multiline general, visibility, detection comment by x spaces
:param comment: The comment to indent
:param indent: The number of spaces to use in the indent
:return: indented comment or the original
"""
if '\n' in comment:
new_comment = comment.replace('\n', '\n' + ' ' * indent)
return new_comment
else:
return comment
def _get_technique_yaml_obj(techniques, tech_id):
"""
Get at technique YAML obj from the provided list of techniques YAML objects which as the provided technique ID
:param techniques: list of technique YAML objects
:param tech_id: ATT&CK ID
:return: technique YAML obj
"""
for tech in techniques:
if tech['technique_id'] == tech_id:
return tech
def generate_data_sources_layer(filename, output_filename, layer_name, layer_settings):
"""
Generates a generic layer for data sources.
:param filename: the filename of the YAML file containing the data sources administration
:param output_filename: the output filename defined by the user
:param layer_name: the name of the Navigator layer
:param layer_settings: settings for the Navigator layer
:return:
"""
my_data_sources, name, systems, exceptions, domain = load_data_sources(filename)
# Do the mapping between my data sources and MITRE data sources:
my_techniques = _map_and_colorize_techniques(my_data_sources, systems, exceptions, domain, layer_settings)
if not layer_name:
layer_name = 'Data sources ' + name
platforms = list(set(chain.from_iterable(map(lambda k: k['platform'], systems))))
layer = get_layer_template_data_sources(layer_name, 'description', platforms, domain, layer_settings)
layer['techniques'] = my_techniques
json_string = simplejson.dumps(layer).replace('}, ', '},\n')
if not output_filename:
output_filename = create_output_filename('data_sources', name)
write_file(output_filename, json_string)
def plot_data_sources_graph(filename, output_filename):
"""
Generates a line graph which shows the improvements on numbers of data sources through time.
:param filename: the filename of the YAML file containing the data sources administration
:param output_filename: the output filename defined by the user
:return:
"""
my_data_sources, name, _, _, _ = load_data_sources(filename)
graph_values = []
for ds_global, ds_detail in my_data_sources.items():
for ds in ds_detail['data_source']:
if ds['date_connected']:
yyyymmdd = ds['date_connected'].strftime('%Y-%m-%d')
graph_values.append({'date': yyyymmdd, 'count': 1})
import pandas as pd
df = pd.DataFrame(graph_values).groupby('date', as_index=False)[['count']].sum()
df['cumcount'] = df['count'].cumsum()
if not output_filename:
output_filename = 'graph_data_sources'
elif output_filename.endswith('.html'):
output_filename = output_filename.replace('.html', '')
output_filename = get_non_existing_filename('output/' + output_filename, 'html')
import plotly.graph_objs as go
import plotly.offline as offline
offline.plot(
{'data': [go.Scatter(x=df['date'], y=df['cumcount'])],
'layout': go.Layout(title="# of data sources for " + name)},
filename=output_filename, auto_open=False
)
print("File written: " + output_filename)
def export_data_source_list_to_excel(filename, output_filename, eql_search=False):
"""
Makes an overview of all MITRE ATT&CK data sources (via techniques) and lists which data sources are present
in the YAML administration including all properties and data quality score.
:param filename: the filename of the YAML file containing the data sources administration
:param output_filename: the output filename defined by the user
:param eql_search: specify if an EQL search was performed which may have resulted in missing ATT&CK data sources
:return:
"""
# pylint: disable=unused-variable
my_data_sources, name, systems, _, domain = load_data_sources(filename, filter_empty_scores=False)
my_data_sources = dict(sorted(my_data_sources.items(), key=lambda kv: kv[0], reverse=False))
if not output_filename:
output_filename = 'data_sources'
elif output_filename.endswith('.xlsx'):
output_filename = output_filename.replace('.xlsx', '')
excel_filename = get_non_existing_filename('output/' + output_filename, 'xlsx')
workbook = xlsxwriter.Workbook(excel_filename)
worksheet = workbook.add_worksheet('Data sources')
# Formatting:
format_bold_left = workbook.add_format({'align': 'left', 'bold': True})
format_title = workbook.add_format({'align': 'left', 'bold': True, 'font_size': '14'})
format_center_valign_top = workbook.add_format({'align': 'center', 'valign': 'top'})
wrap_text = workbook.add_format({'text_wrap': True, 'valign': 'top'})
valign_top = workbook.add_format({'valign': 'top'})
no_score = workbook.add_format({'valign': 'top', 'align': 'center'})
dq_score_0 = workbook.add_format({'valign': 'top', 'align': 'center'})
dq_score_1 = workbook.add_format({'valign': 'top', 'align': 'center', 'bg_color': COLOR_DS_25p})
dq_score_2 = workbook.add_format({'valign': 'top', 'align': 'center', 'bg_color': COLOR_DS_50p})
dq_score_3 = workbook.add_format({'valign': 'top', 'align': 'center', 'bg_color': COLOR_DS_75p, 'font_color': '#ffffff'})
dq_score_4 = workbook.add_format({'valign': 'top', 'align': 'center', 'bg_color': COLOR_DS_99p, 'font_color': '#ffffff'})
dq_score_5 = workbook.add_format({'valign': 'top', 'align': 'center', 'bg_color': COLOR_DS_100p, 'font_color': '#ffffff'})
# Title
worksheet.write(0, 0, 'Data sources for: ' + name, format_title)
worksheet.write(1, 0, 'Domain: ' + domain)
worksheet.write(2, 0, 'Systems: ')
y = 3
for system in systems:
worksheet.write(y, 0, '- %s: %s' % (system['applicable_to'], ', '.join(system['platform'])))
y += 1
# Header columns
y += 1
worksheet.write(y, 0, 'Data source name', format_bold_left)
worksheet.write(y, 1, 'Applicable to', format_bold_left)
worksheet.write(y, 2, 'Date registered', format_bold_left)
worksheet.write(y, 3, 'Date connected', format_bold_left)
worksheet.write(y, 4, 'Products', format_bold_left)
worksheet.write(y, 5, 'Comment', format_bold_left)
worksheet.write(y, 6, 'Available for data analytics', format_bold_left)
worksheet.write(y, 7, 'DQ: device completeness', format_bold_left)
worksheet.write(y, 8, 'DQ: data field completeness', format_bold_left)
worksheet.write(y, 9, 'DQ: timeliness', format_bold_left)
worksheet.write(y, 10, 'DQ: consistency', format_bold_left)
worksheet.write(y, 11, 'DQ: retention', format_bold_left)
worksheet.write(y, 12, 'DQ: score', format_bold_left)
worksheet.autofilter(y, 0, y, 12)
worksheet.freeze_panes(y + 1, 0)
worksheet.set_column(0, 0, 35)
worksheet.set_column(1, 1, 18)
worksheet.set_column(2, 3, 15)
worksheet.set_column(4, 4, 35)
worksheet.set_column(5, 5, 50)
worksheet.set_column(6, 6, 24)
worksheet.set_column(7, 8, 25)
worksheet.set_column(9, 11, 15)
worksheet.set_column(12, 12, 10)
# Putting the data sources data:
y += 1
for ds_global, ds_detail in my_data_sources.items():
for ds in ds_detail['data_source']:
worksheet.write(y, 0, ds_global, valign_top)
date_registered = ds['date_registered'].strftime('%Y-%m-%d') if isinstance(ds['date_registered'], datetime) else ds['date_registered']
date_connected = ds['date_connected'].strftime('%Y-%m-%d') if isinstance(ds['date_connected'], datetime) else ds['date_connected']
worksheet.write(y, 1, ', '.join(ds['applicable_to']), wrap_text)
worksheet.write(y, 2, str(date_registered).replace('None', ''), valign_top)
worksheet.write(y, 3, str(date_connected).replace('None', ''), valign_top)
worksheet.write(y, 4, ', '.join(ds['products']).replace('None', ''), valign_top)
worksheet.write(y, 5, ds['comment'][:-1] if ds['comment'].endswith('\n') else ds['comment'], wrap_text)
worksheet.write(y, 6, str(ds['available_for_data_analytics']), valign_top)
worksheet.write(y, 7, ds['data_quality']['device_completeness'], format_center_valign_top)
worksheet.write(y, 8, ds['data_quality']['data_field_completeness'], format_center_valign_top)
worksheet.write(y, 9, ds['data_quality']['timeliness'], format_center_valign_top)
worksheet.write(y, 10, ds['data_quality']['consistency'], format_center_valign_top)
worksheet.write(y, 11, ds['data_quality']['retention'], format_center_valign_top)
score = 0
score_count = 0
for k, v in ds['data_quality'].items():
# the below DQ dimensions are given more weight in the calculation of the DQ score.
if k in ['device_completeness', 'data_field_completeness', 'retention']:
score += (v * 2)
score_count += 2
else:
score += v
score_count += 1
if score > 0:
score = score / score_count
worksheet.write(y, 12, score, dq_score_0 if score == 0 else dq_score_1 if score < 2 else dq_score_2 if score < 3 else dq_score_3 if score < 4 else dq_score_4 if score < 5 else dq_score_5 if score < 6 else no_score) # noqa
y += 1
try:
workbook.close()
print("File written: " + excel_filename)
except Exception as e:
print('[!] Error while writing Excel file: %s' % str(e))
def _print_ds_systems(systems):
"""
Print the data source systems key-value pair to stdout
:param systems: systems key value pair
:return:
"""
print(' ' * 4 + 'Data source administration \'systems\' key-value pair:')
for s in systems:
print(' ' * 4 + ' * applicable_to: ' + s['applicable_to'])
for p in s['platform']:
print(' ' * 4 + ' - ' + p)
def _print_tech_visibility_object_diff(old_tech, new_tech, tech_id, tech_name):
"""
Print the 'diff' of the old and and the new visibility object(s) as part of a technique
:param old_vis_obj: old technique object
:param new_vis_obj: new technique object
:param tech_id: technique ID of the visibility object
:param tech_name: technique name
:return:
"""
print('\n')
print('Technique: ' + tech_id + ' / ' + tech_name)
print('')
print('OLD visibility object(s):')
for old_vis_obj in old_tech['visibility']:
old_score_date = get_latest_date(old_vis_obj)
old_score_date = old_score_date.strftime('%Y-%m-%d') if old_score_date is not None else ''
print(' - Applicable to: ' + ', '.join(old_vis_obj['applicable_to']))
print(' * Date: ' + old_score_date)
print(' * Score: ' + str(get_latest_score(old_vis_obj)))
print(' * Visibility score comment: ' + _indent_comment(get_latest_comment(old_vis_obj), 31))
print(' * Auto generated: ' + str(get_latest_score_obj(old_vis_obj).get('auto_generated', 'False')))
print('NEW visibility object(s):')
for new_vis_obj in new_tech['visibility']:
new_score_date = new_vis_obj['score_logbook'][0]['date'].strftime('%Y-%m-%d')
print(' - Applicable to: ' + ', '.join(new_vis_obj['applicable_to']))
print(' * Date: ' + new_score_date)
print(' * Score: ' + str(new_vis_obj['score_logbook'][0]['score']))
print(' * Visibility score comment: ' + _indent_comment(new_vis_obj['score_logbook'][0]['comment'], 31))
print(' * Auto generated: True')
print('\n')
def _print_visibility_object_diff(old_vis_obj, new_vis_obj, tech_id, tech_name):
"""
Print the 'diff' of the old and and the new visibility object
:param old_vis_obj: old visibility object
:param new_vis_obj: new visibility object
:param tech_id: technique ID of the visibility object
:param tech_name: technique name
:return:
"""
print('\n')
print('Visibility object:')
print(' - ATT&CK ID/name ' + tech_id + ' / ' + tech_name)
print(' - Applicable to: ' + ', '.join(old_vis_obj['applicable_to']))
print(' - Visibility comment: ' + _indent_comment(old_vis_obj['comment'], 29))
print('')
print('OLD score object:')
old_score_date = get_latest_date(old_vis_obj)
old_score_date = old_score_date.strftime('%Y-%m-%d') if old_score_date is not None else ''
new_score_date = new_vis_obj['score_logbook'][0]['date'].strftime('%Y-%m-%d')
print(' - Date: ' + old_score_date)
print(' - Score: ' + str(get_latest_score(old_vis_obj)))
print(' - Visibility score comment: ' + _indent_comment(get_latest_comment(old_vis_obj), 29))
print(' - Auto generated: ' + str(get_latest_score_obj(old_vis_obj).get('auto_generated', 'False')))
print('NEW score object:')
print(' - Date: ' + new_score_date)
print(' - Score: ' + str(new_vis_obj['score_logbook'][0]['score']))
print(' - Visibility score comment: ' + _indent_comment(new_vis_obj['score_logbook'][0]['comment'], 29))
print(' - Auto generated: True')
print('\n')
def _print_progress_visibility_update(count, total):
"""
Print the progress of the visibility update to stdout
:parm count: counter / how far are we in the progress?
:param total: total techniques to process
:return:
"""
print(' \n' + '-' * 80)
percentage = round((100 * count) / total, 0)
tmp_txt1 = 'Progress: ' + str(percentage) + '% '
tmp_txt2 = '[techniques remaining to be checked ' + str(total - count) + ']'
print(tmp_txt1 + ' ' * (80 - len(tmp_txt1 + tmp_txt2)) + tmp_txt2)
def _add_visibility_object_to_dict(dict_vis_objects, tech_id, vis_obj):
"""
Add visibility object(s) to a dict with the structure {tech_id: [visibility_obj]}
:param dict_vis_objects: the dictionary to add the visibility object(s) to
:param tech_id: the technique ID to which the visibility object(s) needs to be added
:param vis_obj: the visibility object(s) to add to the dictionary
return: updated dict_vis_objects
"""
if tech_id not in dict_vis_objects:
dict_vis_objects[tech_id] = []
if isinstance(vis_obj, list):
dict_vis_objects[tech_id].extend(deepcopy(vis_obj))
else:
dict_vis_objects[tech_id].append(deepcopy(vis_obj))
return dict_vis_objects
def update_technique_administration_file(file_data_sources, file_tech_admin):
"""
Update the visibility scores in the provided technique administration file
:param file_data_sources: file location of the data source admin. file
:param file_tech_admin: file location of the tech. admin. file
:return:
"""
file_updated = False
# first we generate the new visibility scores contained within a temporary tech. admin YAML 'file'
new_visibility_scores = generate_technique_administration_file(file_data_sources, None, write_file=False, all_techniques=True)
# we get the date to remove the single quotes from the date at the end of of this function's code
today = new_visibility_scores['techniques'][0]['visibility'][0]['score_logbook'][0]['date']
# next, we load the current visibility scores from the tech. admin file
cur_visibility_scores, _, platform_tech_admin, domain_tech_admin = load_techniques(file_tech_admin)
# last, we get the systems kv-pair from the data source file
_, _, systems, _, domain = load_data_sources(file_data_sources)
# if the tech admin. file has a platform not present in the DS admin. file we return
if len(set(platform_tech_admin).difference(set(new_visibility_scores['platform']))) > 0:
print('[!] The technique administration file\'s key-value pair \'platform\' has ATT&CK platform(s) that are not '
'part of the data source administration \'systems\' key-value pair. This should be fixed before the '
'visibility update can continue.')
print('\n Technique administration \'platform\' key-value pair:')
for p in platform_tech_admin:
print(' - ' + p)
print('')
_print_ds_systems(systems)
print('\nVisibility update canceled.')
return
# if the tech admin. file has an applicable_to value not present in the DS admin. file we return
app_ds = set([s['applicable_to'].lower() for s in systems])
app_tech = {} # applicable_to: {app_to: ..., tech_ids: ...} - we have app_to in here to preserve the casing when printing
for tech_id, v in cur_visibility_scores.items():
for vis in v['visibility']:
for a in vis['applicable_to']:
a_low = a.lower()
if a_low != 'all':
if a_low not in app_tech:
app_tech[a_low] = {}
app_tech[a_low]['app_to'] = a
app_tech[a_low]['tech_id'] = []
app_tech[a_low]['tech_id'].append(tech_id)
# if the tech admin. file has another domain than the DS admin file has we return
if domain != domain_tech_admin:
print('[!] The technique administration file has another value for \'domain\' than the value for \'domain\' in '
'the data source administration file. This should be fixed before the visibility update can continue.')
print('\nVisibility update canceled.')
return
if len(set(app_tech).difference(app_ds)) > 0:
print('[!] The technique administration file has visibility objects with \'applicable_to\' values that are not '
'present in the data source administration \'systems\' key-value pair. This should be fixed before the '
'visibility update can continue.')
print('\n Technique administration \'applicable_to\' values used within visibility objects:')
for k, v in app_tech.items():
print(' * applicable_to: ' + v['app_to'])
print(' Used in technique(s): ' + ', '.join(v['tech_id']) + '\n')
print('')
_print_ds_systems(systems)
print('\nVisibility update canceled.')
return
# we did not return, so init and start the upgrade :-)
_yaml = init_yaml()
with open(file_tech_admin) as fd:
yaml_file_tech_admin_updated = _yaml.load(fd)
# set the comment
comment = ''
if ask_yes_no('\nDo you want to fill in the visibility comment for the added and/or updated scores?'):
comment = input(' >> Comment: ')
print('')
# Set the comment for all new visibility scores. We will also be needing this later in the code to update
# the scores of already present techniques. Therefore, we will add the comment already to every visibility object
if comment != '':
x = 0
for new_tech in new_visibility_scores['techniques']:
for visibility_obj in new_tech['visibility']:
visibility_obj['score_logbook'][0]['comment'] = comment
x += 1
# check if the DS admin. file has an ATT&CK platform (part of systems) not part of the tech admin. file.
# If yes, add this platform the the tech admin. file's 'platform' kv pair
ds_platforms_not_in_tech = set(new_visibility_scores['platform']).difference(set(platform_tech_admin))
if len(ds_platforms_not_in_tech) > 0:
print('As part of the \'systems\' key-value pair, the data source administration file has ATT&CK platform(s) '
'that are not part of the technique administration file. Therefore, the following platform(s) will be added '
'to the \'platform\' key-value par as part of the technique administration file:')
for p in ds_platforms_not_in_tech:
print(' - ' + p)
yaml_file_tech_admin_updated['platform'].extend(ds_platforms_not_in_tech)
file_updated = True
input('\n' + TXT_ANY_KEY_TO_CONTINUE)
print('\n')
# check if we have tech IDs for which we now have visibility, but which were not yet part of the tech. admin file
cur_tech_ids = set(cur_visibility_scores.keys())
new_tech_ids = set()
del_unnecesary_all_tech_ids = set() # resulted from 'all_techniques=True)', which we do need to call in this way
# because we also want to update visibilty scores for which the score has become 0 (e.g. due to a removal of a data source)
tech_idx = 0
for tech in new_visibility_scores['techniques']:
tech_id = tech['technique_id']
score = False
for vis_obj in tech['visibility']:
if vis_obj['score_logbook'][0]['score'] > 0:
score = True
break
elif tech_id not in cur_tech_ids:
del_unnecesary_all_tech_ids.add(tech_idx)
if score:
new_tech_ids.add(tech_id)
tech_idx += 1
tech_ids_new = new_tech_ids.difference(cur_tech_ids)
# remove techniques which came from 'all_techniques=True)', but that are not present as a technqiue in the current/outdated tech file
for idx in sorted(del_unnecesary_all_tech_ids, reverse=True):
del new_visibility_scores['techniques'][idx]
# Add the new tech. to the ruamel instance: 'yaml_file_tech_admin'
if len(tech_ids_new) > 0:
file_updated = True
x = 0
for new_tech in new_visibility_scores['techniques']:
if new_tech['technique_id'] in tech_ids_new:
yaml_file_tech_admin_updated['techniques'].append(new_tech)
x += 1
print('The following new technique IDs will be added to the technique administration file with a visibility '
'score derived from the nr. of available data sources:')
print_tech_ids_list = [' ']
x = 0
for tech_id in sorted(tech_ids_new):
if not len(print_tech_ids_list[x]) + len(tech_id) + 2 <= 80:
x += 1
print_tech_ids_list.append(' ')
print_tech_ids_list[x] += tech_id + ", "
print_tech_ids_list[x] = print_tech_ids_list[x][:-2]
print('\n'.join(print_tech_ids_list))
input('\n' + TXT_ANY_KEY_TO_CONTINUE)
print('\n')
# Remove techniques which we no longer need
new_visibility_scores['techniques'] = [tech for tech in new_visibility_scores['techniques']
if tech['technique_id'] not in tech_ids_new]
# Update visibility objects for which we have
# - A match on the applicable_to value(s) between the old and new visibility object
# - A different visibility score (otherwise there is no need to update)
# (update = adding a new score logbook entry)
print('We will now start with updating techniques\' visibility scores for which we have an EXACT match on \'applicable_to\' values.')
input('\n' + TXT_ANY_KEY_TO_CONTINUE)
print('\n')
new_vis_objects = {} # {tech_id: [visibility_obj]}
new_visibility_scores_updated = deepcopy(new_visibility_scores)
cur_visibility_scores_updated = deepcopy(cur_visibility_scores)
answer_yes_to_all_auto_gen_false = False
answer_yes_to_all_auto_gen_true = False
answer_no_to_all_auto_gen_false = False
answer_no_to_all_auto_gen_true = False
we_have_updated_scores = False
total_tech_ids = len(new_visibility_scores['techniques'])
tech_ids_to_delete = set()
tech_idxs_to_delete = set()
idx_tech_id = 0
for new_tech in new_visibility_scores['techniques']:
tech_id = new_tech['technique_id']
tech_name = new_tech['technique_name']
if tech_id in cur_visibility_scores:
idx_new_vis_obj = 0
set_new_vis_obj_del = set()
set_old_vis_obj_del = set()
for new_vis_obj in new_tech['visibility']:
idx_old_vis_obj = 0
for old_vis_obj in cur_visibility_scores[tech_id]['visibility']:
# we have a MATCH on the applicable_to value between the old and new visibility object
if set(new_vis_obj['applicable_to']) == set(old_vis_obj['applicable_to']):
# we can ignore the update if the score stays the same
if new_vis_obj['score_logbook'][0]['score'] != get_latest_score(old_vis_obj):
answer = -1
old_score_auto_generated = get_latest_auto_generated(old_vis_obj)
# based on the answer provided by the user we can skip asking for user input, and hence printing the diff
if (not (old_score_auto_generated) and not (answer_yes_to_all_auto_gen_false) and not (answer_no_to_all_auto_gen_false)) \
or (not (answer_yes_to_all_auto_gen_true) and not (answer_no_to_all_auto_gen_true)):
_print_progress_visibility_update(idx_tech_id + 1, total_tech_ids)
_print_visibility_object_diff(old_vis_obj, new_vis_obj, tech_id, tech_name)
if not (old_score_auto_generated) and not (answer_yes_to_all_auto_gen_false) and not (answer_no_to_all_auto_gen_false):
print('[!] The OLD score was set manually (auto_generated = false). But, The NEW score '
'is derived from the nr. of available data sources.\n')
answer = ask_multiple_choice('Update the score?', ['Yes', 'No',
'Yes to ALL (where OLD score has auto_generated = false)',
'No to ALL (where OLD score has auto_generated = false)'])
answer_yes_to_all_auto_gen_false = True if answer == 3 else False
answer_no_to_all_auto_gen_false = True if answer == 4 else False
elif not (answer_yes_to_all_auto_gen_true) and not (answer_no_to_all_auto_gen_true):
print('Both the OLD and NEW scores were derived from the nr. of available data sources '
'(auto_generated = true).\n')
answer = ask_multiple_choice('Update the score?', ['Yes', 'No',
'Yes to ALL (where OLD score has auto_generated = true)',
'No to ALL (where OLD score has auto_generated = true)'])
answer_yes_to_all_auto_gen_true = True if answer == 3 else False
answer_no_to_all_auto_gen_true = True if answer == 4 else False
# update the score / add a new score logbook entry
if (old_score_auto_generated and answer_yes_to_all_auto_gen_true) or \
(not (old_score_auto_generated) and answer_yes_to_all_auto_gen_false) or answer == 1:
file_updated = True
we_have_updated_scores = True
old_vis_obj['score_logbook'].insert(0, new_vis_obj['score_logbook'][0])
upd_str = ' - Updated a visibility score in technique: {0:<10} (applicable to: {1})'
print(upd_str.format(tech_id, ', '.join(old_vis_obj['applicable_to'])))
else:
not_upd_str = ' - A visibility score in this technique was NOT updated: {0:<10} (applicable to: {1})'
print(not_upd_str.format(tech_id, ', '.join(old_vis_obj['applicable_to'])))
# add the updated score, or keep the old score
new_vis_objects = _add_visibility_object_to_dict(new_vis_objects, tech_id, old_vis_obj)
set_new_vis_obj_del.add(idx_new_vis_obj)
set_old_vis_obj_del.add(idx_old_vis_obj)
idx_old_vis_obj += 1
idx_new_vis_obj += 1
# delete visibility objects (old and new) which we processed (possibly including the technique itself)
for idx in sorted(set_new_vis_obj_del, reverse=True):
del new_visibility_scores_updated['techniques'][idx_tech_id]['visibility'][idx]
if len(new_visibility_scores_updated['techniques'][idx_tech_id]['visibility']) == 0:
tech_idxs_to_delete.add(idx_tech_id)
for idx in sorted(set_old_vis_obj_del, reverse=True):
del cur_visibility_scores_updated[tech_id]['visibility'][idx]
if len(cur_visibility_scores_updated[tech_id]['visibility']) == 0:
tech_ids_to_delete.add(tech_id)
idx_tech_id += 1
# delete techniques which no longer have any visibility objects
for idx in sorted(tech_idxs_to_delete, reverse=True):
del new_visibility_scores_updated['techniques'][idx]
for tech_id in tech_ids_to_delete:
del cur_visibility_scores_updated[tech_id]
if not (we_have_updated_scores):
print(' - No visibility scores were found eligible for an update, or you rejected all eligible updates.')
# Update visibility objects for which we have
# - NO match on the applicable_to value(s) between the old and new visibility object
# (update = adding new or replacing existing objects)
print('\nWe will now start with updating techniques\' visibility scores for which we have NO match on \'applicable_to\' values.')
input('\n' + TXT_ANY_KEY_TO_CONTINUE)
print('\n')
answer_yes_to_all_auto_gen_false = False
answer_yes_to_all_auto_gen_true = False
answer_no_to_all_auto_gen_false = False
answer_no_to_all_auto_gen_true = False
we_have_updated_scores = False
total_tech_ids = len(new_visibility_scores_updated['techniques'])
idx_tech_id = 0
for new_tech in new_visibility_scores_updated['techniques']:
tech_id = new_tech['technique_id']
tech_name = new_tech['technique_name']
if tech_id not in cur_visibility_scores_updated:
# We can add this visibility object without asking the user, because it (and thus its applicable_to value)
# was never part of the cur/old technique administration file. We are sure of that because visibility objects
# for which we had an EXACT match were removed. In this particular case that resulted in the deletion of the
# techniques itself (as it had zero visibility objects remaining)
file_updated = True
we_have_updated_scores = True
new_vis_objects = _add_visibility_object_to_dict(new_vis_objects, tech_id, new_tech['visibility'])
applicable_to = list(set(chain.from_iterable(map(lambda k: k['applicable_to'], new_tech['visibility']))))
not_upd_str = ' - A new visibility object was added to technique: {0:<10} (applicable to: {1})'
print(not_upd_str.format(tech_id, ', '.join(applicable_to)))
else:
answer = -1
list_old_score_auto_generated = [get_latest_auto_generated(old_vis_obj)
for old_vis_obj in cur_visibility_scores_updated[tech_id]['visibility']]
old_score_auto_generated = True if True in list_old_score_auto_generated else False
# based on the answer provided by the user we can skip asking for user input, and hence printing the diff
if (not (old_score_auto_generated) and not (answer_yes_to_all_auto_gen_false) and not (answer_no_to_all_auto_gen_false)) \
or (not (answer_yes_to_all_auto_gen_true) and not (answer_no_to_all_auto_gen_true)):
_print_progress_visibility_update(idx_tech_id + 1, total_tech_ids)
_print_tech_visibility_object_diff(cur_visibility_scores_updated[tech_id], new_tech, tech_id, tech_name)
if not (old_score_auto_generated) and not (answer_yes_to_all_auto_gen_false) and not (answer_no_to_all_auto_gen_false):
print('[!] At least one OLD score was set manually (auto_generated = false). '
'But, The NEW score(s) are derived from the nr. of available data sources.\n')
answer = ask_multiple_choice('Replace the OLD the visibility objects(s)?', ['Yes', 'No',
'Yes to ALL (where at least one OLD score has auto_generated = false)',
'No to ALL (where at least one OLD score has auto_generated = false)'])
answer_yes_to_all_auto_gen_false = True if answer == 3 else False
answer_no_to_all_auto_gen_false = True if answer == 4 else False
elif not (answer_yes_to_all_auto_gen_true) and not (answer_no_to_all_auto_gen_true):
print('Both the OLD and NEW scores were derived from the nr. of available data sources '
'(auto_generated = true).\n')
answer = ask_multiple_choice('Replace the OLD visibility object(s)?', ['Yes', 'No',
'Yes to ALL (where OLD score has auto_generated = true)',
'No to ALL (where OLD score has auto_generated = true)'])
answer_yes_to_all_auto_gen_true = True if answer == 3 else False
answer_no_to_all_auto_gen_true = True if answer == 4 else False
# replace the visibility objects or keep the existing ones
if (old_score_auto_generated and answer_yes_to_all_auto_gen_true) or \
(not (old_score_auto_generated) and answer_yes_to_all_auto_gen_false) or answer == 1:
file_updated = True
we_have_updated_scores = True
new_vis_objects = _add_visibility_object_to_dict(new_vis_objects, tech_id, new_tech['visibility'])
applicable_to = list(set(chain.from_iterable(map(lambda k: k['applicable_to'], new_tech['visibility']))))
upd_str = ' - Replaced a visibility score in technique: {0:<10} (applicable to: {1})'
print(upd_str.format(tech_id, ', '.join(applicable_to)))
else:
new_vis_objects = _add_visibility_object_to_dict(new_vis_objects, tech_id, cur_visibility_scores_updated[tech_id]['visibility'])
applicable_to = list(set(chain.from_iterable(
map(lambda k: k['applicable_to'], cur_visibility_scores_updated[tech_id]['visibility']))))
not_upd_str = ' - A visibility score in this technique was NOT updated: {0:<10} (applicable to: {1})'
print(not_upd_str.format(tech_id, ', '.join(applicable_to)))
idx_tech_id += 1
# Update visibility objects in the technique administration file that will be written to disk
idx_tech = 0
for tech in yaml_file_tech_admin_updated['techniques']:
tech_id = tech['technique_id']
if tech_id not in tech_ids_new and tech_id in new_vis_objects:
yaml_file_tech_admin_updated['techniques'][idx_tech]['visibility'] = new_vis_objects[tech_id]
idx_tech += 1
# create backup of the current tech. admin YAML file
if file_updated:
print('')
backup_file(file_tech_admin)
yaml_file_tech_admin_updated = fix_date_and_remove_null(yaml_file_tech_admin_updated, today, input_type='ruamel')
with open(file_tech_admin, 'w') as fd:
fd.writelines(yaml_file_tech_admin_updated)
print('File written: ' + file_tech_admin)
else:
print('No visibility scores have been updated.')
# pylint: disable=redefined-outer-name
def generate_technique_administration_file(filename, output_filename, write_file=True, all_techniques=False):
"""
Generate a technique administration file based on the data source administration YAML file
:param filename: the filename of the YAML file containing the data sources administration
:param output_filename: the output filename defined by the user
:param write_file: by default the file is written to disk
:param all_techniques: include all ATT&CK techniques in the generated YAML file that are applicable to the
platform(s) specified in the data source YAML file
:return:
"""
my_ds, name, systems, exceptions, domain = load_data_sources(filename)
techniques = load_attack_data(DATA_TYPE_STIX_ALL_TECH_ENTERPRISE if domain ==
'enterprise-attack' else DATA_TYPE_STIX_ALL_TECH_ICS if domain == 'ics-attack' else DATA_TYPE_STIX_ALL_TECH_MOBILE)
yaml_platform = list(set(chain.from_iterable(map(lambda k: k['platform'], systems))))
all_applicable_to_values = set([s['applicable_to'] for s in systems])
yaml_file = dict()
yaml_file['version'] = FILE_TYPE_TECHNIQUE_ADMINISTRATION_VERSION
yaml_file['file_type'] = FILE_TYPE_TECHNIQUE_ADMINISTRATION
yaml_file['name'] = name
yaml_file['domain'] = domain
yaml_file['platform'] = yaml_platform
yaml_file['techniques'] = []
today = dt.now()
# Score visibility based on the number of available data sources and the exceptions
for t in techniques:
mitre_platforms = t.get('x_mitre_platforms', [])
tech_id = t['technique_id']
tech = None
visibility_obj_count = 0
if tech_id not in list(map(lambda x: x.upper(), exceptions)):
# calculate visibility score per system
for system in systems:
ds_score = -1
platform_match = False
# the system is relevant for this technique due to a match in ATT&CK platform
if len(set(system['platform']).intersection(set(mitre_platforms))) > 0:
platform_match = True
applicable_data_sources = get_applicable_data_sources_platform(system['platform'], domain)
applicable_dettect_data_sources = get_applicable_dettect_data_sources_platform(system['platform'], domain)
total_ds_count = _count_applicable_data_sources(t, applicable_data_sources, applicable_dettect_data_sources)
if total_ds_count > 0: # the system's platform has data source applicable to this technique
ds_count = 0
for ds in t['data_components']:
# the ATT&CK data source is applicable to this system and available
if ds in applicable_data_sources and ds in my_ds.keys() and _system_in_data_source_details_object(my_ds[ds], system):
ds_count += 1
for cdc in t['dettect_data_sources']:
if cdc in applicable_dettect_data_sources and cdc in my_ds.keys() and _system_in_data_source_details_object(my_ds[cdc], system):
ds_count += 1
if ds_count > 0:
result = (float(ds_count) / float(total_ds_count)) * 100
ds_score = 1 if result <= 49 else 2 if result <= 74 else 3 if result <= 99 else 4
else:
ds_score = 0 # none of the applicable data sources are available for this system
else:
# the technique is applicable to this system (and thus its platform(s)),
# but none of the technique's listed data source are applicable for its platform(s), or the technique has not data sources
ds_score = -1
# Do not add technique if score == 0 or the user want every technique to be added
if ds_score > 0 or (all_techniques and platform_match):
# the ATT&CK technique is not yet part of the YAML file
if visibility_obj_count == 0:
tech = deepcopy(YAML_OBJ_TECHNIQUE)
tech['technique_id'] = tech_id
tech['technique_name'] = t['name']
# score can be -1 due to all_techniques
ds_score = 0 if ds_score == -1 else ds_score
# check if we have already have a visibility object with this exact same score
same_score = False
if visibility_obj_count > 0:
for vis_obj in tech['visibility']:
if vis_obj['score_logbook'][0]['score'] == ds_score:
vis_obj['applicable_to'].append(system['applicable_to'])
same_score = True
break
if not same_score:
tech['visibility'].append(deepcopy(YAML_OBJ_VISIBILITY))
tech['visibility'][visibility_obj_count]['score_logbook'][0]['score'] = ds_score
tech['visibility'][visibility_obj_count]['score_logbook'][0]['date'] = today
tech['visibility'][visibility_obj_count]['applicable_to'] = [system['applicable_to']]
visibility_obj_count += 1
if tech:
# check if we have an applicable to value that can be replaced by the value 'all'
for vis_obj in tech['visibility']:
if all_applicable_to_values == set(vis_obj['applicable_to']) and not len(all_applicable_to_values) == 1:
vis_obj['applicable_to'] = ['all']
yaml_file['techniques'].append(tech)
yaml_file['techniques'] = sorted(yaml_file['techniques'], key=lambda k: k['technique_id'])
if write_file:
# remove the single quotes around the date key-value pair
_yaml = init_yaml()
file = StringIO()
# create the file lines by writing it to memory
_yaml.dump(yaml_file, file)
file.seek(0)
file_lines = file.readlines()
# remove the single quotes from the date
yaml_file_lines = fix_date_and_remove_null(file_lines, today, input_type='list')
if not output_filename:
output_filename = 'techniques-administration-' + normalize_name_to_filename(name)
elif output_filename.endswith('.yaml'):
output_filename = output_filename.replace('.yaml', '')
output_filename = get_non_existing_filename('output/' + output_filename, 'yaml')
with open(output_filename, 'w') as f:
f.writelines(yaml_file_lines)
print("File written: " + output_filename)
else:
return yaml_file