Skip to content
This repository was archived by the owner on May 19, 2023. It is now read-only.

Commit 53a1fa9

Browse files
committed
remove data from repo and update to download from s3
1 parent 5f81cc2 commit 53a1fa9

6 files changed

+85
-2062
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ dask-worker-space/
3434
# Test output
3535
clx/tests/output
3636
clx/tests/.config
37+
rnn_classifier_2020-06-08_20_48_03.pth
3738

3839
# Jupyter
3940
.ipynb_checkpoints/

notebooks/cybert/cybert_example_training.ipynb

+72-54
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@
4141
"from tqdm import tqdm,trange\n",
4242
"\n",
4343
"import pandas as pd\n",
44-
"import numpy as np"
44+
"import numpy as np\n",
45+
"import s3fs\n",
46+
"from os import path"
4547
]
4648
},
4749
{
@@ -59,13 +61,29 @@
5961
"metadata": {},
6062
"outputs": [],
6163
"source": [
62-
"logs_df = pd.read_csv('./data/winevt_sample.csv')"
64+
"# download log data\n",
65+
"WINEVT_CSV = \"winevt_sample.csv\"\n",
66+
"S3_BASE_PATH = \"rapidsai-data/cyber/clx\"\n",
67+
"\n",
68+
"if not path.exists(WINEVT_CSV):\n",
69+
" fs = s3fs.S3FileSystem(anon=True)\n",
70+
" fs.get(S3_BASE_PATH + \"/\" + WINEVT_CSV, WINEVT_CSV)"
6371
]
6472
},
6573
{
6674
"cell_type": "code",
6775
"execution_count": 3,
6876
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"# logs_df = pd.read_csv('./data/winevt_sample.csv')\n",
80+
"logs_df = pd.read_csv(WINEVT_CSV)"
81+
]
82+
},
83+
{
84+
"cell_type": "code",
85+
"execution_count": 4,
86+
"metadata": {},
6987
"outputs": [
7088
{
7189
"data": {
@@ -94,7 +112,7 @@
94112
"Name: 0, dtype: object"
95113
]
96114
},
97-
"execution_count": 3,
115+
"execution_count": 4,
98116
"metadata": {},
99117
"output_type": "execute_result"
100118
}
@@ -106,7 +124,7 @@
106124
},
107125
{
108126
"cell_type": "code",
109-
"execution_count": 4,
127+
"execution_count": 5,
110128
"metadata": {},
111129
"outputs": [
112130
{
@@ -115,7 +133,7 @@
115133
"'02/28/2019 12:49:04 AM LogName= Security SourceName= Microsoft Windows security auditing. EventCode= 4624 EventType= 0 Type= Information ComputerName= lt-95.melton.com TaskCategory= Logon OpCode= Info RecordNumber= 474033423 Keywords= Audit Success Message= An account was successfully logged on. Subject: Account Name: gonzalespeter Account Domain: taylor.com New Logon: Account Name: [email protected] Account Domain: blair.com Network Information: Workstation Name: desktop-gonzalespeter Network Address: 192.175.54.118'"
116134
]
117135
},
118-
"execution_count": 4,
136+
"execution_count": 5,
119137
"metadata": {},
120138
"output_type": "execute_result"
121139
}
@@ -127,7 +145,7 @@
127145
},
128146
{
129147
"cell_type": "code",
130-
"execution_count": 5,
148+
"execution_count": 6,
131149
"metadata": {},
132150
"outputs": [],
133151
"source": [
@@ -153,7 +171,7 @@
153171
},
154172
{
155173
"cell_type": "code",
156-
"execution_count": 6,
174+
"execution_count": 7,
157175
"metadata": {},
158176
"outputs": [],
159177
"source": [
@@ -162,7 +180,7 @@
162180
},
163181
{
164182
"cell_type": "code",
165-
"execution_count": 7,
183+
"execution_count": 8,
166184
"metadata": {},
167185
"outputs": [
168186
{
@@ -179,7 +197,7 @@
179197
},
180198
{
181199
"cell_type": "code",
182-
"execution_count": 8,
200+
"execution_count": 9,
183201
"metadata": {},
184202
"outputs": [],
185203
"source": [
@@ -188,7 +206,7 @@
188206
},
189207
{
190208
"cell_type": "code",
191-
"execution_count": 9,
209+
"execution_count": 10,
192210
"metadata": {},
193211
"outputs": [],
194212
"source": [
@@ -212,7 +230,7 @@
212230
},
213231
{
214232
"cell_type": "code",
215-
"execution_count": 10,
233+
"execution_count": 11,
216234
"metadata": {},
217235
"outputs": [],
218236
"source": [
@@ -237,7 +255,7 @@
237255
},
238256
{
239257
"cell_type": "code",
240-
"execution_count": 11,
258+
"execution_count": 12,
241259
"metadata": {},
242260
"outputs": [],
243261
"source": [
@@ -269,7 +287,7 @@
269287
},
270288
{
271289
"cell_type": "code",
272-
"execution_count": 12,
290+
"execution_count": 13,
273291
"metadata": {},
274292
"outputs": [],
275293
"source": [
@@ -303,7 +321,7 @@
303321
},
304322
{
305323
"cell_type": "code",
306-
"execution_count": 13,
324+
"execution_count": 14,
307325
"metadata": {},
308326
"outputs": [],
309327
"source": [
@@ -319,7 +337,7 @@
319337
},
320338
{
321339
"cell_type": "code",
322-
"execution_count": 14,
340+
"execution_count": 15,
323341
"metadata": {},
324342
"outputs": [],
325343
"source": [
@@ -328,7 +346,7 @@
328346
},
329347
{
330348
"cell_type": "code",
331-
"execution_count": 15,
349+
"execution_count": 16,
332350
"metadata": {},
333351
"outputs": [],
334352
"source": [
@@ -349,7 +367,7 @@
349367
},
350368
{
351369
"cell_type": "code",
352-
"execution_count": 16,
370+
"execution_count": 17,
353371
"metadata": {},
354372
"outputs": [],
355373
"source": [
@@ -372,7 +390,7 @@
372390
},
373391
{
374392
"cell_type": "code",
375-
"execution_count": 17,
393+
"execution_count": 18,
376394
"metadata": {},
377395
"outputs": [],
378396
"source": [
@@ -390,7 +408,7 @@
390408
},
391409
{
392410
"cell_type": "code",
393-
"execution_count": 18,
411+
"execution_count": 19,
394412
"metadata": {},
395413
"outputs": [],
396414
"source": [
@@ -421,7 +439,7 @@
421439
},
422440
{
423441
"cell_type": "code",
424-
"execution_count": 19,
442+
"execution_count": 20,
425443
"metadata": {},
426444
"outputs": [],
427445
"source": [
@@ -433,7 +451,7 @@
433451
},
434452
{
435453
"cell_type": "code",
436-
"execution_count": 20,
454+
"execution_count": 21,
437455
"metadata": {},
438456
"outputs": [
439457
{
@@ -447,40 +465,40 @@
447465
"name": "stdout",
448466
"output_type": "stream",
449467
"text": [
450-
"Train loss: 1.1063271555407295\n"
468+
"Train loss: 1.1561125671041423\n"
451469
]
452470
},
453471
{
454472
"name": "stderr",
455473
"output_type": "stream",
456474
"text": [
457-
"Epoch: 50%|█████ | 1/2 [00:15<00:15, 15.51s/it]"
475+
"Epoch: 50%|█████ | 1/2 [00:14<00:14, 14.94s/it]"
458476
]
459477
},
460478
{
461479
"name": "stdout",
462480
"output_type": "stream",
463481
"text": [
464-
"Validation loss: 0.2913314178586006\n",
465-
"Validation Accuracy: 0.550140380859375\n",
466-
"F1-Score: 0.8453539528062924\n",
467-
"Train loss: 0.14663322655291394\n"
482+
"Validation loss: 0.2560569792985916\n",
483+
"Validation Accuracy: 0.547088623046875\n",
484+
"F1-Score: 0.7919258952025439\n",
485+
"Train loss: 0.12098206990751727\n"
468486
]
469487
},
470488
{
471489
"name": "stderr",
472490
"output_type": "stream",
473491
"text": [
474-
"Epoch: 100%|██████████| 2/2 [00:30<00:00, 15.36s/it]"
492+
"Epoch: 100%|██████████| 2/2 [00:29<00:00, 14.75s/it]"
475493
]
476494
},
477495
{
478496
"name": "stdout",
479497
"output_type": "stream",
480498
"text": [
481-
"Validation loss: 0.026326983235776424\n",
482-
"Validation Accuracy: 0.58355712890625\n",
483-
"F1-Score: 0.9789872096058471\n"
499+
"Validation loss: 0.015797887230291963\n",
500+
"Validation Accuracy: 0.583953857421875\n",
501+
"F1-Score: 0.9797702949621508\n"
484502
]
485503
},
486504
{
@@ -566,7 +584,7 @@
566584
},
567585
{
568586
"cell_type": "code",
569-
"execution_count": 21,
587+
"execution_count": 22,
570588
"metadata": {},
571589
"outputs": [],
572590
"source": [
@@ -575,7 +593,7 @@
575593
},
576594
{
577595
"cell_type": "code",
578-
"execution_count": 22,
596+
"execution_count": 23,
579597
"metadata": {},
580598
"outputs": [],
581599
"source": [
@@ -585,39 +603,39 @@
585603
},
586604
{
587605
"cell_type": "code",
588-
"execution_count": 23,
606+
"execution_count": 24,
589607
"metadata": {},
590608
"outputs": [
591609
{
592610
"name": "stdout",
593611
"output_type": "stream",
594612
"text": [
595-
"f1 score: 0.996850\n",
596-
"Accuracy score: 0.998094\n",
613+
"f1 score: 0.998568\n",
614+
"Accuracy score: 0.999134\n",
597615
" precision recall f1-score support\n",
598616
"\n",
599-
" other 1.0000 1.0000 1.0000 1696\n",
600-
" subject_account_domain 1.0000 0.9400 0.9691 100\n",
601-
" opcode 1.0000 1.0000 1.0000 100\n",
602-
" recordnumber 1.0000 1.0000 1.0000 100\n",
603-
" eventcode 1.0000 1.0000 1.0000 100\n",
604-
" new_logon_account_name 1.0000 1.0000 1.0000 100\n",
605-
" computername 1.0000 1.0000 1.0000 100\n",
617+
" keywords 1.0000 1.0000 1.0000 96\n",
606618
" taskcategory 1.0000 1.0000 1.0000 100\n",
607-
"network_information_source_network_address 1.0000 1.0000 1.0000 100\n",
608-
" logname 0.9524 1.0000 0.9756 100\n",
619+
" eventcode 1.0000 1.0000 1.0000 100\n",
620+
" subject_account_name 1.0000 0.9900 0.9950 100\n",
621+
" other 1.0000 1.0000 1.0000 1696\n",
609622
" message 1.0000 1.0000 1.0000 100\n",
610-
" insert_time 1.0000 1.0000 1.0000 100\n",
611-
" network_information_workstation_name 1.0000 0.9500 0.9744 100\n",
612-
" sourcename 1.0000 1.0000 1.0000 100\n",
613-
" keywords 1.0000 1.0000 1.0000 96\n",
614-
" new_logon_account_domain 0.9615 1.0000 0.9804 100\n",
615-
" subject_account_name 0.9804 1.0000 0.9901 100\n",
623+
"network_information_source_network_address 1.0000 1.0000 1.0000 100\n",
624+
" logname 1.0000 1.0000 1.0000 100\n",
616625
" type 1.0000 1.0000 1.0000 100\n",
617626
" eventtype 1.0000 1.0000 1.0000 100\n",
627+
" network_information_workstation_name 1.0000 1.0000 1.0000 100\n",
628+
" new_logon_account_domain 0.9615 1.0000 0.9804 100\n",
629+
" computername 1.0000 1.0000 1.0000 100\n",
630+
" recordnumber 1.0000 1.0000 1.0000 100\n",
631+
" insert_time 1.0000 1.0000 1.0000 100\n",
632+
" new_logon_account_name 0.9901 1.0000 0.9950 100\n",
633+
" sourcename 1.0000 1.0000 1.0000 100\n",
634+
" subject_account_domain 1.0000 0.9600 0.9796 100\n",
635+
" opcode 1.0000 1.0000 1.0000 100\n",
618636
"\n",
619-
" micro avg 0.9968 0.9968 0.9968 3492\n",
620-
" macro avg 0.9970 0.9968 0.9968 3492\n",
637+
" micro avg 0.9986 0.9986 0.9986 3492\n",
638+
" macro avg 0.9986 0.9986 0.9986 3492\n",
621639
"\n"
622640
]
623641
}
@@ -712,7 +730,7 @@
712730
"name": "python",
713731
"nbconvert_exporter": "python",
714732
"pygments_lexer": "ipython3",
715-
"version": "3.7.6"
733+
"version": "3.7.8"
716734
}
717735
},
718736
"nbformat": 4,

0 commit comments

Comments
 (0)