|
41 | 41 | "from tqdm import tqdm,trange\n",
|
42 | 42 | "\n",
|
43 | 43 | "import pandas as pd\n",
|
44 |
| - "import numpy as np" |
| 44 | + "import numpy as np\n", |
| 45 | + "import s3fs\n", |
| 46 | + "from os import path" |
45 | 47 | ]
|
46 | 48 | },
|
47 | 49 | {
|
|
59 | 61 | "metadata": {},
|
60 | 62 | "outputs": [],
|
61 | 63 | "source": [
|
62 |
| - "logs_df = pd.read_csv('./data/winevt_sample.csv')" |
| 64 | + "# download log data\n", |
| 65 | + "WINEVT_CSV = \"winevt_sample.csv\"\n", |
| 66 | + "S3_BASE_PATH = \"rapidsai-data/cyber/clx\"\n", |
| 67 | + "\n", |
| 68 | + "if not path.exists(WINEVT_CSV):\n", |
| 69 | + " fs = s3fs.S3FileSystem(anon=True)\n", |
| 70 | + " fs.get(S3_BASE_PATH + \"/\" + WINEVT_CSV, WINEVT_CSV)" |
63 | 71 | ]
|
64 | 72 | },
|
65 | 73 | {
|
66 | 74 | "cell_type": "code",
|
67 | 75 | "execution_count": 3,
|
68 | 76 | "metadata": {},
|
| 77 | + "outputs": [], |
| 78 | + "source": [ |
| 79 | + "# logs_df = pd.read_csv('./data/winevt_sample.csv')\n", |
| 80 | + "logs_df = pd.read_csv(WINEVT_CSV)" |
| 81 | + ] |
| 82 | + }, |
| 83 | + { |
| 84 | + "cell_type": "code", |
| 85 | + "execution_count": 4, |
| 86 | + "metadata": {}, |
69 | 87 | "outputs": [
|
70 | 88 | {
|
71 | 89 | "data": {
|
|
94 | 112 | "Name: 0, dtype: object"
|
95 | 113 | ]
|
96 | 114 | },
|
97 |
| - "execution_count": 3, |
| 115 | + "execution_count": 4, |
98 | 116 | "metadata": {},
|
99 | 117 | "output_type": "execute_result"
|
100 | 118 | }
|
|
106 | 124 | },
|
107 | 125 | {
|
108 | 126 | "cell_type": "code",
|
109 |
| - "execution_count": 4, |
| 127 | + "execution_count": 5, |
110 | 128 | "metadata": {},
|
111 | 129 | "outputs": [
|
112 | 130 | {
|
|
115 | 133 | "'02/28/2019 12:49:04 AM LogName= Security SourceName= Microsoft Windows security auditing. EventCode= 4624 EventType= 0 Type= Information ComputerName= lt-95.melton.com TaskCategory= Logon OpCode= Info RecordNumber= 474033423 Keywords= Audit Success Message= An account was successfully logged on. Subject: Account Name: gonzalespeter Account Domain: taylor.com New Logon: Account Name: [email protected] Account Domain: blair.com Network Information: Workstation Name: desktop-gonzalespeter Network Address: 192.175.54.118'"
|
116 | 134 | ]
|
117 | 135 | },
|
118 |
| - "execution_count": 4, |
| 136 | + "execution_count": 5, |
119 | 137 | "metadata": {},
|
120 | 138 | "output_type": "execute_result"
|
121 | 139 | }
|
|
127 | 145 | },
|
128 | 146 | {
|
129 | 147 | "cell_type": "code",
|
130 |
| - "execution_count": 5, |
| 148 | + "execution_count": 6, |
131 | 149 | "metadata": {},
|
132 | 150 | "outputs": [],
|
133 | 151 | "source": [
|
|
153 | 171 | },
|
154 | 172 | {
|
155 | 173 | "cell_type": "code",
|
156 |
| - "execution_count": 6, |
| 174 | + "execution_count": 7, |
157 | 175 | "metadata": {},
|
158 | 176 | "outputs": [],
|
159 | 177 | "source": [
|
|
162 | 180 | },
|
163 | 181 | {
|
164 | 182 | "cell_type": "code",
|
165 |
| - "execution_count": 7, |
| 183 | + "execution_count": 8, |
166 | 184 | "metadata": {},
|
167 | 185 | "outputs": [
|
168 | 186 | {
|
|
179 | 197 | },
|
180 | 198 | {
|
181 | 199 | "cell_type": "code",
|
182 |
| - "execution_count": 8, |
| 200 | + "execution_count": 9, |
183 | 201 | "metadata": {},
|
184 | 202 | "outputs": [],
|
185 | 203 | "source": [
|
|
188 | 206 | },
|
189 | 207 | {
|
190 | 208 | "cell_type": "code",
|
191 |
| - "execution_count": 9, |
| 209 | + "execution_count": 10, |
192 | 210 | "metadata": {},
|
193 | 211 | "outputs": [],
|
194 | 212 | "source": [
|
|
212 | 230 | },
|
213 | 231 | {
|
214 | 232 | "cell_type": "code",
|
215 |
| - "execution_count": 10, |
| 233 | + "execution_count": 11, |
216 | 234 | "metadata": {},
|
217 | 235 | "outputs": [],
|
218 | 236 | "source": [
|
|
237 | 255 | },
|
238 | 256 | {
|
239 | 257 | "cell_type": "code",
|
240 |
| - "execution_count": 11, |
| 258 | + "execution_count": 12, |
241 | 259 | "metadata": {},
|
242 | 260 | "outputs": [],
|
243 | 261 | "source": [
|
|
269 | 287 | },
|
270 | 288 | {
|
271 | 289 | "cell_type": "code",
|
272 |
| - "execution_count": 12, |
| 290 | + "execution_count": 13, |
273 | 291 | "metadata": {},
|
274 | 292 | "outputs": [],
|
275 | 293 | "source": [
|
|
303 | 321 | },
|
304 | 322 | {
|
305 | 323 | "cell_type": "code",
|
306 |
| - "execution_count": 13, |
| 324 | + "execution_count": 14, |
307 | 325 | "metadata": {},
|
308 | 326 | "outputs": [],
|
309 | 327 | "source": [
|
|
319 | 337 | },
|
320 | 338 | {
|
321 | 339 | "cell_type": "code",
|
322 |
| - "execution_count": 14, |
| 340 | + "execution_count": 15, |
323 | 341 | "metadata": {},
|
324 | 342 | "outputs": [],
|
325 | 343 | "source": [
|
|
328 | 346 | },
|
329 | 347 | {
|
330 | 348 | "cell_type": "code",
|
331 |
| - "execution_count": 15, |
| 349 | + "execution_count": 16, |
332 | 350 | "metadata": {},
|
333 | 351 | "outputs": [],
|
334 | 352 | "source": [
|
|
349 | 367 | },
|
350 | 368 | {
|
351 | 369 | "cell_type": "code",
|
352 |
| - "execution_count": 16, |
| 370 | + "execution_count": 17, |
353 | 371 | "metadata": {},
|
354 | 372 | "outputs": [],
|
355 | 373 | "source": [
|
|
372 | 390 | },
|
373 | 391 | {
|
374 | 392 | "cell_type": "code",
|
375 |
| - "execution_count": 17, |
| 393 | + "execution_count": 18, |
376 | 394 | "metadata": {},
|
377 | 395 | "outputs": [],
|
378 | 396 | "source": [
|
|
390 | 408 | },
|
391 | 409 | {
|
392 | 410 | "cell_type": "code",
|
393 |
| - "execution_count": 18, |
| 411 | + "execution_count": 19, |
394 | 412 | "metadata": {},
|
395 | 413 | "outputs": [],
|
396 | 414 | "source": [
|
|
421 | 439 | },
|
422 | 440 | {
|
423 | 441 | "cell_type": "code",
|
424 |
| - "execution_count": 19, |
| 442 | + "execution_count": 20, |
425 | 443 | "metadata": {},
|
426 | 444 | "outputs": [],
|
427 | 445 | "source": [
|
|
433 | 451 | },
|
434 | 452 | {
|
435 | 453 | "cell_type": "code",
|
436 |
| - "execution_count": 20, |
| 454 | + "execution_count": 21, |
437 | 455 | "metadata": {},
|
438 | 456 | "outputs": [
|
439 | 457 | {
|
|
447 | 465 | "name": "stdout",
|
448 | 466 | "output_type": "stream",
|
449 | 467 | "text": [
|
450 |
| - "Train loss: 1.1063271555407295\n" |
| 468 | + "Train loss: 1.1561125671041423\n" |
451 | 469 | ]
|
452 | 470 | },
|
453 | 471 | {
|
454 | 472 | "name": "stderr",
|
455 | 473 | "output_type": "stream",
|
456 | 474 | "text": [
|
457 |
| - "Epoch: 50%|█████ | 1/2 [00:15<00:15, 15.51s/it]" |
| 475 | + "Epoch: 50%|█████ | 1/2 [00:14<00:14, 14.94s/it]" |
458 | 476 | ]
|
459 | 477 | },
|
460 | 478 | {
|
461 | 479 | "name": "stdout",
|
462 | 480 | "output_type": "stream",
|
463 | 481 | "text": [
|
464 |
| - "Validation loss: 0.2913314178586006\n", |
465 |
| - "Validation Accuracy: 0.550140380859375\n", |
466 |
| - "F1-Score: 0.8453539528062924\n", |
467 |
| - "Train loss: 0.14663322655291394\n" |
| 482 | + "Validation loss: 0.2560569792985916\n", |
| 483 | + "Validation Accuracy: 0.547088623046875\n", |
| 484 | + "F1-Score: 0.7919258952025439\n", |
| 485 | + "Train loss: 0.12098206990751727\n" |
468 | 486 | ]
|
469 | 487 | },
|
470 | 488 | {
|
471 | 489 | "name": "stderr",
|
472 | 490 | "output_type": "stream",
|
473 | 491 | "text": [
|
474 |
| - "Epoch: 100%|██████████| 2/2 [00:30<00:00, 15.36s/it]" |
| 492 | + "Epoch: 100%|██████████| 2/2 [00:29<00:00, 14.75s/it]" |
475 | 493 | ]
|
476 | 494 | },
|
477 | 495 | {
|
478 | 496 | "name": "stdout",
|
479 | 497 | "output_type": "stream",
|
480 | 498 | "text": [
|
481 |
| - "Validation loss: 0.026326983235776424\n", |
482 |
| - "Validation Accuracy: 0.58355712890625\n", |
483 |
| - "F1-Score: 0.9789872096058471\n" |
| 499 | + "Validation loss: 0.015797887230291963\n", |
| 500 | + "Validation Accuracy: 0.583953857421875\n", |
| 501 | + "F1-Score: 0.9797702949621508\n" |
484 | 502 | ]
|
485 | 503 | },
|
486 | 504 | {
|
|
566 | 584 | },
|
567 | 585 | {
|
568 | 586 | "cell_type": "code",
|
569 |
| - "execution_count": 21, |
| 587 | + "execution_count": 22, |
570 | 588 | "metadata": {},
|
571 | 589 | "outputs": [],
|
572 | 590 | "source": [
|
|
575 | 593 | },
|
576 | 594 | {
|
577 | 595 | "cell_type": "code",
|
578 |
| - "execution_count": 22, |
| 596 | + "execution_count": 23, |
579 | 597 | "metadata": {},
|
580 | 598 | "outputs": [],
|
581 | 599 | "source": [
|
|
585 | 603 | },
|
586 | 604 | {
|
587 | 605 | "cell_type": "code",
|
588 |
| - "execution_count": 23, |
| 606 | + "execution_count": 24, |
589 | 607 | "metadata": {},
|
590 | 608 | "outputs": [
|
591 | 609 | {
|
592 | 610 | "name": "stdout",
|
593 | 611 | "output_type": "stream",
|
594 | 612 | "text": [
|
595 |
| - "f1 score: 0.996850\n", |
596 |
| - "Accuracy score: 0.998094\n", |
| 613 | + "f1 score: 0.998568\n", |
| 614 | + "Accuracy score: 0.999134\n", |
597 | 615 | " precision recall f1-score support\n",
|
598 | 616 | "\n",
|
599 |
| - " other 1.0000 1.0000 1.0000 1696\n", |
600 |
| - " subject_account_domain 1.0000 0.9400 0.9691 100\n", |
601 |
| - " opcode 1.0000 1.0000 1.0000 100\n", |
602 |
| - " recordnumber 1.0000 1.0000 1.0000 100\n", |
603 |
| - " eventcode 1.0000 1.0000 1.0000 100\n", |
604 |
| - " new_logon_account_name 1.0000 1.0000 1.0000 100\n", |
605 |
| - " computername 1.0000 1.0000 1.0000 100\n", |
| 617 | + " keywords 1.0000 1.0000 1.0000 96\n", |
606 | 618 | " taskcategory 1.0000 1.0000 1.0000 100\n",
|
607 |
| - "network_information_source_network_address 1.0000 1.0000 1.0000 100\n", |
608 |
| - " logname 0.9524 1.0000 0.9756 100\n", |
| 619 | + " eventcode 1.0000 1.0000 1.0000 100\n", |
| 620 | + " subject_account_name 1.0000 0.9900 0.9950 100\n", |
| 621 | + " other 1.0000 1.0000 1.0000 1696\n", |
609 | 622 | " message 1.0000 1.0000 1.0000 100\n",
|
610 |
| - " insert_time 1.0000 1.0000 1.0000 100\n", |
611 |
| - " network_information_workstation_name 1.0000 0.9500 0.9744 100\n", |
612 |
| - " sourcename 1.0000 1.0000 1.0000 100\n", |
613 |
| - " keywords 1.0000 1.0000 1.0000 96\n", |
614 |
| - " new_logon_account_domain 0.9615 1.0000 0.9804 100\n", |
615 |
| - " subject_account_name 0.9804 1.0000 0.9901 100\n", |
| 623 | + "network_information_source_network_address 1.0000 1.0000 1.0000 100\n", |
| 624 | + " logname 1.0000 1.0000 1.0000 100\n", |
616 | 625 | " type 1.0000 1.0000 1.0000 100\n",
|
617 | 626 | " eventtype 1.0000 1.0000 1.0000 100\n",
|
| 627 | + " network_information_workstation_name 1.0000 1.0000 1.0000 100\n", |
| 628 | + " new_logon_account_domain 0.9615 1.0000 0.9804 100\n", |
| 629 | + " computername 1.0000 1.0000 1.0000 100\n", |
| 630 | + " recordnumber 1.0000 1.0000 1.0000 100\n", |
| 631 | + " insert_time 1.0000 1.0000 1.0000 100\n", |
| 632 | + " new_logon_account_name 0.9901 1.0000 0.9950 100\n", |
| 633 | + " sourcename 1.0000 1.0000 1.0000 100\n", |
| 634 | + " subject_account_domain 1.0000 0.9600 0.9796 100\n", |
| 635 | + " opcode 1.0000 1.0000 1.0000 100\n", |
618 | 636 | "\n",
|
619 |
| - " micro avg 0.9968 0.9968 0.9968 3492\n", |
620 |
| - " macro avg 0.9970 0.9968 0.9968 3492\n", |
| 637 | + " micro avg 0.9986 0.9986 0.9986 3492\n", |
| 638 | + " macro avg 0.9986 0.9986 0.9986 3492\n", |
621 | 639 | "\n"
|
622 | 640 | ]
|
623 | 641 | }
|
|
712 | 730 | "name": "python",
|
713 | 731 | "nbconvert_exporter": "python",
|
714 | 732 | "pygments_lexer": "ipython3",
|
715 |
| - "version": "3.7.6" |
| 733 | + "version": "3.7.8" |
716 | 734 | }
|
717 | 735 | },
|
718 | 736 | "nbformat": 4,
|
|
0 commit comments