-
Notifications
You must be signed in to change notification settings - Fork 0
/
malware_detection.py
85 lines (71 loc) · 3.21 KB
/
malware_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: UTF-8 -*-
import tensorflow as tf
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
from sklearn.model_selection import KFold
# Load datasets
inputDataset = loadtxt('input_values.txt', delimiter=',')
outputDataset = loadtxt('output_values.txt', delimiter=',')
X = inputDataset
Y = outputDataset
# Function to create the model
def create_model():
model = Sequential()
model.add(Dense(70, input_shape=(100,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
sgd = optimizers.SGD(lr=0.001, nesterov=True)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# Cross-validation setup
n_split = 12
iter = 0
totalAccuracy = 0
kf = KFold(n_splits=n_split, shuffle=True)
# Cross-validation loop
for train_index, test_index in kf.split(X):
iter += 1
x_train, x_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
# Create and train the model
model = create_model()
model.fit(x_train, y_train, epochs=20, batch_size=50)
# Evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print(f'Test loss: {score[0]}')
print(f'Test accuracy: {score[1]}')
totalAccuracy += score[1]
# Print overall accuracy
print(f'CALCULATED ACCURACY: {totalAccuracy * 100 / iter:.2f}%')
###################################################
"""
# Uncomment and adjust the following section for feature importance analysis
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import eli5
from eli5.sklearn import PermutationImportance
# Example setup for permutation importance
# perm = PermutationImportance(model, scoring="accuracy", random_state=1).fit(X, Y)
# eli5.show_weights(perm, feature_names=X.columns.tolist())
"""
###################################################
# Feature list (commented out as it is not directly related to execution)
"""
['AddAtomW', 'closesocket', 'CloseWindow', 'CreateDCA', 'CreateWaitableTimerA',
'CreateWaitableTimerW', 'DeleteAtom', 'EnableWindow', 'EnumDisplaySettingsA',
'FindAtomW', 'FlushFileBuffers', 'GetCaretPos', 'GetCursor', 'GetDIBits',
'GetEnvironmentStrings', 'GetInputState', 'GetKeyState', 'GetLastActivePopup',
'GetMenu', 'GetPixel', 'GetQueuedCompletionStatus', 'GetTextCharset', 'HeapLock',
'HeapUnlock', 'ImpersonateDdeClientWindow', 'InSendMessage', 'IsBadStringPtrA',
'IsCharLowerA', 'IsMenu', 'IsWindowUnicode', 'LoadAccelerators', 'LoadBitmapA',
'LocalLock', 'LocalUnlock', 'MoveFileExA', 'OleFlushClipboard', 'PostQueuedCompletionStatus',
'PulseEvent', 'SetActiveWindow', 'SetFocus', 'StretchBlt', 'TerminateThread',
'TranslateAcceleratorW', 'UpdateWindow', 'LoadLibraryExW', 'LocalAlloc', 'GetProcAddress',
'GetModuleHandleW', 'CreateFileW', 'GetSystemMetrics', 'MapViewOfFileEx', 'CloseHandle',
'LocalFree', 'GetThreadLocale', 'GetModuleFileNameW', 'LoadLibraryW', 'lstrlenW',
'FreeLibrary', 'lstrlenA', 'GetCurrentThreadId', 'GetOutlineTextMetricsA',
'GetTextFaceA', 'SendDlgItemMessageA', 'SetScrollRange']
"""