-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathFeatureMatrix_AWW.py
148 lines (130 loc) · 7.16 KB
/
FeatureMatrix_AWW.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 10 14:36:35 2017
@author: yzhang17
"""
import numpy as np
from statistics import mode
from scipy import stats
import pywt
dataPath_AWW = 'AlanWalksWales/Raw/'
dataFiles = (dataPath_AWW+'2013_04_24_1hour_walk&chat&drink.csv',dataPath_AWW+ '2013_05_06 - 1hour_walking.csv',dataPath_AWW+ '2013_05_28_1hour_walking.csv',
dataPath_AWW+ '2013_06_04_1hour_walking.csv',dataPath_AWW+ '2013_06_10_1hour_walking.csv',dataPath_AWW+ '2013_04_24_1hour_chat&drink_Pub.csv',
dataPath_AWW+ '2013_05_14_40mins_eating.csv',dataPath_AWW+ '2013_05_19_40mins_eat&drink.csv',dataPath_AWW+ '2013_06_03_1hour_lunch.csv',
dataPath_AWW+ '2013_06_11_40mins_eat&readnewspaper.csv',dataPath_AWW+ '2013_07_10_1hour_Dinner.csv')
dataFiles_labels = (dataPath_AWW+ '2013_04_24_1hour_walk&chat&drink_Epochs.csv',dataPath_AWW+ '2013_05_06 - 1hour_walking_Epochs.csv',dataPath_AWW+ '2013_05_28_1hour_walking_Epochs.csv',
dataPath_AWW+ '2013_06_04_1hour_walking_Epochs.csv',dataPath_AWW+ '2013_06_10_1hour_walking_Epochs.csv',dataPath_AWW+ '2013_04_24_1hour_chat&drink_Pub_Epochs.csv',
dataPath_AWW+ '2013_05_14_40mins_eating_Epochs.csv',dataPath_AWW+ '2013_05_19_40mins_eat&drink_Epochs.csv',dataPath_AWW+ '2013_06_03_1hour_lunch_Epochs.csv',
dataPath_AWW+ '2013_06_11_40mins_eat&readnewspaper_Epochs.csv',dataPath_AWW+ '2013_07_10_1hour_Dinner_Epochs.csv')
nSubjects_AWW = 11; # Number of time segments in AWW resting dataset
def statistics(data): # The function for the 4 statistics
avg = np.mean(data) # mean
sd = np.std(data) # standard deviation
maxm = max(data) # maximum
minm = min(data) # minimum
return avg,sd,maxm,minm
def Derivatives(data): # Get the first and second derivatives of the data
deriv = (data[1:-1] + data[2:])/ 2. - (data[1:-1] + data[:-2])/ 2.
secondDeriv = data[2:] - 2*data[1:-1] + data[:-2]
return deriv,secondDeriv
def featureMatrix(data,labels_all): # Construct the feature matrix
length = len(labels_all)
# Create the one label set by the majority vite
labels = np.zeros((length,))
for j in range(length):
labels[j]=mode(labels_all[j,:])
# Divide the data into 5 seconds time windows, 8Hz is the sampling rate, thus 40 data points each time window(5s).
EDA = data[0:length*40,5].reshape(length,40)
ACCx = data[0:length*40,0].reshape(length,40)
ACCy = data[0:length*40,1].reshape(length,40)
ACCz = data[0:length*40,2].reshape(length,40)
# Get the ACC magnitude by root-mean-square
acc = np.sqrt(np.square(data[0:length*40,0]) + np.square(data[0:length*40,1]) + np.square(data[0:length*40,2]))
ACC = acc.reshape(length,40)
# Construct the feature matrix, 24 EDA features, 96 ACC features, and 120 features in total.
features = np.zeros((length,120))
for i in range(length):
deriv_EDA,secondDeriv_EDA = Derivatives(EDA[i,:])
deriv_ACC,secondDeriv_ACC = Derivatives(ACC[i,:])
deriv_ACCx,secondDeriv_ACCx = Derivatives(ACCx[i,:])
deriv_ACCy,secondDeriv_ACCy = Derivatives(ACCy[i,:])
deriv_ACCz,secondDeriv_ACCz = Derivatives(ACCz[i,:])
_, EDA_cD_3, EDA_cD_2, EDA_cD_1 = pywt.wavedec(EDA[i,:], 'Haar', level=3) #3 = 1Hz, 2 = 2Hz, 1=4Hz
_, ACC_cD_3, ACC_cD_2, ACC_cD_1 = pywt.wavedec(ACC[i,:], 'Haar', level=3)
_, ACCx_cD_3, ACCx_cD_2, ACCx_cD_1 = pywt.wavedec(ACCx[i,:], 'Haar', level=3)
_, ACCy_cD_3, ACCy_cD_2, ACCy_cD_1 = pywt.wavedec(ACCy[i,:], 'Haar', level=3)
_, ACCz_cD_3, ACCz_cD_2, ACCz_cD_1 = pywt.wavedec(ACCz[i,:], 'Haar', level=3)
### EDA features
# EDA statistical features:
features[i,0:4] = statistics(EDA[i,:])
features[i,4:8] = statistics(deriv_EDA)
features[i,8:12] = statistics(secondDeriv_EDA)
# EDA wavelet features:
features[i,12:16] = statistics(EDA_cD_3)
features[i,16:20] = statistics(EDA_cD_2)
features[i,20:24] = statistics(EDA_cD_1)
### ACC features
## ACC statistical features:
# Acceleration magnitude:
features[i,24:28] = statistics(ACC[i,:])
features[i,28:32] = statistics(deriv_ACC)
features[i,32:36] = statistics(secondDeriv_ACC)
# Acceleration x-axis:
features[i,36:40] = statistics(ACCx[i,:])
features[i,40:44] = statistics(deriv_ACCx)
features[i,44:48] = statistics(secondDeriv_ACCx)
# Acceleration y-axis:
features[i,48:52] = statistics(ACCy[i,:])
features[i,52:56] = statistics(deriv_ACCy)
features[i,56:60] = statistics(secondDeriv_ACCy)
# Acceleration z-axis:
features[i,60:64] = statistics(ACCz[i,:])
features[i,64:68] = statistics(deriv_ACCz)
features[i,68:72] = statistics(secondDeriv_ACCz)
## ACC wavelet features:
# ACC magnitude wavelet features:
features[i,72:76] = statistics(ACC_cD_3)
features[i,76:80] = statistics(ACC_cD_2)
features[i,80:84] = statistics(ACC_cD_1)
# ACC x-axis wavelet features:
features[i,84:88] = statistics(ACCx_cD_3)
features[i,88:92] = statistics(ACCx_cD_2)
features[i,92:96] = statistics(ACCx_cD_1)
# ACC y-axis wavelet features:
features[i,96:100] = statistics(ACCy_cD_3)
features[i,100:104] = statistics(ACCy_cD_2)
features[i,104:108] = statistics(ACCy_cD_1)
# ACC z-axis wavelet features:
features[i,108:112] = statistics(ACCz_cD_3)
features[i,112:116] = statistics(ACCz_cD_2)
features[i,116:120] = statistics(ACCz_cD_1)
featuresAll = stats.zscore(features) # Normalize the data using z-score
featuresAcc = featuresAll[:,24:120] # 96 ACC features
featuresEda = featuresAll[:,0:24] #24 EDA features
return featuresAll,featuresAcc,featuresEda,labels
# Load the data and construct the feature matrix
data_AWW = dict()
labels_AWW = dict()
awwAll = dict()
awwAcc = dict()
awwEda = dict()
awwLabels = dict()
awwGroups = dict()
for i in range(nSubjects_AWW):
data_AWW[i] = np.loadtxt(dataFiles[i], delimiter=',', skiprows=8)
labels_AWW[i] = np.loadtxt(dataFiles_labels[i], delimiter=',', skiprows=1, usecols=(3,4,5))
labels_AWW[i][labels_AWW[i]==0]=1 # Assume the unlabeled time windows as clean
labels_AWW[i] = labels_AWW[i]-1 # Make the labels include only 0s and 1s
awwGroups[i] = np.ones(len(labels_AWW[i]))*i # The group number for the leave one group out cross-validation
awwAll[i],awwAcc[i],awwEda[i],awwLabels[i] = featureMatrix(data_AWW[i],labels_AWW[i])
# Convert the dictionary to arrays
awwAll_walk = np.concatenate([awwAll[x] for x in range(5)], 0)
awwAll_rest = np.concatenate([awwAll[x] for x in range(5,11)], 0)
awwAcc_walk = np.concatenate([awwAcc[x] for x in range(5)], 0)
awwAcc_rest = np.concatenate([awwAcc[x] for x in range(5,11)], 0)
awwEda_walk = np.concatenate([awwEda[x] for x in range(5)], 0)
awwEda_rest = np.concatenate([awwEda[x] for x in range(5,11)], 0)
awwLabels_walk = np.concatenate([awwLabels[x] for x in range(5)], 0)
awwLabels_rest = np.concatenate([awwLabels[x] for x in range(5,11)], 0)
awwGroups_walk = np.concatenate([awwGroups[x] for x in range(5)], 0)
awwGroups_rest = np.concatenate([awwGroups[x] for x in range(5,11)], 0)