-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
50 lines (34 loc) · 2.01 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
rawDataDirName<-"C:\\projects\\CleanDataAssignment\\DownloadedData"
outputDataDirName<-"C:\\projects\\CleanDataAssignment"
setwd(rawDataDirName)
# Read the features.txt file, which lists the variables in the X dataset
# We can skip the first column of the features definition file, it is just the row (feature) number
features<-read.table("features.txt", header=FALSE,colClasses=c("NULL","character"))
# Extract only measurements on the mean and standard deviation for each measurement
keep<-grepl("mean",features$V2) | grepl("std",features$V2)
# Read the test and training set. Label the data sets with descriptive variable names
testX<-read.table("test\\X_test.txt", header=FALSE, col.names=features$V2)
testY<-read.table("test\\y_test.txt", header=FALSE, col.names=c("Activity"))
testS<-read.table("test\\subject_test.txt", header=FALSE, col.names=c("Subject"))
testXKeep <- testX[,keep]
test<-cbind(testY,testS,testXKeep)
trainX<-read.table("train\\X_train.txt", header=FALSE, col.names=features$V2)
trainY<-read.table("train\\y_train.txt", header=FALSE, col.names=c("Activity"))
trainS<-read.table("train\\subject_train.txt", header=FALSE,col.names=c("Subject"))
trainXKeep <- trainX[,keep]
train<-cbind(trainY,trainS,trainXKeep)
# merge the training and test data sets to create one data set
allData<-rbind(train,test)
# use descriptive variable names to name the activities in the data set
activities<-read.table("activity_labels.txt",header=FALSE, colClasses=c("NULL","character"),col.names=c(NA,"Activity"))
allData$Activity<-activities$Activity[ allData$Activity ]
# Create a second independent tidy data set with the average of each variable
# for each activity and subject
means<-aggregate(.~Activity+Subject,allData,mean)
meansNames<-names(means)
nNames<-length(meansNames)
for(i in 3:nNames) meansNames[i]<-paste("AVERAGE",meansNames[i],sep="")
names(means) <- meansNames
# save the summary means data set in the output directory
setwd(outputDataDirName)
write.table(means,file="ActivitySubjectMeans.txt",row.names=FALSE)