-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
97 lines (68 loc) · 3.99 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
## Script to Process the UCI HAR DataSet. This work is part of the course: Getting and Cleaning Data
## Source of the DataSet: https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip
## Reads and Merges the training and the test sets to create one independent tidy data set.
####################################
### Reads Activities and Features ##
####################################
# Read the activities, result is a table with corresponding id of the activity and the activity itself
activities <- read.table("./UCI HAR Dataset/activity_labels.txt")
names(activities) <- c("ID_ACTIVITY","ACTIVITY")
# Read the features, result is a table with corresponding id of the feature and the feature itself
features <- read.table("UCI HAR Dataset/features.txt")
names(features)<-c("ID_FEATURE", "FEATURE")
####################################
####### Reads the train set. #######
####################################
## Reads the train set features (features were read previously)
train_X <- read.table("UCI HAR Dataset/train/X_train.txt")
names(train_X)<-features$FEATURE
## Reads the activity of train Subject
train_Y <- read.table("UCI HAR Dataset/train/y_train.txt")
names(train_Y)<-"ID_ACTIVITY"
## Reads each train Subject
train_Subject <- read.table("UCI HAR Dataset/train/subject_train.txt")
names(train_Subject)<-"ID_SUBJECT"
##### Merge the Train dataSet into a Train dataSet with The train Subjects
## Columns are "ID_SUBJECT","ID_ACTIVITY", then all the features (one per column)
train_set <- cbind(train_Subject, train_Y, train_X)
####################################
####### Reads the test set. #######
####################################
## Reads the test set features (features were read previously)
test_X <- read.table("UCI HAR Dataset/test/X_test.txt")
names(test_X)<-features$FEATURE
## Reads the activity of test subject
test_Y <- read.table("UCI HAR Dataset/test/y_test.txt")
names(test_Y)<-"ID_ACTIVITY"
## Reads each train Subject
test_Subject <- read.table("UCI HAR Dataset/test/subject_test.txt")
names(test_Subject)<-"ID_SUBJECT"
##### Merge the Test dataSet into a Test dataSet with the train Subject
## Columns are "ID_SUBJECT","ID_ACTIVITY", then all the features (one per column)
test_set <- cbind(test_Subject, test_Y, test_X)
####################################
## Merges train and test dataSets ##
####################################
### All in one big table
trainTestSet <- rbind(train_set,test_set)
### just keep the subject and activity id's, the mean and std measurements
# The regular expression : ID_SUBJECT|ID_ACTIVITY|mean|std identify any occurence of the 4
# character expressions: ID_SUBJECT ; ID_ACTIVITY ; mean ; std
trainTestSet <- trainTestSet[,grep("ID_SUBJECT|ID_ACTIVITY|mean|std",names(trainTestSet))]
### Match or replace the ID_ACTIVITY with the Corresponding ACTIVITY
### @See http://stackoverflow.com/questions/14417612/r-replace-an-id-value-with-a-name
names(trainTestSet)[2]<-"ACTIVITY"
trainTestSet$ACTIVITY <- activities[match(trainTestSet$ACTIVITY,activities$ID_ACTIVITY),'ACTIVITY']
### Organize dataset with the average of each variable for each activity and each subject.
### Organize data by ACTIVITY and ID_SUBJECT (melt converts each feature to a factor and the corresponding
### measurements and dcast to reshape the data)
### (this solution avoids loops)
### @See http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/#problem
library(reshape2)
melted_features <- melt(trainTestSet, id.vars = c("ID_SUBJECT","ACTIVITY"),
variable.name="FEATURES",
value.name="FEATURES_MEASUREMENTS")
### Reshape, calculating averages, organizing data by ID_SUBJECT and ACTIVITY
finalAveragesDataSet <- dcast(melted_features, ID_SUBJECT + ACTIVITY ~ FEATURES,mean, value.var="FEATURES_MEASUREMENTS")
### Write tidy DataSet to a txt file: finalAveragesDataSet.txt
write.table(finalAveragesDataSet, "./finalAveragesDataSet.txt", row.names = FALSE)