-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
125 lines (75 loc) · 3.6 KB
/
run_analysis.R
File metadata and controls
125 lines (75 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
library(dplyr)
#----------- Get some names -----------#
#read the list of names of features
feature_names <- read.table("./UCI HAR Dataset/features.txt")
#read the names of activities
activity_names <- read.table("./UCI HAR Dataset/activity_labels.txt")
#set the proper column names
names(activity_names) <- c("activity_id", "activity_name")
#read the list of subjects from the training set
subject_train <- read.table("./UCI HAR Dataset/train/subject_train.txt")
#read the list of subjects from the test set
subject_test <- read.table("./UCI HAR Dataset/test/subject_test.txt")
#----------- Get the training set -----------#
#read the training set
X_train <- read.table("./UCI HAR Dataset/train/X_train.txt")
#set the features names as column names of the thaining set
names(X_train) <- features_names[,2]
#read the labels of the training set
y_train <- read.table("./UCI HAR Dataset/train/y_train.txt")
#set the proper column names
names(y_train) <- "label_id"
#add labels to the training set
X_train$activity_id <- y_train$label_id
#add subjects to the training set
X_train$subject_id <- subject_train[,1]
#define each label with proper activity names
X_train <- merge(X_train, activity_names, by.x="activity_id", by.y="activity_id", all=TRUE)
#----------- Get test set -----------#
#read the test set
X_test <- read.table("./UCI HAR Dataset/test/X_test.txt")
#set the features names as column names of the test set
names(X_test) <- features_names[,2]
#read the labels of the test set
y_test <- read.table("./UCI HAR Dataset/test/y_test.txt")
#set the proper column names
names(y_test) <- "label_id"
#add labels to the test set
X_test$activity_id <- y_test$label_id
#add subjects to the test set
X_test$subject_id <- subject_test[,1]
#define each label with proper activity names
X_test <- merge(X_test, activity_names, by.x="activity_id", by.y="activity_id", all=TRUE)
#----------- Merge sets -----------#
data_set <- rbind(X_train, X_test)
#----------- Look for the measurements on mean -----------#
#get indeces of measurements on the mean
mean_idx <- grep("mean\\(\\)", names(data_set))
#get old names of these measurements
old_names_mean <- names(data_set[, mean_idx])
#replace "-mean()" with "Mean"
new_names_mean <- gsub("-mean\\(\\)", "Mean", old_names_mean)
#replace "-X" with "X", "-Y" with "Y" and "-Z" with "Z"
new_names_mean <- gsub("-", "", new_names_mean)
#----------- Look for the measurements on standard deviation -----------#
#get indeces of measurements on the standard deviation
std_idx <- grep("std\\(\\)", names(data_set))
#get old names of these measurements
old_names_std <- names(data_set[, std_idx])
#replace "-std()" with "Std"
new_names_std <- gsub("-std\\(\\)", "Std", old_names_std)
#replace "-X" with "X", "-Y" with "Y" and "-Z" with "Z"
new_names_std <- gsub("-", "", new_names_std)
#----------- Cleaning the data -----------#
#replace old names of the proper columns with the new ones
names(data_set)[c(mean_idx, std_idx)] <- c(new_names_mean, new_names_std)
#catch indeces of columns with subject id and activity name
activity_idx <- match("activity_name", names(data_set))
subject_idx <- match("subject_id", names(data_set))
#leave only those columns that were collected with the mean() and std()
data_set <- data_set[, c(subject_idx, mean_idx, std_idx, activity_idx)]
#----------- Getting the tidy data -----------#
#get summarizing over columns
tidy_data <- aggregate(data_set[c(2:(dim(data_set)[2]-1))], by=data_set[c("subject_id","activity_name")], FUN=mean)
#Some modification of names of coulumns
names(tidy_data)[3:dim(tidy_data)[2]] <- paste("Mean of", names(cdata)[3:dim(tidy_data)[2]])