-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_Analysis.R
87 lines (65 loc) · 3.23 KB
/
run_Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
library(dplyr)
# download zip file containing data if it hasn't already been downloaded
zipUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
zipFile <- "UCI HAR Dataset.zip"
if (!file.exists(zipFile)) {
download.file(zipUrl, zipFile, mode = "wb")
}
# unzip zip file containing data if data directory doesn't already exist
dataPath <- "UCI HAR Dataset"
if (!file.exists(dataPath)) {
unzip(zipFile)
}
# read training data
trainingSubjects <- read.table(file.path(dataPath, "train", "subject_train.txt"))
trainingValues <- read.table(file.path(dataPath, "train", "X_train.txt"))
trainingActivity <- read.table(file.path(dataPath, "train", "y_train.txt"))
# read test data
testSubjects <- read.table(file.path(dataPath, "test", "subject_test.txt"))
testValues <- read.table(file.path(dataPath, "test", "X_test.txt"))
testActivity <- read.table(file.path(dataPath, "test", "y_test.txt"))
# read features, don't convert text labels to factors
features <- read.table(file.path(dataPath, "features.txt"), as.is = TRUE)
## note: feature names (in features[, 2]) are not unique
## e.g. fBodyAcc-bandsEnergy()-1,8
# read activity labels
activities <- read.table(file.path(dataPath, "activity_labels.txt"))
colnames(activities) <- c("activityId", "activityLabel")
# concatenate individual data tables to make single data table
humanActivity <- rbind(
cbind(trainingSubjects, trainingValues, trainingActivity),
cbind(testSubjects, testValues, testActivity)
)
# assign column names
colnames(humanActivity) <- c("subject", features[, 2], "activity")
# determine columns of data set to keep based on column name...
columnsToKeep <- grepl("subject|activity|mean|std", colnames(humanActivity))
# ... and keep data in these columns only
humanActivity <- humanActivity[, columnsToKeep]
# replace activity values with named factor levels
humanActivity$activity <- factor(humanActivity$activity,
levels = activities[, 1], labels = activities[, 2])
# get column names
humanActivityCols <- colnames(humanActivity)
# remove special characters
humanActivityCols <- gsub("[\\(\\)-]", "", humanActivityCols)
# expand abbreviations and clean up names
humanActivityCols <- gsub("^f", "frequencyDomain", humanActivityCols)
humanActivityCols <- gsub("^t", "timeDomain", humanActivityCols)
humanActivityCols <- gsub("Acc", "Accelerometer", humanActivityCols)
humanActivityCols <- gsub("Gyro", "Gyroscope", humanActivityCols)
humanActivityCols <- gsub("Mag", "Magnitude", humanActivityCols)
humanActivityCols <- gsub("Freq", "Frequency", humanActivityCols)
humanActivityCols <- gsub("mean", "Mean", humanActivityCols)
humanActivityCols <- gsub("std", "StandardDeviation", humanActivityCols)
# correct typo
humanActivityCols <- gsub("BodyBody", "Body", humanActivityCols)
# use new labels as column names
colnames(humanActivity) <- humanActivityCols
# group by subject and activity and summarise using mean
humanActivityMeans <- humanActivity %>%
group_by(subject, activity) %>%
summarise_all(funs(mean))
# output to file "tidy_data.txt"
write.table(humanActivityMeans, "tidy_data.txt", row.names = FALSE,
quote = FALSE)