setInternet2(TRUE) archivo <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip" download.file(archivo,destfile="./project_data.zip") dataZip<-"project_data.zip"
testX<-read.table(unz(dataZip,"UCI HAR Dataset/test/X_test.txt"), header = FALSE, colClasses="numeric") testY<-read.table(unz(dataZip,"UCI HAR Dataset/test/y_test.txt"), header = FALSE, colClasses="integer") testSubject<-read.table(unz(dataZip,"UCI HAR Dataset/test/subject_test.txt"), header = FALSE, colClasses="integer")
trainX<-read.table(unz(dataZip,"UCI HAR Dataset/train/X_train.txt"), header = FALSE, colClasses="numeric") trainY<-read.table(unz(dataZip,"UCI HAR Dataset/train/y_train.txt"), header = FALSE, colClasses="integer") trainSubject<-read.table(unz(dataZip,"UCI HAR Dataset/train/subject_train.txt"), header = FALSE, colClasses="integer")
dataDT <- rbind (trainX, testX) labelDT <- rbind (trainY, testY) subjectDT <- rbind (trainSubject, testSubject)
features <-read.table(unz(dataZip,"UCI HAR Dataset/features.txt"), sep = " ") featureNames <- as.character(features[,2]) names(subjectDT) <- c("Subject") names(labelDT) <- c("Activity") names(dataDT) <- featureNames dataTotal <- cbind(dataDT, subjectDT, labelDT)
meanStdDataDT <- dataTotal[, grep("mean\()|std\()|Subject|Activity", names(dataTotal))]
activityLabels <- read.table(unz(dataZip,"UCI HAR Dataset/activity_labels.txt"), header = FALSE, stringsAsFactors = FALSE) meanStdDataDT$Activity <- factor(meanStdDataDT$Activity, levels = activityLabels[,1], labels = activityLabels[,2])
#4.Appropriately labels the data set with descriptive activity names.
names(meanStdDataDT) <- gsub("^f", "Frequency", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("^t", "Time", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("Acc", "Acceleration", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("Gyro", "Gyroscope", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("mean", "Mean", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("std", "Std", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("-", "", names(meanStdDataDT)) names(meanStdDataDT) <- gsub("\()", "", names(meanStdDataDT))
#5.Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
library(reshape) onlyMeanStd <- grep ("Mean|Std", names (meanStdDataDT), value = TRUE) moltenData <- melt(meanStdDataDT, id.vars = c("Subject", "Activity"), measure.vars = onlyMeanStd) tidyData <- cast(moltenData, Subject + Activity ~ variable, mean)
write.csv(tidyData, file = "tidyData.csv", row.names = FALSE)