This assignment demonstrates the use of R Markdown in conjunction with the knitr package to produce literate code. Forked from rdpeng/RepData_PeerAssessment1.
-
Load packages plyr and ggplot2
library(plyr) library(ggplot2)
-
Load data from file activity.csv
dat <- read.csv("./data/activity.csv")
-
Transform the 'date' column from factor into date
dat <- transform(dat, date = as.Date(date, format = "%Y-%m-%d")) str(dat)
## 'data.frame': 17568 obs. of 3 variables: ## $ steps : int NA NA NA NA NA NA NA NA NA NA ... ## $ date : Date, format: "2012-10-01" "2012-10-01" ... ## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
-
Remove NAs for first analyses
datNoNA <- dat[!is.na(dat$steps), ]
-
Summarise steps by date (excluding NAs)
stepsDate <- ddply(datNoNA, "date", summarise, steps = sum(steps))
-
Chart steps by date as a bar plot with mean and median lines
histStepsDate <- function(data) { # Bar plot of steps by date without NAs p <- qplot(date, steps, data = data, geom = "bar", stat = "identity") # Horizontal line at mean pMean <- mean(data$steps) p <- p + geom_hline(yintercept = pMean, color = "red", size = 2) # Label mean line posX <- with(data, date[ceiling(length(date) / 2)]) posY <- 20000 label <- paste("Mean =", format(pMean, nsmall = 1)) p <- p + annotate("text", x = posX, y = posY, label = label, color = "red") # Horizontal line at median pMedian <- median(data$steps) p <- p + geom_hline(yintercept = pMedian, color = "blue", linetype = 2, size = 1) # Label median line posY <- 19000 label <- paste("Median =", format(pMedian, nsmall = 1)) p <- p + annotate("text", x = posX, y = posY, label = label, color = "blue") # Return plot p } histStepsDate(stepsDate) + labs(title = "Total Steps by Date (Remove NAs)")
-
Summarise steps by interval (excluding NAs)
stepsInterval <- ddply(datNoNA, "interval", summarise, steps = mean(steps))
-
Chart steps by interval as a line with max
lineStepsInterval <- function(data) { # Bar plot of steps by date without NAs p <- qplot(interval, steps, data = data, geom = "line") # Label max stepsMax <- max(data$steps) intvMax <- subset(data, steps == stepsMax, select = interval)[[1]] posX <- intvMax + 825L posY <- stepsMax label <- paste("Interval =", intvMax, "| Max Avg Steps = ", format(stepsMax)) p <- p + annotate("point", x = intvMax, y = stepsMax, color = "red", size = 5) p <- p + annotate("text", x = posX, y = posY, color = "red", label = label) # Return plot p } lineStepsInterval(stepsInterval) + labs(title = "Mean Steps by Interval (Remove NAs)")
-
Replace NAs with mean for interval
replaceNA <- function(sSteps, iInterval, data = stepsInterval) { ifelse(is.na(sSteps), subset(data, iInterval == interval, select = steps)[[1]], sSteps) } datReplaceNA <- ddply(dat, c("date", "interval"), transform, steps = replaceNA(steps, interval))
-
Summarize steps by date and chart as a bar plot with mean and median lines
datTemp <- ddply(datReplaceNA, "date", summarise, steps = sum(steps)) histStepsDate(datTemp) + labs(title = "Total Steps by Date (Replace NAs)")
rm(datTemp)
-
Summarize steps by interval and chart as a line with max
datTemp <- ddply(datReplaceNA, "interval", summarise, steps = mean(steps)) lineStepsInterval(datTemp) + labs(title = "Mean Steps by Interval (Replace NAs)")
rm(datTemp)
-
Add weekday character colum to data
weekDayOrEnd <- function(date) { ifelse(format(date, format = "%w") %in% 1:5, "weekday", "weekend") } datReplaceNA <- transform(datReplaceNA, weekday = weekDayOrEnd(date))
-
Summarise steps by interval categorized by weekday/weekend
stepsSplit <- ddply(datReplaceNA, c("interval", "weekday"), summarise, steps = mean(steps))
-
Chart steps by interval as a line with facets by weekday/weekend
p <- qplot(interval, steps, data = stepsSplit, geom = c("line", "smooth"), facets = . ~ weekday) p + labs(title = "Mean Steps by Interval (Replace NAs)")
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method. ## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
rm(p)