Skip to content

Day 5 recap

Meghan Balk edited this page Apr 6, 2018 · 1 revision

Day 5 exercises

species <- letters[1:4]
local.1 <- rep(1, 4)
local.2 <- c(0, 0, 1, 0)
local.3 <- c(0, 0, 1, 1)
local.4 <- c(0, 1, 1, 0)
occ <- data.frame(local.1, local.2, local.3, local.4)
rownames(occ) <- species

occSums <- c()

for(i in 1:length(occ)){ # for each column...
  localSum <- sum(occ[, i])
  occSums <- c(occSums, localSum)
}

occSums

occ


occSums <- apply(occ, MARGIN = 2, FUN = sum)
occSums


people1 <- data.frame(first = c("Amy", "Brian", "Brian"),
                      last = c("Douglas", "Douglas", "Farmer"),
                      age = c(29, 29, 23))

people2 <- data.frame(first = c("Amy", "Brian", "Chris"),
                      last = c("Douglas", "Douglas", "Farmer"),
                      age = c(29, 29, 23))

people1
people2


apply(people1, MARGIN = 2, FUN = unique)

unique(people1[, 1])
unique(people1[, 2])
unique(people1[, 3])


apply(people2, MARGIN = 2, FUN = unique)

unique(people2[, 1])
unique(people2[, 2])
unique(people2[, 3])

 



customFun <- function(vector){
  c(sum = sum(vector), mean = mean(vector))
}

# test that it works
customFun(c(2,4,5,1,1,11))

apply(occ, MARGIN = 2, FUN = customFun)

# equivalent to this for loop:
output <- c()
for(i in 1:ncol(occ)){
  temp.col <- occ[, i]
  output <- cbind(output, c(sum = sum(temp.col), mean = mean(temp.col)))
}
output

colnames(output) <- colnames(occ)
output



stuff.named <- list(numbers = c(66:101), word = "second", dat = mtcars)
stuff.named

lapply(stuff.named, FUN = length)


mtcars.w.names <- data.frame(mtcars, cars = rownames(mtcars))

mtcars.w.names2 <- mtcars
mtcars.w.names2$cars <- rownames(mtcars.w.names2)
mtcars.w.names2$cars





spp <- levels(iris$Species)

# create an empty vector to seed the data frame that will store values
spp.vals <- c()

for(i in 1:length(spp)){
  # subset by species name and isolate Petal.Width
  species.petal.w <- subset(iris, Species == spp[i])$Petal.Width
  
  # add a new row to spp.vals with all the sought after values
  spp.vals <- rbind(spp.vals, c(Species = spp[i],
                                min = min(species.petal.w),
                                max = max(species.petal.w),
                                range = diff(range(species.petal.w)),
                                median = median(species.petal.w)))
  
  # move on to next species
}
spp.vals





iris.grouped <- group_by(iris, Species)

spp.vals <- summarise(iris.grouped,
                      min = min(Petal.Width),
                      max = max(Petal.Width),
                      range = diff(range(Petal.Width)),
                      median = median(Petal.Width))
spp.vals
as.data.frame(spp.vals)



summariseIris <- function(df){
  # this function returns a named data frame including the following values
  data.frame(min = min(df$Petal.Width),
             max = max(df$Petal.Width),
             range = diff(range(df$Petal.Width)),
             median = median(df$Petal.Width))
}

summariseIris(iris)

summariseIris(subset(iris, Species = "setosa"))


spp.vals <- do(iris.grouped, summariseIris(.))

# equivalent to sticking all of these together into one object
summariseIris(subset(iris, Species == "setosa"))
summariseIris(subset(iris, Species == "versicolor"))
summariseIris(subset(iris, Species == "virigniafjawv"))




whales <- read.csv("https://paleobiodb.org/data1.2/occs/list.txt?base_name=Cetacea&interval=Miocene&show=loc,class")

whales

names(whales)

# define groups
whales.county <- group_by(whales, county)
whales.county

# 
unique(whales$county)

# try it on a test county
test.county <- subset(whales, county == "Kern")
length(unique(test.county$genus))

# run the actions over each one of our groups
county.diversity <- summarise(whales.county,
                              county_div = length(unique(genus)))
county.diversity <- as.data.frame(county.diversity)





merge(whales, county.diversity, by = "county")




sqrt(c(4, 16))

c(4, 16) %>%
  sqrt





sum(sqrt(c(4, 16)))

c(4, 16) %>%
  sqrt %>%
  sum




mtcars
mtcars1 <- group_by(mtcars, cyl, am)
mtcars2 <- select(mtcars1, mpg, cyl, wt, am)
mtcars3 <- summarise(mtcars2, avgmpg = mean(mpg), avgwt = mean(wt))
mtcars4 <- filter(mtcars3, avgmpg > 20)
mtcars.final <- as.data.frame(mtcars4)

mtcars.final



mtcars.final <- mtcars %>% 
  group_by(cyl, am) %>%
  select(mpg, cyl, wt, am) %>%
  summarise(avgmpg = mean(mpg), avgwt = mean(wt)) %>%
  filter(avgmpg > 20) %>%
  as.data.frame

mtcars.final


# recap of the whale example
# define groups
whales.county <- group_by(whales, county)

# run the actions over each one of our groups
county.diversity <- summarise(whales.county,
                              county_div = length(unique(genus)))
county.diversity <- as.data.frame(county.diversity)


# chain the above together so it starts from whales and ends with just the version of county.diversity you want
county.diversity <- whales %>%
                        group_by(county) %>%
                        summarise(county_div = length(unique(genus))) %>%
                        as.data.frame
county.diversity


sepal.model <- function(sp){
  return(lm(Sepal.Length ~ Sepal.Width, data = sp))
}
sepal.model(iris)

model.per.sp <- dlply(iris, .(Species), sepal.model)

model.per.sp


library(ggplot2)

ggplot(data = CO2, aes(x = conc, y = uptake)) +
  geom_point(aes(colour = Plant))

ggplot(data = CO2, aes(x = conc, y = uptake)) +
  geom_point(aes(colour = Plant, shape = Type)) +
  geom_smooth(aes(lty = Type))



ggplot(data = CO2, aes(x = conc, y = uptake)) +
  geom_point(aes(colour = Plant)) +
  facet_wrap(~ Type)