Skip to content
This repository has been archived by the owner on Jun 1, 2023. It is now read-only.

Add delaware SNTemp/PGDL/obs time series plots to 8_viz #61

Open
jordansread opened this issue Aug 13, 2020 · 2 comments
Open

Add delaware SNTemp/PGDL/obs time series plots to 8_viz #61

jordansread opened this issue Aug 13, 2020 · 2 comments

Comments

@jordansread
Copy link
Member

I don't have this repo up to date and I think there is some work I need to do to make sure I don't have conflicts w/ return lines and task makefiles, so creating this issue right now as a placeholder.

Turn this into a visualization output that ties into the pipeline:

# assumes you have `compare` from Sam's script, the PGDL outputs, SNTemp outputs, and obs


viz_time_performance <- function(compare, start = "2011-01-20", end = "2012-07-27", reach, fileout){
  png(fileout, width = 12.9, height = 7.5, units = 'in', res = 250) # not quite to the edge
  
  #layout(matrix(c(1,1,1,2),byrow = TRUE))
         
  par(omi = c(0.1,0,0.3,0), mai = c(0.6,1,0,0), las = 1, mgp = c(3.3,0.8,0))
  
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(-0.5,30), xlab = "",
       ylab = 'Water temperature (°C)', axes = FALSE, xaxs = 'i', yaxs = 'i', cex.lab = 2)
  
  axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'month'), to = lubridate::ceiling_date(as.Date(end), unit = 'month'), 'month')
  
  
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  par(mgp = c(3.3,0.2,0))
  axis(1, date_ticks+15, labels = format(date_ticks, '%b'), tck = NA, lwd = NA, cex.axis = 1.1)
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::ceiling_date(as.Date(end), unit = 'year'), 'year')
  
  axis(1, date_ticks, labels = NA, tck = -0.05, lwd = 1.5)
  par(mgp = c(3.3,2,0))
  axis(1, date_ticks+365/2, labels = format(date_ticks, '%Y'), cex.axis = 2, tck = NA, lwd = NA)
  

  x_mod <- compare %>% filter(seg_id_nat == reach) %>% pull(date)
  y_SN <- compare %>% filter(seg_id_nat == reach) %>% pull(sntemp)
  y_PG <- compare %>% filter(seg_id_nat == reach) %>% pull(pgrnn_all)
  
  x_obs <- obs %>% filter(seg_id_nat == reach) %>% pull(date)
  y_obs <- obs %>% filter(seg_id_nat == reach) %>% pull(temp_c)
  
  
  
  points(x_mod, y_SN, pch = 20, col = '#1b9e7766', type = "l", lwd = 3, cex = 0.3) # 40% alpha 66
  
  points(x_mod, y_PG, pch = 20, col = '#7570b3', type = "l", lwd = 3, cex = 0.3)
  
  points(x_obs, y_obs, pch = 20, col = 'black')

  dev.off()
}

viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples
  
  png("8_visualize/out/DRB_samples.png", width = 13.33, height = 7.5, units = 'in', res = 250)
  
  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))
  
  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::floor_date(as.Date(end), unit = 'year'), 'year') 
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  tcks <- date_ticks[seq(1, to = length(date_ticks), by = 2)]
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  tcks <- date_ticks[seq(2, to = length(date_ticks), by = 2)] %>% head(-1) # hack to get rid of 2017
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  
  top <- 1
  height <- 1/(length(rank$seg_id_nat))

  for (site in rank$seg_id_nat){
    this_site <- filter(model_obs, seg_id_nat == site)
    for (date in this_site$date){
      rect(date - 0.75, ybottom = top-height, ytop = top, xright = date+0.75, 
           col = 'black',  border = NA) #scales::alpha('lightblue', 0.5),
    }
    
    # labels <- filter(model_obs, seg_id_nat == site) %>% pull(subseg_id) %>% head(1) %>% 
    #   stringr::str_remove('_1') %>% paste0('s', .)
    text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, col = 'black', cex = 1.2)
    top <- top - height
    names <- tail(names, -1)
  }
  
  dev.off()
}


viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples
  
  png("8_visualize/out/DRB_samples_599_1-2033.png", width = 13.33, height = 7.5, units = 'in', res = 250)
  
  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))
  
  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')
  
  #axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::floor_date(as.Date(end), unit = 'year'), 'year') 
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  tcks <- date_ticks[seq(1, to = length(date_ticks), by = 2)]
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  tcks <- date_ticks[seq(2, to = length(date_ticks), by = 2)] %>% head(-1) # hack to get rid of 2017
  axis(1, tcks+365/2, labels = format(tcks, '%Y'), cex.axis = 0.7, tck = NA, lwd = NA)
  
  top <- 1
  height <- 1/(length(rank$seg_id_nat))
  
  for (site in rank$seg_id_nat){
    this_site <- filter(model_obs, seg_id_nat == site)
    if (site == '2033'){
      for (date in this_site$date){
        rect(date - 0.75, ybottom = top-height, ytop = top, xright = date+0.75, 
             col = 'black',  border = NA) #scales::alpha('lightblue', 0.5),
      }
      
      # labels <- filter(model_obs, seg_id_nat == site) %>% pull(subseg_id) %>% head(1) %>% 
      #   stringr::str_remove('_1') %>% paste0('s', .)
      # 
      text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, cex = 1.2)
    } 
    top <- top - height
    names <- tail(names, -1)
  }
  
  dev.off()
}


viz_sample_time <- function(compare, fileout){
  # sort obs in order of samples
  
  png("8_visualize/out/DRB_samples_only_border.png", width = 13.33, height = 7.5, units = 'in', res = 250)
  
  names <- c(LETTERS, paste(LETTERS, LETTERS, sep = ''))
  
  #layout(matrix(c(1,1,1,2),byrow = TRUE))
  model_obs <- obs %>% filter(date <= as.Date('2016-09-30'), date >= as.Date('1980-10-01'))
  par(omi = c(0,0,0,0), mai = c(0.3,0,0,0), las = 1, mgp = c(3.3,-0.2,0))
  rank <- model_obs %>% group_by(seg_id_nat) %>% tally %>% arrange(desc(n))
  start <- '1980-10-01'
  end <- '2017-12-30'
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(0,1), xlab = "",
       ylab = '', axes = FALSE, xaxs = 'i', yaxs = 'i')
  
  #axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)
  
  top <- 1
  height <- 1/(length(rank$seg_id_nat))
  
  for (site in rank$seg_id_nat){
    text(x = as.Date(end)-200, y = top+height/6, labels = names[1], pos = 1, col = 'black', cex = 1.2)
    top <- top - height
    names <- tail(names, -1)
  }
  
  dev.off()
}


# need test, train, predict data

get_WRR_data <- function(depths = c(0, 10, 18), exper_n = 2){
  
  library(dplyr)
  library(readr)
  library(stringr)
  library(tidyr)
  library(sbtools) # see https://github.com/USGS-R/sbtools
  
  test_data <- item_file_download('5d925066e4b0c4f70d0d0599', names = 'me_test.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv(col_types = 'Ddddc') %>% filter(exper_type == 'season') %>% 
    group_by(date, depth) %>% summarize(temp = mean(temp)) %>% mutate(test = TRUE)
  
  train_data <- item_file_download('5d8a837fe4b0c4f70d0ae8ac', names = 'me_season_training.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv(col_types = 'Ddddc') %>% 
    group_by(date, depth) %>% summarize(temp = mean(temp)) %>% mutate(test = FALSE)
  
  pgdl_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_pgdl.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'pgdl') %>% 
    mutate(depth = as.numeric(depth))
  
  dl_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_dl.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'dl') %>% 
    mutate(depth = as.numeric(depth))
  
  pb_data <- item_file_download('5d915cb2e4b0c4f70d0ce523', names = 'me_season_predict_pb.csv', destinations = tempfile(fileext = '.csv'), overwrite_file = TRUE) %>% 
    readr::read_csv() %>% pivot_longer(cols = c(-date, -exper_id, -exper_n), names_prefix = 'temp_', names_to = 'depth', values_to = 'pb') %>% 
    mutate(depth = as.numeric(depth))
  
  # combine all test and train data, flatten and remove duplicates:
  observed <- rbind(test_data, train_data) %>% filter(depth %in% depths) %>% rename(observed = temp)
  modeled <- inner_join(pgdl_data, y = dl_data, by = c('date','exper_n','exper_id','depth')) %>% 
    inner_join(pb_data, by = c('date','exper_n','exper_id','depth')) %>% 
    filter(depth %in% depths, exper_n == !!exper_n) %>% select(-exper_n, -exper_id)
  
  return(left_join(modeled, observed, by = c('date','depth')))
}


viz_wrr_oobounds <- function(wrr_data, start = "2011-01-20", end = "2012-07-27", fileout = '~/Downloads/wrr_oobounds.png', type = c('obs','pgdl','pb','dl')){
  type <- match.arg(type)
  
  text_key <- c(pb = "Process-based", dl = 'Deep learning', pgdl = 'Process-guided DL')
  
  wrr_data <- mutate(wrr_data, col = case_when(
    depth == 0 ~ "#FDE725FF",
    depth == 10 ~ "#21908CFF",
    depth == 18 ~ "#440154FF"
  ))
  png(fileout, width = 13.33, height = 7.5, units = 'in', res = 250, bg = NA) # not quite to the edge
  
  par(omi = c(0,0,0.2,0.1), mai = c(0.8,1,0.2,0), las = 1, mgp = c(3,0.8,0))
  
  plot(NA, NA, xlim = as.Date(c(start, end)), ylim = c(-0.5,37), xlab = "",
       ylab = 'Water temperature (°C)', axes = FALSE, xaxs = 'i', yaxs = 'i', cex.lab = 2)
  
  axis(2, at = seq(-50,30, by = 5), las = 1, tck = -0.01, cex.axis = 1.5, lwd = 1.5)
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'month'), to = lubridate::ceiling_date(as.Date(end), unit = 'month'), 'month')
  
  
  axis(1, date_ticks, labels = NA, tck = -0.01, lwd = 1.5)
  par(mgp = c(3,0.4,0))
  axis(1, date_ticks+15, labels = format(date_ticks, '%b'), tck = NA, lwd = NA, cex.axis = 1.2)
  
  date_ticks <- seq(lubridate::floor_date(as.Date(start), unit = 'year'), to = lubridate::ceiling_date(as.Date(end), unit = 'year'), 'year')
  
  axis(1, date_ticks, labels = NA, tck = -0.05, lwd = 1.5)
  par(mgp = c(3,2.1,0))
  axis(1, date_ticks+365/2, labels = format(date_ticks, '%Y'), cex.axis = 1.5, tck = NA, lwd = NA)
    
  x1 <- as.Date(start)+9
  y1 <- 31.5
  
  if (type == 'obs'){
    points(wrr_data$date, wrr_data$observed, pch = 16, col = wrr_data$col)
    
    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(x1, y1-i*1.1+1, pch = 16, col = wrr_data$col[i])
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Observed', cex = 1.5, pos = 4)
  
  } else {
    
    # #plot the other models in grey
    # for (mod in c('pb','dl','pgdl')){
    #   if (mod == type) break
    #   points(wrr_data$date, wrr_data[[mod]], pch = 16, col = "#99999966", type ='p', cex = 0.7)
    # }
    #plot obs
    points(wrr_data$date, wrr_data$observed, pch = 16, col = paste0(substring(wrr_data$col, first = 1, last = 7), '66'))
    
    x_txt <- mean(as.Date(c(start, end)))+30
    for (depth in unique(wrr_data$depth)){
      this_data <- filter(wrr_data, depth == !!depth)
      points(this_data$date, this_data[[type]], pch = 16, col = this_data$col, type ='l', lwd = 3, cex = 0.3) # 40% alpha 66
      x0 <- as.Date(start)+220
      y0 <- filter(this_data, date == !!x0)[[type]]
      
      segments(x0 = x0, x1 = x_txt, y0 = y0, y1 = 21, col = this_data$col)
    }
    
    text(x_txt, 21, text_key[[type]], cex = 2, offset = 0.6, pos = 4)
    
    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(x1, y1-i*1.1+1, pch = 16, col =  paste0(substring(wrr_data$col[i], first = 1, last = 7), '66'))
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Observed', cex = 1.5, pos = 4)
    
    y1 <- y1-4
    txt <- c('0m','10m','18m')
    for (i in 1:3){
      points(c(x1-2, x1+2), c(y1-i*1.1+1, y1-i*1.1+1), pch = 16, col = wrr_data$col[i], type ='o', lwd = 3, cex = 0.3)
      text(x1, y = y1-i*1.1+1-0.1, pos = 4, txt[i], cex = 1.4, offset = 0.7)
    }
    text(x1+30, y1-1.4, 'Modeled', cex = 1.5, pos = 4)
  }
  dev.off()
}

see compare file

@aappling-usgs
Copy link
Member

Looks like a useful starting point 👍 . Is the get_WRR_data function above actually used for the plotting? This is for stream predictions, right?

@jordansread
Copy link
Member Author

That was for another figure given in the same talk. get_WRR_data() and viz_wrr_oobounds() aren't part of DRB and shouldn't be added. Good catch

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants