Skip to content

Commit

Permalink
functions work with all sites from database
Browse files Browse the repository at this point in the history
  • Loading branch information
fawda123 committed Nov 9, 2018
1 parent c48a26e commit ae76718
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 191 deletions.
3 changes: 2 additions & 1 deletion R/chkinp.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ chkinp <- function(data, purge = FALSE, msgs = FALSE){
algae <- c('Microalgae Thickness', 'Macrophyte Cover', 'Macroalgae Cover, Attached', 'Macroalgae Cover, Unattached')
channelsinuosity <- c('Slope', 'Length, Segment', 'Elevation Difference', 'Bearing', 'Proportion', 'Length, Reach')
densiometer <- c('Canopy Cover')
sels <- c(algae, channelsinuosity, densiometer)
ripveg <- c('Riparian GroundCover Barren', 'Riparian GroundCover NonWoody Plants', 'Riparian GroundCover Woody Shrubs', 'Riparian Lower Canopy All Vegetation', 'Riparian Upper Canopy All Trees', 'Riparian Lower Canopy All Vegetation', 'Riparian Upper Canopy All Trees', 'Riparian GroundCover Woody Shrubs', 'Riparian GroundCover NonWoody Plants')
sels <- c(algae, channelsinuosity, densiometer, ripveg)

# see if duplicate id, locationcode, analytename
chk <- data %>%
Expand Down
4 changes: 2 additions & 2 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @examples
#' misc(sampdat)
misc <- function(data){

data <- data[which(data$AnalyteName %in% c('Riffle/Run Channel Alteration', 'Riffle/Run Epifaunal Substrate', 'Riffle/Run Sediment Deposition', 'Dominant Land Use', 'Evidence of Fire', 'Evidence of Recent Rainfall')),]
###Report###
data$VariableResult[data$ResQualCode=="NR"] <- NA
Expand Down Expand Up @@ -43,7 +43,7 @@ misc <- function(data){
dplyr::full_join(RBP_SED, by = 'id') %>%
as.data.frame(stringsAsFactors = F)

rownames(misc_metrics) <- gsub('\\s.*$', '', misc_metrics$id)
rownames(misc_metrics) <- misc_metrics$id
misc_metrics <- misc_metrics[, !names(misc_metrics) %in% 'id']

colnames(misc_metrics)<- c("NFC_DLU.result", "NFC_EFR.result", "NFC_ERN.result",
Expand Down
1 change: 1 addition & 0 deletions R/phabmetrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' }
phabmetrics <- function(data){
data <- phabformat(data)
data <- chkinp(data, purge = TRUE)
metrics <- list(bankmorph(data), channelmorph(data), channelsinuosity(data),
densiometer(data), habitat(data), disturbance(data), flow(data),
misc(data), bankstability(data), quality(data), ripveg(data),
Expand Down
94 changes: 54 additions & 40 deletions R/ripveg.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@ ripveg <- function(data){
barren <- data.frame(cbind(data$id[barrenindex], as.character(data$AnalyteName[barrenindex]),
as.character(data$VariableResult[barrenindex])))
colnames(barren) <- c("id", "AnalyteName", "result")
barren$result <- as.numeric(as.character(barren$result))
barren$result <- as.character(barren$result)

###Compute XGB###

for(i in (1:length(barren$result))[which(!is.na(barren$result))]){
if(barren$result[i] == 1){barren$result[i] <- 5} else
if(barren$result[i] == 2){barren$result[i] <- 25} else
if(barren$result[i] == 3){barren$result[i] <- 57.5} else
if(barren$result[i] == 4){barren$result[i] <- 87.5}
}

barren$result <- dplyr::case_when(
barren$result == '0' ~ 0,
barren$result == '1' ~ 5,
barren$result == '2' ~ 25,
barren$result == '3' ~ 57.5,
barren$result == '4' ~ 87.5,
TRUE ~ NaN
)

sumna <- function(data){
sum(data, na.rm = T)
}
Expand All @@ -45,15 +48,18 @@ ripveg <- function(data){
nonwoody <- data.frame(cbind(data$id[nonwoodyindex], as.character(data$AnalyteName[nonwoodyindex]),
as.character(data$VariableResult[nonwoodyindex])))
colnames(nonwoody) <- c("id", "AnalyteName", "result")
nonwoody$result <- as.numeric(as.character(nonwoody$result))
nonwoody$result <- as.character(nonwoody$result)

###Compute XGH###
for(i in (1:length(nonwoody$result))[which(!is.na(nonwoody$result))]){
if(nonwoody$result[i] == 1){nonwoody$result[i] <- 5} else
if(nonwoody$result[i] == 2){nonwoody$result[i] <- 25} else
if(nonwoody$result[i] == 3){nonwoody$result[i] <- 57.5} else
if(nonwoody$result[i] == 4){nonwoody$result[i] <- 87.5}
}
nonwoody$result <- dplyr::case_when(
nonwoody$result == '0' ~ 0,
nonwoody$result == '1' ~ 5,
nonwoody$result == '2' ~ 25,
nonwoody$result == '3' ~ 57.5,
nonwoody$result == '4' ~ 87.5,
TRUE ~ NaN
)

sumna <- function(data){
sum(data, na.rm = T)
}
Expand All @@ -78,16 +84,18 @@ ripveg <- function(data){

woody$result[woody$result %in% 'Not Recorded'] <- NA
woody$result <- as.numeric(as.character(woody$result))
head(woody)

###Compute XGW###

for(i in (1:length(woody$result))[which(!is.na(woody$result))]){
if(woody$result[i] == 1){woody$result[i] <- 5} else
if(woody$result[i] == 2){woody$result[i] <- 25} else
if(woody$result[i] == 3){woody$result[i] <- 57.5} else
if(woody$result[i] == 4){woody$result[i] <- 87.5}
}
woody$result <- dplyr::case_when(
woody$result == '0' ~ 0,
woody$result == '1' ~ 5,
woody$result == '2' ~ 25,
woody$result == '3' ~ 57.5,
woody$result == '4' ~ 87.5,
TRUE ~ NaN
)

sumna <- function(data){
sum(data, na.rm = T)
}
Expand All @@ -109,17 +117,19 @@ ripveg <- function(data){
lowercanopy <- data.frame(cbind(data$id[lowercanopyindex], as.character(data$AnalyteName[lowercanopyindex]),
as.character(data$VariableResult[lowercanopyindex])))
colnames(lowercanopy) <- c("id", "AnalyteName", "result")
lowercanopy$result <- as.numeric(as.character(lowercanopy$result))
head(lowercanopy)
lowercanopy$result <- as.character(lowercanopy$result)

###Compute XM###

for(i in (1:length(lowercanopy$result))[which(!is.na(lowercanopy$result))]){
if(lowercanopy$result[i] == 1){lowercanopy$result[i] <- 5} else
if(lowercanopy$result[i] == 2){lowercanopy$result[i] <- 25} else
if(lowercanopy$result[i] == 3){lowercanopy$result[i] <- 57.5} else
if(lowercanopy$result[i] == 4){lowercanopy$result[i] <- 87.5}
}
lowercanopy$result <- dplyr::case_when(
lowercanopy$result == '0' ~ 0,
lowercanopy$result == '1' ~ 5,
lowercanopy$result == '2' ~ 25,
lowercanopy$result == '3' ~ 57.5,
lowercanopy$result == '4' ~ 87.5,
TRUE ~ NaN
)

sumna <- function(data){
sum(data, na.rm = T)
}
Expand All @@ -142,17 +152,19 @@ ripveg <- function(data){
as.character(data$VariableResult[uppercanopyindex])))
colnames(uppercanopy) <- c("id", "AnalyteName", "result")

uppercanopy$result <- as.numeric(as.character(uppercanopy$result))
head(uppercanopy)
uppercanopy$result <- as.character(uppercanopy$result)

###Compute XC###

for(i in (1:length(uppercanopy$result))[which(!is.na(uppercanopy$result))]){
if(uppercanopy$result[i] == 1){uppercanopy$result[i] <- 5} else
if(uppercanopy$result[i] == 2){uppercanopy$result[i] <- 25} else
if(uppercanopy$result[i] == 3){uppercanopy$result[i] <- 57.5} else
if(uppercanopy$result[i] == 4){uppercanopy$result[i] <- 87.5}
}
uppercanopy$result <- dplyr::case_when(
uppercanopy$result == '0' ~ 0,
uppercanopy$result == '1' ~ 5,
uppercanopy$result == '2' ~ 25,
uppercanopy$result == '3' ~ 57.5,
uppercanopy$result == '4' ~ 87.5,
TRUE ~ NaN
)

sumna <- function(data){
sum(data, na.rm = T)
}
Expand Down Expand Up @@ -185,6 +197,7 @@ ripveg <- function(data){
lowercanopy <- data.frame(cbind(data$id[lowercanopyindex], as.character(data$AnalyteName[lowercanopyindex]),
as.character(data$VariableResult[lowercanopyindex])))
colnames(lowercanopy) <- c("id", "AnalyteName", "result")
lowercanopy$result[lowercanopy$result %in% 'Not Recorded'] <- NA
lowercanopy$result <- as.numeric(as.character(lowercanopy$result))

XPMID_total <- tapply(lowercanopy$result, lowercanopy$id, lengthna)
Expand All @@ -202,8 +215,8 @@ ripveg <- function(data){
uppercanopy <- data.frame(cbind(data$id[uppercanopyindex], as.character(data$AnalyteName[uppercanopyindex]),
as.character(data$VariableResult[uppercanopyindex])))
colnames(uppercanopy) <- c("id", "AnalyteName", "result")
uppercanopy$result[uppercanopy$result %in% 'Not Recorded'] <- NA
uppercanopy$result <- as.numeric(as.character(uppercanopy$result))
head(uppercanopy)

XPCAN_total <- tapply(uppercanopy$result, uppercanopy$id, lengthna)

Expand All @@ -213,7 +226,7 @@ ripveg <- function(data){
}
XPCAN_subcount <- tapply(uppercanopy$result, uppercanopy$id, XPCAN_subcountf)
XPCAN.result <- XPCAN_subcount/XPCAN_total

###Compute XPGVEG###
woodyindex <- which(data$AnalyteName == "Riparian GroundCover Woody Shrubs")
woody <- data.frame(cbind(data$id[woodyindex], as.character(data$AnalyteName[woodyindex]),
Expand All @@ -226,6 +239,7 @@ ripveg <- function(data){
nonwoody <- data.frame(cbind(data$id[nonwoodyindex], as.character(data$AnalyteName[nonwoodyindex]),
as.character(data$VariableResult[nonwoodyindex])))
colnames(nonwoody) <- c("id", "AnalyteName", "result")
nonwoody$result[nonwoody$result %in% 'Not Recorded'] <- NA
nonwoody$result <- as.numeric(as.character(nonwoody$result))
head(nonwoody)
woody$XPGVEG<-apply((cbind(woody$result, nonwoody$result)), 1, sum, na.rm=T)
Expand Down
22 changes: 16 additions & 6 deletions R/substrate.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,21 @@
#'
#' @export
#'
#' @importFrom magrittr "%>%"
#'
#' @examples
#' substrate(sampdat)
substrate <- function(data){
data <- data[which(data$AnalyteName %in% c('Substrate Size Class', 'Embeddedness', 'CPOM')),]

data <- data %>%
dplyr::select(id, LocationCode, AnalyteName, VariableResult, Result) %>%
unique %>%
tidyr::complete(id, LocationCode, AnalyteName) %>%
dplyr::group_by(id) %>%
dplyr::filter(!all(is.na(VariableResult) & !all(is.na(Result)))) %>%
dplyr::ungroup()

data$VariableResult <- as.character(data$VariableResult)
data$VariableResult[data$VariableResult=="a"]<-"SA"
data$VariableResult[data$VariableResult=="as"]<-"SA"
Expand All @@ -33,12 +44,11 @@ substrate <- function(data){
###Compute

metric <- c('RS', 'RR', 'RC', 'XB', 'SB', 'CB', 'GC', 'GF', 'SA', 'FN', 'HP', 'WD', 'OT')

sub$VariableResult <- lapply(sub$VariableResult, toupper)

lengths <- function(data){
length(which(((data != "NOT RECORDED") &(data != "NA"))&(data != "FNOT RECORDED")))}
totals <- tapply(sub$VariableResult, sub$id, lengths)
totals <- tapply(unlist(sub$VariableResult), sub$id, lengths)
tnames <- as.vector(dimnames(totals))
qq <-unlist(tnames)
l <- matrix(NA, ncol=length(metric), nrow=length(totals))
Expand Down Expand Up @@ -140,13 +150,13 @@ substrate <- function(data){
result$XEMBED.result <- XEMBED_sum/XEMBED_count
result$XEMBED.count <- XEMBED_count
result$XEMBED.sd <- tapply(embed$Result, embed$id, sdna)

cpom <- data[data$AnalyteName=="CPOM",]
present <- function(data){
sum(data == "Present")
sum(data == "Present", na.rm = TRUE)
}
cpomtotal <- function(data){
sum((data == "Present") | (data == "Absent"))
sum((data == "Present") | (data == "Absent"), na.rm = TRUE)
}
cpresent <- tapply(cpom$VariableResult, cpom$id, present)
ctotal <- tapply(cpom$VariableResult, cpom$id, cpomtotal)
Expand Down
17 changes: 14 additions & 3 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,25 @@ phabmetrics(sampdat)

# Required data checks

* Every unique `id`, `LocationCode`, `AnalyteName`, and `VariableResult` combination should have only one entry in `VariableResult` for `AnalyteName %in% c('Microalgae Thickness', 'Macrophyte Cover', 'Macroalgae Cover, Attached', 'Macroalgae Cover, Unattached')`. The `algae.R` function will remove duplicate entries but a checker should be built that verifies a unique value can be determined.
* For every function, make sure there are no duplicate or conflicting values for every unique combination of `id`, `LocationCode`, `AnalyteName`, and `VariableResult` (or `Result`). This should be specific to the metric classes just to be safe. For example, every combination should have only one entry in `VariableResult` for `AnalyteName %in% c('Microalgae Thickness', 'Macrophyte Cover', 'Macroalgae Cover, Attached', 'Macroalgae Cover, Unattached')` for the algae metrics. The `algae.R` function will remove duplicate entries but a checker should be built that verifies a unique value can be determined.

* Required column names, see those in `sampdat`.

* Check for required values in `AnalyteName` (note that `chkinp()` can check of the columns exist but we'll need a checker on data input to check for these and only these):
* `c('Microalgae Thickness', 'Macrophyte Cover', 'Macroalgae Cover, Attached', 'Macroalgae Cover, Unattached')` for `algae()`
* `c('Bankfull Height', 'Bankfull Width', 'StationWaterDepth', 'Wetted Width')` for `bankmorph()`
* `c(Slope', 'Length, Segment', 'Elevation Difference', 'Bearing', 'Proportion', 'Length, Reach')` for `channelsinuosity()`
* `c('Cascade/Falls', 'Dry', 'Glide', 'Pool', 'Rapid', 'Riffle', 'Run'))` for `channelmorph()`
* `c(Slope', 'Length, Segment', 'Elevation Difference', 'Bearing', 'Proportion', 'Length, Reach')` for `channelsinuosity()`
* `c('Canopy Cover')` for `densiometer()`
* `c('Distance from Bank', 'StationWaterDepth', 'Velocity', 'Distance, Float', 'Float Time', 'Wetted Width')` for `flow()`
* `c('Fish Cover Macrophytes', 'Fish Cover Artificial Structures', 'Fish Cover Boulders', 'Fish Cover Filamentous Algae', 'Fish Cover Woody Debris >0.3 m', 'Fish Cover Live Trees/Roots', 'Fish Cover Overhang.Veg', 'Fish Cover Woody Debris <0.3 m', 'Fish Cover Undercut Banks')` for `habitat()`
* `c('Riparian Bridges/Abutments', 'Riparian Buildings', 'Riparian Landfill/Trash', 'Riparian Logging', 'Riparian Mining', 'Riparian Orchards/Vineyards', 'Riparian Park/Lawn', 'Riparian Pasture/Range', 'Riparian Pavement', 'Riparian Pipes', 'Riparian Road', 'Riparian Row Crops', 'Riparian Vegetation Management', 'Riparian Wall/Dike')` for `disturbance()`
* `c('Riffle/Run Channel Alteration', 'Riffle/Run Epifaunal Substrate', 'Riffle/Run Sediment Deposition', 'Dominant Land Use', 'Evidence of Fire', 'Evidence of Recent Rainfall')` for `misc()`
* `c('Bank Stability')` for `bankstability()`
* `c("Alkalinity as CaCO3", "Oxygen, Dissolved", "pH", "Salinity", "SpecificConductivity", "Temperature", "Turbidity")` for `quality()`
* `c('Riparian GroundCover Barren', 'Riparian GroundCover NonWoody Plants', 'Riparian GroundCover Woody Shrubs', 'Riparian Lower Canopy All Vegetation', 'Riparian Upper Canopy All Trees', 'Riparian Lower Canopy All Vegetation', 'Riparian Upper Canopy All Trees', 'Riparian GroundCover Woody Shrubs', 'Riparian GroundCover NonWoody Plants')` for `ripveg()`
* `c('Substrate Size Class', 'Embeddedness', 'CPOM')` for `substrate()`

* Check for required values in `LocationCode` (note that `chkinp()` can check of the columns exist but we'll need a checker on data input to check for these and only these)
* Check for required values in `LocationCode` (note that `chkinp()` can check of the columns exist but we'll need a checker on data input to check for these and only these)

* Maybe we need to add a checker to make sure all values in each field are present but with appropriate NA values for `Result`, `VariableResult`, this can be done with `tidyr::complete()` but may be unnecessary since this will increase data volume
Loading

0 comments on commit ae76718

Please sign in to comment.