Skip to content

Commit

Permalink
filterProtein for Scaffold
Browse files Browse the repository at this point in the history
  • Loading branch information
Carol-seven committed May 15, 2024
1 parent 689e502 commit b09140d
Show file tree
Hide file tree
Showing 20 changed files with 80 additions and 65 deletions.
40 changes: 28 additions & 12 deletions R/filterings.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,26 @@ filterOutIn <- function(dataSet,
#' @param dataSet The 2d data set of experimental values.
#'
#' @param proteinInformation The name of the .csv file containing protein information data
#' (including the path to the file, if needed). The file should include the following 4
#' columns: "PG.Genes", "PG.ProteinAccessions", "PG.ProteinDescriptions", and
#' "PG.ProteinNames". This file is automatically generated by the function
#' \code{\link[msDiaLogue]{preprocessing}}.
#' (including the path to the file, if needed). The file should include the following
#' columns:
#' \itemize{
#' \item For Spectronaut: "PG.Genes", "PG.ProteinAccessions", "PG.ProteinDescriptions",
#' and "PG.ProteinNames".
#' \item For Scaffold: "ProteinDescriptions", "AccessionNumber", and "AlternateID".
#' }
#' This file is automatically generated by the function
#' \code{\link[msDiaLogue]{preprocessing}} or
#' \code{\link[msDiaLogue]{preprocessing_scaffold}}.
#'
#' @param text A character vector of text used as the key for selecting or removing.
#'
#' @param by A character string specifying the information to which the \code{text} filter
#' is applied, with allowable options: "gene", "accession" and "description".
#' is applied, with allowable options:
#' \itemize{
#' \item For Spectronaut: "PG.Genes", "PG.ProteinAccessions", "PG.ProteinDescriptions",
#' and "PG.ProteinNames".
#' \item For Scaffold: "ProteinDescriptions", "AccessionNumber", and "AlternateID".
#' }
#'
#' @param removeList A boolean (default = TRUE) specifying whether the list of proteins
#' should be removed or selected.
Expand All @@ -230,8 +241,9 @@ filterOutIn <- function(dataSet,
#' current working directory. This option only works when \code{removeList = TRUE}.
#'
#' @details
#' The function is an extension of the function \code{\link[msDiaLogue]{preprocessing}}
#' that allows for filtering proteins based on additional information.
#' The function is an extension of the function \code{\link[msDiaLogue]{preprocessing}} or
#' \code{\link[msDiaLogue]{preprocessing_scaffold}} that allows for filtering proteins
#' based on additional information.
#'
#' @import dplyr
#' @importFrom utils read.csv write.csv
Expand All @@ -245,7 +257,9 @@ filterOutIn <- function(dataSet,
filterProtein <- function(dataSet,
proteinInformation = "preprocess_protein_information.csv",
text = c(),
by = c("gene", "accession", "description", "name"),
by = c("PG.Genes", "PG.ProteinAccessions",
"PG.ProteinDescriptions", "PG.ProteinNames",
"ProteinDescriptions", "AccessionNumber", "AlternateID"),
removeList = TRUE,
saveRm = TRUE) {

Expand All @@ -255,12 +269,14 @@ filterProtein <- function(dataSet,
filteredData <- dataSet %>%
select(-c("R.Condition", "R.Replicate"))

by <- paste0("PG.", if(by != "gene") {"Protein"},
toupper(substr(by, 1, 1)), substring(by, 2), "s")

index <- grep(paste(text, collapse = "|"), proteinInformation[[by]])

proteinName <- proteinInformation[index,]$PG.ProteinNames
if (by %in% c("PG.Genes", "PG.ProteinAccessions",
"PG.ProteinDescriptions", "PG.ProteinNames")) {
proteinName <- proteinInformation[index,]$PG.ProteinNames
} else {
proteinName <- proteinInformation[index,]$AccessionNumber
}

result <- dataSet %>%
select(any_of(c("R.Condition", "R.Replicate", proteinName)))
Expand Down
2 changes: 1 addition & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ utils::globalVariables(c(
"R.Replicate", # <preprocessing>
"PG.Quantity", # <preprocessing>
".", # <preprocessing_scaffold>
"ProteinAccessions", # <preprocessing_scaffold>
"AccessionNumber", # <preprocessing_scaffold>
"ConditionReplicate", # <preprocessing_scaffold>
"R.Condition", # <preprocessing_scaffold>
"R.Replicate", # <preprocessing_scaffold>
Expand Down
20 changes: 10 additions & 10 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,13 @@ preprocessing_scaffold <- function(fileName, dataSet = NULL) {
select(-"#") %>%
slice(-n()) %>%
rename(ProteinDescriptions = names(.)[3],
ProteinAccessions = "Accession Number",
Genes = "Alternate ID",
AccessionNumber = "Accession Number",
AlternateID = "Alternate ID",
MolecularWeight = "Molecular Weight",
ProteinGroupingAmbiguity = "Protein Grouping Ambiguity")

infoColName <- c("Visible?", "Starred?",
"ProteinDescriptions", "ProteinAccessions", "Genes",
"ProteinDescriptions", "AccessionNumber", "AlternateID",
"MolecularWeight", "ProteinGroupingAmbiguity")

proteinInformation <- dataSet %>%
Expand All @@ -259,22 +259,22 @@ preprocessing_scaffold <- function(fileName, dataSet = NULL) {

## select columns necessary for analysis
selectedData <- dataSet %>%
select(-infoColName[infoColName != "ProteinAccessions"]) %>%
mutate(across(-ProteinAccessions, ~as.numeric(replace(., . == "Missing Value", NA)))) %>%
pivot_longer(-ProteinAccessions, names_to = "ConditionReplicate", values_to = "Quantity") %>%
# gather(ConditionReplicate, Quantity, -ProteinAccessions) %>%
select(-infoColName[infoColName != "AccessionNumber"]) %>%
mutate(across(-AccessionNumber, ~as.numeric(replace(., . == "Missing Value", NA)))) %>%
pivot_longer(-AccessionNumber, names_to = "ConditionReplicate", values_to = "Quantity") %>%
# gather(ConditionReplicate, Quantity, -AccessionNumber) %>%
mutate(ConditionReplicate = sub(".+_(.+)", "\\1", ConditionReplicate)) %>%
mutate(R.Condition = sub("^(\\d+|[a-zA-Z0-9]+).*", "\\1", ConditionReplicate),
R.Replicate = sub("^[^.]*[-]?([0-9]+)$", "\\1", ConditionReplicate)) %>%
select(R.Condition, R.Replicate, ProteinAccessions, Quantity) %>%
select(R.Condition, R.Replicate, AccessionNumber, Quantity) %>%
mutate(Quantity = replace(Quantity, Quantity %in% c(0,1), NA))

## reformat the data to present proteins as the columns and
## to group replicates under each protein
reformatedData <- selectedData %>%
pivot_wider(id_cols = c(R.Condition, R.Replicate),
names_from = ProteinAccessions, values_from = Quantity)
# spread(ProteinAccessions, Quantity)
names_from = AccessionNumber, values_from = Quantity)
# spread(AccessionNumber, Quantity)

## generate a histogram of the log2-transformed values for full data set
## note: the Scaffold is a preprocessed data report.
Expand Down
2 changes: 1 addition & 1 deletion docs/articles/scaffold.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified docs/articles/scaffold_files/figure-html/unnamed-chunk-4-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 1 addition & 23 deletions docs/articles/usage_template.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pkgdown_sha: ~
articles:
scaffold: scaffold.html
usage_template: usage_template.html
last_built: 2024-05-04T05:24Z
last_built: 2024-05-15T04:17Z
urls:
reference: https://uconn-scs.github.io/msDiaLogue/reference
article: https://uconn-scs.github.io/msDiaLogue/articles
Expand Down
24 changes: 16 additions & 8 deletions docs/reference/filterProtein.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/search.json

Large diffs are not rendered by default.

29 changes: 21 additions & 8 deletions man/filterProtein.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified tests/testData/Toy_Scaffold_Data.xls
Binary file not shown.

0 comments on commit b09140d

Please sign in to comment.