From 78b5dcd11abda9126808311fd4232e56fa53e6fc Mon Sep 17 00:00:00 2001 From: Carol-seven <542046605@qq.com> Date: Sun, 2 Jun 2024 02:32:05 -0400 Subject: [PATCH] center --- NAMESPACE | 2 +- R/center.R | 50 +++++++++++++++++++++ R/globals.R | 4 +- R/meanVarPlot.R | 34 ++++++++++++++ R/transform.R | 89 ------------------------------------- docs/pkgdown.yml | 2 +- docs/reference/center.html | 91 ++++++++++++++++++++++++++++++++++++++ docs/reference/index.html | 8 ++-- docs/search.json | 2 +- docs/sitemap.xml | 3 ++ man/center.Rd | 28 ++++++++++++ man/transform.Rd | 30 ------------- 12 files changed, 215 insertions(+), 128 deletions(-) create mode 100644 R/center.R create mode 100644 R/meanVarPlot.R delete mode 100644 R/transform.R create mode 100644 docs/reference/center.html create mode 100644 man/center.Rd delete mode 100644 man/transform.Rd diff --git a/NAMESPACE b/NAMESPACE index 8d4cb86..ff4be48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(center) export(preprocess) -export(transform) import(dplyr) import(ggplot2) import(tidyr) diff --git a/R/center.R b/R/center.R new file mode 100644 index 0000000..5e97520 --- /dev/null +++ b/R/center.R @@ -0,0 +1,50 @@ +#' +#' Centering +#' +#' @description +#' Apply centering to the data. +#' +#' @param dataSet A data frame containing the data signals. +#' +#' @param names A vector of strings (default = c("gender", "treatment", "replicate")) +#' specifying the names of the attribute columns. +#' +#' @details +#' The function executes the following: +#' \enumerate{ +#' \item Plots the mean-variance relationship. +#' \item Centers the data. +#' \item Plots the mean-variance relationship again for comparison. +#' } +#' +#' @returns The centered data. +#' +#' @autoglobal +#' +#' @export + +center <- function(dataSet, + names = c("gender", "treatment", "replicate")) { + + ## organize the data for centering + dataPoints <- dataSet %>% + select(-any_of(names)) + + ## calculate and plot a mean-variance plot + plotPre <- meanVarPlot(dataPoints, title = "Pre-Centering") + print(plotPre) + + ## centering + centerData <- colMeans(dataPoints, na.rm=TRUE) + centeredDataPoints <- sweep(dataPoints, 2L, centerData, check.margin = FALSE) + + ## calculate and plot a mean-variance plot + plotPost <- meanVarPlot(centeredDataPoints, title = "Post-Centering") + print(plotPost) + + ## recombine the labels and centered data into a single data frame + centeredDataSet <- cbind(dataSet[,names], centeredDataPoints) + + ## return the centered data + return(centeredDataSet) +} diff --git a/R/globals.R b/R/globals.R index 3071134..70b82b2 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,9 +1,9 @@ # Generated by roxyglobals: do not edit by hand utils::globalVariables(c( - "Compound", # - "name", # "Mean", # "Variance", # + "Compound", # + "name", # NULL )) diff --git a/R/meanVarPlot.R b/R/meanVarPlot.R new file mode 100644 index 0000000..d83c1fa --- /dev/null +++ b/R/meanVarPlot.R @@ -0,0 +1,34 @@ +#' +#' Plotting a graph of mean versus variance +#' +#' @description +#' Take a set of metabolics data organized by column, calculate the mean and variance of +#' each column, and then plot those statistics. +#' +#' @param datMV A data frame containing the data signals. +#' +#' @param title A string with the desired title for the mean-variance plot. +#' +#' @import ggplot2 +#' @importFrom stats var median +#' +#' @returns An object of class \code{plot}. +#' +#' @autoglobal +#' +#' @noRd + +meanVarPlot <- function(datMV, title = "") { + + ## calculate the mean and variance for each protein individually + plotData <- data.frame(t(sapply(datMV, function(x) { + c(Mean = mean(x, na.rm = TRUE), Variance = var(x, na.rm = TRUE)) + }))) + + ## plot the mean-variance relationship + ggplot(plotData, aes(Mean, Variance)) + + geom_point() + + labs(title = title) + + theme_bw() + + theme(plot.title = element_text(hjust = 0.5)) +} diff --git a/R/transform.R b/R/transform.R deleted file mode 100644 index 03b1256..0000000 --- a/R/transform.R +++ /dev/null @@ -1,89 +0,0 @@ -#' -#' Plotting a graph of mean versus variance -#' -#' @description -#' Take a set of metabolics data organized by column, calculate the mean and variance of -#' each column, and then plot those statistics. -#' -#' @param datMV A data frame containing the data signals. -#' -#' @param title A string with the desired title for the mean-variance plot. -#' -#' @import ggplot2 -#' @importFrom stats var median -#' -#' @returns An object of class \code{plot}. -#' -#' @autoglobal -#' -#' @noRd - -meanVarPlot <- function(datMV, title = "") { - - ## calculate the mean and variance for each protein individually - plotData <- data.frame(t(sapply(datMV, function(x) { - c(Mean = mean(x, na.rm = TRUE), Variance = var(x, na.rm = TRUE)) - }))) - - ## plot the mean-variance relationship - ggplot(plotData, aes(Mean, Variance)) + - geom_point() + - labs(title = title) + - theme_bw() + - theme(plot.title = element_text(hjust = 0.5)) -} - - -##---------------------------------------------------------------------------------------- -#' -#' Log-based transformation -#' -#' @description -#' Apply a logarithmic transformation to the data to stabilize the variance. -#' -#' @param dataSet A data frame containing the data signals. -#' -#' @param names A vector of strings (default = c("gender", "treatment", "replicate")) -#' specifying the names of the attribute columns. -#' -#' @param logFold An integer specifying the base for the log transformation. -#' -#' @details -#' The function executes the following: -#' \enumerate{ -#' \item Plots the mean-variance relationship using \code{meanVariancePlot()}. -#' \item Log-transforms the data, using the specified base. -#' \item Plots the mean-variance relationship again for comparison. -#' } -#' -#' @returns The transformed data. -#' -#' @autoglobal -#' -#' @export - -transform <- function(dataSet, - names = c("gender", "treatment", "replicate"), - logFold = 2) { - - ## organize the data for transformation - dataPoints <- dataSet %>% - select(-names) - - ## calculate and plot a mean-variance plot - plotPre <- meanVarPlot(dataPoints, title = "Pre-Transformation") - print(plotPre) - - ## take the log of the numerical data - transDataPoints <- log(dataPoints, logFold) - - ## calculate and plot a mean-variance plot - plotPost <- meanVarPlot(transDataPoints, title = "Post-Transformation") - print(plotPost) - - ## recombine the labels and transformed data into a single data frame - transDataSet <- cbind(dataSet[,names], transDataPoints) - - ## return the transformed data - return(transDataSet) -} diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 1cb242a..a96db1d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: '3.2' pkgdown: 2.0.9 pkgdown_sha: ~ articles: {} -last_built: 2024-06-02T03:07Z +last_built: 2024-06-02T06:31Z urls: reference: https://uconn-scs.github.io/metastat/reference article: https://uconn-scs.github.io/metastat/articles diff --git a/docs/reference/center.html b/docs/reference/center.html new file mode 100644 index 0000000..2fa98a0 --- /dev/null +++ b/docs/reference/center.html @@ -0,0 +1,91 @@ + +Centering — center • metastat + Skip to contents + + +
+
+
+ +
+

Apply centering to the data.

+
+ +
+

Usage

+
center(dataSet, names = c("gender", "treatment", "replicate"))
+
+ +
+

Arguments

+
dataSet
+

A data frame containing the data signals.

+ + +
names
+

A vector of strings (default = c("gender", "treatment", "replicate")) +specifying the names of the attribute columns.

+ +
+
+

Value

+ + +

The centered data.

+
+
+

Details

+

The function executes the following:

  1. Plots the mean-variance relationship.

  2. +
  3. Centers the data.

  4. +
  5. Plots the mean-variance relationship again for comparison.

  6. +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index a416f53..1a5b427 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -47,14 +47,14 @@

All functionspreprocess() + center() -
Loading and reformatting of metabolomics data
+
Centering
- transform() + preprocess()
-
Log-based transformation
+
Loading and reformatting of metabolomics data
diff --git a/docs/search.json b/docs/search.json index 0db35c7..33cd6d4 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1 +1 @@ -[{"path":"https://uconn-scs.github.io/metastat/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Shiying Xiao. Author, maintainer. Timothy Moore. Author.","code":""},{"path":"https://uconn-scs.github.io/metastat/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao S, Moore T (2024). metastat: Analysis Visualization Metabolomics Data. R package version 0.1.0.","code":"@Manual{, title = {metastat: Analysis and Visualization of Metabolomics Data}, author = {Shiying Xiao and Timothy Moore}, year = {2024}, note = {R package version 0.1.0}, }"},{"path":[]},{"path":"https://uconn-scs.github.io/metastat/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"metastat","text":"can install development version metastat GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/metastat\")"},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of metabolomics data — preprocess","title":"Loading and reformatting of metabolomics data — preprocess","text":"Read data file, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of metabolomics data — preprocess","text":"","code":"preprocess( fileName, dataSet = NULL, names = c(\"gender\", \"treatment\", \"replicate\") )"},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of metabolomics data — preprocess","text":"fileName name .csv file containing metabolomics data (including path file, needed). dataSet raw data set, already loaded R. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of metabolomics data — preprocess","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of metabolomics data — preprocess","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Re-formats data present individual compounds columns. Stores data data.frame prints levels attributes user.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of metabolomics data — preprocessing","title":"Loading and reformatting of metabolomics data — preprocessing","text":"Read data file, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of metabolomics data — preprocessing","text":"","code":"preprocessing( fileName, dataSet = NULL, names = c(\"gender\", \"treatment\", \"replicate\") )"},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of metabolomics data — preprocessing","text":"fileName name .csv file containing metabolomics data (including path file, needed). dataSet raw data set, already loaded R. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of metabolomics data — preprocessing","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of metabolomics data — preprocessing","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Re-formats data present individual compounds columns. Stores data data.frame prints levels attributes user.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":null,"dir":"Reference","previous_headings":"","what":"Log-based transformation — transform","title":"Log-based transformation — transform","text":"Apply logarithmic transformation data stabilize variance.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Log-based transformation — transform","text":"","code":"transform(dataSet, names = c(\"gender\", \"treatment\", \"replicate\"), logFold = 2)"},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Log-based transformation — transform","text":"dataSet data frame containing data signals. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns. logFold integer specifying base log transformation.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Log-based transformation — transform","text":"transformed data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Log-based transformation — transform","text":"function executes following: Plots mean-variance relationship using meanVariancePlot(). Log-transforms data, using specified base. Plots mean-variance relationship comparison.","code":""}] +[{"path":"https://uconn-scs.github.io/metastat/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Shiying Xiao. Author, maintainer. Timothy Moore. Author.","code":""},{"path":"https://uconn-scs.github.io/metastat/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao S, Moore T (2024). metastat: Analysis Visualization Metabolomics Data. R package version 0.1.0.","code":"@Manual{, title = {metastat: Analysis and Visualization of Metabolomics Data}, author = {Shiying Xiao and Timothy Moore}, year = {2024}, note = {R package version 0.1.0}, }"},{"path":[]},{"path":"https://uconn-scs.github.io/metastat/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"metastat","text":"can install development version metastat GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/metastat\")"},{"path":"https://uconn-scs.github.io/metastat/reference/center.html","id":null,"dir":"Reference","previous_headings":"","what":"Centering — center","title":"Centering — center","text":"Apply centering data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/center.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Centering — center","text":"","code":"center(dataSet, names = c(\"gender\", \"treatment\", \"replicate\"))"},{"path":"https://uconn-scs.github.io/metastat/reference/center.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Centering — center","text":"dataSet data frame containing data signals. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/center.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Centering — center","text":"centered data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/center.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Centering — center","text":"function executes following: Plots mean-variance relationship. Centers data. Plots mean-variance relationship comparison.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of metabolomics data — preprocess","title":"Loading and reformatting of metabolomics data — preprocess","text":"Read data file, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of metabolomics data — preprocess","text":"","code":"preprocess( fileName, dataSet = NULL, names = c(\"gender\", \"treatment\", \"replicate\") )"},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of metabolomics data — preprocess","text":"fileName name .csv file containing metabolomics data (including path file, needed). dataSet raw data set, already loaded R. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of metabolomics data — preprocess","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocess.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of metabolomics data — preprocess","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Re-formats data present individual compounds columns. Stores data data.frame prints levels attributes user.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of metabolomics data — preprocessing","title":"Loading and reformatting of metabolomics data — preprocessing","text":"Read data file, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of metabolomics data — preprocessing","text":"","code":"preprocessing( fileName, dataSet = NULL, names = c(\"gender\", \"treatment\", \"replicate\") )"},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of metabolomics data — preprocessing","text":"fileName name .csv file containing metabolomics data (including path file, needed). dataSet raw data set, already loaded R. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of metabolomics data — preprocessing","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/preprocessing.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of metabolomics data — preprocessing","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Re-formats data present individual compounds columns. Stores data data.frame prints levels attributes user.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":null,"dir":"Reference","previous_headings":"","what":"Log-based transformation — transform","title":"Log-based transformation — transform","text":"Apply logarithmic transformation data stabilize variance.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Log-based transformation — transform","text":"","code":"transform(dataSet, names = c(\"gender\", \"treatment\", \"replicate\"), logFold = 2)"},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Log-based transformation — transform","text":"dataSet data frame containing data signals. names vector strings (default = c(\"gender\", \"treatment\", \"replicate\")) specifying names attribute columns. logFold integer specifying base log transformation.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Log-based transformation — transform","text":"transformed data.","code":""},{"path":"https://uconn-scs.github.io/metastat/reference/transform.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Log-based transformation — transform","text":"function executes following: Plots mean-variance relationship using meanVariancePlot(). Log-transforms data, using specified base. Plots mean-variance relationship comparison.","code":""}] diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 08cfae3..f29a3e1 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -12,6 +12,9 @@ https://uconn-scs.github.io/metastat/index.html + + https://uconn-scs.github.io/metastat/reference/center.html + https://uconn-scs.github.io/metastat/reference/index.html diff --git a/man/center.Rd b/man/center.Rd new file mode 100644 index 0000000..5831d20 --- /dev/null +++ b/man/center.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/center.R +\name{center} +\alias{center} +\title{Centering} +\usage{ +center(dataSet, names = c("gender", "treatment", "replicate")) +} +\arguments{ +\item{dataSet}{A data frame containing the data signals.} + +\item{names}{A vector of strings (default = c("gender", "treatment", "replicate")) +specifying the names of the attribute columns.} +} +\value{ +The centered data. +} +\description{ +Apply centering to the data. +} +\details{ +The function executes the following: +\enumerate{ +\item Plots the mean-variance relationship. +\item Centers the data. +\item Plots the mean-variance relationship again for comparison. +} +} diff --git a/man/transform.Rd b/man/transform.Rd deleted file mode 100644 index 69e27de..0000000 --- a/man/transform.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transform.R -\name{transform} -\alias{transform} -\title{Log-based transformation} -\usage{ -transform(dataSet, names = c("gender", "treatment", "replicate"), logFold = 2) -} -\arguments{ -\item{dataSet}{A data frame containing the data signals.} - -\item{names}{A vector of strings (default = c("gender", "treatment", "replicate")) -specifying the names of the attribute columns.} - -\item{logFold}{An integer specifying the base for the log transformation.} -} -\value{ -The transformed data. -} -\description{ -Apply a logarithmic transformation to the data to stabilize the variance. -} -\details{ -The function executes the following: -\enumerate{ -\item Plots the mean-variance relationship using \code{meanVariancePlot()}. -\item Log-transforms the data, using the specified base. -\item Plots the mean-variance relationship again for comparison. -} -}