forked from jpouch/qPCR-Biomark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
open_data.R
executable file
·50 lines (40 loc) · 2.38 KB
/
open_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#! /usr/bin/Rscript
### Opening file ###
#' Make sure you are in the correct working directory (to get current working directory use getwd())
#'
#' enter your csv file name (e. g. "file.csv")
#' @param skip = 11 : allows to skip the file header
#' @param sep = "," : specifies column separator
#' @param dec = "." : specifies type of decimal
#' @param header = TRUE : extracts header name of each column
#' @param fill = TRUE : bypasses error if some columns do not have the same row number
#' @param colClasses : specifies type of value stored in each column (character, number, factor...).
#' Use "NULL" for columns that you do not want to use.
#' @param na.string : change all the 999 to NA
#'
#' @example TaqMan chemistry data set
#' colnames() : only if you want to rename the first four columns that by default are "Name", "Type", "Name.1" and "Type.1"
taq_data <- read.csv("file.csv", skip = 11, sep = ",", dec = ".", header = TRUE, fill = TRUE,
colClasses = c("NULL", "character", "character", "numeric", "character", "character", "numeric", "NULL", "NULL", "character", "NULL", "NULL"),
na.strings = "999")
colnames(taq_data) <- c("sampleID", "type", "concentration", "geneID", "reference", "ct", "status")
#'
#' @example EvaGreen chemistry data set (more columns with melting curve data)
eva_data <- read.csv("file.csv", skip = 11, sep = ",", dec = ".", header = TRUE, fill = TRUE,
colClasses = c("NULL", "character", "character", "numeric", "character", "character", "numeric", "NULL", "NULL", "character", "NULL", "NULL","NULL", "NULL", "NULL"),
na.strings = "999")
colnames(eva_data) <- c("sampleID", "type", "concentration", "geneID", "reference", "ct", "status")
#' It is possible that the Fluidigm software calculates a Ct value but gives it a Fail status.
#' We find it best to exclude these values in order to work with only the valid values.
#'
#' df stands for data frame. To be replaced with your data frame name (i.e taq_data)
#' @param is.na(df$ct) == FALSE: selects all non NA values in the ct column
#' @param df$status=="Fail": selects rows with Fail status
#' @param 6: column containing Ct values
#' @param <- NA: attributes NA to ct values with Fail status
#'
#' @example
#' The status column can be removed after this step
#' @example
df[is.na(df$ct) == FALSE & df$status == "Fail", 6] <- NA
df$status <- NULL