-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMann_Whitney_Cell_Lines.R
70 lines (50 loc) · 2.24 KB
/
Mann_Whitney_Cell_Lines.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#Mann-Whitney U Test for multiple rows with Median and Z value
#First we need to find the significant differences betweeen ALT and Tel in cell lines
library(readr)
library(dplyr)
library(stats)
#Set directory
setwd("/Users/user/Documents/Genestack")
# Load the cleaned data from Python
telomere_data_cleaned <- read_csv("telomere_data_cleaned.csv")
# Select the columns for Indoor and Outdoor samples
alt_columns <- c("ALT_lung_1", "ALT_lung_2", "ALT_lung_3", "ALT_lung_4", "ALT_lung_5", "ALT_lung_6", "ALT_liver_1", "ALT_liver_2")
tel_columns <- c("Tel_ovarian_1", "Tel_ovarian_2", "Tel_bladder_1", "Tel_bladder_2", "Tel_cervical_1", "Tel_cervical_2", "Tel_fibosarcoma_1", "Tel_fibosarcoma_2")
# Initialize vectors to store p-values, medians, and z-values
p_values <- numeric()
medians_alt <- numeric()
medians_tel <- numeric()
z_values <- numeric()
# Loop through each row and perform Mann-Whitney U test
for (i in 1:nrow(telomere_data_cleaned)) {
Alt <- unlist(telomere_data_cleaned[i, alt_columns])
Tel <- unlist(telomere_data_cleaned[i, tel_columns])
# Perform Mann-Whitney U test
mw_test <- wilcox.test(Alt, Tel, alternative = "two.sided")
# Append the p-value to the vector
p_values <- c(p_values, mw_test$p.value)
# Calculate medians for indoor and outdoor samples
median_alt <- median(Alt)
median_tel <- median(Tel)
# Append the medians to the vectors
medians_alt <- c(medians_alt, median_alt)
medians_tel <- c(medians_tel, median_tel)
# Calculate the "z" value
z_value <- mw_test$statistic
z_values <- c(z_values, z_value)
}
# Add p-values, medians, and z-values to the Full_COG_MW.csv dataframe
telomere_data_cleaned$p_value <- p_values
telomere_data_cleaned$median_alt <- medians_alt
telomere_data_cleaned$median_tel <- medians_tel
telomere_data_cleaned$z_value <- z_values
# Save the updated dataframe to a new CSV file
write.csv(telomere_data_cleaned, "Cell_lines_MW.csv")
#Extract the significant genes
# Read the updated CSV file
telomere_data_mw <- read.csv("Cell_lines_MW.csv")
# Filter rows with p-values less than 0.05
significant_rows <- telomere_data_mw %>%
filter(p_value < 0.05)
# Save the filtered dataframe to a new CSV file
write.csv(significant_rows, "significant_genes_cell_lines.csv", row.names = FALSE)