forked from jmcook1186/SpectraProcessing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCells_BBA_analysis.py
83 lines (60 loc) · 2.93 KB
/
Cells_BBA_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 24 22:21:07 2018
@author: joe
"""
import numpy as np
from scipy.stats.stats import pearsonr
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels as sm
# Import BBA and cell count dataset from csv
DF = pd.read_csv('/home/joe/Code/Log_cells_BBA.csv')
xs = np.array(DF['Log_cells'])
ys = np.array(DF['BBA'])
# Define functions for regression, correlation and analysis of variance
def best_fit(xs,ys):
m = (((np.mean(xs)*np.mean(ys)) - np.mean(xs*ys)) / ((np.mean(xs)*np.mean(xs)) - np.mean(xs*xs)))
b = np.mean(ys) - m*np.mean(xs)
print('regression line equation = {}x + {}'.format(np.round(m,3),np.round(b,3)))
return m, b
def squared_error(ys_orig,ys_line):
return sum((ys_line - ys) * (ys_line - ys))
def coefficient_of_determination(ys,ys_line):
y_mean_line = [np.mean(ys) for y in ys]
squared_error_regr = squared_error(ys, ys_line)
squared_error_y_mean = squared_error(ys, y_mean_line)
r_squared = 1 - (squared_error_regr/squared_error_y_mean)
R,Rp = pearsonr(xs, ys)
return r_squared, R, Rp
def plots(xs,ys,line,R,Rp):
plt.style.use('seaborn')
fig = plt.figure(figsize=(10,10))
plt.scatter(xs,ys,color='b'),plt.plot(xs,line,color='r'),plt.xlabel('Log cells mL$^{-1}$',fontsize=22),
plt.ylabel('Broadband Albedo (0.35 - 2.5 $\mu$m)',fontsize=22),plt.xticks(fontsize=22),plt.yticks(fontsize=22),
plt.legend(loc='best'),plt.grid(None),plt.text(4.1,0.7,'$r^2$ = {}'.format(np.round(r_squared,2)),fontsize=22),
#plt.savefig('/home/joe/Desktop/Cells_vs_BBA.jpg',facecolor='white',dpi=150)
return
def ANOVA(DF):
HA_cells = DF.loc[DF['Class'] == 'HA']
HA_cells = np.array(HA_cells['Cells_mL'])
LA_cells = DF.loc[DF['Class'] == 'LA']
LA_cells = np.array(LA_cells['Cells_mL'])
CI_cells = DF.loc[DF['Class'] == 'CI']
CI_cells = np.array(CI_cells['Cells_mL'])
# SN_cells = DF.loc[DF['Class'] == 'SN']
# SN_cells = np.array(SN_cells['Cells_mL'])
F_stat, p = stats.mstats.f_oneway(HA_cells,LA_cells,CI_cells)#,SN_cells)
print('ANOVA F-stat = {} , ANOVA p = {}'.format(F_stat,p))
print('HA Summary: Max = {}, Min = {}, mean = {}, Std = {}'.format(np.max(HA_cells),np.min(HA_cells),np.mean(HA_cells),np.std(HA_cells)))
print('LA Summary: Max = {}, Min = {}, mean = {}, Std = {}'.format(np.max(LA_cells),np.min(LA_cells),np.mean(LA_cells),np.std(LA_cells)))
print('CI Summary: Max = {}, Min = {}, mean = {}, Std = {}'.format(np.max(CI_cells),np.min(CI_cells),np.mean(CI_cells),np.std(CI_cells)))
# print('SN Summary: Max = {}, Min = {}, mean = {}, Std = {}'.format(np.max(SN_cells),np.min(SN_cells),np.mean(SN_cells),np.std(SN_cells)))
return F_stat, p
m,b = best_fit(xs,ys)
line = [(m*x)+b for x in xs]
r_squared, R, Rp = coefficient_of_determination(ys,line)
plots(xs,ys,line, R, Rp)
ANOVA(DF)