Skip to content

Commit

Permalink
added SPA (scripts of processing and analysis)
Browse files Browse the repository at this point in the history
  • Loading branch information
bioatmosphere committed Jun 29, 2022
1 parent 1336b52 commit d8b7beb
Show file tree
Hide file tree
Showing 18 changed files with 951 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ git clone https://github.com/bioatmosphere/DEMENTpy

- output/: folder where the output object in .pickle will be residing

- SPA/: Scripts of Processing and Analyzing (SPA) outputs

**Run DEMENTpy**:

- Configure Environment
Expand Down
47 changes: 47 additions & 0 deletions SPA/batchjob2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

#--------------------------------------------------
# input1 loop
#--------------------------------------------------
for count in 1 2 3
do

jobname="sev$count"
sed -i -e "s/sev.*/$jobname/" dementpy.sh

outname="20191126$count"
#echo $outname
sed -i -e "s/20191126.*/$outname/" dementpy.sh

qsub dementpy.sh

done


#--------------------------------------------------
# input2 loop
#--------------------------------------------------

sed -i -e "s/input/input2/" dementpy.sh
sed -i -e "s/output/output2/" dementpy.sh

for ((count = 2; count = 0; count--))
do
outname = '20191126'+'$count'
sed -i -e "s/20191126/$outname/" dementpy.sh
qsub dementpy.sh
done

#--------------------------------------------------
# input3 loop
#--------------------------------------------------

sed -i -e "s/input/input3/" dementpy.sh
sed -i -e "s/output/output3/" dementpy.sh

for ((count = 2; count = 0; count--))
do
outname = '20191126'+'$count'
sed -i -e "s/20191126/$outname/" dementpy.sh
qsub dementpy.sh
done
20 changes: 20 additions & 0 deletions SPA/batchjob_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
for mode in 1 2 3
do
for count in 1 2 3
do
# job naming
jobname="m_$mode_c_$count"
sed -i -e "s/m_.*/$jobname/" dementpy.sh

# output naming
outname="20191126$count"
sed -i -e "s/20191126.*/$outname/" dementpy.sh

#folder naming
input="mode$mode/input$count"

qsub dementpy.sh

done
done
59 changes: 59 additions & 0 deletions SPA/dementpy_array.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

#==========================
#======Set SGE options:
#==========================

#$ -cwd
#$ -S /bin/bash

#$ -N dementpy


#==============================================
#===Queue Resource Request
# -pe smp #:
# shared memory parallel environment
# -R y:
# The reserve option allows your job to get a foot in the door on a node,
# and prevents the node from being constantly loaded by single-core jobs.
# mem_free:
# This should be set to what you think your job will need (or a little more).
# This will reserve memory for your job on the node that it is run on.
# h_vmem:
# This is the “high water mark” for memory for your job. This should be set to be equal to,
# your mem_free request.
# Refs:
# Memory usage and good citizenship: https://jhpce.jhu.edu/2017/05/17/memory_usage_analysis/
#==============================================
#$ -q mic
#$ -pe smp 8
#$ -R y
#$ -l mem_free=3G,h_vmem=3G
#$ -l h_rt=30:00:00
#$ -l s_rt=35:00:00


########################
# Merge the standard error into the standard output stream
########################
#$ -j y
##$ -e output/
#$ -o output/

##$ -M [email protected]
#$ -m ea

## submit an array job
#$ -t 1-8

#==========================
#======Job it self
#==========================

#module load anaconda
#cd src
#python dementpy.py mode/input_ output 20200120


date > output/results.$SGE_TASK_ID
13 changes: 13 additions & 0 deletions SPA/jupyternb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#--------------------------------------------------------#
#----Script submitting jupyter notebook jobs to HPC------#
#--------------------------------------------------------#

#!/bin/bash
#$ -N dementjupyter
#$ -q mic
#$ -m beas

module load anaconda/3.7-5.3.0

# --ExecutePreprocessor.timeout=180 important to the running of notebooks.
jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute test_5.ipynb --output test_5.ipynb
21 changes: 21 additions & 0 deletions SPA/mvoutputs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#----------------------------------------------------------------------------#
#--- Script mving output files from various folers over to the same Folder---#
#----------------------------------------------------------------------------#

#!/bin/bash

cd dementpy/output
mv 2019111800.pickle ../../test_5
cd ../..

cd dementpy1/output
mv 2019111801.pickle ../../test_5
cd ../..

cd dementpy2/output
mv 2019111802.pickle ../../test_5
cd ../..

cd dementpy3/output
mv 2019111803.pickle ../../test_5
cd ../..
38 changes: 38 additions & 0 deletions SPA/replicate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

# Make changes to dementpy.sh using for loops
for mode in 1
do
for scenario in bas mid sev
do
#input output folder
folder="mode$mode/input_$scenario output"
sed -i -e "s%mode.*.output%$folder%" dementpy.sh

for count in 1 2 3 4 5
do

jobname="dm_$mode$scenario$count"
sed -i -e "s/dm_.*/$jobname/" dementpy.sh

outname="20200120$count"_"$scenario"
echo "job name:" $outname
sed -i -e "s/20200120.*/$outname/" dementpy.sh

qsub dementpy.sh

done

done

done

# restore the dementpy.sh to where it has begin
jobname="dm_"
sed -i -e "s/dm_.*/$jobname/" dementpy.sh

folder="mode/input_ output"
sed -i -e "s%mode.*.output%$folder%" dementpy.sh

outname="20200120"
sed -i -e "s/20200120.*/$outname/" dementpy.sh
35 changes: 35 additions & 0 deletions SPA/replicate_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

# Make changes to dementpy.sh using for loops
for scenario in bas mid sev
do

#input output folder
folder="input_$scenario output"
sed -i -e "s/input_.*.output/$folder/" dementpy.sh

for count in 1 2 3 4
do

jobname="dm_$scenario$count"
sed -i -e "s/dm_.*/$jobname/" dementpy.sh

outname="20191126$count"_"$scenario"
echo "job name:" $outname
sed -i -e "s/20191126.*/$outname/" dementpy.sh

# submit to HPC
qsub dementpy.sh

done
done

# restore the dementpy.sh to where it has begin
jobname="dm_"
sed -i -e "s/dm_.*/$jobname/" dementpy.sh

folder="input_ output"
sed -i -e "s/input_.*.output/$folder/" dementpy.sh

outname="20191126"
sed -i -e "s/20191126.*/$outname/" dementpy.sh
16 changes: 16 additions & 0 deletions SPA/scenario_name.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

cd dementpy0/input
rm climate.csv
mv base.csv climate.csv
cd ../..

cd dementpy1/input
rm climate.csv
mv scenario_2012.csv climate.csv
cd ../..

cd dementpy2/input
rm climate.csv
mv scenario_2013.csv climate.csv
cd ../..
91 changes: 91 additions & 0 deletions SPA/scripts/data_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Script of extracting data from pickle files
By Bin Wang
"""

#import numpy as np
import pandas as pd
import pickle
import sys
import os
import glob
import output

# define a function of extracting data from files in .pickle
def get_pickled_data(key):
datalist = []
#single run
filelist = glob.glob(key+'_'+'20201'+'.pickle')

## ensemble runs--20
filelist_19 = glob.glob(key + '_' + '2020' + '[1-9].pickle')
filelist_1019 = glob.glob(key + '_' + '2020' + '1[0-9].pickle')
filelist_2020 = glob.glob(key + '_' + '2020' + '20.pickle')
filelist = filelist_19 + filelist_1019 + filelist_2020

#filelist_2029 = glob.glob(key+'2[0-9].pickle')
#filelist_3039 = glob.glob(key+'3[0-9].pickle')
#filelist_4040 = glob.glob(key+'40.pickle')
#filelist = filelist_19 + filelist_1019 + filelist_2029 + filelist_3039 + filelist_4040

filelist.sort(reverse=False)
for file in filelist:
with open(file,"rb") as f:
data = pickle.load(f)
datalist.append(data)

return filelist, datalist

def community_drought(data):
"""
Calculate community-level drought tolerance
"""

Relative_mass = data.MicrobesSeries.div(data.MicrobesSeries.sum(axis=0),axis=1)
# enzyme_trait = data.Microbial_traits['Enz_Induci_Cost'] * data.Microbial_traits['Enz_Gene']
#enzyme_trait = (data.Microbial_traits['Enz_Induci_Cost'] + data.Microbial_traits['Enz_Consti_Cost']) * data.Microbial_traits['Enz_Gene']
drought_tol = data.Microbial_traits['Drought_tolerance']
community_drought = Relative_mass.mul(drought_tol,axis=0).sum(axis=0)

return community_drought

def community_enzyme(data):
"""
Calculate community-level drought tolerance
"""

Relative_mass = data.MicrobesSeries.div(data.MicrobesSeries.sum(axis=0),axis=1)
# enzyme_trait = data.Microbial_traits['Enz_Induci_Cost'] * data.Microbial_traits['Enz_Gene']
enzyme_trait = (data.Microbial_traits['Enz_Induci_Cost'] + data.Microbial_traits['Enz_Consti_Cost']) * data.Microbial_traits['Enz_Gene']
community_enzyme = Relative_mass.mul(enzyme_trait,axis=0).sum(axis=0)

return community_enzyme

site = sys.argv[1] # base site name
key = sys.argv[2] # target site

os.chdir('../output_'+site)

filelist, datalist = get_pickled_data(key)
## sub-specific mass
#sub = pd.concat([data.SubstratesSeries for data in datalist], axis=1, sort=False)
## total mass
sub = pd.concat([data.SubstratesSeries.sum(axis=0) for data in datalist], axis=1, sort=False)
# export to csv
sub.to_csv('data/' + 'Sub_' + site +'_'+ key + '.csv')

# microbes
microbes = pd.concat([data.MicrobesSeries for data in datalist], axis=1, sort=False)
microbes.to_csv('data/' + 'Mic_' + site +'_'+ key + '.csv')

# taxon-specific enzyme
Enzyme = pd.concat([data.Enzyme_TaxonSeries for data in datalist], axis=1, sort=False)
Enzyme.to_csv('data/' + 'Enzyme_' + site +'_'+ key + '.csv')

# taxon-specific osmolyte
Osmolyte = pd.concat([data.Osmolyte_TaxonSeries for data in datalist], axis=1, sort=False)
Osmolyte.to_csv('data/' + 'Osmolyte_' + site +'_'+ key + '.csv')

# taxon-specific growth yield
Yield = pd.concat([data.Growth_yield for data in datalist], axis=1, sort=False)
Yield.to_csv('data/' + 'Yield_' + site +'_'+ key + '.csv')
Loading

0 comments on commit d8b7beb

Please sign in to comment.