added SPA (scripts of processing and analysis)

DEMENT-Model · Jun 29, 2022 · d8b7beb · d8b7beb
1 parent 1336b52
commit d8b7beb
Show file tree

Hide file tree

Showing 18 changed files with 951 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -53,6 +53,8 @@ git clone https://github.com/bioatmosphere/DEMENTpy
 
 - output/: folder where the output object in .pickle will be residing
 
+- SPA/: Scripts of Processing and Analyzing (SPA) outputs
+
 **Run DEMENTpy**:
 
 - Configure Environment

diff --git a/SPA/batchjob2.sh b/SPA/batchjob2.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+#--------------------------------------------------
+# input1 loop
+#--------------------------------------------------
+for count in 1 2 3
+do
+
+  jobname="sev$count"
+  sed -i -e "s/sev.*/$jobname/" dementpy.sh
+
+  outname="20191126$count"
+  #echo $outname
+  sed -i -e "s/20191126.*/$outname/" dementpy.sh
+
+  qsub dementpy.sh
+
+done
+
+
+#--------------------------------------------------
+# input2 loop
+#--------------------------------------------------
+
+sed -i -e "s/input/input2/" dementpy.sh
+sed -i -e "s/output/output2/" dementpy.sh
+
+for ((count = 2; count = 0; count--))
+do
+outname = '20191126'+'$count'
+sed -i -e "s/20191126/$outname/" dementpy.sh
+qsub dementpy.sh
+done
+
+#--------------------------------------------------
+# input3 loop
+#--------------------------------------------------
+
+sed -i -e "s/input/input3/" dementpy.sh
+sed -i -e "s/output/output3/" dementpy.sh
+
+for ((count = 2; count = 0; count--))
+do
+outname = '20191126'+'$count'
+sed -i -e "s/20191126/$outname/" dementpy.sh
+qsub dementpy.sh
+done
diff --git a/SPA/batchjob_test.sh b/SPA/batchjob_test.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+for mode in 1 2 3
+do
+  for count in 1 2 3
+  do
+    # job naming
+    jobname="m_$mode_c_$count"
+    sed -i -e "s/m_.*/$jobname/" dementpy.sh
+
+    # output naming
+    outname="20191126$count"
+    sed -i -e "s/20191126.*/$outname/" dementpy.sh
+
+    #folder naming
+    input="mode$mode/input$count"
+
+    qsub dementpy.sh
+
+  done
+done
diff --git a/SPA/dementpy_array.sh b/SPA/dementpy_array.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+#==========================
+#======Set SGE options:
+#==========================
+
+#$ -cwd
+#$ -S /bin/bash
+
+#$ -N dementpy
+
+
+#==============================================
+#===Queue Resource Request
+# -pe smp #:
+#          shared memory parallel environment
+# -R y:
+#      The reserve option allows your job to get a foot in the door on a node,
+#      and prevents the node from being constantly loaded by single-core jobs.
+# mem_free: 
+#          This should be set to what you think your job will need (or a little more).
+#          This will reserve memory for your job on the node that it is run on.
+# h_vmem:
+#        This is the “high water mark” for memory for your job.  This should be set to be equal to,
+#        your mem_free request.
+# Refs:
+#        Memory usage and good citizenship: https://jhpce.jhu.edu/2017/05/17/memory_usage_analysis/
+#==============================================
+#$ -q mic
+#$ -pe smp 8
+#$ -R y
+#$ -l mem_free=3G,h_vmem=3G
+#$ -l h_rt=30:00:00
+#$ -l s_rt=35:00:00
+
+
+########################
+# Merge the standard error into the standard output stream
+########################
+#$ -j y
+##$ -e output/
+#$ -o output/
+
+##$ -M [email protected]
+#$ -m ea
+
+## submit an array job
+#$ -t 1-8
+
+#==========================
+#======Job it self
+#==========================
+
+#module load anaconda
+#cd src
+#python dementpy.py mode/input_ output 20200120
+
+
+date > output/results.$SGE_TASK_ID
diff --git a/SPA/jupyternb.sh b/SPA/jupyternb.sh
@@ -0,0 +1,13 @@
+#--------------------------------------------------------#
+#----Script submitting jupyter notebook jobs to HPC------#
+#--------------------------------------------------------#
+
+#!/bin/bash
+#$ -N dementjupyter
+#$ -q mic
+#$ -m beas
+
+module load anaconda/3.7-5.3.0
+
+# --ExecutePreprocessor.timeout=180 important to the running of notebooks.
+jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute test_5.ipynb --output test_5.ipynb
diff --git a/SPA/mvoutputs.sh b/SPA/mvoutputs.sh
@@ -0,0 +1,21 @@
+#----------------------------------------------------------------------------#
+#--- Script mving output files from various folers over to the same Folder---#
+#----------------------------------------------------------------------------#
+
+#!/bin/bash
+
+cd dementpy/output
+mv 2019111800.pickle ../../test_5
+cd ../..
+
+cd dementpy1/output
+mv 2019111801.pickle ../../test_5
+cd ../..
+
+cd dementpy2/output
+mv 2019111802.pickle ../../test_5
+cd ../..
+
+cd dementpy3/output
+mv 2019111803.pickle ../../test_5
+cd ../..
diff --git a/SPA/replicate.sh b/SPA/replicate.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Make changes to dementpy.sh using for loops
+for mode in 1
+do
+  for scenario in bas mid sev
+  do
+    #input output folder
+    folder="mode$mode/input_$scenario output"
+    sed -i -e "s%mode.*.output%$folder%" dementpy.sh
+
+    for count in 1 2 3 4 5
+    do
+
+      jobname="dm_$mode$scenario$count"
+      sed -i -e "s/dm_.*/$jobname/" dementpy.sh
+
+      outname="20200120$count"_"$scenario"
+      echo "job name:" $outname
+      sed -i -e "s/20200120.*/$outname/" dementpy.sh
+
+      qsub dementpy.sh
+
+    done
+
+  done
+
+done
+
+# restore the dementpy.sh to where it has begin
+jobname="dm_"
+sed -i -e "s/dm_.*/$jobname/" dementpy.sh
+
+folder="mode/input_ output"
+sed -i -e "s%mode.*.output%$folder%" dementpy.sh
+
+outname="20200120"
+sed -i -e "s/20200120.*/$outname/" dementpy.sh
diff --git a/SPA/replicate_test.sh b/SPA/replicate_test.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Make changes to dementpy.sh using for loops
+for scenario in bas mid sev
+do
+
+  #input output folder
+  folder="input_$scenario output"
+  sed -i -e "s/input_.*.output/$folder/" dementpy.sh
+
+  for count in 1 2 3 4
+  do
+
+    jobname="dm_$scenario$count"
+    sed -i -e "s/dm_.*/$jobname/" dementpy.sh
+
+    outname="20191126$count"_"$scenario"
+    echo "job name:" $outname
+    sed -i -e "s/20191126.*/$outname/" dementpy.sh
+
+    # submit to HPC
+    qsub dementpy.sh
+
+  done
+done
+
+# restore the dementpy.sh to where it has begin
+jobname="dm_"
+sed -i -e "s/dm_.*/$jobname/" dementpy.sh
+
+folder="input_ output"
+sed -i -e "s/input_.*.output/$folder/" dementpy.sh
+
+outname="20191126"
+sed -i -e "s/20191126.*/$outname/" dementpy.sh
diff --git a/SPA/scenario_name.sh b/SPA/scenario_name.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+cd dementpy0/input
+rm climate.csv
+mv base.csv climate.csv
+cd ../..
+
+cd dementpy1/input
+rm climate.csv
+mv scenario_2012.csv climate.csv
+cd ../..
+
+cd dementpy2/input
+rm climate.csv
+mv scenario_2013.csv climate.csv
+cd ../..
diff --git a/SPA/scripts/data_extraction.py b/SPA/scripts/data_extraction.py
@@ -0,0 +1,91 @@
+"""
+Script of extracting data from pickle files
+By Bin Wang
+"""
+
+#import numpy as np
+import pandas as pd
+import pickle
+import sys
+import os
+import glob
+import output
+
+# define a function of extracting data from files in .pickle
+def get_pickled_data(key):
+    datalist = []
+    #single run
+    filelist = glob.glob(key+'_'+'20201'+'.pickle')
+
+    ## ensemble runs--20
+    filelist_19   = glob.glob(key + '_' + '2020' + '[1-9].pickle')
+    filelist_1019 = glob.glob(key + '_' + '2020' + '1[0-9].pickle')
+    filelist_2020 = glob.glob(key + '_' + '2020' + '20.pickle')
+    filelist = filelist_19 + filelist_1019 + filelist_2020
+
+    #filelist_2029 = glob.glob(key+'2[0-9].pickle')
+    #filelist_3039 = glob.glob(key+'3[0-9].pickle')
+    #filelist_4040 = glob.glob(key+'40.pickle')
+    #filelist = filelist_19 + filelist_1019 + filelist_2029 + filelist_3039 + filelist_4040
+
+    filelist.sort(reverse=False)
+    for file in filelist:
+        with open(file,"rb") as f:
+            data = pickle.load(f)
+        datalist.append(data)
+
+    return filelist, datalist
+
+def community_drought(data):
+    """
+    Calculate community-level drought tolerance
+    """
+
+    Relative_mass = data.MicrobesSeries.div(data.MicrobesSeries.sum(axis=0),axis=1)
+    # enzyme_trait  = data.Microbial_traits['Enz_Induci_Cost'] * data.Microbial_traits['Enz_Gene']
+    #enzyme_trait  = (data.Microbial_traits['Enz_Induci_Cost'] + data.Microbial_traits['Enz_Consti_Cost']) * data.Microbial_traits['Enz_Gene']
+    drought_tol   = data.Microbial_traits['Drought_tolerance']
+    community_drought = Relative_mass.mul(drought_tol,axis=0).sum(axis=0)
+
+    return community_drought
+
+def community_enzyme(data):
+    """
+    Calculate community-level drought tolerance
+    """
+
+    Relative_mass = data.MicrobesSeries.div(data.MicrobesSeries.sum(axis=0),axis=1)
+    # enzyme_trait  = data.Microbial_traits['Enz_Induci_Cost'] * data.Microbial_traits['Enz_Gene']
+    enzyme_trait  = (data.Microbial_traits['Enz_Induci_Cost'] + data.Microbial_traits['Enz_Consti_Cost']) * data.Microbial_traits['Enz_Gene']
+    community_enzyme = Relative_mass.mul(enzyme_trait,axis=0).sum(axis=0)
+
+    return community_enzyme
+
+site = sys.argv[1]    # base site name
+key  = sys.argv[2]    # target site
+
+os.chdir('../output_'+site)
+
+filelist, datalist = get_pickled_data(key)
+## sub-specific mass
+#sub = pd.concat([data.SubstratesSeries for data in datalist], axis=1, sort=False)
+## total mass
+sub = pd.concat([data.SubstratesSeries.sum(axis=0) for data in datalist], axis=1, sort=False)
+# export to csv
+sub.to_csv('data/' + 'Sub_' + site +'_'+ key + '.csv')
+
+# microbes
+microbes = pd.concat([data.MicrobesSeries for data in datalist], axis=1, sort=False)
+microbes.to_csv('data/' + 'Mic_' + site +'_'+ key + '.csv')
+
+# taxon-specific enzyme
+Enzyme = pd.concat([data.Enzyme_TaxonSeries for data in datalist], axis=1, sort=False)
+Enzyme.to_csv('data/' + 'Enzyme_' + site +'_'+ key + '.csv')
+
+# taxon-specific osmolyte
+Osmolyte = pd.concat([data.Osmolyte_TaxonSeries for data in datalist], axis=1, sort=False)
+Osmolyte.to_csv('data/' + 'Osmolyte_' + site +'_'+ key + '.csv')
+
+# taxon-specific growth yield
+Yield = pd.concat([data.Growth_yield for data in datalist], axis=1, sort=False)
+Yield.to_csv('data/' + 'Yield_' + site +'_'+ key + '.csv')