From 029ef1fb9dfd90b7f98f8b67bedfecad1848a5bb Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Thu, 15 Apr 2021 16:04:31 +0200
Subject: [PATCH 01/12] Replaced post processing script

---
 .gitignore                 |   4 +
 example/alt_sample_plot.py | 137 ---------------------------
 pafi/__init__.py           |   1 +
 pafi/pafi.py               | 189 +++++++++++++++++++++++++++++++++++++
 4 files changed, 194 insertions(+), 137 deletions(-)
 delete mode 100644 example/alt_sample_plot.py
 create mode 100644 pafi/__init__.py
 create mode 100644 pafi/pafi.py

diff --git a/.gitignore b/.gitignore
index 7b31997..f716570 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,10 @@ example/*
 lib/*
 notes.py
 
+# Python
+pafi/__pycache__
+*.pyc
+
 # Prerequisites
 */*.d
 
diff --git a/example/alt_sample_plot.py b/example/alt_sample_plot.py
deleted file mode 100644
index 0504609..0000000
--- a/example/alt_sample_plot.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""
-VERY rough Python code to plot PAFI results
-
-(c) 2021 Tom Swinburne
-
-raw_ensemble_output has 1 + 4*nWorker colums:
-
-Column 0 : Number of workers with max_jump < user threshold at run time
--> nWorker+1 : av(dF), the gradient
--> 2*nWorker+1 : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
--> 3*nWorker+1 : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
--> 4*nWorker+1 : the maximum per-atom jump following the MD
-"""
-
-import glob,io
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.integrate import cumtrapz
-from scipy.interpolate import interp1d
-
-
-discretization_error_estimate = 0.015 # estimate in eV, divided by 2
-
-
-
-# read in file line-by-line, returning ave and std for data with max displacement <= disp_thresh
-def raw_parser(file_name,disp_thresh = 0.4):
-    try:
-        f = open(file_name,'r')
-    except IOError:
-        print("raw data file %s not found" % file_name)
-        return np.zeros((1,3))
-
-    r_dFave_dFstd = []
-    r = 0.0
-    for line in f.readlines():
-        if line[0] != "#":
-            fields = np.loadtxt(io.StringIO(line.strip()))
-            n_data = (fields.size-1)//4
-            n_valid = (fields[-n_data:] < disp_thresh).sum()
-            if n_valid>0:
-                r_dFave_dFstd += [[r,fields[1:n_valid+1].mean(),fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
-            r += 1.0 # always increment even if n_valid==0
-    r_dFave_dFstd = np.r_[r_dFave_dFstd]
-    r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
-
-    return r_dFave_dFstd
-
-# splined rediscretization
-def remesh(data,density = 10):
-    spl_data = np.zeros((density*data.shape[0],data.shape[1]))
-    r_data = np.linspace(0.,1.,data.shape[0])
-    r_spl_data = np.linspace(0.,1.,spl_data.shape[0])
-    spl_data[:,0] = r_spl_data#interp1d(r_data, data[:,0],kind='linear')(r_spl_data)
-    spl_data[:,1] = interp1d(data[:,0], data[:,1],kind='linear')(spl_data[:,0])
-    spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear')(spl_data[:,0])
-    return spl_data
-
-def integrate(data):
-    idata = np.zeros(data.shape)
-    idata[:,0] = data[:,0]
-    idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
-    idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
-    run_min =  np.minimum.accumulate(idata[:,1])
-    run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
-    if (run_min == run_min_shift).sum()>0:
-        idata = idata[run_min == run_min_shift,:]
-    idata[:,1]-=idata[0][1]
-    return idata, idata[:,1].max(), idata[0][2] + idata[idata[:,1].argmax()][2]
-
-
-
-# file list- here we take epoch 5
-fl = glob.glob("50/dumps/raw*");
-print(fl)
-# temperatures
-T = np.r_[[int(f.split("_")[-2][:-1]) for f in fl]];
-
-
-fig,axs = plt.subplots(1,2,figsize=(8,4),dpi=144,sharey=True);
-
-bar = []
-for ii,i_f in enumerate(T.argsort()):
-    _bar = [T[i_f]]
-    r_dFave_dFstd = raw_parser(fl[i_f])
-    r_Fave_Fstd,barrier,error = integrate(r_dFave_dFstd)
-
-    _bar += [barrier]+[error]
-    r_dFave_dFstd_remesh = remesh(r_dFave_dFstd)
-    r_Fave_Fstd_remesh,barrier,error = integrate(r_dFave_dFstd_remesh)
-    _bar += [barrier]+[error]
-
-    axs[0].plot(r_Fave_Fstd[:,0],
-                r_Fave_Fstd[:,1],
-                'C%d%s' % (ii,'o--'),label='%dK' % T[i_f])
-
-    axs[0].fill_between(r_Fave_Fstd[:,0],
-            r_Fave_Fstd[:,1]-r_Fave_Fstd[:,2],
-            r_Fave_Fstd[:,1]+r_Fave_Fstd[:,2],
-            facecolor='0.8')
-
-    axs[0].plot(r_Fave_Fstd_remesh[:,0],
-                r_Fave_Fstd_remesh[:,1],
-                'C%d%s' % (ii,'-'),label='%dK (Splined)' % T[i_f])
-
-    axs[0].fill_between(r_Fave_Fstd_remesh[:,0],
-            r_Fave_Fstd_remesh[:,1]-r_Fave_Fstd_remesh[:,2],
-            r_Fave_Fstd_remesh[:,1]+r_Fave_Fstd_remesh[:,2],
-            facecolor='0.8')
-
-    bar += [_bar]
-bar = np.r_[bar]
-print(bar.shape)
-
-p = np.polyfit(bar[:3,0],bar[:3,1],1)
-
-kb = 8.617e-5
-
-axs[1].plot(bar[:,0],bar[:,1],'o-',label='Raw Data')
-axs[1].plot(bar[:,0],bar[:,3],'o--',label='Splined Data')
-
-axs[1].plot(bar[:,0],p[1]+p[0]*bar[:,0],'k--',label=r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1],-p[0]/kb))
-axs[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.8')
-axs[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.8')
-
-
-axs[1].set_ylim(ymin=-discretization_error_estimate)
-axs[0].legend(fontsize=7)
-axs[1].legend(fontsize=7)
-
-axs[0].set_xlabel("Reaction Coordinate")
-axs[1].set_xlabel("Temperature [K]")
-
-axs[0].set_ylabel("Free energy barrier [eV]")
-
-plt.subplots_adjust(wspace=0)
-plt.savefig("PAFI_fig.pdf")
diff --git a/pafi/__init__.py b/pafi/__init__.py
new file mode 100644
index 0000000..233e1f8
--- /dev/null
+++ b/pafi/__init__.py
@@ -0,0 +1 @@
+from pafi.pafi import *
diff --git a/pafi/pafi.py b/pafi/pafi.py
new file mode 100644
index 0000000..48f87df
--- /dev/null
+++ b/pafi/pafi.py
@@ -0,0 +1,189 @@
+"""
+raw_ensemble_output has 1 + 4*nWorker colums:
+
+Column 0 : Number of workers with max_jump < user threshold at run time
+-> nWorker+1 : av(dF), the gradient
+-> 2*nWorker+1 : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
+-> 3*nWorker+1 : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
+-> 4*nWorker+1 : the maximum per-atom jump following the MD
+"""
+
+import io
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.integrate import cumtrapz
+from scipy.interpolate import interp1d
+import pandas as pd 
+import pathlib
+
+class Profile():
+    """Class for handling (free) energy profiles.
+    
+    Parameters
+    ----------
+
+            data : np.ndarray
+                A Nx3 shaped array that contains [reaction coordinate, free energy, and error] as columns.
+
+    Attributes
+    ----------
+
+        data : np.ndarray
+            The Nx3 array passed as argument.
+        barrier : float
+            The maximum Free Energy value.
+        error : float
+            error = error(R=0)+error(R=Rm), where Rm is the reaction corrdinate at which the Free Energy is maximal.
+    """
+
+    def __init__(self, data):
+        self.data = data
+        self.barrier = data[:,1].max()
+        self.error = data[0][2] + data[data[:,1].argmax()][2]
+
+class PafiResult():
+    """Class for manipulating PAFI simulation results.
+    
+    Parameters
+    ----------
+
+            file : pathlib.Path object or str
+                The path to the pafi raw_ensemble file to extract data from. 
+                Should have a name of the form "raw_ensemble_output_*K_*", unless optional temperature parameter is given.
+            temperature : int or float
+                The temperature of the simulation.
+
+    Attributes
+    ----------
+
+        temperature : str
+            The standard output passed by PreciSo during its execution.
+        file : preciso.Distribution object
+            A preciso.Distribution object, that contains all the data related to precipitates size distributions.
+        discrete_profile : pafi.Profile
+            The free energy profile as a pafi.Profile object. Contains the discrete data.
+        splined_profile : pafi.Profile
+            The free energy profile as a pafi.Profile object. Rediscretized in order to have a smooth profile curve.
+        bar :   np.array
+            A convenient array that contains commonly used data : [temperature, discrete_profile.barrier, discrete_profile.error, splined_profile.barrier, splined_profile.error]
+            See documentation of pafi.Profile for details on .barrier and .error attributes.
+    
+    Examples
+    --------
+
+        >>> import pafi
+        >>> import matplotlib.pyplot as plt
+        >>> p = pafi.PafiResult('raw_ensemble_output_0K_0')
+        >>> ax = p.plot()
+        >>> plt.show()
+    """
+
+    def __init__(self, file, temperature=None):
+        assert (isinstance(file, str)) or (isinstance(file, pathlib.Path)) ,"`file` must be str or pathlib.Path object."
+
+        self.file = file
+        if temperature is not None:
+            self.temperature = temperature
+        else:
+            self.temperature = self.get_temperature()
+
+        raw = self.raw_parser()
+        self.discrete_profile = Profile(PafiResult.integrate(raw))
+ 
+        remeshed = PafiResult.remesh(raw)
+        self.splined_profile = Profile(PafiResult.integrate(remeshed))
+
+        self.bar = self.get_bar()
+
+    def raw_parser(self, disp_thresh=0.7):
+        """Parse the raw data file"""
+        try:
+            f = open(self.file,'r')
+        except IOError:
+            print("raw data file %s not found" % self.file)
+            return np.zeros((1,3))
+
+        count_data = count_valid = 0
+        r_dFave_dFstd = []
+        r = 0.0
+        for line in f.readlines():
+            if line[0] != "#":
+                fields = np.loadtxt(io.StringIO(line.strip()))
+                n_data = (fields.size-1)//4
+                n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                count_data  += n_data
+                if n_valid>0:
+                    count_valid += n_valid
+                
+                    r_dFave_dFstd += [[r, 
+                                        fields[1:n_valid+1].mean(),
+                                        fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                r += 1.0 # always increment even if n_valid==0
+        # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+        
+        r_dFave_dFstd = np.r_[r_dFave_dFstd]
+        r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+
+        return r_dFave_dFstd
+    
+    def get_bar(self):
+        """This data format is used by some legacy functions"""
+        return np.array([self.temperature, 
+                            self.discrete_profile.barrier, 
+                            self.discrete_profile.error, 
+                            self.splined_profile.barrier, 
+                            self.splined_profile.error])
+
+    def get_temperature(self):
+        """The temperature of the run"""
+        return int(str(self.file).split("_")[-2][:-1])
+
+    def remesh(data, density=10):
+        """Remesh to obtained the splined version of the profile"""
+        spl_data = np.zeros((density*data.shape[0],data.shape[1]))
+        r_data = np.linspace(0.,1.,data.shape[0])
+        r_spl_data = np.linspace(0.,1.,spl_data.shape[0])
+        spl_data[:,0] = r_spl_data
+        spl_data[:,1] = interp1d(data[:,0], data[:,1],kind='linear',fill_value="extrapolate")(spl_data[:,0])
+        spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear',fill_value="extrapolate")(spl_data[:,0])
+        return spl_data
+
+    def integrate(data, discretization_error_estimate=0.015):
+        idata = np.zeros(data.shape)
+        idata[:,0] = data[:,0]
+        idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
+        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
+        run_min =  np.minimum.accumulate(idata[:,1])
+        run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
+        if (run_min == run_min_shift).sum()>0:
+            idata = idata[run_min == run_min_shift,:]
+        idata[:,1]-=idata[0][1]
+        return idata
+
+    def plot(self):
+        
+        fig, ax = plt.subplots()
+        discrete = self.discrete_profile.data
+        splined = self.splined_profile.data
+
+        for i, profile in enumerate([self.discrete_profile.data, self.splined_profile.data]):
+            
+            style = ["o", "-"][i]
+            label = [f'{self.temperature}K', ''][i]
+            ax.plot(profile[:,0],
+                        profile[:,1],
+                        style, 
+                        color=f'C{i}',
+                        label=label)
+
+            ax.fill_between(profile[:,0],
+                    profile[:,1]-profile[:,2],
+                    profile[:,1]+profile[:,2],
+                    facecolor='0.8')
+
+        ax.set_xlabel("Reaction Coordinate")
+        ax.set_ylabel("Free energy of activation [eV]")
+        ax.set_xlabel("Temperature [K]")      
+        ax.legend(loc="best")  
+        return ax

From 628abad83db44d47faa86d3e61f81b390e4c536c Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Thu, 15 Apr 2021 16:17:38 +0200
Subject: [PATCH 02/12] restored alt_sample_plot

---
 example/alt_sample_plot.py | 137 +++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 example/alt_sample_plot.py

diff --git a/example/alt_sample_plot.py b/example/alt_sample_plot.py
new file mode 100644
index 0000000..0504609
--- /dev/null
+++ b/example/alt_sample_plot.py
@@ -0,0 +1,137 @@
+"""
+VERY rough Python code to plot PAFI results
+
+(c) 2021 Tom Swinburne
+
+raw_ensemble_output has 1 + 4*nWorker colums:
+
+Column 0 : Number of workers with max_jump < user threshold at run time
+-> nWorker+1 : av(dF), the gradient
+-> 2*nWorker+1 : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
+-> 3*nWorker+1 : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
+-> 4*nWorker+1 : the maximum per-atom jump following the MD
+"""
+
+import glob,io
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.integrate import cumtrapz
+from scipy.interpolate import interp1d
+
+
+discretization_error_estimate = 0.015 # estimate in eV, divided by 2
+
+
+
+# read in file line-by-line, returning ave and std for data with max displacement <= disp_thresh
+def raw_parser(file_name,disp_thresh = 0.4):
+    try:
+        f = open(file_name,'r')
+    except IOError:
+        print("raw data file %s not found" % file_name)
+        return np.zeros((1,3))
+
+    r_dFave_dFstd = []
+    r = 0.0
+    for line in f.readlines():
+        if line[0] != "#":
+            fields = np.loadtxt(io.StringIO(line.strip()))
+            n_data = (fields.size-1)//4
+            n_valid = (fields[-n_data:] < disp_thresh).sum()
+            if n_valid>0:
+                r_dFave_dFstd += [[r,fields[1:n_valid+1].mean(),fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+            r += 1.0 # always increment even if n_valid==0
+    r_dFave_dFstd = np.r_[r_dFave_dFstd]
+    r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+
+    return r_dFave_dFstd
+
+# splined rediscretization
+def remesh(data,density = 10):
+    spl_data = np.zeros((density*data.shape[0],data.shape[1]))
+    r_data = np.linspace(0.,1.,data.shape[0])
+    r_spl_data = np.linspace(0.,1.,spl_data.shape[0])
+    spl_data[:,0] = r_spl_data#interp1d(r_data, data[:,0],kind='linear')(r_spl_data)
+    spl_data[:,1] = interp1d(data[:,0], data[:,1],kind='linear')(spl_data[:,0])
+    spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear')(spl_data[:,0])
+    return spl_data
+
+def integrate(data):
+    idata = np.zeros(data.shape)
+    idata[:,0] = data[:,0]
+    idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
+    idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
+    run_min =  np.minimum.accumulate(idata[:,1])
+    run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
+    if (run_min == run_min_shift).sum()>0:
+        idata = idata[run_min == run_min_shift,:]
+    idata[:,1]-=idata[0][1]
+    return idata, idata[:,1].max(), idata[0][2] + idata[idata[:,1].argmax()][2]
+
+
+
+# file list- here we take epoch 5
+fl = glob.glob("50/dumps/raw*");
+print(fl)
+# temperatures
+T = np.r_[[int(f.split("_")[-2][:-1]) for f in fl]];
+
+
+fig,axs = plt.subplots(1,2,figsize=(8,4),dpi=144,sharey=True);
+
+bar = []
+for ii,i_f in enumerate(T.argsort()):
+    _bar = [T[i_f]]
+    r_dFave_dFstd = raw_parser(fl[i_f])
+    r_Fave_Fstd,barrier,error = integrate(r_dFave_dFstd)
+
+    _bar += [barrier]+[error]
+    r_dFave_dFstd_remesh = remesh(r_dFave_dFstd)
+    r_Fave_Fstd_remesh,barrier,error = integrate(r_dFave_dFstd_remesh)
+    _bar += [barrier]+[error]
+
+    axs[0].plot(r_Fave_Fstd[:,0],
+                r_Fave_Fstd[:,1],
+                'C%d%s' % (ii,'o--'),label='%dK' % T[i_f])
+
+    axs[0].fill_between(r_Fave_Fstd[:,0],
+            r_Fave_Fstd[:,1]-r_Fave_Fstd[:,2],
+            r_Fave_Fstd[:,1]+r_Fave_Fstd[:,2],
+            facecolor='0.8')
+
+    axs[0].plot(r_Fave_Fstd_remesh[:,0],
+                r_Fave_Fstd_remesh[:,1],
+                'C%d%s' % (ii,'-'),label='%dK (Splined)' % T[i_f])
+
+    axs[0].fill_between(r_Fave_Fstd_remesh[:,0],
+            r_Fave_Fstd_remesh[:,1]-r_Fave_Fstd_remesh[:,2],
+            r_Fave_Fstd_remesh[:,1]+r_Fave_Fstd_remesh[:,2],
+            facecolor='0.8')
+
+    bar += [_bar]
+bar = np.r_[bar]
+print(bar.shape)
+
+p = np.polyfit(bar[:3,0],bar[:3,1],1)
+
+kb = 8.617e-5
+
+axs[1].plot(bar[:,0],bar[:,1],'o-',label='Raw Data')
+axs[1].plot(bar[:,0],bar[:,3],'o--',label='Splined Data')
+
+axs[1].plot(bar[:,0],p[1]+p[0]*bar[:,0],'k--',label=r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1],-p[0]/kb))
+axs[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.8')
+axs[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.8')
+
+
+axs[1].set_ylim(ymin=-discretization_error_estimate)
+axs[0].legend(fontsize=7)
+axs[1].legend(fontsize=7)
+
+axs[0].set_xlabel("Reaction Coordinate")
+axs[1].set_xlabel("Temperature [K]")
+
+axs[0].set_ylabel("Free energy barrier [eV]")
+
+plt.subplots_adjust(wspace=0)
+plt.savefig("PAFI_fig.pdf")

From 3f20525baa6b7c2b64bb247ace3650b5085ad8ed Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Thu, 15 Apr 2021 17:00:43 +0200
Subject: [PATCH 03/12] Added plot of activated Free energy vs temperature

---
 pafi/pafi.py | 41 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/pafi/pafi.py b/pafi/pafi.py
index 48f87df..151b389 100644
--- a/pafi/pafi.py
+++ b/pafi/pafi.py
@@ -16,6 +16,8 @@
 import pandas as pd 
 import pathlib
 
+kb = 8.617e-5
+
 class Profile():
     """Class for handling (free) energy profiles.
     
@@ -161,9 +163,12 @@ def integrate(data, discretization_error_estimate=0.015):
         idata[:,1]-=idata[0][1]
         return idata
 
-    def plot(self):
+    def plot(self, ax=None, color=None):
         
-        fig, ax = plt.subplots()
+        if ax is None:
+            fig, ax = plt.subplots()
+        if color is None:
+            color="C0"
         discrete = self.discrete_profile.data
         splined = self.splined_profile.data
 
@@ -174,7 +179,7 @@ def plot(self):
             ax.plot(profile[:,0],
                         profile[:,1],
                         style, 
-                        color=f'C{i}',
+                        color=color,
                         label=label)
 
             ax.fill_between(profile[:,0],
@@ -187,3 +192,33 @@ def plot(self):
         ax.set_xlabel("Temperature [K]")      
         ax.legend(loc="best")  
         return ax
+
+def free_energy_vs_temperature(flist, harmonic_until_T=100, ymin=-0.05):
+
+    fig, ax = plt.subplots(1,2, figsize=(8,4),dpi=144,sharey=True)
+
+    barriers = []
+    for i, file in enumerate(flist):
+        r = PafiResult(file)
+        a = r.plot(ax[0], color=f'C{i}')
+        barriers.append(r.bar)
+
+    bar = np.array(barriers)
+    bar = bar[np.argsort(bar[:, 0])]
+
+    # Fit a linear regression on the domain below harmonic_until_T K
+    harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))]
+    p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1],1)
+
+    ax[1].plot(bar[:,0], bar[:,1], "-o")
+    ax[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.93')
+    ax[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.93')
+    # harmonic domain
+    ax[1].plot(bar[:4,0],p[1]+p[0]*bar[:4,0],'k--', label=r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb))
+    ax[1].set_xlabel("Temperature [K]")
+    ax[1].legend(loc='best')
+    ax[1].set_ylim(ymin=ymin)
+    plt.subplots_adjust(wspace=0)
+    plt.tight_layout()
+        
+    return ax
\ No newline at end of file

From 94d701f6004e435357aa24741093115e5ccbc329 Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Fri, 16 Apr 2021 14:06:48 +0200
Subject: [PATCH 04/12] Renamed pafi.py for clarity

---
 pafi/__init__.py                     | 2 +-
 pafi/{pafi.py => post_processing.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename pafi/{pafi.py => post_processing.py} (100%)

diff --git a/pafi/__init__.py b/pafi/__init__.py
index 233e1f8..1060cc1 100644
--- a/pafi/__init__.py
+++ b/pafi/__init__.py
@@ -1 +1 @@
-from pafi.pafi import *
+from pafi.post_processing import *
diff --git a/pafi/pafi.py b/pafi/post_processing.py
similarity index 100%
rename from pafi/pafi.py
rename to pafi/post_processing.py

From 0b219b83e32d894040fec8378e5bb2f3579a976e Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Fri, 16 Apr 2021 14:44:41 +0200
Subject: [PATCH 05/12] Python packaging to make pafi importable anywhere

---
 .gitignore       |  3 +++
 INSTALL.md       | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 pafi/__init__.py | 10 ++++++++
 pyproject.toml   |  3 +++
 setup.cfg        | 15 ++++++++++++
 setup.py         |  2 ++
 6 files changed, 93 insertions(+)
 create mode 100644 pyproject.toml
 create mode 100644 setup.cfg
 create mode 100644 setup.py

diff --git a/.gitignore b/.gitignore
index f716570..2f6d1fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,8 @@ notes.py
 # Python
 pafi/__pycache__
 *.pyc
+dist
+*.egg-info
 
 # Prerequisites
 */*.d
@@ -47,3 +49,4 @@ pafi/__pycache__
 */*.exe
 */*.out
 */*.app
+
diff --git a/INSTALL.md b/INSTALL.md
index 361f8e1..ea54893 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -76,3 +76,63 @@ LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64  -DLAMMPS_EXCEPTIONS
    cmake ..
    make # or try make -j4 for parallel make using 4 cores
 ```
+
+## Install Python post processing package
+
+Pafi is distributed with a Python package that enables easy visualization of the results.
+To install the python package, you need to install `python` and `pip`.
+Then, go to the main directory and do: 
+
+```bash
+   pip install -e .
+```
+
+This way, the package will be installed in developer mode, meaning that any change of the python file(s) `pafi/*.py` or any git pull of a newer version will take effect immediately.
+If you want to have to re-install the package manually each time instead, still from the main directory:
+
+```python
+   pip install build #
+   python -m build   # to build the package
+   pip install .     # run it each time you want to (re)install it 
+```
+
+The `pafi` package is now importable from python, at any location. 
+
+For plotting the results of a simulation:
+
+```python
+import pafi
+import matplotlib.pyplot as plt
+
+p = pafi.PafiResult('raw_ensemble_output_0K_0')
+ax = p.plot()
+plt.show()
+```
+
+It also allows plotting the results of a series of simulations conducted at different temperatures:
+
+```python
+import pafi
+import matplotlib.pyplot as plt
+import glob
+
+fl = glob.glob('raw_ensemble_output_*K_0')
+ax = pafi.free_energy_vs_temperature(fl, harmonic_until_T=100)
+plt.show()
+```
+
+A command line tool is also available.
+
+To display the result of a simulation in a new matplotlib GUI window :
+
+```bash
+pafi_plot example/raw_ensemble_output_0K_0
+```
+
+To save the graph to pdf or png format (does not open the GUI):
+
+```bash
+pafi_plot example/raw_ensemble_output_0K_0 graph.pdf
+or
+pafi_plot example/raw_ensemble_output_0K_0 graph.png
+```
\ No newline at end of file
diff --git a/pafi/__init__.py b/pafi/__init__.py
index 1060cc1..dea66d9 100644
--- a/pafi/__init__.py
+++ b/pafi/__init__.py
@@ -1 +1,11 @@
 from pafi.post_processing import *
+
+def cmd_plot():
+    import sys
+    import matplotlib.pyplot as plt
+    p = PafiResult(sys.argv[1])
+    ax = p.plot()
+    if len(sys.argv)==3:
+        plt.savefig(sys.argv[2])
+    else:
+        plt.show()
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..9787c3b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..8b5ba70
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,15 @@
+[metadata]
+name = pafi
+version = 0.0.1
+
+[options]
+packages = find:
+install_requires =
+    numpy
+    matplotlib
+    scipy
+    pandas; python_version == "3.7"
+
+[options.entry_points]
+console_scripts =
+    pafi_plot = pafi:cmd_plot
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a4f49f9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,2 @@
+import setuptools
+setuptools.setup()

From 2ab1fee44ff6106ba58d74f9087f5a711a442ca1 Mon Sep 17 00:00:00 2001
From: arn-all <arnaud.allera@univ-lyon1.fr>
Date: Fri, 16 Apr 2021 15:43:35 +0200
Subject: [PATCH 06/12] fix: sort profiles by temperature

---
 pafi/post_processing.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 151b389..429b27b 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -196,12 +196,13 @@ def plot(self, ax=None, color=None):
 def free_energy_vs_temperature(flist, harmonic_until_T=100, ymin=-0.05):
 
     fig, ax = plt.subplots(1,2, figsize=(8,4),dpi=144,sharey=True)
+    
+    data = [PafiResult(file) for file in flist]
+    data.sort(key=lambda x: x.temperature)
+    barriers = [x.bar for x in data]
 
-    barriers = []
-    for i, file in enumerate(flist):
-        r = PafiResult(file)
+    for i, r in enumerate(data):
         a = r.plot(ax[0], color=f'C{i}')
-        barriers.append(r.bar)
 
     bar = np.array(barriers)
     bar = bar[np.argsort(bar[:, 0])]

From e757ec1ae1a668f7114c5005afe00b2e15de6656 Mon Sep 17 00:00:00 2001
From: aallera <arnaud.allera@univ-lyon1.fr>
Date: Wed, 22 Sep 2021 12:07:27 +0200
Subject: [PATCH 07/12] support for new raw_data file format

---
 pafi/post_processing.py | 70 +++++++++++++++++++++++++++++------------
 1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 429b27b..757341f 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -107,28 +107,58 @@ def raw_parser(self, disp_thresh=0.7):
 
         count_data = count_valid = 0
         r_dFave_dFstd = []
-        r = 0.0
-        for line in f.readlines():
-            if line[0] != "#":
-                fields = np.loadtxt(io.StringIO(line.strip()))
-                n_data = (fields.size-1)//4
-                n_valid = (fields[-n_data:] < disp_thresh).sum()
-
-                count_data  += n_data
-                if n_valid>0:
-                    count_valid += n_valid
-                
-                    r_dFave_dFstd += [[r, 
-                                        fields[1:n_valid+1].mean(),
-                                        fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
-                r += 1.0 # always increment even if n_valid==0
-        # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
-        
-        r_dFave_dFstd = np.r_[r_dFave_dFstd]
-        r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
 
-        return r_dFave_dFstd
+        if PafiResult.has_old_data_format(self.file):
+            # reaction coordinate is interpolated (may cause deviations at large T)
+            # needed for backward compatibility
+            r = 0.0
+            for line in f.readlines():
+                if line[0] != "#":
+                    fields = np.loadtxt(io.StringIO(line.strip()))
+                    n_data = (fields.size-1)//4
+                    n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                    count_data  += n_data
+                    if n_valid>0:
+                        count_valid += n_valid
+                    
+                        r_dFave_dFstd += [[r, 
+                                            fields[1:n_valid+1].mean(),
+                                            fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                    r += 1.0 # always increment even if n_valid==0
+            # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+            r_dFave_dFstd = np.r_[r_dFave_dFstd]
+            r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+            return r_dFave_dFstd
+        else:
+            # reaction coordinate is read from file
+            for line in f.readlines():
+                if line[0] != "#":
+                    fields = np.loadtxt(io.StringIO(line.strip()))
+                    n_data = (fields.size-2)//4
+                    n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                    count_data  += n_data
+                    if n_valid>0:
+                        count_valid += n_valid
+                    
+                        r_dFave_dFstd += [[fields[0],
+                                        fields[2:n_valid+2].mean(),
+                                        fields[2:n_valid+2].std()/np.sqrt(n_valid)]]
+
+            # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+            r_dFave_dFstd = np.r_[r_dFave_dFstd]
+            return r_dFave_dFstd
     
+    def has_old_data_format(file):
+        d = np.loadtxt(file, max_rows=1)
+        if (d.shape[0]-1) //4 == (d.shape[0]-1) /4:
+            return True
+        elif (d.shape[0]-2 //4) == (d.shape[0]-2 /4):
+            return False
+        else: 
+            raise RuntimeError(r"File {file} has an unsupported format")
+
     def get_bar(self):
         """This data format is used by some legacy functions"""
         return np.array([self.temperature, 

From 24a614c08eada1dc318f7c17e281945f2a94f2ac Mon Sep 17 00:00:00 2001
From: aallera <arnaud.allera@univ-lyon1.fr>
Date: Wed, 22 Sep 2021 12:08:03 +0200
Subject: [PATCH 08/12] improvements to plotting functions

---
 pafi/post_processing.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 757341f..3e5c4af 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -193,7 +193,7 @@ def integrate(data, discretization_error_estimate=0.015):
         idata[:,1]-=idata[0][1]
         return idata
 
-    def plot(self, ax=None, color=None):
+    def plot(self, ax=None, color=None, label_prepend=""):
         
         if ax is None:
             fig, ax = plt.subplots()
@@ -205,7 +205,7 @@ def plot(self, ax=None, color=None):
         for i, profile in enumerate([self.discrete_profile.data, self.splined_profile.data]):
             
             style = ["o", "-"][i]
-            label = [f'{self.temperature}K', ''][i]
+            label = [label_prepend + f'{self.temperature}K', ''][i]
             ax.plot(profile[:,0],
                         profile[:,1],
                         style, 
@@ -223,29 +223,31 @@ def plot(self, ax=None, color=None):
         ax.legend(loc="best")  
         return ax
 
-def free_energy_vs_temperature(flist, harmonic_until_T=100, ymin=-0.05):
+def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", start_color_at_index=0):
 
-    fig, ax = plt.subplots(1,2, figsize=(8,4),dpi=144,sharey=True)
+    if (ax is None) or len(ax)!=2:
+        fig, ax = plt.subplots(1,2, figsize=(9,5),dpi=144,sharey=True)
     
     data = [PafiResult(file) for file in flist]
     data.sort(key=lambda x: x.temperature)
-    barriers = [x.bar for x in data]
+    bar = np.array([x.bar for x in data])
 
     for i, r in enumerate(data):
-        a = r.plot(ax[0], color=f'C{i}')
+        a = r.plot(ax[0], color=f'C{i+start_color_at_index}', label_prepend=label_prepend)
 
-    bar = np.array(barriers)
-    bar = bar[np.argsort(bar[:, 0])]
+    # bar = bar[np.argsort(bar[:, 0])]
 
-    # Fit a linear regression on the domain below harmonic_until_T K
-    harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))]
-    p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1],1)
+    if fit_harmonic:
+        # Fit a linear regression on the domain below harmonic_until_T K
+        harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))]
+        p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1],1)
 
-    ax[1].plot(bar[:,0], bar[:,1], "-o")
+    ax[1].plot(bar[:,0], bar[:,1], "-o", color=f'C{start_color_at_index}', label=label_prepend)
     ax[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.93')
     ax[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.93')
-    # harmonic domain
-    ax[1].plot(bar[:4,0],p[1]+p[0]*bar[:4,0],'k--', label=r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb))
+    if fit_harmonic:
+        # harmonic domain
+        ax[1].plot(bar[:4,0],p[1]+p[0]*bar[:4,0],'--', color=f'C{start_color_at_index}', label=label_prepend + r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb))
     ax[1].set_xlabel("Temperature [K]")
     ax[1].legend(loc='best')
     ax[1].set_ylim(ymin=ymin)

From f09b41381739419f262531abaca7cdb6eaa00a5e Mon Sep 17 00:00:00 2001
From: Arnaud Allera <45487966+arn-all@users.noreply.github.com>
Date: Wed, 22 Sep 2021 12:12:58 +0200
Subject: [PATCH 09/12] Update post_processing.py

---
 pafi/post_processing.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 3e5c4af..8d41824 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -1,11 +1,11 @@
 """
-raw_ensemble_output has 1 + 4*nWorker colums:
+raw_ensemble_output has i + 4*nWorker colums. i = 2 since commit 3ca8f884 (20/09/21). For older versions of PAFI, i=1.
 
 Column 0 : Number of workers with max_jump < user threshold at run time
--> nWorker+1 : av(dF), the gradient
--> 2*nWorker+1 : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
--> 3*nWorker+1 : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
--> 4*nWorker+1 : the maximum per-atom jump following the MD
+-> nWorker+i : av(dF), the gradient
+-> 2*nWorker+i : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
+-> 3*nWorker+i : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
+-> 4*nWorker+i : the maximum per-atom jump following the MD
 """
 
 import io
@@ -254,4 +254,4 @@ def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until
     plt.subplots_adjust(wspace=0)
     plt.tight_layout()
         
-    return ax
\ No newline at end of file
+    return ax

From 68fb0de6788e825a421fc3fbe682f3ee7ecdb312 Mon Sep 17 00:00:00 2001
From: aallera <arnaud.allera@univ-lyon1.fr>
Date: Wed, 13 Oct 2021 14:17:52 +0200
Subject: [PATCH 10/12] bugfix: reading new format raw files

---
 pafi/post_processing.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 3e5c4af..0d43cef 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -148,16 +148,17 @@ def raw_parser(self, disp_thresh=0.7):
 
             # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
             r_dFave_dFstd = np.r_[r_dFave_dFstd]
+            # print(r_dFave_dFstd)
             return r_dFave_dFstd
     
     def has_old_data_format(file):
         d = np.loadtxt(file, max_rows=1)
         if (d.shape[0]-1) //4 == (d.shape[0]-1) /4:
             return True
-        elif (d.shape[0]-2 //4) == (d.shape[0]-2 /4):
+        elif (d.shape[0]-2) //4 == (d.shape[0]-2) /4:
             return False
         else: 
-            raise RuntimeError(r"File {file} has an unsupported format")
+            raise RuntimeError(f"File {file} has an unsupported format")
 
     def get_bar(self):
         """This data format is used by some legacy functions"""
@@ -181,15 +182,26 @@ def remesh(data, density=10):
         spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear',fill_value="extrapolate")(spl_data[:,0])
         return spl_data
 
+    def selector(data):
+        """Start the path at the energy minimum (can be different from the initial state)"""
+
+        select = data
+        if np.argmax(data[:, 1])!=0:
+            select = data[np.argmin(data[:np.argmax(data[:, 1]), 1]):, :]
+        else:
+            run_min =  np.minimum.accumulate(data[:,1])
+            run_min_shift =  np.minimum.accumulate(np.append(data[1:,1],data[-1][1]))
+
+            if (run_min == run_min_shift).sum()>2:
+                select = data[run_min == run_min_shift,:]
+        return select
+
     def integrate(data, discretization_error_estimate=0.015):
         idata = np.zeros(data.shape)
         idata[:,0] = data[:,0]
-        idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
-        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
-        run_min =  np.minimum.accumulate(idata[:,1])
-        run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
-        if (run_min == run_min_shift).sum()>0:
-            idata = idata[run_min == run_min_shift,:]
+        idata[1:,1] = -cumtrapz(data[:,1],data[:,0]) # free energy
+        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate # stdev
+        idata = PafiResult.selector(idata)
         idata[:,1]-=idata[0][1]
         return idata
 
@@ -219,8 +231,8 @@ def plot(self, ax=None, color=None, label_prepend=""):
 
         ax.set_xlabel("Reaction Coordinate")
         ax.set_ylabel("Free energy of activation [eV]")
-        ax.set_xlabel("Temperature [K]")      
         ax.legend(loc="best")  
+        plt.tight_layout()
         return ax
 
 def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", start_color_at_index=0):

From e65799da2a020f7037fe3d225335dd653fa26d23 Mon Sep 17 00:00:00 2001
From: aallera <arnaud.allera@univ-lyon1.fr>
Date: Wed, 13 Oct 2021 14:26:23 +0200
Subject: [PATCH 11/12] feat : better integration (TDS)

---
 pafi/post_processing.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index 0d43cef..c391569 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -177,31 +177,20 @@ def remesh(data, density=10):
         spl_data = np.zeros((density*data.shape[0],data.shape[1]))
         r_data = np.linspace(0.,1.,data.shape[0])
         r_spl_data = np.linspace(0.,1.,spl_data.shape[0])
-        spl_data[:,0] = r_spl_data
+        spl_data[:,0] = interp1d(r_data, data[:,0],kind='linear')(r_spl_data)
         spl_data[:,1] = interp1d(data[:,0], data[:,1],kind='linear',fill_value="extrapolate")(spl_data[:,0])
         spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear',fill_value="extrapolate")(spl_data[:,0])
         return spl_data
 
-    def selector(data):
-        """Start the path at the energy minimum (can be different from the initial state)"""
-
-        select = data
-        if np.argmax(data[:, 1])!=0:
-            select = data[np.argmin(data[:np.argmax(data[:, 1]), 1]):, :]
-        else:
-            run_min =  np.minimum.accumulate(data[:,1])
-            run_min_shift =  np.minimum.accumulate(np.append(data[1:,1],data[-1][1]))
-
-            if (run_min == run_min_shift).sum()>2:
-                select = data[run_min == run_min_shift,:]
-        return select
-
     def integrate(data, discretization_error_estimate=0.015):
         idata = np.zeros(data.shape)
         idata[:,0] = data[:,0]
-        idata[1:,1] = -cumtrapz(data[:,1],data[:,0]) # free energy
-        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate # stdev
-        idata = PafiResult.selector(idata)
+        idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
+        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
+        run_min =  np.minimum.accumulate(idata[:,1])
+        run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
+        if (run_min == run_min_shift).sum()>0:
+            idata = idata[run_min == run_min_shift,:]
         idata[:,1]-=idata[0][1]
         return idata
 

From adf62ba7cbf0158b284fb92498a3e95600b70186 Mon Sep 17 00:00:00 2001
From: aallera <arnaud.allera@univ-lyon1.fr>
Date: Wed, 13 Oct 2021 14:26:31 +0200
Subject: [PATCH 12/12] feat : read r from logfile

---
 pafi/post_processing.py | 110 ++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 37 deletions(-)

diff --git a/pafi/post_processing.py b/pafi/post_processing.py
index c391569..398b47c 100644
--- a/pafi/post_processing.py
+++ b/pafi/post_processing.py
@@ -1,11 +1,11 @@
 """
-raw_ensemble_output has 1 + 4*nWorker colums:
+raw_ensemble_output has i + 4*nWorker colums. i = 2 since commit 3ca8f884 (20/09/21). For older versions of PAFI, i=1.
 
 Column 0 : Number of workers with max_jump < user threshold at run time
--> nWorker+1 : av(dF), the gradient
--> 2*nWorker+1 : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
--> 3*nWorker+1 : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
--> 4*nWorker+1 : the maximum per-atom jump following the MD
+-> nWorker+i : av(dF), the gradient
+-> 2*nWorker+i : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
+-> 3*nWorker+i : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
+-> 4*nWorker+i : the maximum per-atom jump following the MD
 """
 
 import io
@@ -80,10 +80,15 @@ class PafiResult():
         >>> plt.show()
     """
 
-    def __init__(self, file, temperature=None):
+    def __init__(self, file, logfile=None, temperature=None):
         assert (isinstance(file, str)) or (isinstance(file, pathlib.Path)) ,"`file` must be str or pathlib.Path object."
-
+        
         self.file = file
+
+        self.r_coord = None
+        if logfile is not None:
+            self.r_coord = self.log_parser(logfile)
+        
         if temperature is not None:
             self.temperature = temperature
         else:
@@ -109,29 +114,52 @@ def raw_parser(self, disp_thresh=0.7):
         r_dFave_dFstd = []
 
         if PafiResult.has_old_data_format(self.file):
-            # reaction coordinate is interpolated (may cause deviations at large T)
-            # needed for backward compatibility
-            r = 0.0
-            for line in f.readlines():
-                if line[0] != "#":
-                    fields = np.loadtxt(io.StringIO(line.strip()))
-                    n_data = (fields.size-1)//4
-                    n_valid = (fields[-n_data:] < disp_thresh).sum()
-
-                    count_data  += n_data
-                    if n_valid>0:
-                        count_valid += n_valid
-                    
-                        r_dFave_dFstd += [[r, 
-                                            fields[1:n_valid+1].mean(),
-                                            fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
-                    r += 1.0 # always increment even if n_valid==0
-            # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
-            r_dFave_dFstd = np.r_[r_dFave_dFstd]
-            r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
-            return r_dFave_dFstd
+            if self.r_coord is not None:
+                # Use rcoord from logfile
+                print('Use rcoord from logfile')
+                for line, r_c in zip(f.readlines(), self.r_coord):
+                    if line[0] != "#":
+                        fields = np.loadtxt(io.StringIO(line.strip()))
+                        n_data = (fields.size-1)//4
+                        n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                        count_data  += n_data
+                        if n_valid>0:
+                            count_valid += n_valid
+                        
+                            r_dFave_dFstd += [[r_c, 
+                                                fields[1:n_valid+1].mean(),
+                                                fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+                r_dFave_dFstd = np.r_[r_dFave_dFstd]
+                r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+                return r_dFave_dFstd, len(r_dFave_dFstd)
+            else:
+                # reaction coordinate is interpolated (may cause deviations at large T)
+                # needed for backward compatibility
+                print('reaction coordinate is interpolated (may cause deviations at high T)')
+                r = 0.0
+                for line in f.readlines():
+                    if line[0] != "#":
+                        fields = np.loadtxt(io.StringIO(line.strip()))
+                        n_data = (fields.size-1)//4
+                        n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                        count_data  += n_data
+                        if n_valid>0:
+                            count_valid += n_valid
+                        
+                            r_dFave_dFstd += [[r, 
+                                                fields[1:n_valid+1].mean(),
+                                                fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                        r += 1.0 # always increment even if n_valid==0
+                # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+                r_dFave_dFstd = np.r_[r_dFave_dFstd]
+                r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+                return r_dFave_dFstd, len(r_dFave_dFstd)
         else:
             # reaction coordinate is read from file
+            print("Reaction coordinate is read from raw file")
             for line in f.readlines():
                 if line[0] != "#":
                     fields = np.loadtxt(io.StringIO(line.strip()))
@@ -148,9 +176,16 @@ def raw_parser(self, disp_thresh=0.7):
 
             # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
             r_dFave_dFstd = np.r_[r_dFave_dFstd]
-            # print(r_dFave_dFstd)
             return r_dFave_dFstd
-    
+
+    def log_parser(self, logfile):
+        import re
+        with open(logfile, "r") as f:
+            content = f.read()
+            res = re.findall("""##.+\n[\s\t]*([\d\.]+)""", content)
+            R_coord = np.r_[[float(r) for r in res]]
+        return R_coord
+
     def has_old_data_format(file):
         d = np.loadtxt(file, max_rows=1)
         if (d.shape[0]-1) //4 == (d.shape[0]-1) /4:
@@ -219,12 +254,11 @@ def plot(self, ax=None, color=None, label_prepend=""):
                     facecolor='0.8')
 
         ax.set_xlabel("Reaction Coordinate")
-        ax.set_ylabel("Free energy of activation [eV]")
+        ax.set_ylabel("Gibbs energy profile (eV)")
         ax.legend(loc="best")  
-        plt.tight_layout()
         return ax
 
-def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", start_color_at_index=0):
+def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", start_color_at_index=0, return_poly=False):
 
     if (ax is None) or len(ax)!=2:
         fig, ax = plt.subplots(1,2, figsize=(9,5),dpi=144,sharey=True)
@@ -243,16 +277,18 @@ def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until
         harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))]
         p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1],1)
 
-    ax[1].plot(bar[:,0], bar[:,1], "-o", color=f'C{start_color_at_index}', label=label_prepend)
+    ax[1].plot(bar[:,0], bar[:,3], "-o", color=f'C{start_color_at_index}', label=label_prepend)
     ax[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.93')
     ax[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.93')
     if fit_harmonic:
         # harmonic domain
         ax[1].plot(bar[:4,0],p[1]+p[0]*bar[:4,0],'--', color=f'C{start_color_at_index}', label=label_prepend + r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb))
-    ax[1].set_xlabel("Temperature [K]")
+    ax[1].set_xlabel("Temperature (K)")
+    ax[1].set_ylabel("Gibbs energy of activation (eV)")
     ax[1].legend(loc='best')
     ax[1].set_ylim(ymin=ymin)
     plt.subplots_adjust(wspace=0)
     plt.tight_layout()
-        
-    return ax
\ No newline at end of file
+    
+    if return_poly: return ax, p
+    else: return ax