tomswinburne · arn-all · Apr 15, 2021 · Apr 15, 2021 · Apr 15, 2021 · Apr 16, 2021
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,12 @@ example/*
 lib/*
 notes.py
 
+# Python
+pafi/__pycache__
+*.pyc
+dist
+*.egg-info
+
 # Prerequisites
 */*.d
 
@@ -43,3 +49,4 @@ notes.py
 */*.exe
 */*.out
 */*.app
+
diff --git a/INSTALL.md b/INSTALL.md
@@ -76,3 +76,63 @@ LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64  -DLAMMPS_EXCEPTIONS
    cmake ..
    make # or try make -j4 for parallel make using 4 cores
 ```
+
+## Install Python post processing package
+
+Pafi is distributed with a Python package that enables easy visualization of the results.
+To install the python package, you need to install `python` and `pip`.
+Then, go to the main directory and do: 
+
+```bash
+   pip install -e .
+```
+
+This way, the package will be installed in developer mode, meaning that any change of the python file(s) `pafi/*.py` or any git pull of a newer version will take effect immediately.
+If you want to have to re-install the package manually each time instead, still from the main directory:
+
+```python
+   pip install build #
+   python -m build   # to build the package
+   pip install .     # run it each time you want to (re)install it 
+```
+
+The `pafi` package is now importable from python, at any location. 
+
+For plotting the results of a simulation:
+
+```python
+import pafi
+import matplotlib.pyplot as plt
+
+p = pafi.PafiResult('raw_ensemble_output_0K_0')
+ax = p.plot()
+plt.show()
+```
+
+It also allows plotting the results of a series of simulations conducted at different temperatures:
+
+```python
+import pafi
+import matplotlib.pyplot as plt
+import glob
+
+fl = glob.glob('raw_ensemble_output_*K_0')
+ax = pafi.free_energy_vs_temperature(fl, harmonic_until_T=100)
+plt.show()
+```
+
+A command line tool is also available.
+
+To display the result of a simulation in a new matplotlib GUI window :
+
+```bash
+pafi_plot example/raw_ensemble_output_0K_0
+```
+
+To save the graph to pdf or png format (does not open the GUI):
+
+```bash
+pafi_plot example/raw_ensemble_output_0K_0 graph.pdf
+or
+pafi_plot example/raw_ensemble_output_0K_0 graph.png
+```
diff --git a/pafi/__init__.py b/pafi/__init__.py
@@ -0,0 +1,11 @@
+from pafi.post_processing import *
+
+def cmd_plot():
+    import sys
+    import matplotlib.pyplot as plt
+    p = PafiResult(sys.argv[1])
+    ax = p.plot()
+    if len(sys.argv)==3:
+        plt.savefig(sys.argv[2])
+    else:
+        plt.show()
diff --git a/pafi/post_processing.py b/pafi/post_processing.py
@@ -0,0 +1,294 @@
+"""
+raw_ensemble_output has i + 4*nWorker colums. i = 2 since commit 3ca8f884 (20/09/21). For older versions of PAFI, i=1.
+
+Column 0 : Number of workers with max_jump < user threshold at run time
+-> nWorker+i : av(dF), the gradient
+-> 2*nWorker+i : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit!
+-> 3*nWorker+i : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection
+-> 4*nWorker+i : the maximum per-atom jump following the MD
+"""
+
+import io
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.integrate import cumtrapz
+from scipy.interpolate import interp1d
+import pandas as pd 
+import pathlib
+
+kb = 8.617e-5
+
+class Profile():
+    """Class for handling (free) energy profiles.
+
+    Parameters
+    ----------
+
+            data : np.ndarray
+                A Nx3 shaped array that contains [reaction coordinate, free energy, and error] as columns.
+
+    Attributes
+    ----------
+
+        data : np.ndarray
+            The Nx3 array passed as argument.
+        barrier : float
+            The maximum Free Energy value.
+        error : float
+            error = error(R=0)+error(R=Rm), where Rm is the reaction corrdinate at which the Free Energy is maximal.
+    """
+
+    def __init__(self, data):
+        self.data = data
+        self.barrier = data[:,1].max()
+        self.error = data[0][2] + data[data[:,1].argmax()][2]
+
+class PafiResult():
+    """Class for manipulating PAFI simulation results.
+
+    Parameters
+    ----------
+
+            file : pathlib.Path object or str
+                The path to the pafi raw_ensemble file to extract data from. 
+                Should have a name of the form "raw_ensemble_output_*K_*", unless optional temperature parameter is given.
+            temperature : int or float
+                The temperature of the simulation.
+
+    Attributes
+    ----------
+
+        temperature : str
+            The standard output passed by PreciSo during its execution.
+        file : preciso.Distribution object
+            A preciso.Distribution object, that contains all the data related to precipitates size distributions.
+        discrete_profile : pafi.Profile
+            The free energy profile as a pafi.Profile object. Contains the discrete data.
+        splined_profile : pafi.Profile
+            The free energy profile as a pafi.Profile object. Rediscretized in order to have a smooth profile curve.
+        bar :   np.array
+            A convenient array that contains commonly used data : [temperature, discrete_profile.barrier, discrete_profile.error, splined_profile.barrier, splined_profile.error]
+            See documentation of pafi.Profile for details on .barrier and .error attributes.
+
+    Examples
+    --------
+
+        >>> import pafi
+        >>> import matplotlib.pyplot as plt
+        >>> p = pafi.PafiResult('raw_ensemble_output_0K_0')
+        >>> ax = p.plot()
+        >>> plt.show()
+    """
+
+    def __init__(self, file, logfile=None, temperature=None):
+        assert (isinstance(file, str)) or (isinstance(file, pathlib.Path)) ,"`file` must be str or pathlib.Path object."
+
+        self.file = file
+
+        self.r_coord = None
+        if logfile is not None:
+            self.r_coord = self.log_parser(logfile)
+
+        if temperature is not None:
+            self.temperature = temperature
+        else:
+            self.temperature = self.get_temperature()
+
+        raw = self.raw_parser()
+        self.discrete_profile = Profile(PafiResult.integrate(raw))
+
+        remeshed = PafiResult.remesh(raw)
+        self.splined_profile = Profile(PafiResult.integrate(remeshed))
+
+        self.bar = self.get_bar()
+
+    def raw_parser(self, disp_thresh=0.7):
+        """Parse the raw data file"""
+        try:
+            f = open(self.file,'r')
+        except IOError:
+            print("raw data file %s not found" % self.file)
+            return np.zeros((1,3))
+
+        count_data = count_valid = 0
+        r_dFave_dFstd = []
+
+        if PafiResult.has_old_data_format(self.file):
+            if self.r_coord is not None:
+                # Use rcoord from logfile
+                print('Use rcoord from logfile')
+                for line, r_c in zip(f.readlines(), self.r_coord):
+                    if line[0] != "#":
+                        fields = np.loadtxt(io.StringIO(line.strip()))
+                        n_data = (fields.size-1)//4
+                        n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                        count_data  += n_data
+                        if n_valid>0:
+                            count_valid += n_valid
+
+                            r_dFave_dFstd += [[r_c, 
+                                                fields[1:n_valid+1].mean(),
+                                                fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+                r_dFave_dFstd = np.r_[r_dFave_dFstd]
+                r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+                return r_dFave_dFstd, len(r_dFave_dFstd)
+            else:
+                # reaction coordinate is interpolated (may cause deviations at large T)
+                # needed for backward compatibility
+                print('reaction coordinate is interpolated (may cause deviations at high T)')
+                r = 0.0
+                for line in f.readlines():
+                    if line[0] != "#":
+                        fields = np.loadtxt(io.StringIO(line.strip()))
+                        n_data = (fields.size-1)//4
+                        n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                        count_data  += n_data
+                        if n_valid>0:
+                            count_valid += n_valid
+
+                            r_dFave_dFstd += [[r, 
+                                                fields[1:n_valid+1].mean(),
+                                                fields[1:n_valid+1].std()/np.sqrt(n_valid)]]
+                        r += 1.0 # always increment even if n_valid==0
+                # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+                r_dFave_dFstd = np.r_[r_dFave_dFstd]
+                r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1
+                return r_dFave_dFstd, len(r_dFave_dFstd)
+        else:
+            # reaction coordinate is read from file
+            print("Reaction coordinate is read from raw file")
+            for line in f.readlines():
+                if line[0] != "#":
+                    fields = np.loadtxt(io.StringIO(line.strip()))
+                    n_data = (fields.size-2)//4
+                    n_valid = (fields[-n_data:] < disp_thresh).sum()
+
+                    count_data  += n_data
+                    if n_valid>0:
+                        count_valid += n_valid
+
+                        r_dFave_dFstd += [[fields[0],
+                                        fields[2:n_valid+2].mean(),
+                                        fields[2:n_valid+2].std()/np.sqrt(n_valid)]]
+
+            # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100)))
+            r_dFave_dFstd = np.r_[r_dFave_dFstd]
+            return r_dFave_dFstd
+
+    def log_parser(self, logfile):
+        import re
+        with open(logfile, "r") as f:
+            content = f.read()
+            res = re.findall("""##.+\n[\s\t]*([\d\.]+)""", content)
+            R_coord = np.r_[[float(r) for r in res]]
+        return R_coord
+
+    def has_old_data_format(file):
+        d = np.loadtxt(file, max_rows=1)
+        if (d.shape[0]-1) //4 == (d.shape[0]-1) /4:
+            return True
+        elif (d.shape[0]-2) //4 == (d.shape[0]-2) /4:
+            return False
+        else: 
+            raise RuntimeError(f"File {file} has an unsupported format")
+
+    def get_bar(self):
+        """This data format is used by some legacy functions"""
+        return np.array([self.temperature, 
+                            self.discrete_profile.barrier, 
+                            self.discrete_profile.error, 
+                            self.splined_profile.barrier, 
+                            self.splined_profile.error])
+
+    def get_temperature(self):
+        """The temperature of the run"""
+        return int(str(self.file).split("_")[-2][:-1])
+
+    def remesh(data, density=10):
+        """Remesh to obtained the splined version of the profile"""
+        spl_data = np.zeros((density*data.shape[0],data.shape[1]))
+        r_data = np.linspace(0.,1.,data.shape[0])
+        r_spl_data = np.linspace(0.,1.,spl_data.shape[0])
+        spl_data[:,0] = interp1d(r_data, data[:,0],kind='linear')(r_spl_data)
+        spl_data[:,1] = interp1d(data[:,0], data[:,1],kind='linear',fill_value="extrapolate")(spl_data[:,0])
+        spl_data[:,2] = interp1d(data[:,0], data[:,2],kind='linear',fill_value="extrapolate")(spl_data[:,0])
+        return spl_data
+
+    def integrate(data, discretization_error_estimate=0.015):
+        idata = np.zeros(data.shape)
+        idata[:,0] = data[:,0]
+        idata[1:,1] = -cumtrapz(data[:,1],data[:,0])
+        idata[1:,2] = cumtrapz(data[:,2],data[:,0]) +  discretization_error_estimate
+        run_min =  np.minimum.accumulate(idata[:,1])
+        run_min_shift =  np.minimum.accumulate(np.append(idata[1:,1],idata[-1][1]))
+        if (run_min == run_min_shift).sum()>0:
+            idata = idata[run_min == run_min_shift,:]
+        idata[:,1]-=idata[0][1]
+        return idata
+
+    def plot(self, ax=None, color=None, label_prepend=""):
+
+        if ax is None:
+            fig, ax = plt.subplots()
+        if color is None:
+            color="C0"
+        discrete = self.discrete_profile.data
+        splined = self.splined_profile.data
+
+        for i, profile in enumerate([self.discrete_profile.data, self.splined_profile.data]):
+
+            style = ["o", "-"][i]
+            label = [label_prepend + f'{self.temperature}K', ''][i]
+            ax.plot(profile[:,0],
+                        profile[:,1],
+                        style, 
+                        color=color,
+                        label=label)
+
+            ax.fill_between(profile[:,0],
+                    profile[:,1]-profile[:,2],
+                    profile[:,1]+profile[:,2],
+                    facecolor='0.8')
+
+        ax.set_xlabel("Reaction Coordinate")
+        ax.set_ylabel("Gibbs energy profile (eV)")
+        ax.legend(loc="best")  
+        return ax
+
+def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", start_color_at_index=0, return_poly=False):
+
+    if (ax is None) or len(ax)!=2:
+        fig, ax = plt.subplots(1,2, figsize=(9,5),dpi=144,sharey=True)
+
+    data = [PafiResult(file) for file in flist]
+    data.sort(key=lambda x: x.temperature)
+    bar = np.array([x.bar for x in data])
+
+    for i, r in enumerate(data):
+        a = r.plot(ax[0], color=f'C{i+start_color_at_index}', label_prepend=label_prepend)
+
+    # bar = bar[np.argsort(bar[:, 0])]
+
+    if fit_harmonic:
+        # Fit a linear regression on the domain below harmonic_until_T K
+        harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))]
+        p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1],1)
+
+    ax[1].plot(bar[:,0], bar[:,3], "-o", color=f'C{start_color_at_index}', label=label_prepend)
+    ax[1].fill_between(bar[:,0],bar[:,1]-bar[:,2],bar[:,1]+bar[:,2],facecolor='0.93')
+    ax[1].fill_between(bar[:,0],bar[:,3]-bar[:,4],bar[:,3]+bar[:,4],facecolor='0.93')
+    if fit_harmonic:
+        # harmonic domain
+        ax[1].plot(bar[:4,0],p[1]+p[0]*bar[:4,0],'--', color=f'C{start_color_at_index}', label=label_prepend + r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb))
+    ax[1].set_xlabel("Temperature (K)")
+    ax[1].set_ylabel("Gibbs energy of activation (eV)")
+    ax[1].legend(loc='best')
+    ax[1].set_ylim(ymin=ymin)
+    plt.subplots_adjust(wspace=0)
+    plt.tight_layout()
+
+    if return_poly: return ax, p
+    else: return ax
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"