From f147c17beed98fb409208dd8e09b951960f5025f Mon Sep 17 00:00:00 2001
From: William F Godoy <williamfgc@yahoo.com>
Date: Wed, 25 Oct 2023 22:19:18 -0400
Subject: [PATCH] clang-format-16 on new files

Fix license file headers

Fix typo with T1->T
---
 src/Containers/OhmmsSoA/VectorSoaContainer.h  |    2 +-
 src/Particle/DistanceTableT.h                 |  571 ++-
 src/Particle/DynamicCoordinatesT.cpp          |   30 +-
 src/Particle/DynamicCoordinatesT.h            |  144 +-
 src/Particle/InitMolecularSystemT.cpp         |  483 ++-
 src/Particle/InitMolecularSystemT.h           |   50 +-
 src/Particle/LongRange/KContainerT.cpp        |  398 +-
 src/Particle/LongRange/KContainerT.h          |   95 +-
 src/Particle/LongRange/StructFactT.cpp        |  346 +-
 src/Particle/LongRange/StructFactT.h          |  124 +-
 src/Particle/MCCoordsT.hpp                    |   47 +-
 src/Particle/MCWalkerConfigurationT.cpp       |  398 +-
 src/Particle/MCWalkerConfigurationT.h         |  283 +-
 src/Particle/ParticleSetPoolT.cpp             |  362 +-
 src/Particle/ParticleSetPoolT.h               |  127 +-
 src/Particle/ParticleSetT.cpp                 | 2203 ++++++-----
 src/Particle/ParticleSetT.h                   |  942 ++---
 src/Particle/ParticleSetTraits.h              |  119 +-
 src/Particle/RealSpacePositionsT.h            |   87 +-
 src/Particle/RealSpacePositionsTOMPTarget.h   |  424 +--
 src/Particle/ReptileT.h                       |  520 ++-
 src/Particle/SampleStackT.cpp                 |   61 +-
 src/Particle/SampleStackT.h                   |   68 +-
 src/Particle/SimulationCellT.cpp              |   81 +-
 src/Particle/SimulationCellT.h                |   57 +-
 src/Particle/SoaDistanceTableAAT.h            |  319 +-
 src/Particle/SoaDistanceTableAATOMPTarget.h   |  899 ++---
 src/Particle/SoaDistanceTableABT.h            |  232 +-
 src/Particle/SoaDistanceTableABTOMPTarget.h   |  773 ++--
 src/Particle/VirtualParticleSetT.cpp          |  378 +-
 src/Particle/VirtualParticleSetT.h            |  190 +-
 src/Particle/WalkerConfigurationsT.cpp        |  203 +-
 src/Particle/WalkerConfigurationsT.h          |   20 +-
 src/Particle/createDistanceTableT.cpp         |  362 +-
 src/Particle/createDistanceTableT.h           |   76 +-
 .../createDistanceTableTOMPTarget.cpp         |  372 +-
 src/QMCWaveFunctions/BasisSetBaseT.h          |  341 +-
 .../BsplineFactory/BsplineReaderBaseT.cpp     |  384 +-
 .../BsplineFactory/BsplineReaderBaseT.h       |  299 +-
 .../BsplineFactory/BsplineSetT.h              |  374 +-
 .../BsplineFactory/HybridRepCenterOrbitalsT.h | 1427 ++++---
 .../BsplineFactory/HybridRepCplxT.h           |    1 -
 .../BsplineFactory/HybridRepRealT.h           |    1 -
 .../BsplineFactory/HybridRepSetReaderT.h      |  837 ++--
 .../BsplineFactory/SplineC2COMPTargetT.cpp    | 2394 ++++++------
 .../BsplineFactory/SplineC2COMPTargetT.h      |  556 ++-
 .../BsplineFactory/SplineC2CT.cpp             |  104 +-
 .../BsplineFactory/SplineC2CT.h               |  346 +-
 .../BsplineFactory/SplineC2ROMPTargetT.cpp    | 3369 ++++++++---------
 .../BsplineFactory/SplineC2ROMPTargetT.h      |  559 ++-
 .../BsplineFactory/SplineC2RT.cpp             |   15 +-
 .../BsplineFactory/SplineC2RT.h               |  351 +-
 .../BsplineFactory/SplineR2RT.cpp             |   13 +-
 .../BsplineFactory/SplineR2RT.h               |  389 +-
 .../BsplineFactory/SplineSetReaderT.h         |  491 ++-
 .../BsplineFactory/createBsplineReaderT.cpp   |    2 +-
 .../BsplineFactory/createBsplineReaderT.h     |   36 +-
 src/QMCWaveFunctions/CompositeSPOSetT.cpp     |  284 +-
 src/QMCWaveFunctions/CompositeSPOSetT.h       |  183 +-
 src/QMCWaveFunctions/EinsplineSetBuilderT.cpp |   16 +
 src/QMCWaveFunctions/EinsplineSetBuilderT.h   |   20 +-
 .../EinsplineSpinorSetBuilderT.h              |    2 +-
 .../ElectronGas/FreeOrbitalBuilder.h          |   16 +
 .../ElectronGas/FreeOrbitalBuilderT.cpp       |   22 +-
 .../ElectronGas/FreeOrbitalBuilderT.h         |   22 +-
 .../ElectronGas/FreeOrbitalT.cpp              | 1155 +++---
 .../ElectronGas/FreeOrbitalT.h                |  127 +-
 .../HarmonicOscillator/SHOSetBuilderT.cpp     |  327 +-
 .../HarmonicOscillator/SHOSetBuilderT.h       |   62 +-
 .../HarmonicOscillator/SHOSetT.cpp            |  909 ++---
 .../HarmonicOscillator/SHOSetT.h              |  273 +-
 src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp | 1669 ++++----
 .../LCAO/CuspCorrectionConstructionT.cpp      | 1320 +++----
 .../LCAO/CuspCorrectionConstructionT.h        |  380 +-
 src/QMCWaveFunctions/LCAO/CuspCorrectionT.h   |  104 +-
 .../LCAO/LCAOSpinorBuilderT.cpp               |  327 +-
 .../LCAO/LCAOSpinorBuilderT.h                 |   34 +-
 .../LCAO/LCAOrbitalBuilderT.cpp               | 1949 +++++-----
 .../LCAO/LCAOrbitalBuilderT.h                 |  167 +-
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp  | 1775 ++++-----
 src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h    |  613 ++-
 .../LCAO/LCAOrbitalSetWithCorrectionT.cpp     |   77 +-
 .../LCAO/LCAOrbitalSetWithCorrectionT.h       |   60 +-
 .../LCAO/SoaAtomicBasisSetT.h                 | 1397 ++++---
 .../LCAO/SoaCuspCorrectionT.cpp               |  253 +-
 .../LCAO/SoaCuspCorrectionT.h                 |  116 +-
 .../LCAO/SoaLocalizedBasisSetT.cpp            |  634 ++--
 .../LCAO/SoaLocalizedBasisSetT.h              |  173 +-
 src/QMCWaveFunctions/OptimizableObjectT.h     |   59 +-
 src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp   |   10 +-
 src/QMCWaveFunctions/PlaneWave/PWBasisT.h     |  464 ++-
 .../PlaneWave/PWOrbitalSetT.cpp               |  206 +-
 .../PlaneWave/PWOrbitalSetT.h                 |  181 +-
 src/QMCWaveFunctions/RotatedSPOsT.cpp         | 2773 +++++++-------
 src/QMCWaveFunctions/RotatedSPOsT.h           |  917 +++--
 .../SPOSetBuilderFactoryT.cpp                 |   34 +-
 src/QMCWaveFunctions/SPOSetBuilderFactoryT.h  |   72 +-
 src/QMCWaveFunctions/SPOSetScannerT.h         |  425 +--
 src/QMCWaveFunctions/SPOSetT.cpp              |   25 +-
 src/QMCWaveFunctions/SPOSetT.h                |  654 ++--
 src/QMCWaveFunctions/SpinorSetT.cpp           |  952 +++--
 src/QMCWaveFunctions/SpinorSetT.h             |  285 +-
 src/QMCWaveFunctions/VariableSetT.cpp         |  541 ++-
 .../tests/ConstantSPOSetT.cpp                 |  144 +-
 src/QMCWaveFunctions/tests/ConstantSPOSetT.h  |   93 +-
 src/QMCWaveFunctions/tests/FakeSPOT.cpp       |  216 +-
 src/QMCWaveFunctions/tests/FakeSPOT.h         |   60 +-
 .../tests/test_RotatedSPOsT.cpp               | 1487 ++++----
 108 files changed, 22831 insertions(+), 25368 deletions(-)

diff --git a/src/Containers/OhmmsSoA/VectorSoaContainer.h b/src/Containers/OhmmsSoA/VectorSoaContainer.h
index 0e3a1a98a24..58068188040 100644
--- a/src/Containers/OhmmsSoA/VectorSoaContainer.h
+++ b/src/Containers/OhmmsSoA/VectorSoaContainer.h
@@ -219,7 +219,7 @@ struct VectorSoaContainer
        */
   void copyOut(Vector<TinyVector<T, D>>& out) const
   {
-    PosSoA2AoS(nLocal, D, myData, nGhosts, reinterpret_cast<T1*>(out.first_address()), D);
+    PosSoA2AoS(nLocal, D, myData, nGhosts, reinterpret_cast<T*>(out.first_address()), D);
   }
 
   /** return TinyVector<T,D>
diff --git a/src/Particle/DistanceTableT.h b/src/Particle/DistanceTableT.h
index 5eaba1bd44c..5a69c4990d0 100644
--- a/src/Particle/DistanceTableT.h
+++ b/src/Particle/DistanceTableT.h
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
-//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_DISTANCETABLEDATAIMPLT_H
@@ -41,128 +38,95 @@ class ResourceCollection;
  * update/compute invoked by ParticleSet. Derived AA/AB classes handle the
  * actual storage and data access.
  */
-template <typename T>
+template<typename T>
 class DistanceTableT
 {
 public:
-    static constexpr unsigned DIM = OHMMS_DIM;
+  static constexpr unsigned DIM = OHMMS_DIM;
 
-    using IndexType = typename ParticleSetTraits<T>::IndexType;
-    using RealType = typename ParticleSetTraits<T>::RealType;
-    using PosType = typename ParticleSetTraits<T>::PosType;
-    using DistRow = Vector<RealType, aligned_allocator<RealType>>;
-    using DisplRow = VectorSoaContainer<RealType, DIM>;
+  using IndexType = typename ParticleSetTraits<T>::IndexType;
+  using RealType  = typename ParticleSetTraits<T>::RealType;
+  using PosType   = typename ParticleSetTraits<T>::PosType;
+  using DistRow   = Vector<RealType, aligned_allocator<RealType>>;
+  using DisplRow  = VectorSoaContainer<RealType, DIM>;
 
 protected:
-    // FIXME. once DT takes only DynamicCoordinates, change this type as well.
-    const ParticleSetT<T>& origin_;
+  // FIXME. once DT takes only DynamicCoordinates, change this type as well.
+  const ParticleSetT<T>& origin_;
 
-    const size_t num_sources_;
-    const size_t num_targets_;
+  const size_t num_sources_;
+  const size_t num_targets_;
 
-    /// name of the table
-    const std::string name_;
+  /// name of the table
+  const std::string name_;
 
-    /// operation modes defined by DTModes
-    DTModes modes_;
+  /// operation modes defined by DTModes
+  DTModes modes_;
 
 public:
-    /// constructor using source and target ParticleSet
-    DistanceTableT(const ParticleSetT<T>& source, const ParticleSetT<T>& target,
-        DTModes modes) :
-        origin_(source),
+  /// constructor using source and target ParticleSet
+  DistanceTableT(const ParticleSetT<T>& source, const ParticleSetT<T>& target, DTModes modes)
+      : origin_(source),
         num_sources_(source.getTotalNum()),
         num_targets_(target.getTotalNum()),
         name_(source.getName() + "_" + target.getName()),
         modes_(modes)
-    {
-    }
-
-    /// copy constructor. deleted
-    DistanceTableT(const DistanceTableT&) = delete;
-
-    /// virutal destructor
-    virtual ~DistanceTableT() = default;
-
-    /// get modes
-    inline DTModes
-    getModes() const
-    {
-        return modes_;
-    }
-
-    /// set modes
-    inline void
-    setModes(DTModes modes)
-    {
-        modes_ = modes;
-    }
-
-    /// return the name of table
-    inline const std::string&
-    getName() const
-    {
-        return name_;
-    }
-
-    /// returns the reference the origin particleset
-    const ParticleSetT<T>&
-    get_origin() const
-    {
-        return origin_;
-    }
-
-    /// returns the number of centers
-    inline size_t
-    centers() const
-    {
-        return origin_.getTotalNum();
-    }
-
-    /// returns the number of centers
-    inline size_t
-    targets() const
-    {
-        return num_targets_;
-    }
-
-    /// returns the number of source particles
-    inline size_t
-    sources() const
-    {
-        return num_sources_;
-    }
-
-    /** evaluate the full Distance Table
+  {}
+
+  /// copy constructor. deleted
+  DistanceTableT(const DistanceTableT&) = delete;
+
+  /// virutal destructor
+  virtual ~DistanceTableT() = default;
+
+  /// get modes
+  inline DTModes getModes() const { return modes_; }
+
+  /// set modes
+  inline void setModes(DTModes modes) { modes_ = modes; }
+
+  /// return the name of table
+  inline const std::string& getName() const { return name_; }
+
+  /// returns the reference the origin particleset
+  const ParticleSetT<T>& get_origin() const { return origin_; }
+
+  /// returns the number of centers
+  inline size_t centers() const { return origin_.getTotalNum(); }
+
+  /// returns the number of centers
+  inline size_t targets() const { return num_targets_; }
+
+  /// returns the number of source particles
+  inline size_t sources() const { return num_sources_; }
+
+  /** evaluate the full Distance Table
      * @param P the target particle set
      */
-    virtual void
-    evaluate(ParticleSetT<T>& P) = 0;
-    virtual void
-    mw_evaluate(const RefVectorWithLeader<DistanceTableT>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
-    {
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            dt_list[iw].evaluate(p_list[iw]);
-    }
-
-    /** recompute multi walker internal data, recompute
+  virtual void evaluate(ParticleSetT<T>& P) = 0;
+  virtual void mw_evaluate(const RefVectorWithLeader<DistanceTableT>& dt_list,
+                           const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
+  {
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      dt_list[iw].evaluate(p_list[iw]);
+  }
+
+  /** recompute multi walker internal data, recompute
      * @param dt_list the distance table batch
      * @param p_list the target particle set batch
      * @param recompute if true, must recompute. Otherwise, implementation
      * dependent.
      */
-    virtual void
-    mw_recompute(const RefVectorWithLeader<DistanceTableT>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const std::vector<bool>& recompute) const
-    {
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            if (recompute[iw])
-                dt_list[iw].evaluate(p_list[iw]);
-    }
-
-    /** evaluate the temporary pair relations when a move is proposed
+  virtual void mw_recompute(const RefVectorWithLeader<DistanceTableT>& dt_list,
+                            const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                            const std::vector<bool>& recompute) const
+  {
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      if (recompute[iw])
+        dt_list[iw].evaluate(p_list[iw]);
+  }
+
+  /** evaluate the temporary pair relations when a move is proposed
      * @param P the target particle set
      * @param rnew proposed new position
      * @param iat the particle to be moved
@@ -175,81 +139,73 @@ class DistanceTableT
      * move. Drivers/Hamiltonians know whether moves will be accepted or not and
      * manage this flag when calling ParticleSet::makeMoveXXX functions.
      */
-    virtual void
-    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
-        bool prepare_old = true) = 0;
+  virtual void move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat, bool prepare_old = true) = 0;
 
-    /** walker batched version of move. this function may be implemented
+  /** walker batched version of move. this function may be implemented
      * asynchronously. Additional synchroniziation for collecting results should
      * be handled by the caller. If DTModes::NEED_TEMP_DATA_ON_HOST, host data
      * will be updated. If no consumer requests data on the host, the transfer
      * is skipped.
      */
-    virtual void
-    mw_move(const RefVectorWithLeader<DistanceTableT>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const std::vector<PosType>& rnew_list, const IndexType iat,
-        bool prepare_old = true) const
-    {
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old);
-    }
-
-    /** update the distance table by the pair relations from the temporal
+  virtual void mw_move(const RefVectorWithLeader<DistanceTableT>& dt_list,
+                       const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                       const std::vector<PosType>& rnew_list,
+                       const IndexType iat,
+                       bool prepare_old = true) const
+  {
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      dt_list[iw].move(p_list[iw], rnew_list[iw], iat, prepare_old);
+  }
+
+  /** update the distance table by the pair relations from the temporal
      * position. Used when a move is accepted in regular mode
      * @param iat the particle with an accepted move
      */
-    virtual void
-    update(IndexType jat) = 0;
+  virtual void update(IndexType jat) = 0;
 
-    /** fill partially the distance table by the pair relations from the
+  /** fill partially the distance table by the pair relations from the
      * temporary or old particle position. Used in forward mode when a move is
      * reject
      * @param iat the particle with an accepted move
      * @param from_temp if true, copy from temp. if false, copy from old
      */
-    virtual void
-    updatePartial(IndexType jat, bool from_temp)
-    {
-        if (from_temp)
-            update(jat);
-    }
-
-    /** walker batched version of updatePartial.
+  virtual void updatePartial(IndexType jat, bool from_temp)
+  {
+    if (from_temp)
+      update(jat);
+  }
+
+  /** walker batched version of updatePartial.
      * If not DTModes::NEED_TEMP_DATA_ON_HOST, host data is not up-to-date and
      * host distance table will not be updated.
      */
-    virtual void
-    mw_updatePartial(const RefVectorWithLeader<DistanceTableT>& dt_list,
-        IndexType jat, const std::vector<bool>& from_temp)
-    {
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            dt_list[iw].updatePartial(jat, from_temp[iw]);
-    }
-
-    /** finalize distance table calculation after particle-by-particle moves
+  virtual void mw_updatePartial(const RefVectorWithLeader<DistanceTableT>& dt_list,
+                                IndexType jat,
+                                const std::vector<bool>& from_temp)
+  {
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      dt_list[iw].updatePartial(jat, from_temp[iw]);
+  }
+
+  /** finalize distance table calculation after particle-by-particle moves
      * if update() doesn't make the table up-to-date during p-by-p moves
      * finalizePbyP takes action to bring the table up-to-date
      */
-    virtual void
-    finalizePbyP(const ParticleSetT<T>& P)
-    {
-    }
+  virtual void finalizePbyP(const ParticleSetT<T>& P) {}
 
-    /** walker batched version of finalizePbyP
+  /** walker batched version of finalizePbyP
      * If not DTModes::NEED_TEMP_DATA_ON_HOST, host distance table data is not
      * updated at all during p-by-p Thus, a recompute is necessary to update the
      * whole host distance table for consumers like the Coulomb potential.
      */
-    virtual void
-    mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
-    {
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            dt_list[iw].finalizePbyP(p_list[iw]);
-    }
-
-    /** find the first nearest neighbor
+  virtual void mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT>& dt_list,
+                               const RefVectorWithLeader<ParticleSetT<T>>& p_list) const
+  {
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      dt_list[iw].finalizePbyP(p_list[iw]);
+  }
+
+  /** find the first nearest neighbor
      * @param iat source particle id
      * @param r distance
      * @param dr displacement
@@ -258,48 +214,36 @@ class DistanceTableT
      * displacements_[iat]
      * @return the id of the nearest particle, -1 not found
      */
-    virtual int
-    get_first_neighbor(
-        IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0;
-
-    [[noreturn]] inline void
-    print(std::ostream& os)
-    {
-        throw std::runtime_error("DistanceTable::print is not supported");
-    }
-
-    /// initialize a shared resource and hand it to a collection
-    virtual void
-    createResource(ResourceCollection& collection) const
-    {
-    }
-
-    /// acquire a shared resource from a collection
-    virtual void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT>& dt_list) const
-    {
-    }
-
-    /// return a shared resource to a collection
-    virtual void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT>& dt_list) const
-    {
-    }
+  virtual int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const = 0;
+
+  [[noreturn]] inline void print(std::ostream& os)
+  {
+    throw std::runtime_error("DistanceTable::print is not supported");
+  }
+
+  /// initialize a shared resource and hand it to a collection
+  virtual void createResource(ResourceCollection& collection) const {}
+
+  /// acquire a shared resource from a collection
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTableT>& dt_list) const
+  {}
+
+  /// return a shared resource to a collection
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<DistanceTableT>& dt_list) const
+  {}
 };
 
 /** AA type of DistanceTable containing storage */
-template <typename T>
+template<typename T>
 class DistanceTableAAT : public DistanceTableT<T>
 {
 public:
-    using DistRow = typename DistanceTableT<T>::DistRow;
-    using DisplRow = typename DistanceTableT<T>::DisplRow;
-    using RealType = typename DistanceTableT<T>::RealType;
+  using DistRow  = typename DistanceTableT<T>::DistRow;
+  using DisplRow = typename DistanceTableT<T>::DisplRow;
+  using RealType = typename DistanceTableT<T>::RealType;
 
 protected:
-    /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+  /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
      *  Note: Derived classes decide if it is a memory view or the actual
      * storage For only the lower triangle (j<i) data can be accessed safely.
      *            There is no bound check to protect j>=i terms as the nature of
@@ -307,223 +251,152 @@ class DistanceTableAAT : public DistanceTableT<T>
      * segment, out-of-bound access is still within the segment and thus doesn't
      * trigger an alarm by the address sanitizer.
      */
-    std::vector<DistRow> distances_;
+  std::vector<DistRow> distances_;
 
-    /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
+  /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
      * r_A1[i] Note: Derived classes decide if it is a memory view or the actual
      * storage only the lower triangle (j<i) is defined. See the note of
      * distances_.
      */
-    std::vector<DisplRow> displacements_;
+  std::vector<DisplRow> displacements_;
 
-    /// temp_r
-    DistRow temp_r_;
+  /// temp_r
+  DistRow temp_r_;
 
-    /// temp_dr
-    DisplRow temp_dr_;
+  /// temp_dr
+  DisplRow temp_dr_;
 
-    /// old distances
-    DistRow old_r_;
+  /// old distances
+  DistRow old_r_;
 
-    /// old displacements
-    DisplRow old_dr_;
+  /// old displacements
+  DisplRow old_dr_;
 
 public:
-    /// constructor using source and target ParticleSet
-    DistanceTableAAT(const ParticleSetT<T>& target, DTModes modes) :
-        DistanceTableT<T>(target, target, modes)
-    {
-    }
+  /// constructor using source and target ParticleSet
+  DistanceTableAAT(const ParticleSetT<T>& target, DTModes modes) : DistanceTableT<T>(target, target, modes) {}
 
-    /** return full table distances
+  /** return full table distances
      */
-    const std::vector<DistRow>&
-    getDistances() const
-    {
-        return distances_;
-    }
+  const std::vector<DistRow>& getDistances() const { return distances_; }
 
-    /** return full table displacements
+  /** return full table displacements
      */
-    const std::vector<DisplRow>&
-    getDisplacements() const
-    {
-        return displacements_;
-    }
+  const std::vector<DisplRow>& getDisplacements() const { return displacements_; }
 
-    /** return a row of distances for a given target particle
+  /** return a row of distances for a given target particle
      */
-    const DistRow&
-    getDistRow(int iel) const
-    {
-        return distances_[iel];
-    }
+  const DistRow& getDistRow(int iel) const { return distances_[iel]; }
 
-    /** return a row of displacements for a given target particle
+  /** return a row of displacements for a given target particle
      */
-    const DisplRow&
-    getDisplRow(int iel) const
-    {
-        return displacements_[iel];
-    }
+  const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; }
 
-    /** return the temporary distances when a move is proposed
+  /** return the temporary distances when a move is proposed
      */
-    const DistRow&
-    getTempDists() const
-    {
-        return temp_r_;
-    }
+  const DistRow& getTempDists() const { return temp_r_; }
 
-    /** return the temporary displacements when a move is proposed
+  /** return the temporary displacements when a move is proposed
      */
-    const DisplRow&
-    getTempDispls() const
-    {
-        return temp_dr_;
-    }
+  const DisplRow& getTempDispls() const { return temp_dr_; }
 
-    /** return old distances set up by move() for optimized distance table
+  /** return old distances set up by move() for optimized distance table
      * consumers
      */
-    const DistRow&
-    getOldDists() const
-    {
-        return old_r_;
-    }
+  const DistRow& getOldDists() const { return old_r_; }
 
-    /** return old displacements set up by move() for optimized distance table
+  /** return old displacements set up by move() for optimized distance table
      * consumers
      */
-    const DisplRow&
-    getOldDispls() const
-    {
-        return old_dr_;
-    }
-
-    virtual size_t
-    get_num_particls_stored() const
-    {
-        return 0;
-    }
-
-    /// return multi walker temporary pair distance table data pointer
-    [[noreturn]] virtual const RealType*
-    getMultiWalkerTempDataPtr() const
-    {
-        throw std::runtime_error(
-            this->name_ + " multi walker data pointer for temp not supported");
-    }
-
-    virtual const RealType*
-    mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list, size_t range_begin,
-        size_t range_end) const
-    {
-        return nullptr;
-    }
+  const DisplRow& getOldDispls() const { return old_dr_; }
+
+  virtual size_t get_num_particls_stored() const { return 0; }
+
+  /// return multi walker temporary pair distance table data pointer
+  [[noreturn]] virtual const RealType* getMultiWalkerTempDataPtr() const
+  {
+    throw std::runtime_error(this->name_ + " multi walker data pointer for temp not supported");
+  }
+
+  virtual const RealType* mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                                              const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                                              size_t range_begin,
+                                              size_t range_end) const
+  {
+    return nullptr;
+  }
 };
 
 /** AB type of DistanceTable containing storage */
-template <typename T>
+template<typename T>
 class DistanceTableABT : public DistanceTableT<T>
 {
 public:
-    using DistRow = typename DistanceTableT<T>::DistRow;
-    using DisplRow = typename DistanceTableT<T>::DisplRow;
-    using RealType = typename DistanceTableT<T>::RealType;
+  using DistRow  = typename DistanceTableT<T>::DistRow;
+  using DisplRow = typename DistanceTableT<T>::DisplRow;
+  using RealType = typename DistanceTableT<T>::RealType;
 
 protected:
-    /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
+  /** distances_[num_targets_][num_sources_], [i][3][j] = |r_A2[j] - r_A1[i]|
      *  Note: Derived classes decide if it is a memory view or the actual
      * storage
      */
-    std::vector<DistRow> distances_;
+  std::vector<DistRow> distances_;
 
-    /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
+  /** displacements_[num_targets_][3][num_sources_], [i][3][j] = r_A2[j] -
      * r_A1[i] Note: Derived classes decide if it is a memory view or the actual
      * storage
      */
-    std::vector<DisplRow> displacements_;
+  std::vector<DisplRow> displacements_;
 
-    /// temp_r
-    DistRow temp_r_;
+  /// temp_r
+  DistRow temp_r_;
 
-    /// temp_dr
-    DisplRow temp_dr_;
+  /// temp_dr
+  DisplRow temp_dr_;
 
 public:
-    /// constructor using source and target ParticleSet
-    DistanceTableABT(const ParticleSetT<T>& source,
-        const ParticleSetT<T>& target, DTModes modes) :
-        DistanceTableT<T>(source, target, modes)
-    {
-    }
-
-    /** return full table distances
+  /// constructor using source and target ParticleSet
+  DistanceTableABT(const ParticleSetT<T>& source, const ParticleSetT<T>& target, DTModes modes)
+      : DistanceTableT<T>(source, target, modes)
+  {}
+
+  /** return full table distances
      */
-    const std::vector<DistRow>&
-    getDistances() const
-    {
-        return distances_;
-    }
+  const std::vector<DistRow>& getDistances() const { return distances_; }
 
-    /** return full table displacements
+  /** return full table displacements
      */
-    const std::vector<DisplRow>&
-    getDisplacements() const
-    {
-        return displacements_;
-    }
+  const std::vector<DisplRow>& getDisplacements() const { return displacements_; }
 
-    /** return a row of distances for a given target particle
+  /** return a row of distances for a given target particle
      */
-    const DistRow&
-    getDistRow(int iel) const
-    {
-        return distances_[iel];
-    }
+  const DistRow& getDistRow(int iel) const { return distances_[iel]; }
 
-    /** return a row of displacements for a given target particle
+  /** return a row of displacements for a given target particle
      */
-    const DisplRow&
-    getDisplRow(int iel) const
-    {
-        return displacements_[iel];
-    }
+  const DisplRow& getDisplRow(int iel) const { return displacements_[iel]; }
 
-    /** return the temporary distances when a move is proposed
+  /** return the temporary distances when a move is proposed
      */
-    const DistRow&
-    getTempDists() const
-    {
-        return temp_r_;
-    }
+  const DistRow& getTempDists() const { return temp_r_; }
 
-    /** return the temporary displacements when a move is proposed
+  /** return the temporary displacements when a move is proposed
      */
-    const DisplRow&
-    getTempDispls() const
-    {
-        return temp_dr_;
-    }
-
-    /// return multi-walker full (all pairs) distance table data pointer
-    [[noreturn]] virtual const RealType*
-    getMultiWalkerDataPtr() const
-    {
-        throw std::runtime_error(
-            this->name_ + " multi walker data pointer not supported");
-    }
-
-    /// return stride of per target pctl data. full table data = stride * num of
-    /// target particles
-    [[noreturn]] virtual size_t
-    getPerTargetPctlStrideSize() const
-    {
-        throw std::runtime_error(
-            this->name_ + " getPerTargetPctlStrideSize not supported");
-    }
+  const DisplRow& getTempDispls() const { return temp_dr_; }
+
+  /// return multi-walker full (all pairs) distance table data pointer
+  [[noreturn]] virtual const RealType* getMultiWalkerDataPtr() const
+  {
+    throw std::runtime_error(this->name_ + " multi walker data pointer not supported");
+  }
+
+  /// return stride of per target pctl data. full table data = stride * num of
+  /// target particles
+  [[noreturn]] virtual size_t getPerTargetPctlStrideSize() const
+  {
+    throw std::runtime_error(this->name_ + " getPerTargetPctlStrideSize not supported");
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/DynamicCoordinatesT.cpp b/src/Particle/DynamicCoordinatesT.cpp
index b563d264c18..a16ddcdfe48 100644
--- a/src/Particle/DynamicCoordinatesT.cpp
+++ b/src/Particle/DynamicCoordinatesT.cpp
@@ -18,26 +18,22 @@ namespace qmcplusplus
 {
 /** create DynamicCoordinates based on kind
  */
-template <typename T>
-std::unique_ptr<DynamicCoordinatesT<T>>
-createDynamicCoordinatesT(const DynamicCoordinateKind kind)
+template<typename T>
+std::unique_ptr<DynamicCoordinatesT<T>> createDynamicCoordinatesT(const DynamicCoordinateKind kind)
 {
-    if (kind == DynamicCoordinateKind::DC_POS)
-        return std::make_unique<RealSpacePositionsT<T>>();
-    else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD)
-        return std::make_unique<RealSpacePositionsTOMPTarget<T>>();
-    // dummy return
-    return std::unique_ptr<RealSpacePositionsT<T>>();
+  if (kind == DynamicCoordinateKind::DC_POS)
+    return std::make_unique<RealSpacePositionsT<T>>();
+  else if (kind == DynamicCoordinateKind::DC_POS_OFFLOAD)
+    return std::make_unique<RealSpacePositionsTOMPTarget<T>>();
+  // dummy return
+  return std::unique_ptr<RealSpacePositionsT<T>>();
 }
 
-template std::unique_ptr<DynamicCoordinatesT<double>>
-createDynamicCoordinatesT<double>(const DynamicCoordinateKind kind);
-template std::unique_ptr<DynamicCoordinatesT<float>>
-createDynamicCoordinatesT<float>(const DynamicCoordinateKind kind);
-template std::unique_ptr<DynamicCoordinatesT<std::complex<double>>>
-createDynamicCoordinatesT<std::complex<double>>(
+template std::unique_ptr<DynamicCoordinatesT<double>> createDynamicCoordinatesT<double>(
     const DynamicCoordinateKind kind);
-template std::unique_ptr<DynamicCoordinatesT<std::complex<float>>>
-createDynamicCoordinatesT<std::complex<float>>(
+template std::unique_ptr<DynamicCoordinatesT<float>> createDynamicCoordinatesT<float>(const DynamicCoordinateKind kind);
+template std::unique_ptr<DynamicCoordinatesT<std::complex<double>>> createDynamicCoordinatesT<std::complex<double>>(
+    const DynamicCoordinateKind kind);
+template std::unique_ptr<DynamicCoordinatesT<std::complex<float>>> createDynamicCoordinatesT<std::complex<float>>(
     const DynamicCoordinateKind kind);
 } // namespace qmcplusplus
diff --git a/src/Particle/DynamicCoordinatesT.h b/src/Particle/DynamicCoordinatesT.h
index 36d94868d66..18d616bb9ae 100644
--- a/src/Particle/DynamicCoordinatesT.h
+++ b/src/Particle/DynamicCoordinatesT.h
@@ -9,8 +9,6 @@
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
-/** @file DynamicCoordinatesT.h
- */
 #ifndef QMCPLUSPLUS_DYNAMICCOORDINATEST_H
 #define QMCPLUSPLUS_DYNAMICCOORDINATEST_H
 
@@ -34,119 +32,91 @@ enum class DynamicCoordinateKind
 
 /** quantum variables of all the particles
  */
-template <typename T>
+template<typename T>
 class DynamicCoordinatesT
 {
 public:
-    using RealType = typename ParticleSetTraits<T>::RealType;
-    using PosType = typename ParticleSetTraits<T>::PosType;
-    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
-    using PosVectorSoa =
-        VectorSoaContainer<RealType, ParticleSetTraits<T>::DIM>;
+  using RealType     = typename ParticleSetTraits<T>::RealType;
+  using PosType      = typename ParticleSetTraits<T>::PosType;
+  using ParticlePos  = typename LatticeParticleTraits<T>::ParticlePos;
+  using PosVectorSoa = VectorSoaContainer<RealType, ParticleSetTraits<T>::DIM>;
 
-    DynamicCoordinatesT(const DynamicCoordinateKind kind_in) :
-        variable_kind_(kind_in)
-    {
-    }
+  DynamicCoordinatesT(const DynamicCoordinateKind kind_in) : variable_kind_(kind_in) {}
 
-    DynamicCoordinatesT(const DynamicCoordinatesT&) = default;
-    DynamicCoordinatesT&
-    operator=(const DynamicCoordinatesT&) = delete;
+  DynamicCoordinatesT(const DynamicCoordinatesT&)            = default;
+  DynamicCoordinatesT& operator=(const DynamicCoordinatesT&) = delete;
 
-    DynamicCoordinateKind
-    getKind() const
-    {
-        return variable_kind_;
-    }
+  DynamicCoordinateKind getKind() const { return variable_kind_; }
 
-    virtual ~DynamicCoordinatesT() = default;
+  virtual ~DynamicCoordinatesT() = default;
 
-    virtual std::unique_ptr<DynamicCoordinatesT>
-    makeClone() = 0;
+  virtual std::unique_ptr<DynamicCoordinatesT> makeClone() = 0;
 
-    /** resize internal storages based on the number of particles
+  /** resize internal storages based on the number of particles
      *  @param n the number of particles
      */
-    virtual void
-    resize(size_t n) = 0;
-    /// return the number of particles
-    virtual size_t
-    size() const = 0;
-
-    /// overwrite the positions of all the particles.
-    virtual void
-    setAllParticlePos(const ParticlePos& R) = 0;
-    /// overwrite the position of one the particle.
-    virtual void
-    setOneParticlePos(const PosType& pos, size_t iat) = 0;
-    /** copy the active positions of particles with a uniform id in all the
+  virtual void resize(size_t n) = 0;
+  /// return the number of particles
+  virtual size_t size() const = 0;
+
+  /// overwrite the positions of all the particles.
+  virtual void setAllParticlePos(const ParticlePos& R) = 0;
+  /// overwrite the position of one the particle.
+  virtual void setOneParticlePos(const PosType& pos, size_t iat) = 0;
+  /** copy the active positions of particles with a uniform id in all the
      * walkers to a single internal buffer.
      *  @param coords_list a batch of DynamicCoordinates
      *  @param iat paricle id, uniform across coords_list
      *  @param new_positions proposed positions
      */
-    virtual void
-    mw_copyActivePos(
-        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list, size_t iat,
-        const std::vector<PosType>& new_positions) const
-    {
-        assert(this == &coords_list.getLeader());
-    }
-
-    /** overwrite the positions of particles with a uniform id in all the
+  virtual void mw_copyActivePos(const RefVectorWithLeader<DynamicCoordinatesT>& coords_list,
+                                size_t iat,
+                                const std::vector<PosType>& new_positions) const
+  {
+    assert(this == &coords_list.getLeader());
+  }
+
+  /** overwrite the positions of particles with a uniform id in all the
      * walkers upon acceptance.
      *  @param coords_list a batch of DynamicCoordinates
      *  @param iat paricle id, uniform across coords_list
      *  @param new_positions proposed positions
      *  @param isAccepted accept/reject info
      */
-    virtual void
-    mw_acceptParticlePos(
-        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list, size_t iat,
-        const std::vector<PosType>& new_positions,
-        const std::vector<bool>& isAccepted) const = 0;
-
-    /// all particle position accessor
-    virtual const PosVectorSoa&
-    getAllParticlePos() const = 0;
-    /// one particle position accessor
-    virtual PosType
-    getOneParticlePos(size_t iat) const = 0;
-
-    /// secure internal data consistency after p-by-p moves
-    virtual void
-    donePbyP()
-    {
-    }
-
-    /// initialize a shared resource and hand it to a collection
-    virtual void
-    createResource(ResourceCollection& collection) const
-    {
-    }
-
-    /// acquire a shared resource from a collection
-    virtual void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
-    {
-    }
-
-    /// return a shared resource to a collection
-    virtual void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
-    {
-    }
+  virtual void mw_acceptParticlePos(const RefVectorWithLeader<DynamicCoordinatesT>& coords_list,
+                                    size_t iat,
+                                    const std::vector<PosType>& new_positions,
+                                    const std::vector<bool>& isAccepted) const = 0;
+
+  /// all particle position accessor
+  virtual const PosVectorSoa& getAllParticlePos() const = 0;
+  /// one particle position accessor
+  virtual PosType getOneParticlePos(size_t iat) const = 0;
+
+  /// secure internal data consistency after p-by-p moves
+  virtual void donePbyP() {}
+
+  /// initialize a shared resource and hand it to a collection
+  virtual void createResource(ResourceCollection& collection) const {}
+
+  /// acquire a shared resource from a collection
+  virtual void acquireResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
+  {}
+
+  /// return a shared resource to a collection
+  virtual void releaseResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<DynamicCoordinatesT>& coords_list) const
+  {}
 
 protected:
-    /// type of dynamic coordinates
-    const DynamicCoordinateKind variable_kind_;
+  /// type of dynamic coordinates
+  const DynamicCoordinateKind variable_kind_;
 };
 
 /** create DynamicCoordinates based on kind
  */
-template <typename T>
+template<typename T>
 std::unique_ptr<DynamicCoordinatesT<T>> createDynamicCoordinatesT(
     const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS);
 } // namespace qmcplusplus
diff --git a/src/Particle/InitMolecularSystemT.cpp b/src/Particle/InitMolecularSystemT.cpp
index 896a6c1d22f..8d6b848095d 100644
--- a/src/Particle/InitMolecularSystemT.cpp
+++ b/src/Particle/InitMolecularSystemT.cpp
@@ -4,20 +4,15 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Jordan E. Vincent, University of Illinois at
-// Urbana-Champaign
-//                    Luke Shulenburger, lshulen@sandia.gov, Sandia National
-//                    Laboratories Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Miguel Morales, moralessilva2@llnl.gov,
-//                    Lawrence Livermore National Laboratory Mark Dewing,
-//                    markdewing@gmail.com, University of Illinois at
-//                    Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak
-//                    Ridge National Laboratory
+// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
+//                    Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "InitMolecularSystemT.h"
@@ -29,282 +24,270 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-InitMolecularSystemT<T>::InitMolecularSystemT(
-    ParticleSetPoolT<T>& pset, const char* aname) :
-    OhmmsElementBase(aname),
-    ptclPool(pset)
-{
-}
+template<typename T>
+InitMolecularSystemT<T>::InitMolecularSystemT(ParticleSetPoolT<T>& pset, const char* aname)
+    : OhmmsElementBase(aname), ptclPool(pset)
+{}
 
-template <typename T>
-bool
-InitMolecularSystemT<T>::put(xmlNodePtr cur)
+template<typename T>
+bool InitMolecularSystemT<T>::put(xmlNodePtr cur)
 {
-    std::string target("e"), source("i"), volume("no");
-    OhmmsAttributeSet hAttrib;
-    hAttrib.add(target, "target");
-    hAttrib.add(source, "source");
-    hAttrib.add(volume, "use_volume");
-    hAttrib.put(cur);
-    ParticleSetT<T>* els = ptclPool.getParticleSet(target);
-    if (els == 0) {
-        ERRORMSG("No target particle " << target << " exists.")
-        return false;
-    }
-    ParticleSetT<T>* ions = ptclPool.getParticleSet(source);
-    if (ions == 0) {
-        ERRORMSG("No source particle " << source << " exists.")
-        return false;
-    }
-
-    app_log() << "<init source=\"" << source << "\" target=\"" << target
-              << "\">" << std::endl;
-
-    if (volume == "yes")
-        initWithVolume(ions, els);
-    else
-        initMolecule(ions, els);
-
-    makeUniformRandom(els->spins);
-    els->spins *= 2 * M_PI;
-
-    app_log() << "</init>" << std::endl;
-    app_log().flush();
-
-    return true;
+  std::string target("e"), source("i"), volume("no");
+  OhmmsAttributeSet hAttrib;
+  hAttrib.add(target, "target");
+  hAttrib.add(source, "source");
+  hAttrib.add(volume, "use_volume");
+  hAttrib.put(cur);
+  ParticleSetT<T>* els = ptclPool.getParticleSet(target);
+  if (els == 0)
+  {
+    ERRORMSG("No target particle " << target << " exists.")
+    return false;
+  }
+  ParticleSetT<T>* ions = ptclPool.getParticleSet(source);
+  if (ions == 0)
+  {
+    ERRORMSG("No source particle " << source << " exists.")
+    return false;
+  }
+
+  app_log() << "<init source=\"" << source << "\" target=\"" << target << "\">" << std::endl;
+
+  if (volume == "yes")
+    initWithVolume(ions, els);
+  else
+    initMolecule(ions, els);
+
+  makeUniformRandom(els->spins);
+  els->spins *= 2 * M_PI;
+
+  app_log() << "</init>" << std::endl;
+  app_log().flush();
+
+  return true;
 }
 
-template <typename T>
-void
-InitMolecularSystemT<T>::initAtom(ParticleSetT<T>* ions, ParticleSetT<T>* els)
+template<typename T>
+void InitMolecularSystemT<T>::initAtom(ParticleSetT<T>* ions, ParticleSetT<T>* els)
 {
-    // 3N-dimensional Gaussian
-    typename ParticleSetT<T>::ParticlePos chi(els->getTotalNum());
-    makeGaussRandom(chi);
-    RealType q = std::sqrt(static_cast<RealType>(els->getTotalNum())) * 0.5;
-    int nel(els->getTotalNum()), items(0);
-    while (nel) {
-        els->R[items] = ions->R[0] + q * chi[items];
-        --nel;
-        ++items;
-    }
+  // 3N-dimensional Gaussian
+  typename ParticleSetT<T>::ParticlePos chi(els->getTotalNum());
+  makeGaussRandom(chi);
+  RealType q = std::sqrt(static_cast<RealType>(els->getTotalNum())) * 0.5;
+  int nel(els->getTotalNum()), items(0);
+  while (nel)
+  {
+    els->R[items] = ions->R[0] + q * chi[items];
+    --nel;
+    ++items;
+  }
 }
 
-template <typename TReal>
+template<typename TReal>
 struct LoneElectronT
 {
-    using RealType = TReal;
-    int ID;
-    RealType BondLength;
-    inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl)
-    {
-    }
+  using RealType = TReal;
+  int ID;
+  RealType BondLength;
+  inline LoneElectronT(int id, RealType bl) : ID(id), BondLength(bl) {}
 };
 
-template <typename T>
-void
-InitMolecularSystemT<T>::initMolecule(
-    ParticleSetT<T>* ions, ParticleSetT<T>* els)
+template<typename T>
+void InitMolecularSystemT<T>::initMolecule(ParticleSetT<T>* ions, ParticleSetT<T>* els)
 {
-    if (ions->getTotalNum() == 1)
-        return initAtom(ions, els);
-
-    const int d_ii_ID = ions->addTable(*ions);
-    ions->update();
-    const typename ParticleSetT<T>::ParticleIndex& grID(ions->GroupID);
-    SpeciesSet& Species(ions->getSpeciesSet());
-    int Centers = ions->getTotalNum();
-    std::vector<int> Qtot(Centers), Qcore(Centers), Qval(Centers, 0);
-    // use charge as the core electrons first
-    int icharge = Species.addAttribute("charge");
-    // Assign default core charge
-    for (int iat = 0; iat < Centers; iat++)
-        Qtot[iat] = static_cast<int>(Species(icharge, grID[iat]));
-    // cutoff radius (Bohr) this a random choice
-    RealType cutoff = 4.0;
-    typename ParticleSetT<T>::ParticlePos chi(els->getTotalNum());
-    // makeGaussRandom(chi);
-    makeSphereRandom(chi);
-    // the upper limit of the electron index with spin up
-    const int numUp = els->last(0);
-    // the upper limit of the electron index with spin down. Pay attention to
-    // the no spin down electron case.
-    const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0);
-    // consumer counter of random numbers chi
-    int random_number_counter = 0;
-    int nup_tot = 0, ndown_tot = numUp;
-    std::vector<LoneElectronT<RealType>> loneQ;
-    RealType rmin = cutoff;
-    typename ParticleSetT<T>::SingleParticlePos cm;
-
-    const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances();
-    // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd,
-    // put Q[iat]-1 and save the lone electron.
-    for (size_t iat = 0; iat < Centers; iat++) {
-        cm += ions->R[iat];
-        for (size_t jat = iat + 1; jat < Centers; ++jat) {
-            rmin = std::min(rmin, dist[jat][iat]);
-        }
-        // use 40% of the minimum bond
-        RealType sep = rmin * 0.4;
-        int v2 = Qtot[iat] / 2;
-        if (Qtot[iat] > v2 * 2) {
-            loneQ.push_back(LoneElectronT<RealType>(iat, sep));
-        }
-        for (int k = 0; k < v2; k++) {
-            // initialize electron positions in pairs
-            if (nup_tot < numUp)
-                els->R[nup_tot++] =
-                    ions->R[iat] + sep * chi[random_number_counter++];
-            if (ndown_tot < numDown)
-                els->R[ndown_tot++] =
-                    ions->R[iat] + sep * chi[random_number_counter++];
-        }
+  if (ions->getTotalNum() == 1)
+    return initAtom(ions, els);
+
+  const int d_ii_ID = ions->addTable(*ions);
+  ions->update();
+  const typename ParticleSetT<T>::ParticleIndex& grID(ions->GroupID);
+  SpeciesSet& Species(ions->getSpeciesSet());
+  int Centers = ions->getTotalNum();
+  std::vector<int> Qtot(Centers), Qcore(Centers), Qval(Centers, 0);
+  // use charge as the core electrons first
+  int icharge = Species.addAttribute("charge");
+  // Assign default core charge
+  for (int iat = 0; iat < Centers; iat++)
+    Qtot[iat] = static_cast<int>(Species(icharge, grID[iat]));
+  // cutoff radius (Bohr) this a random choice
+  RealType cutoff = 4.0;
+  typename ParticleSetT<T>::ParticlePos chi(els->getTotalNum());
+  // makeGaussRandom(chi);
+  makeSphereRandom(chi);
+  // the upper limit of the electron index with spin up
+  const int numUp = els->last(0);
+  // the upper limit of the electron index with spin down. Pay attention to
+  // the no spin down electron case.
+  const int numDown = els->last(els->groups() > 1 ? 1 : 0) - els->first(0);
+  // consumer counter of random numbers chi
+  int random_number_counter = 0;
+  int nup_tot = 0, ndown_tot = numUp;
+  std::vector<LoneElectronT<RealType>> loneQ;
+  RealType rmin = cutoff;
+  typename ParticleSetT<T>::SingleParticlePos cm;
+
+  const auto& dist = ions->getDistTableAA(d_ii_ID).getDistances();
+  // Step 1. Distribute even Q[iat] of atomic center iat. If Q[iat] is odd,
+  // put Q[iat]-1 and save the lone electron.
+  for (size_t iat = 0; iat < Centers; iat++)
+  {
+    cm += ions->R[iat];
+    for (size_t jat = iat + 1; jat < Centers; ++jat)
+    {
+      rmin = std::min(rmin, dist[jat][iat]);
     }
-
-    // Step 2. Distribute the electrons left alone
-    // mmorales: changed order of spin assignment to help with spin
-    // imbalances in molecules at large distances.
-    // Not guaranteed to work, but should help in most cases
-    // as long as atoms in molecules are defined sequencially
-    typename std::vector<LoneElectronT<RealType>>::iterator it(loneQ.begin());
-    typename std::vector<LoneElectronT<RealType>>::iterator it_end(loneQ.end());
-    while (it != it_end && nup_tot != numUp && ndown_tot != numDown) {
-        if (nup_tot < numUp) {
-            els->R[nup_tot++] = ions->R[(*it).ID] +
-                (*it).BondLength * chi[random_number_counter++];
-            ++it;
-        }
-        if (ndown_tot < numDown && it != it_end) {
-            els->R[ndown_tot++] = ions->R[(*it).ID] +
-                (*it).BondLength * chi[random_number_counter++];
-            ++it;
-        }
+    // use 40% of the minimum bond
+    RealType sep = rmin * 0.4;
+    int v2       = Qtot[iat] / 2;
+    if (Qtot[iat] > v2 * 2)
+    {
+      loneQ.push_back(LoneElectronT<RealType>(iat, sep));
     }
-
-    // Step 3. Handle more than neutral electrons
-    // extra electrons around the geometric center
-    RealType cnorm = 1.0 / static_cast<RealType>(Centers);
-    RealType sep = rmin * 2;
-    cm = cnorm * cm;
+    for (int k = 0; k < v2; k++)
+    {
+      // initialize electron positions in pairs
+      if (nup_tot < numUp)
+        els->R[nup_tot++] = ions->R[iat] + sep * chi[random_number_counter++];
+      if (ndown_tot < numDown)
+        els->R[ndown_tot++] = ions->R[iat] + sep * chi[random_number_counter++];
+    }
+  }
+
+  // Step 2. Distribute the electrons left alone
+  // mmorales: changed order of spin assignment to help with spin
+  // imbalances in molecules at large distances.
+  // Not guaranteed to work, but should help in most cases
+  // as long as atoms in molecules are defined sequencially
+  typename std::vector<LoneElectronT<RealType>>::iterator it(loneQ.begin());
+  typename std::vector<LoneElectronT<RealType>>::iterator it_end(loneQ.end());
+  while (it != it_end && nup_tot != numUp && ndown_tot != numDown)
+  {
     if (nup_tot < numUp)
-        while (nup_tot < numUp)
-            els->R[nup_tot++] = cm + sep * chi[random_number_counter++];
-    if (ndown_tot < numDown)
-        while (ndown_tot < numDown)
-            els->R[ndown_tot++] = cm + sep * chi[random_number_counter++];
-
-    // safety check. all the random numbers should have been consumed once and
-    // only once.
-    if (random_number_counter != chi.size())
-        throw std::runtime_error("initMolecule unexpected random number "
-                                 "consumption. Please report a bug!");
-
-    // put all the electrons in a unit box
-    if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN) {
-        els->R.setUnit(PosUnit::Cartesian);
-        els->applyBC(els->R);
-        els->update(false);
+    {
+      els->R[nup_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++];
+      ++it;
     }
+    if (ndown_tot < numDown && it != it_end)
+    {
+      els->R[ndown_tot++] = ions->R[(*it).ID] + (*it).BondLength * chi[random_number_counter++];
+      ++it;
+    }
+  }
+
+  // Step 3. Handle more than neutral electrons
+  // extra electrons around the geometric center
+  RealType cnorm = 1.0 / static_cast<RealType>(Centers);
+  RealType sep   = rmin * 2;
+  cm             = cnorm * cm;
+  if (nup_tot < numUp)
+    while (nup_tot < numUp)
+      els->R[nup_tot++] = cm + sep * chi[random_number_counter++];
+  if (ndown_tot < numDown)
+    while (ndown_tot < numDown)
+      els->R[ndown_tot++] = cm + sep * chi[random_number_counter++];
+
+  // safety check. all the random numbers should have been consumed once and
+  // only once.
+  if (random_number_counter != chi.size())
+    throw std::runtime_error("initMolecule unexpected random number "
+                             "consumption. Please report a bug!");
+
+  // put all the electrons in a unit box
+  if (els->getLattice().SuperCellEnum != SUPERCELL_OPEN)
+  {
+    els->R.setUnit(PosUnit::Cartesian);
+    els->applyBC(els->R);
+    els->update(false);
+  }
 }
 
 /// helper function to determine the lower bound of a domain (need to move up)
-template <typename T>
-inline TinyVector<T, 3>
-lower_bound(const TinyVector<T, 3>& a, const TinyVector<T, 3>& b)
+template<typename T>
+inline TinyVector<T, 3> lower_bound(const TinyVector<T, 3>& a, const TinyVector<T, 3>& b)
 {
-    return TinyVector<T, 3>(
-        std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]));
+  return TinyVector<T, 3>(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]));
 }
 
 /// helper function to determine the upper bound of a domain (need to move up)
-template <typename T>
-inline TinyVector<T, 3>
-upper_bound(const TinyVector<T, 3>& a, const TinyVector<T, 3>& b)
+template<typename T>
+inline TinyVector<T, 3> upper_bound(const TinyVector<T, 3>& a, const TinyVector<T, 3>& b)
 {
-    return TinyVector<T, 3>(
-        std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]));
+  return TinyVector<T, 3>(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]));
 }
 
-template <typename T>
-void
-InitMolecularSystemT<T>::initWithVolume(
-    ParticleSetT<T>* ions, ParticleSetT<T>* els)
+template<typename T>
+void InitMolecularSystemT<T>::initWithVolume(ParticleSetT<T>* ions, ParticleSetT<T>* els)
 {
-    TinyVector<RealType, OHMMS_DIM> start(1.0);
-    TinyVector<RealType, OHMMS_DIM> end(0.0);
-
-    typename ParticleSetT<T>::ParticlePos Ru(ions->getTotalNum());
-    Ru.setUnit(PosUnit::Lattice);
-    ions->applyBC(ions->R, Ru);
-
-    for (int iat = 0; iat < Ru.size(); iat++) {
-        start = lower_bound(Ru[iat], start);
-        end = upper_bound(Ru[iat], end);
-    }
-
-    TinyVector<RealType, OHMMS_DIM> shift;
-    Tensor<RealType, OHMMS_DIM> newbox(ions->getLattice().R);
-
-    RealType buffer = 2.0; // buffer 2 bohr
-    for (int idim = 0; idim < OHMMS_DIM; ++idim) {
-        // if(ions->getLattice().BoxBConds[idim])
-        //{
-        //   start[idim]=0.0;
-        //   end[idim]=1.0;
-        //   shift[idim]=0.0;
-        // }
-        // else
-        {
-            RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim];
-            start[idim] = std::max((RealType)0.0, (start[idim] - buffer_r));
-            end[idim] = std::min((RealType)1.0, (end[idim] + buffer_r));
-            shift[idim] = start[idim] * ions->getLattice().Length[idim];
-            if (std::abs(end[idim] = start[idim]) <
-                buffer) { // handle singular case
-                start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0);
-                end[idim] = std::min(1.0, end[idim] + buffer_r / 2.0);
-            }
-
-            newbox(idim, idim) =
-                (end[idim] - start[idim]) * ions->getLattice().Length[idim];
-        }
+  TinyVector<RealType, OHMMS_DIM> start(1.0);
+  TinyVector<RealType, OHMMS_DIM> end(0.0);
+
+  typename ParticleSetT<T>::ParticlePos Ru(ions->getTotalNum());
+  Ru.setUnit(PosUnit::Lattice);
+  ions->applyBC(ions->R, Ru);
+
+  for (int iat = 0; iat < Ru.size(); iat++)
+  {
+    start = lower_bound(Ru[iat], start);
+    end   = upper_bound(Ru[iat], end);
+  }
+
+  TinyVector<RealType, OHMMS_DIM> shift;
+  Tensor<RealType, OHMMS_DIM> newbox(ions->getLattice().R);
+
+  RealType buffer = 2.0; // buffer 2 bohr
+  for (int idim = 0; idim < OHMMS_DIM; ++idim)
+  {
+    // if(ions->getLattice().BoxBConds[idim])
+    //{
+    //   start[idim]=0.0;
+    //   end[idim]=1.0;
+    //   shift[idim]=0.0;
+    // }
+    // else
+    {
+      RealType buffer_r = buffer * ions->getLattice().OneOverLength[idim];
+      start[idim]       = std::max((RealType)0.0, (start[idim] - buffer_r));
+      end[idim]         = std::min((RealType)1.0, (end[idim] + buffer_r));
+      shift[idim]       = start[idim] * ions->getLattice().Length[idim];
+      if (std::abs(end[idim] = start[idim]) < buffer)
+      { // handle singular case
+        start[idim] = std::max(0.0, start[idim] - buffer_r / 2.0);
+        end[idim]   = std::min(1.0, end[idim] + buffer_r / 2.0);
+      }
+
+      newbox(idim, idim) = (end[idim] - start[idim]) * ions->getLattice().Length[idim];
     }
+  }
 
-    typename ParticleSetT<T>::ParticleLayout slattice(ions->getLattice());
-    slattice.set(newbox);
+  typename ParticleSetT<T>::ParticleLayout slattice(ions->getLattice());
+  slattice.set(newbox);
 
-    app_log() << "  InitMolecularSystem::initWithVolume " << std::endl;
-    app_log() << "  Effective Lattice shifted by  " << shift << std::endl;
-    app_log() << newbox << std::endl;
+  app_log() << "  InitMolecularSystem::initWithVolume " << std::endl;
+  app_log() << "  Effective Lattice shifted by  " << shift << std::endl;
+  app_log() << newbox << std::endl;
 
-    Ru.resize(els->getTotalNum());
-    makeUniformRandom(Ru);
-    for (int iat = 0; iat < Ru.size(); ++iat)
-        els->R[iat] = slattice.toCart(Ru[iat]) + shift;
-    els->R.setUnit(PosUnit::Cartesian);
+  Ru.resize(els->getTotalNum());
+  makeUniformRandom(Ru);
+  for (int iat = 0; iat < Ru.size(); ++iat)
+    els->R[iat] = slattice.toCart(Ru[iat]) + shift;
+  els->R.setUnit(PosUnit::Cartesian);
 }
 
-template <typename T>
-bool
-InitMolecularSystemT<T>::put(std::istream& is)
+template<typename T>
+bool InitMolecularSystemT<T>::put(std::istream& is)
 {
-    return true;
+  return true;
 }
 
-template <typename T>
-bool
-InitMolecularSystemT<T>::get(std::ostream& os) const
+template<typename T>
+bool InitMolecularSystemT<T>::get(std::ostream& os) const
 {
-    return true;
+  return true;
 }
 
-template <typename T>
-void
-InitMolecularSystemT<T>::reset()
-{
-}
+template<typename T>
+void InitMolecularSystemT<T>::reset()
+{}
 
 #ifndef QMC_COMPLEX
 #ifndef MIXED_PRECISION
diff --git a/src/Particle/InitMolecularSystemT.h b/src/Particle/InitMolecularSystemT.h
index 3bfe148db5e..a5d7125adfe 100644
--- a/src/Particle/InitMolecularSystemT.h
+++ b/src/Particle/InitMolecularSystemT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_INITMOLECULARSYSTEMT_H
@@ -23,57 +20,50 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class ParticleSetT;
-template <typename T>
+template<typename T>
 class ParticleSetPoolT;
 
 /* Engine to initialize the initial electronic structure for a molecular system
  */
-template <typename T>
+template<typename T>
 class InitMolecularSystemT : public OhmmsElementBase
 {
 public:
-    using RealType = typename ParticleSetTraits<T>::RealType;
+  using RealType = typename ParticleSetTraits<T>::RealType;
 
-    InitMolecularSystemT(ParticleSetPoolT<T>& pset, const char* aname = "mosystem");
+  InitMolecularSystemT(ParticleSetPoolT<T>& pset, const char* aname = "mosystem");
 
-    bool
-    get(std::ostream& os) const override;
-    bool
-    put(std::istream& is) override;
-    bool
-    put(xmlNodePtr cur) override;
-    void
-    reset() override;
+  bool get(std::ostream& os) const override;
+  bool put(std::istream& is) override;
+  bool put(xmlNodePtr cur) override;
+  void reset() override;
 
-    /** initialize els for an atom
+  /** initialize els for an atom
      */
-    void
-    initAtom(ParticleSetT<T>* ions, ParticleSetT<T>* els);
-    /** initialize els position for a molecule
+  void initAtom(ParticleSetT<T>* ions, ParticleSetT<T>* els);
+  /** initialize els position for a molecule
      *
      * Use the valence of each ionic species on a sphere
      */
-    void
-    initMolecule(ParticleSetT<T>* ions, ParticleSetT<T>* els);
-    /** initialize els for the systems with a mixed boundary
+  void initMolecule(ParticleSetT<T>* ions, ParticleSetT<T>* els);
+  /** initialize els for the systems with a mixed boundary
      *
      * Use the bound of the ionic systems and uniform random positions within a
      * reduced box
      */
-    void
-    initWithVolume(ParticleSetT<T>* ions, ParticleSetT<T>* els);
+  void initWithVolume(ParticleSetT<T>* ions, ParticleSetT<T>* els);
 
 private:
-    /** pointer to ParticleSetPool
+  /** pointer to ParticleSetPool
      *
      * QMCHamiltonian needs to know which ParticleSet object
      * is used as an input object for the evaluations.
      * Any number of ParticleSet can be used to describe
      * a QMCHamiltonian.
      */
-    ParticleSetPoolT<T>& ptclPool;
+  ParticleSetPoolT<T>& ptclPool;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/LongRange/KContainerT.cpp b/src/Particle/LongRange/KContainerT.cpp
index eee850387db..48679cfa892 100644
--- a/src/Particle/LongRange/KContainerT.cpp
+++ b/src/Particle/LongRange/KContainerT.cpp
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "KContainerT.h"
@@ -25,33 +22,34 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-void
-KContainerT<T>::updateKLists(const ParticleLayout& lattice, RealType kc,
-    unsigned ndim, const PosType& twist, bool useSphere)
+template<typename T>
+void KContainerT<T>::updateKLists(const ParticleLayout& lattice,
+                                  RealType kc,
+                                  unsigned ndim,
+                                  const PosType& twist,
+                                  bool useSphere)
 {
-    kcutoff = kc;
-    if (kcutoff <= 0.0) {
-        APP_ABORT("  Illegal cutoff for KContainer");
-    }
-    findApproxMMax(lattice, ndim);
-    BuildKLists(lattice, twist, useSphere);
+  kcutoff = kc;
+  if (kcutoff <= 0.0)
+  {
+    APP_ABORT("  Illegal cutoff for KContainer");
+  }
+  findApproxMMax(lattice, ndim);
+  BuildKLists(lattice, twist, useSphere);
 
-    app_log() << "  KContainer initialised with cutoff " << kcutoff
-              << std::endl;
-    app_log() << "   # of K-shell  = " << kshell.size() << std::endl;
-    app_log() << "   # of K points = " << kpts.size() << std::endl;
-    app_log() << std::endl;
+  app_log() << "  KContainer initialised with cutoff " << kcutoff << std::endl;
+  app_log() << "   # of K-shell  = " << kshell.size() << std::endl;
+  app_log() << "   # of K points = " << kpts.size() << std::endl;
+  app_log() << std::endl;
 }
 
-template <typename T>
-void
-KContainerT<T>::findApproxMMax(const ParticleLayout& lattice, unsigned ndim)
+template<typename T>
+void KContainerT<T>::findApproxMMax(const ParticleLayout& lattice, unsigned ndim)
 {
-    // Estimate the size of the parallelpiped that encompasses a sphere of
-    // kcutoff. mmax is stored as integer translations of the reciprocal cell
-    // vectors. Does not require an orthorhombic cell.
-    /* Old method.
+  // Estimate the size of the parallelpiped that encompasses a sphere of
+  // kcutoff. mmax is stored as integer translations of the reciprocal cell
+  // vectors. Does not require an orthorhombic cell.
+  /* Old method.
     //2pi is not included in lattice.b
     Matrix<RealType> mmat;
     mmat.resize(3,3);
@@ -84,185 +82,199 @@ KContainerT<T>::findApproxMMax(const ParticleLayout& lattice, unsigned ndim)
       mmax[idim] = static_cast<int>(sqrt(4.0*kcut2/tempr)) + 1;
     }
     */
-    // see rmm, Electronic Structure, p. 85 for details
-    for (int i = 0; i < DIM; i++)
-        mmax[i] = static_cast<int>(
-                      std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) *
-                          kcutoff / (2 * M_PI))) +
-            1;
+  // see rmm, Electronic Structure, p. 85 for details
+  for (int i = 0; i < DIM; i++)
+    mmax[i] = static_cast<int>(std::floor(std::sqrt(dot(lattice.a(i), lattice.a(i))) * kcutoff / (2 * M_PI))) + 1;
 
-    mmax[DIM] = mmax[0];
-    for (int i = 1; i < DIM; ++i)
-        mmax[DIM] = std::max(mmax[i], mmax[DIM]);
+  mmax[DIM] = mmax[0];
+  for (int i = 1; i < DIM; ++i)
+    mmax[DIM] = std::max(mmax[i], mmax[DIM]);
 
-    // overwrite the non-periodic directon to be zero
-    if (LRCoulombSingleton::isQuasi2D()) {
-        app_log() << "  No kspace sum perpendicular to slab " << std::endl;
-        mmax[2] = 0;
-    }
-    if (ndim < 3) {
-        app_log() << "  No kspace sum along z " << std::endl;
-        mmax[2] = 0;
-    }
-    if (ndim < 2)
-        mmax[1] = 0;
+  // overwrite the non-periodic directon to be zero
+  if (LRCoulombSingleton::isQuasi2D())
+  {
+    app_log() << "  No kspace sum perpendicular to slab " << std::endl;
+    mmax[2] = 0;
+  }
+  if (ndim < 3)
+  {
+    app_log() << "  No kspace sum along z " << std::endl;
+    mmax[2] = 0;
+  }
+  if (ndim < 2)
+    mmax[1] = 0;
 }
 
-template <typename T>
-void
-KContainerT<T>::BuildKLists(
-    const ParticleLayout& lattice, const PosType& twist, bool useSphere)
+template<typename T>
+void KContainerT<T>::BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere)
 {
-    TinyVector<int, DIM + 1> TempActualMax;
-    TinyVector<int, DIM> kvec;
-    TinyVector<RealType, DIM> kvec_cart;
-    RealType modk2;
-    std::vector<TinyVector<int, DIM>> kpts_tmp;
-    std::vector<PosType> kpts_cart_tmp;
-    std::vector<RealType> ksq_tmp;
-    // reserve the space for memory efficiency
-    if (useSphere) {
-        const RealType kcut2 = kcutoff * kcutoff;
-        // Loop over guesses for valid k-points.
-        for (int i = -mmax[0]; i <= mmax[0]; i++) {
-            kvec[0] = i;
-            for (int j = -mmax[1]; j <= mmax[1]; j++) {
-                kvec[1] = j;
-                for (int k = -mmax[2]; k <= mmax[2]; k++) {
-                    kvec[2] = k;
-                    // Do not include k=0 in evaluations.
-                    if (i == 0 && j == 0 && k == 0)
-                        continue;
-                    // Convert kvec to Cartesian
-                    kvec_cart = lattice.k_cart(kvec + twist);
-                    // Find modk
-                    modk2 = dot(kvec_cart, kvec_cart);
-                    if (modk2 > kcut2)
-                        continue; // Inside cutoff?
-                    // This k-point should be added to the list
-                    kpts_tmp.push_back(kvec);
-                    kpts_cart_tmp.push_back(kvec_cart);
-                    ksq_tmp.push_back(modk2);
-                    // Update record of the allowed maximum translation.
-                    for (int idim = 0; idim < 3; idim++)
-                        if (std::abs(kvec[idim]) > TempActualMax[idim])
-                            TempActualMax[idim] = std::abs(kvec[idim]);
-                }
-            }
+  TinyVector<int, DIM + 1> TempActualMax;
+  TinyVector<int, DIM> kvec;
+  TinyVector<RealType, DIM> kvec_cart;
+  RealType modk2;
+  std::vector<TinyVector<int, DIM>> kpts_tmp;
+  std::vector<PosType> kpts_cart_tmp;
+  std::vector<RealType> ksq_tmp;
+  // reserve the space for memory efficiency
+  if (useSphere)
+  {
+    const RealType kcut2 = kcutoff * kcutoff;
+    // Loop over guesses for valid k-points.
+    for (int i = -mmax[0]; i <= mmax[0]; i++)
+    {
+      kvec[0] = i;
+      for (int j = -mmax[1]; j <= mmax[1]; j++)
+      {
+        kvec[1] = j;
+        for (int k = -mmax[2]; k <= mmax[2]; k++)
+        {
+          kvec[2] = k;
+          // Do not include k=0 in evaluations.
+          if (i == 0 && j == 0 && k == 0)
+            continue;
+          // Convert kvec to Cartesian
+          kvec_cart = lattice.k_cart(kvec + twist);
+          // Find modk
+          modk2 = dot(kvec_cart, kvec_cart);
+          if (modk2 > kcut2)
+            continue; // Inside cutoff?
+          // This k-point should be added to the list
+          kpts_tmp.push_back(kvec);
+          kpts_cart_tmp.push_back(kvec_cart);
+          ksq_tmp.push_back(modk2);
+          // Update record of the allowed maximum translation.
+          for (int idim = 0; idim < 3; idim++)
+            if (std::abs(kvec[idim]) > TempActualMax[idim])
+              TempActualMax[idim] = std::abs(kvec[idim]);
         }
+      }
     }
-    else {
-        // Loop over all k-points in the parallelpiped and add them to
-        // kcontainer note layout is for interfacing with fft, so for each
-        // dimension, the positive indexes come first then the negative indexes
-        // backwards e.g.    0, 1, .... mmax, -mmax+1, -mmax+2, ... -1
-        const int idimsize = mmax[0] * 2;
-        const int jdimsize = mmax[1] * 2;
-        const int kdimsize = mmax[2] * 2;
-        for (int i = 0; i < idimsize; i++) {
-            kvec[0] = i;
-            if (kvec[0] > mmax[0])
-                kvec[0] -= idimsize;
-            for (int j = 0; j < jdimsize; j++) {
-                kvec[1] = j;
-                if (kvec[1] > mmax[1])
-                    kvec[1] -= jdimsize;
-                for (int k = 0; k < kdimsize; k++) {
-                    kvec[2] = k;
-                    if (kvec[2] > mmax[2])
-                        kvec[2] -= kdimsize;
-                    // get cartesian location and modk2
-                    kvec_cart = lattice.k_cart(kvec);
-                    modk2 = dot(kvec_cart, kvec_cart);
-                    // add k-point to lists
-                    kpts_tmp.push_back(kvec);
-                    kpts_cart_tmp.push_back(kvec_cart);
-                    ksq_tmp.push_back(modk2);
-                }
-            }
+  }
+  else
+  {
+    // Loop over all k-points in the parallelpiped and add them to
+    // kcontainer note layout is for interfacing with fft, so for each
+    // dimension, the positive indexes come first then the negative indexes
+    // backwards e.g.    0, 1, .... mmax, -mmax+1, -mmax+2, ... -1
+    const int idimsize = mmax[0] * 2;
+    const int jdimsize = mmax[1] * 2;
+    const int kdimsize = mmax[2] * 2;
+    for (int i = 0; i < idimsize; i++)
+    {
+      kvec[0] = i;
+      if (kvec[0] > mmax[0])
+        kvec[0] -= idimsize;
+      for (int j = 0; j < jdimsize; j++)
+      {
+        kvec[1] = j;
+        if (kvec[1] > mmax[1])
+          kvec[1] -= jdimsize;
+        for (int k = 0; k < kdimsize; k++)
+        {
+          kvec[2] = k;
+          if (kvec[2] > mmax[2])
+            kvec[2] -= kdimsize;
+          // get cartesian location and modk2
+          kvec_cart = lattice.k_cart(kvec);
+          modk2     = dot(kvec_cart, kvec_cart);
+          // add k-point to lists
+          kpts_tmp.push_back(kvec);
+          kpts_cart_tmp.push_back(kvec_cart);
+          ksq_tmp.push_back(modk2);
         }
-        // set allowed maximum translation
-        TempActualMax[0] = mmax[0];
-        TempActualMax[1] = mmax[1];
-        TempActualMax[2] = mmax[2];
+      }
     }
+    // set allowed maximum translation
+    TempActualMax[0] = mmax[0];
+    TempActualMax[1] = mmax[1];
+    TempActualMax[2] = mmax[2];
+  }
 
-    // Update a record of the number of k vectors
-    numk = kpts_tmp.size();
-    std::map<int64_t, std::vector<int>*> kpts_sorted;
-    // create the map: use simple integer with resolution of 0.00000001 in ksq
-    for (int ik = 0; ik < numk; ik++) {
-        // This is a workaround for ewald bug (Issue #2105).  Basically, 1e-7 is
-        // the resolution of |k|^2 for doubles, so we jack up the tolerance to
-        // match that.
-        const int64_t k_ind = static_cast<int64_t>(ksq_tmp[ik] * 10000000);
-        auto it(kpts_sorted.find(k_ind));
-        if (it == kpts_sorted.end()) {
-            std::vector<int>* newSet = new std::vector<int>;
-            kpts_sorted[k_ind] = newSet;
-            newSet->push_back(ik);
-        }
-        else {
-            (*it).second->push_back(ik);
-        }
-    }
-    std::map<int64_t, std::vector<int>*>::iterator it(kpts_sorted.begin());
-    kpts.resize(numk);
-    kpts_cart.resize(numk);
-    kpts_cart_soa_.resize(numk);
-    ksq.resize(numk);
-    kshell.resize(kpts_sorted.size() + 1, 0);
-    int ok = 0, ish = 0;
-    while (it != kpts_sorted.end()) {
-        std::vector<int>::iterator vit((*it).second->begin());
-        while (vit != (*it).second->end()) {
-            int ik = (*vit);
-            kpts[ok] = kpts_tmp[ik];
-            kpts_cart[ok] = kpts_cart_tmp[ik];
-            kpts_cart_soa_(ok) = kpts_cart_tmp[ik];
-            ksq[ok] = ksq_tmp[ik];
-            ++vit;
-            ++ok;
-        }
-        kshell[ish + 1] = kshell[ish] + (*it).second->size();
-        ++it;
-        ++ish;
+  // Update a record of the number of k vectors
+  numk = kpts_tmp.size();
+  std::map<int64_t, std::vector<int>*> kpts_sorted;
+  // create the map: use simple integer with resolution of 0.00000001 in ksq
+  for (int ik = 0; ik < numk; ik++)
+  {
+    // This is a workaround for ewald bug (Issue #2105).  Basically, 1e-7 is
+    // the resolution of |k|^2 for doubles, so we jack up the tolerance to
+    // match that.
+    const int64_t k_ind = static_cast<int64_t>(ksq_tmp[ik] * 10000000);
+    auto it(kpts_sorted.find(k_ind));
+    if (it == kpts_sorted.end())
+    {
+      std::vector<int>* newSet = new std::vector<int>;
+      kpts_sorted[k_ind]       = newSet;
+      newSet->push_back(ik);
     }
-    kpts_cart_soa_.updateTo();
-    it = kpts_sorted.begin();
-    std::map<int64_t, std::vector<int>*>::iterator e_it(kpts_sorted.end());
-    while (it != e_it) {
-        delete it->second;
-        it++;
+    else
+    {
+      (*it).second->push_back(ik);
     }
-    // Finished searching k-points. Copy list of maximum translations.
-    mmax[DIM] = 0;
-    for (int idim = 0; idim < DIM; idim++) {
-        mmax[idim] = TempActualMax[idim];
-        mmax[DIM] = std::max(mmax[idim], mmax[DIM]);
-        // if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim];
+  }
+  std::map<int64_t, std::vector<int>*>::iterator it(kpts_sorted.begin());
+  kpts.resize(numk);
+  kpts_cart.resize(numk);
+  kpts_cart_soa_.resize(numk);
+  ksq.resize(numk);
+  kshell.resize(kpts_sorted.size() + 1, 0);
+  int ok = 0, ish = 0;
+  while (it != kpts_sorted.end())
+  {
+    std::vector<int>::iterator vit((*it).second->begin());
+    while (vit != (*it).second->end())
+    {
+      int ik             = (*vit);
+      kpts[ok]           = kpts_tmp[ik];
+      kpts_cart[ok]      = kpts_cart_tmp[ik];
+      kpts_cart_soa_(ok) = kpts_cart_tmp[ik];
+      ksq[ok]            = ksq_tmp[ik];
+      ++vit;
+      ++ok;
     }
-    // Now fill the array that returns the index of -k when given the index of
-    // k.
-    minusk.resize(numk);
+    kshell[ish + 1] = kshell[ish] + (*it).second->size();
+    ++it;
+    ++ish;
+  }
+  kpts_cart_soa_.updateTo();
+  it = kpts_sorted.begin();
+  std::map<int64_t, std::vector<int>*>::iterator e_it(kpts_sorted.end());
+  while (it != e_it)
+  {
+    delete it->second;
+    it++;
+  }
+  // Finished searching k-points. Copy list of maximum translations.
+  mmax[DIM] = 0;
+  for (int idim = 0; idim < DIM; idim++)
+  {
+    mmax[idim] = TempActualMax[idim];
+    mmax[DIM]  = std::max(mmax[idim], mmax[DIM]);
+    // if(mmax[idim] > mmax[DIM]) mmax[DIM] = mmax[idim];
+  }
+  // Now fill the array that returns the index of -k when given the index of
+  // k.
+  minusk.resize(numk);
 
-    // Assigns a unique hash value to each kpoint.
-    auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t {
-        int64_t hash = 0; // this will cause integral promotion below
-        for (int i = 0; i < inpv.Size; ++i)
-            hash += inpv[i] + hash * hashparam;
-        return hash;
-    };
+  // Assigns a unique hash value to each kpoint.
+  auto getHashOfVec = [](const auto& inpv, int hashparam) -> int64_t {
+    int64_t hash = 0; // this will cause integral promotion below
+    for (int i = 0; i < inpv.Size; ++i)
+      hash += inpv[i] + hash * hashparam;
+    return hash;
+  };
 
-    // Create a map from the hash value for each k vector to the index
-    std::map<int64_t, int> hashToIndex;
-    for (int ki = 0; ki < numk; ki++) {
-        hashToIndex[getHashOfVec(kpts[ki], numk)] = ki;
-    }
-    // Use the map to find the index of -k from the index of k
-    for (int ki = 0; ki < numk; ki++) {
-        minusk[ki] = hashToIndex[getHashOfVec(-1 * kpts[ki], numk)];
-    }
+  // Create a map from the hash value for each k vector to the index
+  std::map<int64_t, int> hashToIndex;
+  for (int ki = 0; ki < numk; ki++)
+  {
+    hashToIndex[getHashOfVec(kpts[ki], numk)] = ki;
+  }
+  // Use the map to find the index of -k from the index of k
+  for (int ki = 0; ki < numk; ki++)
+  {
+    minusk[ki] = hashToIndex[getHashOfVec(-1 * kpts[ki], numk)];
+  }
 }
 
 template class KContainerT<double>;
diff --git a/src/Particle/LongRange/KContainerT.h b/src/Particle/LongRange/KContainerT.h
index 2f975569cc8..20b98af3474 100644
--- a/src/Particle/LongRange/KContainerT.h
+++ b/src/Particle/LongRange/KContainerT.h
@@ -1,17 +1,14 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source
-// License. See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_KCONTAINERT_H
@@ -29,85 +26,79 @@ namespace qmcplusplus
  * reciprocal-space cell. K-points are generated within a spherical cutoff set
  * by the supercell
  */
-template <typename T>
+template<typename T>
 class KContainerT
 {
 public:
-    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
-    using RealType = typename ParticleSetTraits<T>::RealType;
-    using PosType = typename ParticleSetTraits<T>::PosType;
+  static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+  using RealType            = typename ParticleSetTraits<T>::RealType;
+  using PosType             = typename ParticleSetTraits<T>::PosType;
 
 private:
-    /// The cutoff up to which k-vectors are generated.
-    RealType kcutoff;
+  /// The cutoff up to which k-vectors are generated.
+  RealType kcutoff;
 
 public:
-    // Typedef for the lattice-type
-    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
+  // Typedef for the lattice-type
+  using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
 
-    /// number of k-points
-    int numk;
+  /// number of k-points
+  int numk;
 
-    /** maximum integer translations of reciprocal cell within kc.
+  /** maximum integer translations of reciprocal cell within kc.
      *
      * Last index is max. of first dimension+1
      */
-    TinyVector<int, DIM + 1> mmax;
+  TinyVector<int, DIM + 1> mmax;
 
-    /** K-vector in reduced coordinates
+  /** K-vector in reduced coordinates
      */
-    std::vector<TinyVector<int, DIM>> kpts;
-    /** K-vector in Cartesian coordinates
+  std::vector<TinyVector<int, DIM>> kpts;
+  /** K-vector in Cartesian coordinates
      */
-    std::vector<PosType> kpts_cart;
-    /** squre of kpts in Cartesian coordniates
+  std::vector<PosType> kpts_cart;
+  /** squre of kpts in Cartesian coordniates
      */
-    std::vector<RealType> ksq;
-    /** Given a k index, return index to -k
+  std::vector<RealType> ksq;
+  /** Given a k index, return index to -k
      */
-    std::vector<int> minusk;
-    /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */
-    std::vector<int> kshell;
+  std::vector<int> minusk;
+  /** kpts which belong to the ith-shell [kshell[i], kshell[i+1]) */
+  std::vector<int> kshell;
 
-    /** k points sorted by the |k|  excluding |k|=0
+  /** k points sorted by the |k|  excluding |k|=0
      *
      * The first for |k|
      * The second for a map to the full index. The size of the second is the
      * degeneracy.
      */
-    // std::map<int,std::vector<int>*>  kpts_sorted;
+  // std::map<int,std::vector<int>*>  kpts_sorted;
 
-    /** update k-vectors
+  /** update k-vectors
      * @param sc supercell
      * @param kc cutoff radius in the K
      * @param twist shifts the center of the grid of k-vectors
      * @param useSphere if true, use the |K|
      */
-    void
-    updateKLists(const ParticleLayout& lattice, RealType kc, unsigned ndim,
-        const PosType& twist = PosType(), bool useSphere = true);
+  void updateKLists(const ParticleLayout& lattice,
+                    RealType kc,
+                    unsigned ndim,
+                    const PosType& twist = PosType(),
+                    bool useSphere       = true);
 
-    const auto&
-    get_kpts_cart_soa() const
-    {
-        return kpts_cart_soa_;
-    }
+  const auto& get_kpts_cart_soa() const { return kpts_cart_soa_; }
 
 private:
-    /** compute approximate parallelpiped that surrounds kc
+  /** compute approximate parallelpiped that surrounds kc
      * @param lattice supercell
      */
-    void
-    findApproxMMax(const ParticleLayout& lattice, unsigned ndim);
-    /** construct the container for k-vectors */
-    void
-    BuildKLists(
-        const ParticleLayout& lattice, const PosType& twist, bool useSphere);
+  void findApproxMMax(const ParticleLayout& lattice, unsigned ndim);
+  /** construct the container for k-vectors */
+  void BuildKLists(const ParticleLayout& lattice, const PosType& twist, bool useSphere);
 
-    /** K-vector in Cartesian coordinates in SoA layout
+  /** K-vector in Cartesian coordinates in SoA layout
      */
-    VectorSoaContainer<RealType, DIM, OffloadAllocator<RealType>>
-        kpts_cart_soa_;
+  VectorSoaContainer<RealType, DIM, OffloadAllocator<RealType>> kpts_cart_soa_;
 };
 
 } // namespace qmcplusplus
diff --git a/src/Particle/LongRange/StructFactT.cpp b/src/Particle/LongRange/StructFactT.cpp
index 363d364c686..6360fd9f9d4 100644
--- a/src/Particle/LongRange/StructFactT.cpp
+++ b/src/Particle/LongRange/StructFactT.cpp
@@ -5,14 +5,12 @@
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
 // File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak
-//                    Ridge National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "StructFactT.h"
@@ -30,211 +28,197 @@
 namespace qmcplusplus
 {
 // Constructor - pass arguments to k_lists_' constructor
-template <typename T>
-StructFactT<T>::StructFactT(
-    const ParticleLayout& lattice, const KContainerT<T>& k_lists) :
-    SuperCellEnum(SUPERCELL_BULK),
-    k_lists_(k_lists),
-    StorePerParticle(false),
-    update_all_timer_(
-        createGlobalTimer("StructFact::update_all_part", timer_level_fine))
+template<typename T>
+StructFactT<T>::StructFactT(const ParticleLayout& lattice, const KContainerT<T>& k_lists)
+    : SuperCellEnum(SUPERCELL_BULK),
+      k_lists_(k_lists),
+      StorePerParticle(false),
+      update_all_timer_(createGlobalTimer("StructFact::update_all_part", timer_level_fine))
 {
-    if (LRCoulombSingleton::isQuasi2D()) {
-        app_log() << "  Setting StructFact::SuperCellEnum=SUPERCELL_SLAB "
-                  << std::endl;
-        SuperCellEnum = SUPERCELL_SLAB;
-    }
+  if (LRCoulombSingleton::isQuasi2D())
+  {
+    app_log() << "  Setting StructFact::SuperCellEnum=SUPERCELL_SLAB " << std::endl;
+    SuperCellEnum = SUPERCELL_SLAB;
+  }
 }
 
 // Destructor
-template <typename T>
+template<typename T>
 StructFactT<T>::~StructFactT() = default;
 
-template <typename T>
-void
-StructFactT<T>::resize(int nkpts, int num_species, int num_ptcls)
+template<typename T>
+void StructFactT<T>::resize(int nkpts, int num_species, int num_ptcls)
 {
-    rhok_r.resize(num_species, nkpts);
-    rhok_i.resize(num_species, nkpts);
-    if (StorePerParticle) {
-        eikr_r.resize(num_ptcls, nkpts);
-        eikr_i.resize(num_ptcls, nkpts);
-    }
+  rhok_r.resize(num_species, nkpts);
+  rhok_i.resize(num_species, nkpts);
+  if (StorePerParticle)
+  {
+    eikr_r.resize(num_ptcls, nkpts);
+    eikr_i.resize(num_ptcls, nkpts);
+  }
 }
 
-template <typename T>
-void
-StructFactT<T>::updateAllPart(const ParticleSetT<T>& P)
+template<typename T>
+void StructFactT<T>::updateAllPart(const ParticleSetT<T>& P)
 {
-    ScopedTimer local(update_all_timer_);
-    computeRhok(P);
+  ScopedTimer local(update_all_timer_);
+  computeRhok(P);
 }
 
-template <typename T>
-void
-StructFactT<T>::mw_updateAllPart(
-    const RefVectorWithLeader<StructFactT>& sk_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-    SKMultiWalkerMemT<T>& mw_mem)
+template<typename T>
+void StructFactT<T>::mw_updateAllPart(const RefVectorWithLeader<StructFactT>& sk_list,
+                                      const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                                      SKMultiWalkerMemT<T>& mw_mem)
 {
-    auto& sk_leader = sk_list.getLeader();
-    auto& p_leader = p_list.getLeader();
-    ScopedTimer local(sk_leader.update_all_timer_);
-    if (p_leader.getCoordinates().getKind() !=
-            DynamicCoordinateKind::DC_POS_OFFLOAD ||
-        sk_leader.StorePerParticle)
-        for (int iw = 0; iw < sk_list.size(); iw++)
-            sk_list[iw].computeRhok(p_list[iw]);
-    else {
-        const size_t nw = p_list.size();
-        const size_t num_species = p_leader.groups();
-        const auto& kpts_cart = sk_leader.k_lists_.get_kpts_cart_soa();
-        const size_t nk = sk_leader.k_lists_.numk;
-        const size_t nk_padded = kpts_cart.capacity();
-
-        auto& coordinates_leader =
-            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
-                p_leader.getCoordinates());
-        auto& mw_rsoa_dev_ptrs =
-            coordinates_leader.getMultiWalkerRSoADevicePtrs();
-        const size_t np_padded =
-            p_leader.getCoordinates().getAllParticlePos().capacity();
-
-        constexpr size_t cplx_stride = 2;
-        mw_mem.nw_rhok.resize(nw * num_species * cplx_stride, nk_padded);
-
-        // make the compute over nk by blocks
-        constexpr size_t kblock_size = 512;
-        const size_t num_kblocks = (nk + kblock_size) / kblock_size;
-
-        auto* mw_rsoa_ptr = mw_rsoa_dev_ptrs.data();
-        auto* kpts_cart_ptr = kpts_cart.data();
-        auto* mw_rhok_ptr = mw_mem.nw_rhok.data();
-        auto* group_offsets = p_leader.get_group_offsets().data();
-
-        PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+  auto& sk_leader = sk_list.getLeader();
+  auto& p_leader  = p_list.getLeader();
+  ScopedTimer local(sk_leader.update_all_timer_);
+  if (p_leader.getCoordinates().getKind() != DynamicCoordinateKind::DC_POS_OFFLOAD || sk_leader.StorePerParticle)
+    for (int iw = 0; iw < sk_list.size(); iw++)
+      sk_list[iw].computeRhok(p_list[iw]);
+  else
+  {
+    const size_t nw          = p_list.size();
+    const size_t num_species = p_leader.groups();
+    const auto& kpts_cart    = sk_leader.k_lists_.get_kpts_cart_soa();
+    const size_t nk          = sk_leader.k_lists_.numk;
+    const size_t nk_padded   = kpts_cart.capacity();
+
+    auto& coordinates_leader = static_cast<const RealSpacePositionsTOMPTarget<T>&>(p_leader.getCoordinates());
+    auto& mw_rsoa_dev_ptrs   = coordinates_leader.getMultiWalkerRSoADevicePtrs();
+    const size_t np_padded   = p_leader.getCoordinates().getAllParticlePos().capacity();
+
+    constexpr size_t cplx_stride = 2;
+    mw_mem.nw_rhok.resize(nw * num_species * cplx_stride, nk_padded);
+
+    // make the compute over nk by blocks
+    constexpr size_t kblock_size = 512;
+    const size_t num_kblocks     = (nk + kblock_size) / kblock_size;
+
+    auto* mw_rsoa_ptr   = mw_rsoa_dev_ptrs.data();
+    auto* kpts_cart_ptr = kpts_cart.data();
+    auto* mw_rhok_ptr   = mw_mem.nw_rhok.data();
+    auto* group_offsets = p_leader.get_group_offsets().data();
+
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
                 map(always, from : mw_rhok_ptr[:mw_mem.nw_rhok.size()])")
-        for (int iw = 0; iw < nw; iw++)
-            for (int ib = 0; ib < num_kblocks; ib++) {
-                const size_t offset = ib * kblock_size;
-                const size_t this_block_size =
-                    omptarget::min(kblock_size, nk - offset);
-                const auto* rsoa_ptr = mw_rsoa_ptr[iw];
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int ik = 0; ik < this_block_size; ik++)
-                    for (int is = 0; is < num_species; is++) {
-                        RealType rhok_r(0), rhok_i(0);
-
-                        for (int ip = group_offsets[is];
-                             ip < group_offsets[is + 1]; ip++) {
-                            RealType s, c, phase(0);
-                            for (int idim = 0; idim < DIM; idim++)
-                                phase += kpts_cart_ptr[ik + offset +
-                                             nk_padded * idim] *
-                                    rsoa_ptr[ip + idim * np_padded];
-                            omptarget::sincos(phase, &s, &c);
-                            rhok_r += c;
-                            rhok_i += s;
-                        }
-
-                        mw_rhok_ptr[(iw * num_species + is) * cplx_stride *
-                                nk_padded +
-                            offset + ik] = rhok_r;
-                        mw_rhok_ptr[(iw * num_species + is) * cplx_stride *
-                                nk_padded +
-                            nk_padded + offset + ik] = rhok_i;
-                    }
+    for (int iw = 0; iw < nw; iw++)
+      for (int ib = 0; ib < num_kblocks; ib++)
+      {
+        const size_t offset          = ib * kblock_size;
+        const size_t this_block_size = omptarget::min(kblock_size, nk - offset);
+        const auto* rsoa_ptr         = mw_rsoa_ptr[iw];
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int ik = 0; ik < this_block_size; ik++)
+          for (int is = 0; is < num_species; is++)
+          {
+            RealType rhok_r(0), rhok_i(0);
+
+            for (int ip = group_offsets[is]; ip < group_offsets[is + 1]; ip++)
+            {
+              RealType s, c, phase(0);
+              for (int idim = 0; idim < DIM; idim++)
+                phase += kpts_cart_ptr[ik + offset + nk_padded * idim] * rsoa_ptr[ip + idim * np_padded];
+              omptarget::sincos(phase, &s, &c);
+              rhok_r += c;
+              rhok_i += s;
             }
 
-        for (int iw = 0; iw < nw; iw++)
-            for (int is = 0; is < num_species; is++) {
-                std::copy_n(
-                    mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride], nk,
-                    sk_list[iw].rhok_r[is]);
-                std::copy_n(
-                    mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride + 1],
-                    nk, sk_list[iw].rhok_i[is]);
-            }
-    }
+            mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + offset + ik]             = rhok_r;
+            mw_rhok_ptr[(iw * num_species + is) * cplx_stride * nk_padded + nk_padded + offset + ik] = rhok_i;
+          }
+      }
+
+    for (int iw = 0; iw < nw; iw++)
+      for (int is = 0; is < num_species; is++)
+      {
+        std::copy_n(mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride], nk, sk_list[iw].rhok_r[is]);
+        std::copy_n(mw_mem.nw_rhok[(iw * num_species + is) * cplx_stride + 1], nk, sk_list[iw].rhok_i[is]);
+      }
+  }
 }
 
 /** evaluate rok per species, eikr  per particle
  */
-template <typename T>
-void
-StructFactT<T>::computeRhok(const ParticleSetT<T>& P)
+template<typename T>
+void StructFactT<T>::computeRhok(const ParticleSetT<T>& P)
 {
-    const size_t num_ptcls = P.getTotalNum();
-    const size_t num_species = P.groups();
-    const size_t nk = k_lists_.numk;
-    resize(nk, num_species, num_ptcls);
-
-    rhok_r = 0.0;
-    rhok_i = 0.0;
-    if (StorePerParticle) {
-        // save per particle and species value
-        for (int i = 0; i < num_ptcls; ++i) {
-            const auto& pos = P.R[i];
-            auto* restrict eikr_r_ptr = eikr_r[i];
-            auto* restrict eikr_i_ptr = eikr_i[i];
-            auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
-            auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
+  const size_t num_ptcls   = P.getTotalNum();
+  const size_t num_species = P.groups();
+  const size_t nk          = k_lists_.numk;
+  resize(nk, num_species, num_ptcls);
+
+  rhok_r = 0.0;
+  rhok_i = 0.0;
+  if (StorePerParticle)
+  {
+    // save per particle and species value
+    for (int i = 0; i < num_ptcls; ++i)
+    {
+      const auto& pos           = P.R[i];
+      auto* restrict eikr_r_ptr = eikr_r[i];
+      auto* restrict eikr_i_ptr = eikr_i[i];
+      auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
+      auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
 #pragma omp simd
-            for (int ki = 0; ki < nk; ki++) {
-                qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos),
-                    &eikr_i_ptr[ki], &eikr_r_ptr[ki]);
-                rhok_r_ptr[ki] += eikr_r_ptr[ki];
-                rhok_i_ptr[ki] += eikr_i_ptr[ki];
-            }
-        }
+      for (int ki = 0; ki < nk; ki++)
+      {
+        qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &eikr_i_ptr[ki], &eikr_r_ptr[ki]);
+        rhok_r_ptr[ki] += eikr_r_ptr[ki];
+        rhok_i_ptr[ki] += eikr_i_ptr[ki];
+      }
     }
-    else {
-        // save per species value
-        for (int i = 0; i < num_ptcls; ++i) {
-            const auto& pos = P.R[i];
-            auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
-            auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
+  }
+  else
+  {
+    // save per species value
+    for (int i = 0; i < num_ptcls; ++i)
+    {
+      const auto& pos           = P.R[i];
+      auto* restrict rhok_r_ptr = rhok_r[P.getGroupID(i)];
+      auto* restrict rhok_i_ptr = rhok_i[P.getGroupID(i)];
 #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
 #pragma omp simd
-            for (int ki = 0; ki < nk; ki++) {
-                RealType s, c;
-                qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &s, &c);
-                rhok_r_ptr[ki] += c;
-                rhok_i_ptr[ki] += s;
-            }
+      for (int ki = 0; ki < nk; ki++)
+      {
+        RealType s, c;
+        qmcplusplus::sincos(dot(k_lists_.kpts_cart[ki], pos), &s, &c);
+        rhok_r_ptr[ki] += c;
+        rhok_i_ptr[ki] += s;
+      }
 #else
-            // make the compute over nk by blocks
-            constexpr size_t kblock_size = 512;
-            const size_t num_kblocks = (nk + kblock_size) / kblock_size;
-            RealType phiV[kblock_size], eikr_r_temp[kblock_size],
-                eikr_i_temp[kblock_size];
-
-            for (int ib = 0; ib < num_kblocks; ib++) {
-                const size_t offset = ib * kblock_size;
-                const size_t this_block_size =
-                    std::min(kblock_size, nk - offset);
-                for (int ki = 0; ki < this_block_size; ki++)
-                    phiV[ki] = dot(k_lists_.kpts_cart[ki + offset], pos);
-                eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp);
-                for (int ki = 0; ki < this_block_size; ki++) {
-                    rhok_r_ptr[ki + offset] += eikr_r_temp[ki];
-                    rhok_i_ptr[ki + offset] += eikr_i_temp[ki];
-                }
-            }
-#endif
+      // make the compute over nk by blocks
+      constexpr size_t kblock_size = 512;
+      const size_t num_kblocks     = (nk + kblock_size) / kblock_size;
+      RealType phiV[kblock_size], eikr_r_temp[kblock_size], eikr_i_temp[kblock_size];
+
+      for (int ib = 0; ib < num_kblocks; ib++)
+      {
+        const size_t offset          = ib * kblock_size;
+        const size_t this_block_size = std::min(kblock_size, nk - offset);
+        for (int ki = 0; ki < this_block_size; ki++)
+          phiV[ki] = dot(k_lists_.kpts_cart[ki + offset], pos);
+        eval_e2iphi(this_block_size, phiV, eikr_r_temp, eikr_i_temp);
+        for (int ki = 0; ki < this_block_size; ki++)
+        {
+          rhok_r_ptr[ki + offset] += eikr_r_temp[ki];
+          rhok_i_ptr[ki + offset] += eikr_i_temp[ki];
         }
+      }
+#endif
     }
+  }
 }
 
-template <typename T>
-void
-StructFactT<T>::turnOnStorePerParticle(const ParticleSetT<T>& P)
+template<typename T>
+void StructFactT<T>::turnOnStorePerParticle(const ParticleSetT<T>& P)
 {
-    if (!StorePerParticle) {
-        StorePerParticle = true;
-        computeRhok(P);
-    }
+  if (!StorePerParticle)
+  {
+    StorePerParticle = true;
+    computeRhok(P);
+  }
 }
 
 template class StructFactT<double>;
diff --git a/src/Particle/LongRange/StructFactT.h b/src/Particle/LongRange/StructFactT.h
index b6f3a9269ee..264ddd0e11f 100644
--- a/src/Particle/LongRange/StructFactT.h
+++ b/src/Particle/LongRange/StructFactT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_STRUCTFACTT_H
@@ -27,9 +24,9 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class ParticleSetT;
-template <typename T>
+template<typename T>
 struct SKMultiWalkerMemT;
 
 /** @ingroup longrange
@@ -39,119 +36,98 @@ struct SKMultiWalkerMemT;
  *   Rhok[alpha][k] \f$ \equiv \rho_{k}^{\alpha} = \sum_{i} e^{i{\bf k}\cdot{\bf
  *r_i}}\f$ Structure factor per particle eikr[i][k]
  */
-template <typename T>
+template<typename T>
 class StructFactT
 {
 public:
-    // Typedef for the lattice-type
-    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
-    using RealType = typename ParticleSetTraits<T>::RealType;
+  // Typedef for the lattice-type
+  using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
+  using RealType       = typename ParticleSetTraits<T>::RealType;
 
-    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+  static constexpr auto DIM = ParticleSetTraits<T>::DIM;
 
-    /** enumeration for the methods to handle mixed bconds
+  /** enumeration for the methods to handle mixed bconds
      *
      * Allow overwriting lattice::SuperCellEnum to use D-dim k-point sets with
      * mixed BC
      */
-    int SuperCellEnum;
-    /// 2-D container for the phase
-    Matrix<RealType> rhok_r, rhok_i;
-    Matrix<RealType> eikr_r, eikr_i;
-    /** Constructor - copy ParticleSet and init. k-shells
+  int SuperCellEnum;
+  /// 2-D container for the phase
+  Matrix<RealType> rhok_r, rhok_i;
+  Matrix<RealType> eikr_r, eikr_i;
+  /** Constructor - copy ParticleSet and init. k-shells
      * @param lattice long range box
      * @param kc cutoff for k
      *
      * At least in the batched version Structure factor is _NOT_ valid
      * after construction.
      */
-    StructFactT(const ParticleLayout& lattice, const KContainerT<T>& k_lists);
-    /// desructor
-    ~StructFactT();
+  StructFactT(const ParticleLayout& lattice, const KContainerT<T>& k_lists);
+  /// desructor
+  ~StructFactT();
 
-    /**  Update Rhok if all particles moved
+  /**  Update Rhok if all particles moved
      */
-    void
-    updateAllPart(const ParticleSetT<T>& P);
+  void updateAllPart(const ParticleSetT<T>& P);
 
-    /** Update RhoK for all particles for multiple walkers particles.
+  /** Update RhoK for all particles for multiple walkers particles.
      *
      *  In batched context until this is called StructFact is invalid and will
      * cause a crash if any Hamiltonian using StructFact indirectly through
      * ParticleSet is evaluated.
      */
-    static void
-    mw_updateAllPart(const RefVectorWithLeader<StructFactT>& sk_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        SKMultiWalkerMemT<T>& mw_mem);
+  static void mw_updateAllPart(const RefVectorWithLeader<StructFactT>& sk_list,
+                               const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                               SKMultiWalkerMemT<T>& mw_mem);
 
-    /** @brief switch on the storage per particle
+  /** @brief switch on the storage per particle
      * if StorePerParticle was false, this function allocates memory and
      * precompute data if StorePerParticle was true, this function is no-op
      */
-    void
-    turnOnStorePerParticle(const ParticleSetT<T>& P);
-
-    /// accessor of StorePerParticle
-    bool
-    isStorePerParticle() const
-    {
-        return StorePerParticle;
-    }
-
-    /// accessor of k_lists_
-    const KContainerT<T>&
-    getKLists() const
-    {
-        return k_lists_;
-    }
+  void turnOnStorePerParticle(const ParticleSetT<T>& P);
+
+  /// accessor of StorePerParticle
+  bool isStorePerParticle() const { return StorePerParticle; }
+
+  /// accessor of k_lists_
+  const KContainerT<T>& getKLists() const { return k_lists_; }
 
 private:
-    /// Compute all rhok elements from the start
-    void
-    computeRhok(const ParticleSetT<T>& P);
-    /** resize the internal data
+  /// Compute all rhok elements from the start
+  void computeRhok(const ParticleSetT<T>& P);
+  /** resize the internal data
      * @param nkpts
      * @param num_species number of species
      * @param num_ptcls number of particles
      */
-    void
-    resize(int nkpts, int num_species, int num_ptcls);
+  void resize(int nkpts, int num_species, int num_ptcls);
 
-    /// K-Vector List.
-    const KContainerT<T>& k_lists_;
-    /** Whether intermediate data is stored per particle. default false
+  /// K-Vector List.
+  const KContainerT<T>& k_lists_;
+  /** Whether intermediate data is stored per particle. default false
      * storing data per particle needs significant amount of memory but some
      * calculation may request it. storing data per particle specie is more
      * cost-effective
      */
-    bool StorePerParticle;
-    /// timer for updateAllPart
-    NewTimer& update_all_timer_;
+  bool StorePerParticle;
+  /// timer for updateAllPart
+  NewTimer& update_all_timer_;
 };
 
 /// multi walker shared memory buffer
-template <typename T>
+template<typename T>
 struct SKMultiWalkerMemT : public Resource
 {
-    using RealType = typename StructFactT<T>::RealType;
+  using RealType = typename StructFactT<T>::RealType;
 
-    /// dist displ for temporary and old pairs
-    Matrix<RealType, OffloadPinnedAllocator<RealType>> nw_rhok;
+  /// dist displ for temporary and old pairs
+  Matrix<RealType, OffloadPinnedAllocator<RealType>> nw_rhok;
 
-    SKMultiWalkerMemT() : Resource("SKMultiWalkerMem")
-    {
-    }
+  SKMultiWalkerMemT() : Resource("SKMultiWalkerMem") {}
 
-    SKMultiWalkerMemT(const SKMultiWalkerMemT&) : SKMultiWalkerMemT()
-    {
-    }
+  SKMultiWalkerMemT(const SKMultiWalkerMemT&) : SKMultiWalkerMemT() {}
 
-    std::unique_ptr<Resource>
-    makeClone() const override
-    {
-        return std::make_unique<SKMultiWalkerMemT>(*this);
-    }
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<SKMultiWalkerMemT>(*this); }
 };
 
 } // namespace qmcplusplus
diff --git a/src/Particle/MCCoordsT.hpp b/src/Particle/MCCoordsT.hpp
index 1ca99aba3f0..f50015001da 100644
--- a/src/Particle/MCCoordsT.hpp
+++ b/src/Particle/MCCoordsT.hpp
@@ -5,8 +5,7 @@
 // Copyright (c) 2022 QMCPACK developers.
 //
 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
-//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
-//                    Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //
 // File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
@@ -28,53 +27,43 @@ enum class CoordsType
   POS_SPIN
 };
 
-template <typename T, CoordsType MCT>
+template<typename T, CoordsType MCT>
 struct MCCoordsT;
 
-template <typename T>
+template<typename T>
 struct MCCoordsT<T, CoordsType::POS>
 {
-    using PosType = typename ParticleSetTraits<T>::PosType;
+  using PosType = typename ParticleSetTraits<T>::PosType;
 
-    MCCoordsT(const std::size_t size) : positions(size)
-    {
-    }
+  MCCoordsT(const std::size_t size) : positions(size) {}
 
-    MCCoordsT&
-    operator+=(const MCCoordsT& rhs);
+  MCCoordsT& operator+=(const MCCoordsT& rhs);
 
-    /** get subset of MCCoordsT
+  /** get subset of MCCoordsT
      * [param,out] out
      */
-    void
-    getSubset(const std::size_t offset, const std::size_t size,
-        MCCoordsT<T, CoordsType::POS>& out) const;
+  void getSubset(const std::size_t offset, const std::size_t size, MCCoordsT<T, CoordsType::POS>& out) const;
 
-    std::vector<PosType> positions;
+  std::vector<PosType> positions;
 };
 
-template <typename T>
+template<typename T>
 struct MCCoordsT<T, CoordsType::POS_SPIN>
 {
-    using PosType = typename ParticleSetTraits<T>::PosType;
-    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
+  using PosType          = typename ParticleSetTraits<T>::PosType;
+  using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
 
-    MCCoordsT(const std::size_t size) : positions(size), spins(size)
-    {
-    }
+  MCCoordsT(const std::size_t size) : positions(size), spins(size) {}
 
-    MCCoordsT&
-    operator+=(const MCCoordsT& rhs);
+  MCCoordsT& operator+=(const MCCoordsT& rhs);
 
-    /** get subset of MCCoordsT
+  /** get subset of MCCoordsT
      * [param,out] out
      */
-    void
-    getSubset(const std::size_t offset, const std::size_t size,
-        MCCoordsT<T, CoordsType::POS_SPIN>& out) const;
+  void getSubset(const std::size_t offset, const std::size_t size, MCCoordsT<T, CoordsType::POS_SPIN>& out) const;
 
-    std::vector<PosType> positions;
-    std::vector<FullPrecRealType> spins;
+  std::vector<PosType> positions;
+  std::vector<FullPrecRealType> spins;
 };
 } // namespace qmcplusplus
 
diff --git a/src/Particle/MCWalkerConfigurationT.cpp b/src/Particle/MCWalkerConfigurationT.cpp
index 7867a9195e2..a30b728bc93 100644
--- a/src/Particle/MCWalkerConfigurationT.cpp
+++ b/src/Particle/MCWalkerConfigurationT.cpp
@@ -4,20 +4,16 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jordan E. Vincent, University of Illinois at
-// Urbana-Champaign
+// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
 //                    Bryan Clark, bclark@Princeton.edu, Princeton University
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge
-//                    National Laboratory Ye Luo, yeluo@anl.gov, Argonne
-//                    National Laboratory Mark A. Berrill, berrillma@ornl.gov,
-//                    Oak Ridge National Laboratory
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "MCWalkerConfigurationT.h"
@@ -37,67 +33,55 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-MCWalkerConfigurationT<T>::MCWalkerConfigurationT(
-    const SimulationCellT<T>& simulation_cell,
-    const DynamicCoordinateKind kind) :
-    ParticleSetT<T>(simulation_cell, kind),
-    ReadyForPbyP(false),
-    UpdateMode(Update_Walker),
-    reptile(0),
-    Polymer(0)
-{
-}
+template<typename T>
+MCWalkerConfigurationT<T>::MCWalkerConfigurationT(const SimulationCellT<T>& simulation_cell,
+                                                  const DynamicCoordinateKind kind)
+    : ParticleSetT<T>(simulation_cell, kind), ReadyForPbyP(false), UpdateMode(Update_Walker), reptile(0), Polymer(0)
+{}
 
-template <typename T>
-MCWalkerConfigurationT<T>::MCWalkerConfigurationT(
-    const MCWalkerConfigurationT& mcw) :
-    ParticleSetT<T>(mcw),
-    ReadyForPbyP(false),
-    UpdateMode(Update_Walker),
-    Polymer(0)
+template<typename T>
+MCWalkerConfigurationT<T>::MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw)
+    : ParticleSetT<T>(mcw), ReadyForPbyP(false), UpdateMode(Update_Walker), Polymer(0)
 {
-    samples.clearEnsemble();
-    samples.setMaxSamples(mcw.getMaxSamples());
-    this->setWalkerOffsets(mcw.getWalkerOffsets());
-    this->Properties = mcw.Properties;
+  samples.clearEnsemble();
+  samples.setMaxSamples(mcw.getMaxSamples());
+  this->setWalkerOffsets(mcw.getWalkerOffsets());
+  this->Properties = mcw.Properties;
 }
 
-template <typename T>
+template<typename T>
 MCWalkerConfigurationT<T>::~MCWalkerConfigurationT() = default;
 
-template <typename T>
-void
-MCWalkerConfigurationT<T>::createWalkers(int n)
+template<typename T>
+void MCWalkerConfigurationT<T>::createWalkers(int n)
 {
-    const int old_nw = this->getActiveWalkers();
-    WalkerConfigurationsT<T>::createWalkers(n, this->TotalNum);
-    // no pre-existing walkers, need to initialized based on particleset.
-    if (old_nw == 0)
-        for (auto& awalker : this->walker_list_) {
-            awalker->R = this->R;
-            awalker->spins = this->spins;
-        }
-    resizeWalkerHistories();
+  const int old_nw = this->getActiveWalkers();
+  WalkerConfigurationsT<T>::createWalkers(n, this->TotalNum);
+  // no pre-existing walkers, need to initialized based on particleset.
+  if (old_nw == 0)
+    for (auto& awalker : this->walker_list_)
+    {
+      awalker->R     = this->R;
+      awalker->spins = this->spins;
+    }
+  resizeWalkerHistories();
 }
 
-template <typename T>
-void
-MCWalkerConfigurationT<T>::resize(int numWalkers, int numPtcls)
+template<typename T>
+void MCWalkerConfigurationT<T>::resize(int numWalkers, int numPtcls)
 {
-    if (this->TotalNum && this->walker_list_.size())
-        app_warning()
-            << "MCWalkerConfiguration::resize cleans up the walker list."
-            << std::endl;
-    const int old_nw = this->getActiveWalkers();
-    ParticleSetT<T>::resize(unsigned(numPtcls));
-    WalkerConfigurationsT<T>::resize(numWalkers, this->TotalNum);
-    // no pre-existing walkers, need to initialized based on particleset.
-    if (old_nw == 0)
-        for (auto& awalker : this->walker_list_) {
-            awalker->R = this->R;
-            awalker->spins = this->spins;
-        }
+  if (this->TotalNum && this->walker_list_.size())
+    app_warning() << "MCWalkerConfiguration::resize cleans up the walker list." << std::endl;
+  const int old_nw = this->getActiveWalkers();
+  ParticleSetT<T>::resize(unsigned(numPtcls));
+  WalkerConfigurationsT<T>::resize(numWalkers, this->TotalNum);
+  // no pre-existing walkers, need to initialized based on particleset.
+  if (old_nw == 0)
+    for (auto& awalker : this->walker_list_)
+    {
+      awalker->R     = this->R;
+      awalker->spins = this->spins;
+    }
 }
 
 /** Make Metropolis move to the walkers and save in a temporary array.
@@ -106,203 +90,197 @@ MCWalkerConfigurationT<T>::resize(int numWalkers, int numPtcls)
  *
  * R + D + X
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::sample(iterator it, RealType tauinv)
+template<typename T>
+void MCWalkerConfigurationT<T>::sample(iterator it, RealType tauinv)
 {
-    throw std::runtime_error("MCWalkerConfiguration::sample obsolete");
-    //  makeGaussRandom(R);
-    //  R *= tauinv;
-    //  R += (*it)->R + (*it)->Drift;
+  throw std::runtime_error("MCWalkerConfiguration::sample obsolete");
+  //  makeGaussRandom(R);
+  //  R *= tauinv;
+  //  R += (*it)->R + (*it)->Drift;
 }
 
 /** reset the Property container of all the walkers
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::resetWalkerProperty(int ncopy)
+template<typename T>
+void MCWalkerConfigurationT<T>::resetWalkerProperty(int ncopy)
 {
-    int m(this->PropertyList.size());
-    app_log() << "  Resetting Properties of the walkers " << ncopy << " x " << m
-              << std::endl;
-    try {
-        this->Properties.resize(ncopy, m);
-    }
-    catch (std::domain_error& de) {
-        app_error() << de.what() << '\n'
-                    << "This is likely because some object has attempted to "
-                       "add walker properties\n"
-                    << " in excess of WALKER_MAX_PROPERTIES.\n"
-                    << "build with cmake ... "
-                       "-DWALKER_MAX_PROPERTIES=at_least_properties_required"
-                    << std::endl;
-        APP_ABORT("Fatal Exception");
-    }
+  int m(this->PropertyList.size());
+  app_log() << "  Resetting Properties of the walkers " << ncopy << " x " << m << std::endl;
+  try
+  {
+    this->Properties.resize(ncopy, m);
+  }
+  catch (std::domain_error& de)
+  {
+    app_error() << de.what() << '\n'
+                << "This is likely because some object has attempted to "
+                   "add walker properties\n"
+                << " in excess of WALKER_MAX_PROPERTIES.\n"
+                << "build with cmake ... "
+                   "-DWALKER_MAX_PROPERTIES=at_least_properties_required"
+                << std::endl;
+    APP_ABORT("Fatal Exception");
+  }
 
-    for (auto& walker : this->walker_list_) {
-        walker->resizeProperty(ncopy, m);
-        walker->Weight = 1.0;
-    }
-    resizeWalkerHistories();
+  for (auto& walker : this->walker_list_)
+  {
+    walker->resizeProperty(ncopy, m);
+    walker->Weight = 1.0;
+  }
+  resizeWalkerHistories();
 }
 
-template <typename T>
-void
-MCWalkerConfigurationT<T>::resizeWalkerHistories()
+template<typename T>
+void MCWalkerConfigurationT<T>::resizeWalkerHistories()
 {
-    // using std::vector<std::vector<RealType> > is too costly.
-    int np = this->PropertyHistory.size();
-    if (np)
-        for (int iw = 0; iw < this->walker_list_.size(); ++iw)
-            this->walker_list_[iw]->PropertyHistory = this->PropertyHistory;
-    np = this->PHindex.size();
-    if (np)
-        for (int iw = 0; iw < this->walker_list_.size(); ++iw)
-            this->walker_list_[iw]->PHindex = this->PHindex;
-    ;
+  // using std::vector<std::vector<RealType> > is too costly.
+  int np = this->PropertyHistory.size();
+  if (np)
+    for (int iw = 0; iw < this->walker_list_.size(); ++iw)
+      this->walker_list_[iw]->PropertyHistory = this->PropertyHistory;
+  np = this->PHindex.size();
+  if (np)
+    for (int iw = 0; iw < this->walker_list_.size(); ++iw)
+      this->walker_list_[iw]->PHindex = this->PHindex;
+  ;
 }
 
 /** allocate the SampleStack
  * @param n number of samples per thread
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::setNumSamples(int n)
+template<typename T>
+void MCWalkerConfigurationT<T>::setNumSamples(int n)
 {
-    samples.clearEnsemble();
-    samples.setMaxSamples(n);
+  samples.clearEnsemble();
+  samples.setMaxSamples(n);
 }
 
 /** save the current walkers to SampleStack
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::saveEnsemble()
+template<typename T>
+void MCWalkerConfigurationT<T>::saveEnsemble()
 {
-    saveEnsemble(this->walker_list_.begin(), this->walker_list_.end());
+  saveEnsemble(this->walker_list_.begin(), this->walker_list_.end());
 }
 
 /** save the [first,last) walkers to SampleStack
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::saveEnsemble(iterator first, iterator last)
+template<typename T>
+void MCWalkerConfigurationT<T>::saveEnsemble(iterator first, iterator last)
 {
-    for (; first != last; first++) {
-        samples.appendSample(MCSample(**first));
-    }
+  for (; first != last; first++)
+  {
+    samples.appendSample(MCSample(**first));
+  }
 }
 /** load a single sample from SampleStack
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::loadSample(ParticleSetT<T>& pset, size_t iw) const
+template<typename T>
+void MCWalkerConfigurationT<T>::loadSample(ParticleSetT<T>& pset, size_t iw) const
 {
-    samples.loadSample(pset, iw);
+  samples.loadSample(pset, iw);
 }
 
 /** load SampleStack to walker_list_
  */
-template <typename T>
-void
-MCWalkerConfigurationT<T>::loadEnsemble()
+template<typename T>
+void MCWalkerConfigurationT<T>::loadEnsemble()
 {
-    using WP = WalkerProperties::Indexes;
-    int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples());
-    if (samples.empty() || nsamples == 0)
-        return;
-    typename Walker_t::PropertyContainer_t prop(
-        1, this->PropertyList.size(), 1, WP::MAXPROPERTIES);
-    this->walker_list_.resize(nsamples);
-    for (int i = 0; i < nsamples; ++i) {
-        auto awalker = std::make_unique<Walker_t>(this->TotalNum);
-        awalker->Properties.copy(prop);
-        samples.getSample(i).convertToWalker(*awalker);
-        this->walker_list_[i] = std::move(awalker);
-    }
-    resizeWalkerHistories();
-    samples.clearEnsemble();
+  using WP     = WalkerProperties::Indexes;
+  int nsamples = std::min(samples.getMaxSamples(), samples.getNumSamples());
+  if (samples.empty() || nsamples == 0)
+    return;
+  typename Walker_t::PropertyContainer_t prop(1, this->PropertyList.size(), 1, WP::MAXPROPERTIES);
+  this->walker_list_.resize(nsamples);
+  for (int i = 0; i < nsamples; ++i)
+  {
+    auto awalker = std::make_unique<Walker_t>(this->TotalNum);
+    awalker->Properties.copy(prop);
+    samples.getSample(i).convertToWalker(*awalker);
+    this->walker_list_[i] = std::move(awalker);
+  }
+  resizeWalkerHistories();
+  samples.clearEnsemble();
 }
 
-template <typename T>
-bool
-MCWalkerConfigurationT<T>::dumpEnsemble(
-    std::vector<MCWalkerConfigurationT<T>*>& others, HDFWalkerOutput& out,
-    int np, int nBlock)
+template<typename T>
+bool MCWalkerConfigurationT<T>::dumpEnsemble(std::vector<MCWalkerConfigurationT<T>*>& others,
+                                             HDFWalkerOutput& out,
+                                             int np,
+                                             int nBlock)
 {
-    WalkerConfigurationsT<T> wctemp;
-    for (auto* mcwc : others) {
-        const auto& astack(mcwc->getSampleStack());
-        const size_t sample_size =
-            std::min(mcwc->getMaxSamples(), mcwc->numSamples());
-        for (int j = 0; j < sample_size; ++j) {
-            const auto& sample = astack.getSample(j);
-            const size_t num_ptcls = sample.getNumPtcls();
-            auto awalker = std::make_unique<Walker_t>(num_ptcls);
-            sample.convertToWalker(*awalker);
-            wctemp.push_back(std::move(awalker));
-        }
+  WalkerConfigurationsT<T> wctemp;
+  for (auto* mcwc : others)
+  {
+    const auto& astack(mcwc->getSampleStack());
+    const size_t sample_size = std::min(mcwc->getMaxSamples(), mcwc->numSamples());
+    for (int j = 0; j < sample_size; ++j)
+    {
+      const auto& sample     = astack.getSample(j);
+      const size_t num_ptcls = sample.getNumPtcls();
+      auto awalker           = std::make_unique<Walker_t>(num_ptcls);
+      sample.convertToWalker(*awalker);
+      wctemp.push_back(std::move(awalker));
     }
-    const int w = wctemp.getActiveWalkers();
-    if (w == 0)
-        return false;
+  }
+  const int w = wctemp.getActiveWalkers();
+  if (w == 0)
+    return false;
 
-    // The following code assumes the same amount of active walkers on all the
-    // MPI ranks
-    std::vector<int> nwoff(np + 1, 0);
-    for (int ip = 0; ip < np; ++ip)
-        nwoff[ip + 1] = nwoff[ip] + w;
-    wctemp.setWalkerOffsets(nwoff);
-    out.dump(wctemp, nBlock);
-    return true;
+  // The following code assumes the same amount of active walkers on all the
+  // MPI ranks
+  std::vector<int> nwoff(np + 1, 0);
+  for (int ip = 0; ip < np; ++ip)
+    nwoff[ip + 1] = nwoff[ip] + w;
+  wctemp.setWalkerOffsets(nwoff);
+  out.dump(wctemp, nBlock);
+  return true;
 }
 
-template <typename T>
-int
-MCWalkerConfigurationT<T>::getMaxSamples() const
+template<typename T>
+int MCWalkerConfigurationT<T>::getMaxSamples() const
 {
-    return samples.getMaxSamples();
+  return samples.getMaxSamples();
 }
 
-template <typename T>
-void
-MCWalkerConfigurationT<T>::loadEnsemble(
-    std::vector<MCWalkerConfigurationT<T>*>& others, bool doclean)
+template<typename T>
+void MCWalkerConfigurationT<T>::loadEnsemble(std::vector<MCWalkerConfigurationT<T>*>& others, bool doclean)
 {
-    using WP = WalkerProperties::Indexes;
-    std::vector<int> off(others.size() + 1, 0);
-    for (int i = 0; i < others.size(); ++i) {
-        off[i + 1] = off[i] +
-            std::min(others[i]->getMaxSamples(), others[i]->numSamples());
-    }
-    int nw_tot = off.back();
-    if (nw_tot) {
-        typename Walker_t::PropertyContainer_t prop(
-            1, this->PropertyList.size(), 1, WP::MAXPROPERTIES);
-        while (this->walker_list_.size())
-            this->pop_back();
-        this->walker_list_.resize(nw_tot);
-        for (int i = 0; i < others.size(); ++i) {
-            SampleStackT<T>& astack(others[i]->getSampleStack());
-            for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw) {
-                auto awalker = std::make_unique<Walker_t>(this->TotalNum);
-                awalker->Properties.copy(prop);
-                astack.getSample(j).convertToWalker(*awalker);
-                this->walker_list_[iw] = std::move(awalker);
-            }
-            if (doclean)
-                others[i]->clearEnsemble();
-        }
+  using WP = WalkerProperties::Indexes;
+  std::vector<int> off(others.size() + 1, 0);
+  for (int i = 0; i < others.size(); ++i)
+  {
+    off[i + 1] = off[i] + std::min(others[i]->getMaxSamples(), others[i]->numSamples());
+  }
+  int nw_tot = off.back();
+  if (nw_tot)
+  {
+    typename Walker_t::PropertyContainer_t prop(1, this->PropertyList.size(), 1, WP::MAXPROPERTIES);
+    while (this->walker_list_.size())
+      this->pop_back();
+    this->walker_list_.resize(nw_tot);
+    for (int i = 0; i < others.size(); ++i)
+    {
+      SampleStackT<T>& astack(others[i]->getSampleStack());
+      for (int j = 0, iw = off[i]; iw < off[i + 1]; ++j, ++iw)
+      {
+        auto awalker = std::make_unique<Walker_t>(this->TotalNum);
+        awalker->Properties.copy(prop);
+        astack.getSample(j).convertToWalker(*awalker);
+        this->walker_list_[iw] = std::move(awalker);
+      }
+      if (doclean)
+        others[i]->clearEnsemble();
     }
-    if (doclean)
-        resizeWalkerHistories();
+  }
+  if (doclean)
+    resizeWalkerHistories();
 }
 
-template <typename T>
-void
-MCWalkerConfigurationT<T>::clearEnsemble()
+template<typename T>
+void MCWalkerConfigurationT<T>::clearEnsemble()
 {
-    samples.clearEnsemble();
+  samples.clearEnsemble();
 }
 
 #ifndef QMC_COMPLEX
diff --git a/src/Particle/MCWalkerConfigurationT.h b/src/Particle/MCWalkerConfigurationT.h
index 4de261c12ac..ff753346111 100644
--- a/src/Particle/MCWalkerConfigurationT.h
+++ b/src/Particle/MCWalkerConfigurationT.h
@@ -4,21 +4,16 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jordan E. Vincent, University of Illinois at
-// Urbana-Champaign
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge
-//                    National Laboratory Raymond Clay III,
-//                    j.k.rofling@gmail.com, Lawrence Livermore National
-//                    Laboratory Ye Luo, yeluo@anl.gov, Argonne National
-//                    Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge
-//                    National Laboratory
+// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file MCWalkerConfiguration.h
@@ -37,7 +32,7 @@ namespace qmcplusplus
 // Forward declaration
 class MultiChain;
 class HDFWalkerOutput;
-template <typename T>
+template<typename T>
 class ReptileT;
 
 /** A set of walkers that are to be advanced by Metropolis Monte Carlo.
@@ -57,138 +52,96 @@ class ReptileT;
 
  *</ul>
  */
-template <typename T>
-class MCWalkerConfigurationT :
-    public ParticleSetT<T>,
-    public WalkerConfigurationsT<T>
+template<typename T>
+class MCWalkerConfigurationT : public ParticleSetT<T>, public WalkerConfigurationsT<T>
 {
 public:
-    /**enumeration for update*/
-    enum
-    {
-        Update_All = 0, /// move all the active walkers
-        Update_Walker, /// move a walker by walker
-        Update_Particle /// move a particle by particle
-    };
-
-    using Walker_t = typename WalkerConfigurationsT<T>::Walker_t;
-    /// container type of the Properties of a Walker
-    using PropertyContainer_t = typename Walker_t::PropertyContainer_t;
-    /// container type of Walkers
-    using WalkerList_t = std::vector<std::unique_ptr<Walker_t>>;
-    /// FIX: a type alias of iterator for an object should not be for just one
-    /// of many objects it holds.
-    using iterator = typename WalkerList_t::iterator;
-    /// const_iterator of Walker container
-    using const_iterator = typename WalkerList_t::const_iterator;
-
-    using ReptileList_t = UPtrVector<ReptileT<T>>;
-
-    using RealType = typename ParticleSetT<T>::RealType;
-
-    /// default constructor
-    MCWalkerConfigurationT(const SimulationCellT<T>& simulation_cell,
-        const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS);
-
-    /// default constructor: copy only ParticleSet
-    MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw);
-    ~MCWalkerConfigurationT();
-    /** create numWalkers Walkers
+  /**enumeration for update*/
+  enum
+  {
+    Update_All = 0, /// move all the active walkers
+    Update_Walker,  /// move a walker by walker
+    Update_Particle /// move a particle by particle
+  };
+
+  using Walker_t = typename WalkerConfigurationsT<T>::Walker_t;
+  /// container type of the Properties of a Walker
+  using PropertyContainer_t = typename Walker_t::PropertyContainer_t;
+  /// container type of Walkers
+  using WalkerList_t = std::vector<std::unique_ptr<Walker_t>>;
+  /// FIX: a type alias of iterator for an object should not be for just one
+  /// of many objects it holds.
+  using iterator = typename WalkerList_t::iterator;
+  /// const_iterator of Walker container
+  using const_iterator = typename WalkerList_t::const_iterator;
+
+  using ReptileList_t = UPtrVector<ReptileT<T>>;
+
+  using RealType = typename ParticleSetT<T>::RealType;
+
+  /// default constructor
+  MCWalkerConfigurationT(const SimulationCellT<T>& simulation_cell,
+                         const DynamicCoordinateKind kind = DynamicCoordinateKind::DC_POS);
+
+  /// default constructor: copy only ParticleSet
+  MCWalkerConfigurationT(const MCWalkerConfigurationT& mcw);
+  ~MCWalkerConfigurationT();
+  /** create numWalkers Walkers
      *
      * Append Walkers to WalkerList.
      */
-    void
-    createWalkers(int numWalkers);
-    /// clean up the walker list and make a new list
-    void
-    resize(int numWalkers, int numPtcls);
-
-    /// clean up the walker list
-    using WalkerConfigurationsT<T>::clear;
-    /// resize Walker::PropertyHistory and Walker::PHindex:
-    void
-    resizeWalkerHistories();
-
-    /// make random moves for all the walkers
-    // void sample(iterator first, iterator last, value_type tauinv);
-    /// make a random move for a walker
-    void
-    sample(iterator it, RealType tauinv);
-
-    /// return the number of particles per walker
-    inline int
-    getParticleNum() const
-    {
-        return this->R.size();
-    }
-    /**@}*/
-
-    /** set LocalEnergy
+  void createWalkers(int numWalkers);
+  /// clean up the walker list and make a new list
+  void resize(int numWalkers, int numPtcls);
+
+  /// clean up the walker list
+  using WalkerConfigurationsT<T>::clear;
+  /// resize Walker::PropertyHistory and Walker::PHindex:
+  void resizeWalkerHistories();
+
+  /// make random moves for all the walkers
+  // void sample(iterator first, iterator last, value_type tauinv);
+  /// make a random move for a walker
+  void sample(iterator it, RealType tauinv);
+
+  /// return the number of particles per walker
+  inline int getParticleNum() const { return this->R.size(); }
+  /**@}*/
+
+  /** set LocalEnergy
      * @param e current average Local Energy
      */
-    inline void
-    setLocalEnergy(RealType e)
-    {
-        LocalEnergy = e;
-    }
+  inline void setLocalEnergy(RealType e) { LocalEnergy = e; }
 
-    /** return LocalEnergy
+  /** return LocalEnergy
      */
-    inline RealType
-    getLocalEnergy() const
-    {
-        return LocalEnergy;
-    }
-
-    inline MultiChain*
-    getPolymer()
-    {
-        return Polymer;
-    }
-
-    inline void
-    setPolymer(MultiChain* chain)
-    {
-        Polymer = chain;
-    }
-
-    void
-    resetWalkerProperty(int ncopy = 1);
-
-    inline bool
-    updatePbyP() const
-    {
-        return ReadyForPbyP;
-    }
-
-    //@{save/load/clear function for optimization
-    //
-    int
-    numSamples() const
-    {
-        return samples.getNumSamples();
-    }
-    /// set the number of max samples
-    void
-    setNumSamples(int n);
-    /// save the position of current walkers to SampleStack
-    void
-    saveEnsemble();
-    /// save the position of current walkers
-    void
-    saveEnsemble(iterator first, iterator last);
-    /// load a single sample from SampleStack
-    void
-    loadSample(ParticleSetT<T>& pset, size_t iw) const;
-    /// load SampleStack data to the current list of walker configurations
-    void
-    loadEnsemble();
-    /// load the SampleStacks of others to the current list of walker
-    /// configurations
-    void
-    loadEnsemble(
-        std::vector<MCWalkerConfigurationT<T>*>& others, bool doclean = true);
-    /** dump Samples to a file
+  inline RealType getLocalEnergy() const { return LocalEnergy; }
+
+  inline MultiChain* getPolymer() { return Polymer; }
+
+  inline void setPolymer(MultiChain* chain) { Polymer = chain; }
+
+  void resetWalkerProperty(int ncopy = 1);
+
+  inline bool updatePbyP() const { return ReadyForPbyP; }
+
+  //@{save/load/clear function for optimization
+  //
+  int numSamples() const { return samples.getNumSamples(); }
+  /// set the number of max samples
+  void setNumSamples(int n);
+  /// save the position of current walkers to SampleStack
+  void saveEnsemble();
+  /// save the position of current walkers
+  void saveEnsemble(iterator first, iterator last);
+  /// load a single sample from SampleStack
+  void loadSample(ParticleSetT<T>& pset, size_t iw) const;
+  /// load SampleStack data to the current list of walker configurations
+  void loadEnsemble();
+  /// load the SampleStacks of others to the current list of walker
+  /// configurations
+  void loadEnsemble(std::vector<MCWalkerConfigurationT<T>*>& others, bool doclean = true);
+  /** dump Samples to a file
      * @param others MCWalkerConfigurations whose samples will be collected
      * @param out engine to write the samples to state_0/walkers
      * @param np number of processors
@@ -197,48 +150,36 @@ class MCWalkerConfigurationT :
      * CAUTION: The current implementation assumes the same amount of active
      * walkers on all the MPI ranks.
      */
-    static bool
-    dumpEnsemble(std::vector<MCWalkerConfigurationT<T>*>& others,
-        HDFWalkerOutput& out, int np, int nBlock);
-    /// clear the ensemble
-    void
-    clearEnsemble();
-
-    const SampleStackT<T>&
-    getSampleStack() const
-    {
-        return samples;
-    }
-    SampleStackT<T>&
-    getSampleStack()
-    {
-        return samples;
-    }
-
-    /// Transitional forwarding methods
-    int
-    getMaxSamples() const;
-    //@}
+  static bool dumpEnsemble(std::vector<MCWalkerConfigurationT<T>*>& others, HDFWalkerOutput& out, int np, int nBlock);
+  /// clear the ensemble
+  void clearEnsemble();
+
+  const SampleStackT<T>& getSampleStack() const { return samples; }
+  SampleStackT<T>& getSampleStack() { return samples; }
+
+  /// Transitional forwarding methods
+  int getMaxSamples() const;
+  //@}
 
 protected:
-    /// true if the buffer is ready for particle-by-particle updates
-    bool ReadyForPbyP;
-    /// update-mode index
-    int UpdateMode;
+  /// true if the buffer is ready for particle-by-particle updates
+  bool ReadyForPbyP;
+  /// update-mode index
+  int UpdateMode;
 
-    RealType LocalEnergy;
+  RealType LocalEnergy;
 
 public:
-    /// a collection of reptiles contained in MCWalkerConfiguration.
-    ReptileList_t ReptileList;
-    ReptileT<T>* reptile;
+  /// a collection of reptiles contained in MCWalkerConfiguration.
+  ReptileList_t ReptileList;
+  ReptileT<T>* reptile;
 
-    friend class MCPopulation;
+  friend class MCPopulation;
 
 private:
-    MultiChain* Polymer;
+  MultiChain* Polymer;
 
-    SampleStackT<T> samples;
+  SampleStackT<T> samples;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/ParticleSetPoolT.cpp b/src/Particle/ParticleSetPoolT.cpp
index 010bd5de266..4909b0a7833 100644
--- a/src/Particle/ParticleSetPoolT.cpp
+++ b/src/Particle/ParticleSetPoolT.cpp
@@ -4,16 +4,12 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence
-//                    Livermore National Laboratory Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak
-//                    Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /**@file ParticleSetPool.cpp
@@ -34,94 +30,89 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-ParticleSetPoolT<T>::ParticleSetPoolT(Communicate* c, const char* aname) :
-    MPIObjectBase(c),
-    simulation_cell_(std::make_unique<SimulationCellT<T>>())
+template<typename T>
+ParticleSetPoolT<T>::ParticleSetPoolT(Communicate* c, const char* aname)
+    : MPIObjectBase(c), simulation_cell_(std::make_unique<SimulationCellT<T>>())
 {
-    ClassName = "ParticleSetPool";
-    myName = aname;
+  ClassName = "ParticleSetPool";
+  myName    = aname;
 }
 
-template <typename T>
-ParticleSetPoolT<T>::ParticleSetPoolT(ParticleSetPoolT&& other) noexcept :
-    MPIObjectBase(other.myComm),
-    simulation_cell_(std::move(other.simulation_cell_)),
-    myPool(std::move(other.myPool))
+template<typename T>
+ParticleSetPoolT<T>::ParticleSetPoolT(ParticleSetPoolT&& other) noexcept
+    : MPIObjectBase(other.myComm), simulation_cell_(std::move(other.simulation_cell_)), myPool(std::move(other.myPool))
 {
-    ClassName = other.ClassName;
-    myName = other.myName;
+  ClassName = other.ClassName;
+  myName    = other.myName;
 }
 
-template <typename T>
+template<typename T>
 ParticleSetPoolT<T>::~ParticleSetPoolT() = default;
 
-template <typename T>
-ParticleSetT<T>*
-ParticleSetPoolT<T>::getParticleSet(const std::string& pname)
+template<typename T>
+ParticleSetT<T>* ParticleSetPoolT<T>::getParticleSet(const std::string& pname)
 {
-    if (auto pit = myPool.find(pname); pit == myPool.end())
-        return nullptr;
-    else
-        return pit->second.get();
+  if (auto pit = myPool.find(pname); pit == myPool.end())
+    return nullptr;
+  else
+    return pit->second.get();
 }
 
-template <typename T>
-MCWalkerConfigurationT<T>*
-ParticleSetPoolT<T>::getWalkerSet(const std::string& pname)
+template<typename T>
+MCWalkerConfigurationT<T>* ParticleSetPoolT<T>::getWalkerSet(const std::string& pname)
 {
-    auto mc = dynamic_cast<MCWalkerConfigurationT<T>*>(getParticleSet(pname));
-    if (mc == nullptr) {
-        throw std::runtime_error(
-            "ParticleSePool::getWalkerSet missing " + pname);
-    }
-    return mc;
+  auto mc = dynamic_cast<MCWalkerConfigurationT<T>*>(getParticleSet(pname));
+  if (mc == nullptr)
+  {
+    throw std::runtime_error("ParticleSePool::getWalkerSet missing " + pname);
+  }
+  return mc;
 }
 
-template <typename T>
-void
-ParticleSetPoolT<T>::addParticleSet(std::unique_ptr<ParticleSetT<T>>&& p)
+template<typename T>
+void ParticleSetPoolT<T>::addParticleSet(std::unique_ptr<ParticleSetT<T>>&& p)
 {
-    const auto pit(myPool.find(p->getName()));
-    if (pit == myPool.end()) {
-        auto& pname = p->getName();
-        LOGMSG("  Adding " << pname << " ParticleSet to the pool")
-        if (&p->getSimulationCell() != simulation_cell_.get())
-            throw std::runtime_error(
-                "Bug detected! ParticleSetPool::addParticleSet requires p "
-                "created with the simulation "
-                "cell from ParticleSetPool.");
-        myPool.emplace(pname, std::move(p));
-    }
-    else
-        throw std::runtime_error(
-            p->getName() + " exists. Cannot be added again.");
+  const auto pit(myPool.find(p->getName()));
+  if (pit == myPool.end())
+  {
+    auto& pname = p->getName();
+    LOGMSG("  Adding " << pname << " ParticleSet to the pool")
+    if (&p->getSimulationCell() != simulation_cell_.get())
+      throw std::runtime_error("Bug detected! ParticleSetPool::addParticleSet requires p "
+                               "created with the simulation "
+                               "cell from ParticleSetPool.");
+    myPool.emplace(pname, std::move(p));
+  }
+  else
+    throw std::runtime_error(p->getName() + " exists. Cannot be added again.");
 }
 
-template <typename T>
-bool
-ParticleSetPoolT<T>::readSimulationCellXML(xmlNodePtr cur)
+template<typename T>
+bool ParticleSetPoolT<T>::readSimulationCellXML(xmlNodePtr cur)
 {
-    ReportEngine PRE("ParticleSetPool", "putLattice");
+  ReportEngine PRE("ParticleSetPool", "putLattice");
 
-    bool lattice_defined = false;
-    try {
-        LatticeParserT<T> a(simulation_cell_->lattice_);
-        lattice_defined = a.put(cur);
-    }
-    catch (const UniformCommunicateError& ue) {
-        myComm->barrier_and_abort(ue.what());
-    }
+  bool lattice_defined = false;
+  try
+  {
+    LatticeParserT<T> a(simulation_cell_->lattice_);
+    lattice_defined = a.put(cur);
+  }
+  catch (const UniformCommunicateError& ue)
+  {
+    myComm->barrier_and_abort(ue.what());
+  }
 
-    if (lattice_defined) {
-        app_log() << "  Overwriting global supercell " << std::endl;
-        simulation_cell_->resetLRBox();
-        if (outputManager.isHighActive())
-            simulation_cell_->lattice_.print(app_log(), 2);
-        else
-            simulation_cell_->lattice_.print(app_summary(), 1);
-    }
-    return lattice_defined;
+  if (lattice_defined)
+  {
+    app_log() << "  Overwriting global supercell " << std::endl;
+    simulation_cell_->resetLRBox();
+    if (outputManager.isHighActive())
+      simulation_cell_->lattice_.print(app_log(), 2);
+    else
+      simulation_cell_->lattice_.print(app_summary(), 1);
+  }
+  return lattice_defined;
 }
 
 /** process an xml element
@@ -131,142 +122,131 @@ ParticleSetPoolT<T>::readSimulationCellXML(xmlNodePtr cur)
  * Creating MCWalkerConfiguration for all the ParticleSet
  * objects.
  */
-template <typename T>
-bool
-ParticleSetPoolT<T>::put(xmlNodePtr cur)
+template<typename T>
+bool ParticleSetPoolT<T>::put(xmlNodePtr cur)
 {
-    ReportEngine PRE("ParticleSetPool", "put");
-    std::string id("e");
-    std::string role("none");
-    std::string randomR("no");
-    std::string randomsrc;
-    std::string useGPU;
-    std::string spinor;
-    OhmmsAttributeSet pAttrib;
-    pAttrib.add(id, "id");
-    pAttrib.add(id, "name");
-    pAttrib.add(role, "role");
-    pAttrib.add(randomR, "random");
-    pAttrib.add(randomsrc, "randomsrc");
-    pAttrib.add(randomsrc, "random_source");
-    pAttrib.add(spinor, "spinor", {"no", "yes"});
-    pAttrib.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values);
-    pAttrib.put(cur);
-    // backward compatibility
-    if (id == "e" && role == "none")
-        role = "MC";
-    ParticleSetT<T>* pTemp = getParticleSet(id);
-    if (pTemp == 0) {
-        const bool use_offload = CPUOMPTargetSelector::selectPlatform(useGPU) ==
-            PlatformKind::OMPTARGET;
-        app_summary() << std::endl;
-        app_summary() << " Particle Set" << std::endl;
-        app_summary() << " ------------" << std::endl;
-        app_summary() << "  Name: " << id
-                      << "   Offload : " << (use_offload ? "yes" : "no")
-                      << std::endl;
-        app_summary() << std::endl;
-
-        // select OpenMP offload implementation in ParticleSet.
-        if (use_offload)
-            pTemp = new MCWalkerConfigurationT<T>(
-                *simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD);
-        else
-            pTemp = new MCWalkerConfigurationT<T>(
-                *simulation_cell_, DynamicCoordinateKind::DC_POS);
+  ReportEngine PRE("ParticleSetPool", "put");
+  std::string id("e");
+  std::string role("none");
+  std::string randomR("no");
+  std::string randomsrc;
+  std::string useGPU;
+  std::string spinor;
+  OhmmsAttributeSet pAttrib;
+  pAttrib.add(id, "id");
+  pAttrib.add(id, "name");
+  pAttrib.add(role, "role");
+  pAttrib.add(randomR, "random");
+  pAttrib.add(randomsrc, "randomsrc");
+  pAttrib.add(randomsrc, "random_source");
+  pAttrib.add(spinor, "spinor", {"no", "yes"});
+  pAttrib.add(useGPU, "gpu", CPUOMPTargetSelector::candidate_values);
+  pAttrib.put(cur);
+  // backward compatibility
+  if (id == "e" && role == "none")
+    role = "MC";
+  ParticleSetT<T>* pTemp = getParticleSet(id);
+  if (pTemp == 0)
+  {
+    const bool use_offload = CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET;
+    app_summary() << std::endl;
+    app_summary() << " Particle Set" << std::endl;
+    app_summary() << " ------------" << std::endl;
+    app_summary() << "  Name: " << id << "   Offload : " << (use_offload ? "yes" : "no") << std::endl;
+    app_summary() << std::endl;
 
-        myPool.emplace(id, pTemp);
+    // select OpenMP offload implementation in ParticleSet.
+    if (use_offload)
+      pTemp = new MCWalkerConfigurationT<T>(*simulation_cell_, DynamicCoordinateKind::DC_POS_OFFLOAD);
+    else
+      pTemp = new MCWalkerConfigurationT<T>(*simulation_cell_, DynamicCoordinateKind::DC_POS);
 
-        try {
-            XMLParticleParserT<T> pread(*pTemp);
-            pread.readXML(cur);
-        }
-        catch (const UniformCommunicateError& ue) {
-            myComm->barrier_and_abort(ue.what());
-        }
+    myPool.emplace(id, pTemp);
 
-        // if random_source is given, create a node <init target="" soruce=""/>
-        if (randomR == "yes" && !randomsrc.empty()) {
-            xmlNodePtr anode = xmlNewNode(NULL, (const xmlChar*)"init");
-            xmlNewProp(anode, (const xmlChar*)"source",
-                (const xmlChar*)randomsrc.c_str());
-            xmlNewProp(
-                anode, (const xmlChar*)"target", (const xmlChar*)id.c_str());
-            randomize_nodes.push_back(anode);
-        }
-        pTemp->setName(id);
-        pTemp->setSpinor(spinor == "yes");
-        app_summary() << "  Particle set size: " << pTemp->getTotalNum()
-                      << "   Groups : " << pTemp->groups() << std::endl;
-        app_summary() << std::endl;
-        return true;
+    try
+    {
+      XMLParticleParserT<T> pread(*pTemp);
+      pread.readXML(cur);
     }
-    else {
-        app_warning() << "Particle set " << id
-                      << " is already created. Ignoring this section."
-                      << std::endl;
+    catch (const UniformCommunicateError& ue)
+    {
+      myComm->barrier_and_abort(ue.what());
+    }
+
+    // if random_source is given, create a node <init target="" soruce=""/>
+    if (randomR == "yes" && !randomsrc.empty())
+    {
+      xmlNodePtr anode = xmlNewNode(NULL, (const xmlChar*)"init");
+      xmlNewProp(anode, (const xmlChar*)"source", (const xmlChar*)randomsrc.c_str());
+      xmlNewProp(anode, (const xmlChar*)"target", (const xmlChar*)id.c_str());
+      randomize_nodes.push_back(anode);
     }
+    pTemp->setName(id);
+    pTemp->setSpinor(spinor == "yes");
+    app_summary() << "  Particle set size: " << pTemp->getTotalNum() << "   Groups : " << pTemp->groups() << std::endl;
     app_summary() << std::endl;
     return true;
+  }
+  else
+  {
+    app_warning() << "Particle set " << id << " is already created. Ignoring this section." << std::endl;
+  }
+  app_summary() << std::endl;
+  return true;
 }
 
-template <typename T>
-void
-ParticleSetPoolT<T>::randomize()
+template<typename T>
+void ParticleSetPoolT<T>::randomize()
 {
-    app_log() << "ParticleSetPool::randomize " << randomize_nodes.size()
-              << " ParticleSet" << (randomize_nodes.size() == 1 ? "" : "s")
-              << "." << std::endl;
-    bool success = true;
-    for (int i = 0; i < randomize_nodes.size(); ++i) {
-        InitMolecularSystemT<T> moinit(*this);
-        success &= moinit.put(randomize_nodes[i]);
-        xmlFreeNode(randomize_nodes[i]);
-    }
-    randomize_nodes.clear();
-    if (!success)
-        throw std::runtime_error(
-            "ParticleSePool::randomize failed to randomize some Particlesets!");
+  app_log() << "ParticleSetPool::randomize " << randomize_nodes.size() << " ParticleSet"
+            << (randomize_nodes.size() == 1 ? "" : "s") << "." << std::endl;
+  bool success = true;
+  for (int i = 0; i < randomize_nodes.size(); ++i)
+  {
+    InitMolecularSystemT<T> moinit(*this);
+    success &= moinit.put(randomize_nodes[i]);
+    xmlFreeNode(randomize_nodes[i]);
+  }
+  randomize_nodes.clear();
+  if (!success)
+    throw std::runtime_error("ParticleSePool::randomize failed to randomize some Particlesets!");
 }
 
-template <typename T>
-bool
-ParticleSetPoolT<T>::get(std::ostream& os) const
+template<typename T>
+bool ParticleSetPoolT<T>::get(std::ostream& os) const
 {
-    os << "ParticleSetPool has: " << std::endl << std::endl;
-    os.setf(std::ios::scientific, std::ios::floatfield);
-    os.precision(14);
-    for (const auto& [name, pset] : myPool)
-        if (outputManager.isDebugActive())
-            pset->print(os, 0);
-        else
-            pset->print(os, 10 /* maxParticlesToPrint */);
-    return true;
+  os << "ParticleSetPool has: " << std::endl << std::endl;
+  os.setf(std::ios::scientific, std::ios::floatfield);
+  os.precision(14);
+  for (const auto& [name, pset] : myPool)
+    if (outputManager.isDebugActive())
+      pset->print(os, 0);
+    else
+      pset->print(os, 10 /* maxParticlesToPrint */);
+  return true;
 }
 
-template <typename T>
-void
-ParticleSetPoolT<T>::output_particleset_info(
-    Libxml2Document& doc, xmlNodePtr root)
+template<typename T>
+void ParticleSetPoolT<T>::output_particleset_info(Libxml2Document& doc, xmlNodePtr root)
 {
-    xmlNodePtr particles_info = doc.addChild(root, "particles");
-    typename PoolType::const_iterator it(myPool.begin()), it_end(myPool.end());
-    while (it != it_end) {
-        xmlNodePtr particle = doc.addChild(particles_info, "particle");
-        doc.addChild(particle, "name", (*it).second->getName());
-        doc.addChild(particle, "size", (*it).second->getTotalNum());
-        ++it;
-    }
+  xmlNodePtr particles_info = doc.addChild(root, "particles");
+  typename PoolType::const_iterator it(myPool.begin()), it_end(myPool.end());
+  while (it != it_end)
+  {
+    xmlNodePtr particle = doc.addChild(particles_info, "particle");
+    doc.addChild(particle, "name", (*it).second->getName());
+    doc.addChild(particle, "size", (*it).second->getTotalNum());
+    ++it;
+  }
 }
 
 /** reset is used to initialize and evaluate the distance tables
  */
-template <typename T>
-void
-ParticleSetPoolT<T>::reset()
+template<typename T>
+void ParticleSetPoolT<T>::reset()
 {
-    for (const auto& [key, pset] : myPool)
-        pset->update();
+  for (const auto& [key, pset] : myPool)
+    pset->update();
 }
 
 // explicit instantiations
diff --git a/src/Particle/ParticleSetPoolT.h b/src/Particle/ParticleSetPoolT.h
index da72817dfcd..2d71c3e9977 100644
--- a/src/Particle/ParticleSetPoolT.h
+++ b/src/Particle/ParticleSetPoolT.h
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_PARTICLESETPOOLT_H
@@ -31,125 +28,97 @@ namespace qmcplusplus
  * This object handles \<particleset\> elements and
  * functions as a builder class for ParticleSet objects.
  */
-template <typename T>
+template<typename T>
 class ParticleSetPoolT : public MPIObjectBase
 {
 public:
-    using PoolType =
-        std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
+  using PoolType = std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
 
-    /** constructor
+  /** constructor
      * @param aname xml tag
      */
-    ParticleSetPoolT(Communicate* c, const char* aname = "particleset");
-    ~ParticleSetPoolT();
-
-    ParticleSetPoolT(const ParticleSetPoolT&) = delete;
-    ParticleSetPoolT&
-    operator=(const ParticleSetPoolT&) = delete;
-    ParticleSetPoolT(ParticleSetPoolT&& pset) noexcept;
-    ParticleSetPoolT&
-    operator=(ParticleSetPoolT&&) = default;
-
-    bool
-    put(xmlNodePtr cur);
-    bool
-    get(std::ostream& os) const;
-    void
-    reset();
-
-    void
-    output_particleset_info(Libxml2Document& doc, xmlNodePtr root);
-
-    /** initialize the supercell shared by all the particle sets
+  ParticleSetPoolT(Communicate* c, const char* aname = "particleset");
+  ~ParticleSetPoolT();
+
+  ParticleSetPoolT(const ParticleSetPoolT&)            = delete;
+  ParticleSetPoolT& operator=(const ParticleSetPoolT&) = delete;
+  ParticleSetPoolT(ParticleSetPoolT&& pset) noexcept;
+  ParticleSetPoolT& operator=(ParticleSetPoolT&&) = default;
+
+  bool put(xmlNodePtr cur);
+  bool get(std::ostream& os) const;
+  void reset();
+
+  void output_particleset_info(Libxml2Document& doc, xmlNodePtr root);
+
+  /** initialize the supercell shared by all the particle sets
      *
      *  return value is never checked anywhere
      *  side effect simulation_cell_ UPtr<ParticleLayout> is set
      *  to particle layout created on heap.
      *  This is later directly assigned to pset member variable Lattice.
      */
-    bool
-    readSimulationCellXML(xmlNodePtr cur);
+  bool readSimulationCellXML(xmlNodePtr cur);
 
-    /// return true, if the pool is empty
-    inline bool
-    empty() const
-    {
-        return myPool.empty();
-    }
+  /// return true, if the pool is empty
+  inline bool empty() const { return myPool.empty(); }
 
-    /** add a ParticleSet* to the pool with its ownership transferred
+  /** add a ParticleSet* to the pool with its ownership transferred
      * ParticleSet built outside the ParticleSetPool must be constructed with
      * the simulation cell from this->simulation_cell_.
      */
-    void
-    addParticleSet(std::unique_ptr<ParticleSetT<T>>&& p);
+  void addParticleSet(std::unique_ptr<ParticleSetT<T>>&& p);
 
-    /** get a named ParticleSet
+  /** get a named ParticleSet
      * @param pname name of the ParticleSet
      * @return a MCWalkerConfiguration object with pname
      *
      * When the named ParticleSet is not in this object, return 0.
      */
-    ParticleSetT<T>*
-    getParticleSet(const std::string& pname);
+  ParticleSetT<T>* getParticleSet(const std::string& pname);
 
-    /** get a named MCWalkerConfiguration
+  /** get a named MCWalkerConfiguration
      * @param pname name of the MCWalkerConfiguration
      * @return a MCWalkerConfiguration object with pname
      *
      * When the named MCWalkerConfiguration is not in this object, return 0.
      */
-    MCWalkerConfigurationT<T>*
-    getWalkerSet(const std::string& pname);
+  MCWalkerConfigurationT<T>* getWalkerSet(const std::string& pname);
 
-    /** get the Pool object
+  /** get the Pool object
      */
-    inline const PoolType&
-    getPool() const
-    {
-        return myPool;
-    }
-
-    /// get simulation cell
-    const auto&
-    getSimulationCell() const
-    {
-        return *simulation_cell_;
-    }
-
-    /// set simulation cell
-    void
-    setSimulationCell(const SimulationCellT<T>& simulation_cell)
-    {
-        *simulation_cell_ = simulation_cell;
-    }
-
-    /** randomize a particleset particleset/@random='yes' &&
+  inline const PoolType& getPool() const { return myPool; }
+
+  /// get simulation cell
+  const auto& getSimulationCell() const { return *simulation_cell_; }
+
+  /// set simulation cell
+  void setSimulationCell(const SimulationCellT<T>& simulation_cell) { *simulation_cell_ = simulation_cell; }
+
+  /** randomize a particleset particleset/@random='yes' &&
      * particleset@random_source exists
      */
-    void
-    randomize();
+  void randomize();
 
 private:
-    /** global simulation cell
+  /** global simulation cell
      *
      * updated by
      * - readSimulationCellXML() parsing <simulationcell> element
      * - setSimulationCell()
      */
-    std::unique_ptr<SimulationCellT<T>> simulation_cell_;
-    /** List of ParticleSet owned
+  std::unique_ptr<SimulationCellT<T>> simulation_cell_;
+  /** List of ParticleSet owned
      *
      * Each ParticleSet has to have a unique name which is used as a key for the
      * map.
      */
-    PoolType myPool;
-    /** xml node for random initialization.
+  PoolType myPool;
+  /** xml node for random initialization.
      *
      * randomize() process initializations just before starting qmc sections
      */
-    std::vector<xmlNodePtr> randomize_nodes;
+  std::vector<xmlNodePtr> randomize_nodes;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/ParticleSetT.cpp b/src/Particle/ParticleSetT.cpp
index 2247ee14d20..095d21197e1 100644
--- a/src/Particle/ParticleSetT.cpp
+++ b/src/Particle/ParticleSetT.cpp
@@ -4,21 +4,16 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Luke Shulenburger, lshulen@sandia.gov, Sandia National
-//                    Laboratories Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Ye Luo, yeluo@anl.gov, Argonne
-//                    National Laboratory Mark A. Berrill, berrillma@ornl.gov,
-//                    Oak Ridge National Laboratory Mark Dewing,
-//                    markdewing@gmail.com, University of Illinois at
-//                    Urbana-Champaign
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "ParticleSetT.h"
@@ -45,593 +40,572 @@ using WP = WalkerProperties::Indexes;
 
 enum PSetTimers
 {
-    PS_newpos,
-    PS_donePbyP,
-    PS_accept,
-    PS_loadWalker,
-    PS_update,
-    PS_dt_move,
-    PS_mw_copy
+  PS_newpos,
+  PS_donePbyP,
+  PS_accept,
+  PS_loadWalker,
+  PS_update,
+  PS_dt_move,
+  PS_mw_copy
 };
 
-static const TimerNameList_t<PSetTimers>
-generatePSetTimerNames(std::string& obj_name)
-{
-    return {{PS_newpos, "ParticleSet:" + obj_name + "::computeNewPosDT"},
-        {PS_donePbyP, "ParticleSet:" + obj_name + "::donePbyP"},
-        {PS_accept, "ParticleSet:" + obj_name + "::acceptMove"},
-        {PS_loadWalker, "ParticleSet:" + obj_name + "::loadWalker"},
-        {PS_update, "ParticleSet:" + obj_name + "::update"},
-        {PS_dt_move, "ParticleSet:" + obj_name + "::dt_move"},
-        {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}};
-}
-
-template <typename T>
-ParticleSetT<T>::ParticleSetT(const SimulationCellT<T>& simulation_cell,
-    const DynamicCoordinateKind kind) :
-    quantum_domain(classical),
-    Properties(0, 0, 1, WP::MAXPROPERTIES),
-    simulation_cell_(simulation_cell),
-    same_mass_(true),
-    is_spinor_(false),
-    active_ptcl_(-1),
-    active_spin_val_(0.0),
-    myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName),
-        timer_level_medium),
-    myTwist(0.0),
-    ParentName("0"),
-    TotalNum(0),
-    group_offsets_(std::make_shared<Vector<int, OMPallocator<int>>>()),
-    coordinates_(createDynamicCoordinatesT<T>(kind))
-{
-    initPropertyList();
-}
-
-template <typename T>
-ParticleSetT<T>::ParticleSetT(const ParticleSetT& p) :
-    Properties(p.Properties),
-    simulation_cell_(p.simulation_cell_),
-    same_mass_(true),
-    is_spinor_(false),
-    active_ptcl_(-1),
-    active_spin_val_(0.0),
-    my_species_(p.getSpeciesSet()),
-    myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName),
-        timer_level_medium),
-    myTwist(0.0),
-    ParentName(p.parentName()),
-    group_offsets_(p.group_offsets_),
-    coordinates_(p.coordinates_->makeClone())
-{
-    setQuantumDomain(p.quantum_domain);
-
-    resize(p.getTotalNum());
-    R.InUnit = p.R.InUnit;
-    R = p.R;
-    spins = p.spins;
-    GroupID = p.GroupID;
-    is_spinor_ = p.is_spinor_;
-
-    // need explicit copy:
-    Mass = p.Mass;
-    Z = p.Z;
-    // std::ostringstream o;
-    // o<<p.getName()<<ObjectTag;
-    // this->setName(o.str());
-    // app_log() << "  Copying a particle set " << p.getName() << " to " <<
-    // this->getName() << " groups=" << groups() << std::endl;
-    myName = p.getName();
-    PropertyList.Names = p.PropertyList.Names;
-    PropertyList.Values = p.PropertyList.Values;
-    PropertyHistory = p.PropertyHistory;
-    Collectables = p.Collectables;
-    // construct the distance tables with the same order
-    for (int i = 0; i < p.DistTables.size(); ++i)
-        addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes());
-
-    if (p.structure_factor_)
-        structure_factor_ =
-            std::make_unique<StructFactT<T>>(*p.structure_factor_);
-    myTwist = p.myTwist;
-
-    G = p.G;
-    L = p.L;
-}
-
-template <typename T>
+static const TimerNameList_t<PSetTimers> generatePSetTimerNames(std::string& obj_name)
+{
+  return {{PS_newpos, "ParticleSet:" + obj_name + "::computeNewPosDT"},
+          {PS_donePbyP, "ParticleSet:" + obj_name + "::donePbyP"},
+          {PS_accept, "ParticleSet:" + obj_name + "::acceptMove"},
+          {PS_loadWalker, "ParticleSet:" + obj_name + "::loadWalker"},
+          {PS_update, "ParticleSet:" + obj_name + "::update"},
+          {PS_dt_move, "ParticleSet:" + obj_name + "::dt_move"},
+          {PS_mw_copy, "ParticleSet:" + obj_name + "::mw_copy"}};
+}
+
+template<typename T>
+ParticleSetT<T>::ParticleSetT(const SimulationCellT<T>& simulation_cell, const DynamicCoordinateKind kind)
+    : quantum_domain(classical),
+      Properties(0, 0, 1, WP::MAXPROPERTIES),
+      simulation_cell_(simulation_cell),
+      same_mass_(true),
+      is_spinor_(false),
+      active_ptcl_(-1),
+      active_spin_val_(0.0),
+      myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), timer_level_medium),
+      myTwist(0.0),
+      ParentName("0"),
+      TotalNum(0),
+      group_offsets_(std::make_shared<Vector<int, OMPallocator<int>>>()),
+      coordinates_(createDynamicCoordinatesT<T>(kind))
+{
+  initPropertyList();
+}
+
+template<typename T>
+ParticleSetT<T>::ParticleSetT(const ParticleSetT& p)
+    : Properties(p.Properties),
+      simulation_cell_(p.simulation_cell_),
+      same_mass_(true),
+      is_spinor_(false),
+      active_ptcl_(-1),
+      active_spin_val_(0.0),
+      my_species_(p.getSpeciesSet()),
+      myTimers(getGlobalTimerManager(), generatePSetTimerNames(myName), timer_level_medium),
+      myTwist(0.0),
+      ParentName(p.parentName()),
+      group_offsets_(p.group_offsets_),
+      coordinates_(p.coordinates_->makeClone())
+{
+  setQuantumDomain(p.quantum_domain);
+
+  resize(p.getTotalNum());
+  R.InUnit   = p.R.InUnit;
+  R          = p.R;
+  spins      = p.spins;
+  GroupID    = p.GroupID;
+  is_spinor_ = p.is_spinor_;
+
+  // need explicit copy:
+  Mass = p.Mass;
+  Z    = p.Z;
+  // std::ostringstream o;
+  // o<<p.getName()<<ObjectTag;
+  // this->setName(o.str());
+  // app_log() << "  Copying a particle set " << p.getName() << " to " <<
+  // this->getName() << " groups=" << groups() << std::endl;
+  myName              = p.getName();
+  PropertyList.Names  = p.PropertyList.Names;
+  PropertyList.Values = p.PropertyList.Values;
+  PropertyHistory     = p.PropertyHistory;
+  Collectables        = p.Collectables;
+  // construct the distance tables with the same order
+  for (int i = 0; i < p.DistTables.size(); ++i)
+    addTable(p.DistTables[i]->get_origin(), p.DistTables[i]->getModes());
+
+  if (p.structure_factor_)
+    structure_factor_ = std::make_unique<StructFactT<T>>(*p.structure_factor_);
+  myTwist = p.myTwist;
+
+  G = p.G;
+  L = p.L;
+}
+
+template<typename T>
 ParticleSetT<T>::~ParticleSetT() = default;
 
-template <typename T>
-void
-ParticleSetT<T>::create(const std::vector<int>& agroup)
-{
-    auto& group_offsets(*group_offsets_);
-    group_offsets.resize(agroup.size() + 1);
-    group_offsets[0] = 0;
-    for (int is = 0; is < agroup.size(); is++)
-        group_offsets[is + 1] = group_offsets[is] + agroup[is];
-    group_offsets.updateTo();
-    const size_t nsum = group_offsets[agroup.size()];
-    resize(nsum);
-    TotalNum = nsum;
-    int loc = 0;
-    for (int i = 0; i < agroup.size(); i++)
-        for (int j = 0; j < agroup[i]; j++, loc++)
-            GroupID[loc] = i;
-}
-
-template <typename T>
-void
-ParticleSetT<T>::setQuantumDomain(quantum_domains qdomain)
-{
-    if (quantumDomainValid(qdomain))
-        quantum_domain = qdomain;
-    else
-        throw std::runtime_error("ParticleSet::setQuantumDomain\n  input "
-                                 "quantum domain is not valid for particles");
-}
-
-template <typename T>
-void
-ParticleSetT<T>::resetGroups()
-{
-    const int nspecies = my_species_.getTotalNum();
-    // Usually an empty ParticleSet indicates an error in the input file,
-    // but in some cases it is useful.  Allow an empty ParticleSet if it
-    // has the special name "empty".
-    if (nspecies == 0 && getName() != "empty") {
-        throw std::runtime_error(
-            "ParticleSet::resetGroups() Failed. No species exisits");
-    }
-    int natt = my_species_.numAttributes();
-    int qind = my_species_.addAttribute("charge");
-    if (natt == qind) {
-        app_log() << " Missing charge attribute of the SpeciesSet " << myName
-                  << " particleset" << std::endl;
-        app_log() << " Assume neutral particles Z=0.0 " << std::endl;
-        for (int ig = 0; ig < nspecies; ig++)
-            my_species_(qind, ig) = 0.0;
-    }
-    for (int iat = 0; iat < Z.size(); iat++)
-        Z[iat] = my_species_(qind, GroupID[iat]);
-    natt = my_species_.numAttributes();
-    int massind = my_species_.addAttribute("mass");
-    if (massind == natt) {
-        for (int ig = 0; ig < nspecies; ig++)
-            my_species_(massind, ig) = 1.0;
-    }
-    same_mass_ = true;
-    double m0 = my_species_(massind, 0);
-    for (int ig = 1; ig < nspecies; ig++)
-        same_mass_ &= (my_species_(massind, ig) == m0);
-    if (same_mass_)
-        app_log() << "  All the species have the same mass " << m0 << std::endl;
-    else
-        app_log() << "  Distinctive masses for each species " << std::endl;
-    for (int iat = 0; iat < Mass.size(); iat++)
-        Mass[iat] = my_species_(massind, GroupID[iat]);
-
-    int membersize = my_species_.addAttribute("membersize");
-    for (int ig = 0; ig < nspecies; ++ig)
-        my_species_(membersize, ig) = groupsize(ig);
-
-    for (int iat = 0; iat < GroupID.size(); iat++)
-        assert(GroupID[iat] < nspecies);
-}
-
-template <typename T>
-void
-ParticleSetT<T>::randomizeFromSource(ParticleSetT& src)
-{
-    SpeciesSet& srcSpSet(src.getSpeciesSet());
-    SpeciesSet& spSet(getSpeciesSet());
-    int srcChargeIndx = srcSpSet.addAttribute("charge");
-    int srcMemberIndx = srcSpSet.addAttribute("membersize");
-    int ChargeIndex = spSet.addAttribute("charge");
-    int MemberIndx = spSet.addAttribute("membersize");
-    int Nsrc = src.getTotalNum();
-    int Nptcl = getTotalNum();
-    int NumSpecies = spSet.TotalNum;
-    int NumSrcSpecies = srcSpSet.TotalNum;
-    // Store information about charges and number of each species
-    std::vector<int> Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec;
-    Zat.resize(Nsrc);
-    Zspec.resize(NumSrcSpecies);
-    NofSpecies.resize(NumSpecies);
-    CurElec.resize(NumSpecies);
-    NofSrcSpecies.resize(NumSrcSpecies);
-    for (int spec = 0; spec < NumSrcSpecies; spec++) {
-        Zspec[spec] = (int)round(srcSpSet(srcChargeIndx, spec));
-        NofSrcSpecies[spec] = (int)round(srcSpSet(srcMemberIndx, spec));
-    }
-    for (int spec = 0; spec < NumSpecies; spec++) {
-        NofSpecies[spec] = (int)round(spSet(MemberIndx, spec));
-        CurElec[spec] = first(spec);
-    }
-    int totQ = 0;
-    for (int iat = 0; iat < Nsrc; iat++)
-        totQ += Zat[iat] = Zspec[src.GroupID[iat]];
-    app_log() << "  Total ion charge    = " << totQ << std::endl;
-    totQ -= Nptcl;
-    app_log() << "  Total system charge = " << totQ << std::endl;
-    // Now, loop over ions, attaching electrons to them to neutralize
-    // charge
-    int spToken = 0;
-    // This is decremented when we run out of electrons in each species
-    int spLeft = NumSpecies;
-    std::vector<PosType> gaussRand(Nptcl);
-    makeGaussRandom(gaussRand);
-    for (int iat = 0; iat < Nsrc; iat++) {
-        // Loop over electrons to add, selecting round-robin from the
-        // electron species
-        int z = Zat[iat];
-        while (z > 0 && spLeft) {
-            int sp = spToken++ % NumSpecies;
-            if (NofSpecies[sp]) {
-                NofSpecies[sp]--;
-                z--;
-                int elec = CurElec[sp]++;
-                app_log() << "  Assigning " << (sp ? "down" : "up  ")
-                          << " electron " << elec << " to ion " << iat
-                          << " with charge " << z << std::endl;
-                double radius = 0.5 * std::sqrt((double)Zat[iat]);
-                R[elec] = src.R[iat] + radius * gaussRand[elec];
-            }
-            else
-                spLeft--;
-        }
+template<typename T>
+void ParticleSetT<T>::create(const std::vector<int>& agroup)
+{
+  auto& group_offsets(*group_offsets_);
+  group_offsets.resize(agroup.size() + 1);
+  group_offsets[0] = 0;
+  for (int is = 0; is < agroup.size(); is++)
+    group_offsets[is + 1] = group_offsets[is] + agroup[is];
+  group_offsets.updateTo();
+  const size_t nsum = group_offsets[agroup.size()];
+  resize(nsum);
+  TotalNum = nsum;
+  int loc  = 0;
+  for (int i = 0; i < agroup.size(); i++)
+    for (int j = 0; j < agroup[i]; j++, loc++)
+      GroupID[loc] = i;
+}
+
+template<typename T>
+void ParticleSetT<T>::setQuantumDomain(quantum_domains qdomain)
+{
+  if (quantumDomainValid(qdomain))
+    quantum_domain = qdomain;
+  else
+    throw std::runtime_error("ParticleSet::setQuantumDomain\n  input "
+                             "quantum domain is not valid for particles");
+}
+
+template<typename T>
+void ParticleSetT<T>::resetGroups()
+{
+  const int nspecies = my_species_.getTotalNum();
+  // Usually an empty ParticleSet indicates an error in the input file,
+  // but in some cases it is useful.  Allow an empty ParticleSet if it
+  // has the special name "empty".
+  if (nspecies == 0 && getName() != "empty")
+  {
+    throw std::runtime_error("ParticleSet::resetGroups() Failed. No species exisits");
+  }
+  int natt = my_species_.numAttributes();
+  int qind = my_species_.addAttribute("charge");
+  if (natt == qind)
+  {
+    app_log() << " Missing charge attribute of the SpeciesSet " << myName << " particleset" << std::endl;
+    app_log() << " Assume neutral particles Z=0.0 " << std::endl;
+    for (int ig = 0; ig < nspecies; ig++)
+      my_species_(qind, ig) = 0.0;
+  }
+  for (int iat = 0; iat < Z.size(); iat++)
+    Z[iat] = my_species_(qind, GroupID[iat]);
+  natt        = my_species_.numAttributes();
+  int massind = my_species_.addAttribute("mass");
+  if (massind == natt)
+  {
+    for (int ig = 0; ig < nspecies; ig++)
+      my_species_(massind, ig) = 1.0;
+  }
+  same_mass_ = true;
+  double m0  = my_species_(massind, 0);
+  for (int ig = 1; ig < nspecies; ig++)
+    same_mass_ &= (my_species_(massind, ig) == m0);
+  if (same_mass_)
+    app_log() << "  All the species have the same mass " << m0 << std::endl;
+  else
+    app_log() << "  Distinctive masses for each species " << std::endl;
+  for (int iat = 0; iat < Mass.size(); iat++)
+    Mass[iat] = my_species_(massind, GroupID[iat]);
+
+  int membersize = my_species_.addAttribute("membersize");
+  for (int ig = 0; ig < nspecies; ++ig)
+    my_species_(membersize, ig) = groupsize(ig);
+
+  for (int iat = 0; iat < GroupID.size(); iat++)
+    assert(GroupID[iat] < nspecies);
+}
+
+template<typename T>
+void ParticleSetT<T>::randomizeFromSource(ParticleSetT& src)
+{
+  SpeciesSet& srcSpSet(src.getSpeciesSet());
+  SpeciesSet& spSet(getSpeciesSet());
+  int srcChargeIndx = srcSpSet.addAttribute("charge");
+  int srcMemberIndx = srcSpSet.addAttribute("membersize");
+  int ChargeIndex   = spSet.addAttribute("charge");
+  int MemberIndx    = spSet.addAttribute("membersize");
+  int Nsrc          = src.getTotalNum();
+  int Nptcl         = getTotalNum();
+  int NumSpecies    = spSet.TotalNum;
+  int NumSrcSpecies = srcSpSet.TotalNum;
+  // Store information about charges and number of each species
+  std::vector<int> Zat, Zspec, NofSpecies, NofSrcSpecies, CurElec;
+  Zat.resize(Nsrc);
+  Zspec.resize(NumSrcSpecies);
+  NofSpecies.resize(NumSpecies);
+  CurElec.resize(NumSpecies);
+  NofSrcSpecies.resize(NumSrcSpecies);
+  for (int spec = 0; spec < NumSrcSpecies; spec++)
+  {
+    Zspec[spec]         = (int)round(srcSpSet(srcChargeIndx, spec));
+    NofSrcSpecies[spec] = (int)round(srcSpSet(srcMemberIndx, spec));
+  }
+  for (int spec = 0; spec < NumSpecies; spec++)
+  {
+    NofSpecies[spec] = (int)round(spSet(MemberIndx, spec));
+    CurElec[spec]    = first(spec);
+  }
+  int totQ = 0;
+  for (int iat = 0; iat < Nsrc; iat++)
+    totQ += Zat[iat] = Zspec[src.GroupID[iat]];
+  app_log() << "  Total ion charge    = " << totQ << std::endl;
+  totQ -= Nptcl;
+  app_log() << "  Total system charge = " << totQ << std::endl;
+  // Now, loop over ions, attaching electrons to them to neutralize
+  // charge
+  int spToken = 0;
+  // This is decremented when we run out of electrons in each species
+  int spLeft = NumSpecies;
+  std::vector<PosType> gaussRand(Nptcl);
+  makeGaussRandom(gaussRand);
+  for (int iat = 0; iat < Nsrc; iat++)
+  {
+    // Loop over electrons to add, selecting round-robin from the
+    // electron species
+    int z = Zat[iat];
+    while (z > 0 && spLeft)
+    {
+      int sp = spToken++ % NumSpecies;
+      if (NofSpecies[sp])
+      {
+        NofSpecies[sp]--;
+        z--;
+        int elec = CurElec[sp]++;
+        app_log() << "  Assigning " << (sp ? "down" : "up  ") << " electron " << elec << " to ion " << iat
+                  << " with charge " << z << std::endl;
+        double radius = 0.5 * std::sqrt((double)Zat[iat]);
+        R[elec]       = src.R[iat] + radius * gaussRand[elec];
+      }
+      else
+        spLeft--;
     }
-    // Assign remaining electrons
-    int ion = 0;
-    for (int sp = 0; sp < NumSpecies; sp++) {
-        for (int ie = 0; ie < NofSpecies[sp]; ie++) {
-            int iat = ion++ % Nsrc;
-            double radius = std::sqrt((double)Zat[iat]);
-            int elec = CurElec[sp]++;
-            R[elec] = src.R[iat] + radius * gaussRand[elec];
-        }
+  }
+  // Assign remaining electrons
+  int ion = 0;
+  for (int sp = 0; sp < NumSpecies; sp++)
+  {
+    for (int ie = 0; ie < NofSpecies[sp]; ie++)
+    {
+      int iat       = ion++ % Nsrc;
+      double radius = std::sqrt((double)Zat[iat]);
+      int elec      = CurElec[sp]++;
+      R[elec]       = src.R[iat] + radius * gaussRand[elec];
     }
+  }
 }
 
-template <typename T>
-void
-ParticleSetT<T>::print(std::ostream& os, const size_t maxParticlesToPrint) const
+template<typename T>
+void ParticleSetT<T>::print(std::ostream& os, const size_t maxParticlesToPrint) const
 {
-    os << "  ParticleSet '" << getName() << "' contains " << TotalNum
-       << " particles : ";
-    if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0)
-        for (int i = 0; i < group_offsets.size() - 1; i++)
-            os << " " << my_species_.speciesName[i] << "("
-               << group_offsets[i + 1] - group_offsets[i] << ")";
-    os << std::endl << std::endl;
+  os << "  ParticleSet '" << getName() << "' contains " << TotalNum << " particles : ";
+  if (auto& group_offsets(*group_offsets_); group_offsets.size() > 0)
+    for (int i = 0; i < group_offsets.size() - 1; i++)
+      os << " " << my_species_.speciesName[i] << "(" << group_offsets[i + 1] - group_offsets[i] << ")";
+  os << std::endl << std::endl;
 
-    const size_t numToPrint = maxParticlesToPrint == 0 ?
-        TotalNum :
-        std::min(TotalNum, maxParticlesToPrint);
+  const size_t numToPrint = maxParticlesToPrint == 0 ? TotalNum : std::min(TotalNum, maxParticlesToPrint);
 
-    for (int i = 0; i < numToPrint; i++) {
-        os << "    " << my_species_.speciesName[GroupID[i]] << R[i]
-           << std::endl;
-    }
-    if (numToPrint < TotalNum) {
-        os << "    (... and " << (TotalNum - numToPrint)
-           << " more particle positions ...)" << std::endl;
-    }
-    os << std::endl;
+  for (int i = 0; i < numToPrint; i++)
+  {
+    os << "    " << my_species_.speciesName[GroupID[i]] << R[i] << std::endl;
+  }
+  if (numToPrint < TotalNum)
+  {
+    os << "    (... and " << (TotalNum - numToPrint) << " more particle positions ...)" << std::endl;
+  }
+  os << std::endl;
 
-    for (const std::string& description : distTableDescriptions)
-        os << description;
-    os << std::endl;
+  for (const std::string& description : distTableDescriptions)
+    os << description;
+  os << std::endl;
 }
 
-template <typename T>
-bool
-ParticleSetT<T>::get(std::ostream& is) const
+template<typename T>
+bool ParticleSetT<T>::get(std::ostream& is) const
 {
-    return true;
+  return true;
 }
 
-template <typename T>
-bool
-ParticleSetT<T>::put(std::istream& is)
+template<typename T>
+bool ParticleSetT<T>::put(std::istream& is)
 {
-    return true;
+  return true;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::reset()
+template<typename T>
+void ParticleSetT<T>::reset()
 {
-    app_log() << "<<<< going to set properties >>>> " << std::endl;
+  app_log() << "<<<< going to set properties >>>> " << std::endl;
 }
 
 /// read the particleset
-template <typename T>
-bool
-ParticleSetT<T>::put(xmlNodePtr cur)
-{
-    return true;
-}
-
-template <typename T>
-int
-ParticleSetT<T>::addTable(const ParticleSetT& psrc, DTModes modes)
-{
-    if (myName == "none" || psrc.getName() == "none")
-        throw std::runtime_error("ParticleSet::addTable needs proper names for "
-                                 "both source and target particle sets.");
-
-    int tid;
-    std::map<std::string, int>::iterator tit(
-        myDistTableMap.find(psrc.getName()));
-    if (tit == myDistTableMap.end()) {
-        std::ostringstream description;
-        tid = DistTables.size();
-        if (myName == psrc.getName())
-            DistTables.push_back(createDistanceTableT(*this, description));
-        else
-            DistTables.push_back(
-                createDistanceTableT(psrc, *this, description));
-        distTableDescriptions.push_back(description.str());
-        myDistTableMap[psrc.getName()] = tid;
-        app_debug() << "  ... ParticleSet::addTable Create Table #" << tid
-                    << " " << DistTables[tid]->getName() << std::endl;
-    }
-    else {
-        tid = (*tit).second;
-        app_debug() << "  ... ParticleSet::addTable Reuse Table #" << tid << " "
-                    << DistTables[tid]->getName() << std::endl;
-    }
+template<typename T>
+bool ParticleSetT<T>::put(xmlNodePtr cur)
+{
+  return true;
+}
+
+template<typename T>
+int ParticleSetT<T>::addTable(const ParticleSetT& psrc, DTModes modes)
+{
+  if (myName == "none" || psrc.getName() == "none")
+    throw std::runtime_error("ParticleSet::addTable needs proper names for "
+                             "both source and target particle sets.");
 
-    DistTables[tid]->setModes(DistTables[tid]->getModes() | modes);
+  int tid;
+  std::map<std::string, int>::iterator tit(myDistTableMap.find(psrc.getName()));
+  if (tit == myDistTableMap.end())
+  {
+    std::ostringstream description;
+    tid = DistTables.size();
+    if (myName == psrc.getName())
+      DistTables.push_back(createDistanceTableT(*this, description));
+    else
+      DistTables.push_back(createDistanceTableT(psrc, *this, description));
+    distTableDescriptions.push_back(description.str());
+    myDistTableMap[psrc.getName()] = tid;
+    app_debug() << "  ... ParticleSet::addTable Create Table #" << tid << " " << DistTables[tid]->getName()
+                << std::endl;
+  }
+  else
+  {
+    tid = (*tit).second;
+    app_debug() << "  ... ParticleSet::addTable Reuse Table #" << tid << " " << DistTables[tid]->getName() << std::endl;
+  }
+
+  DistTables[tid]->setModes(DistTables[tid]->getModes() | modes);
 
-    app_log().flush();
-    return tid;
+  app_log().flush();
+  return tid;
 }
 
-template <typename T>
-const DistanceTableAAT<T>&
-ParticleSetT<T>::getDistTableAA(int table_ID) const
+template<typename T>
+const DistanceTableAAT<T>& ParticleSetT<T>::getDistTableAA(int table_ID) const
 {
-    return dynamic_cast<DistanceTableAAT<T>&>(*DistTables[table_ID]);
+  return dynamic_cast<DistanceTableAAT<T>&>(*DistTables[table_ID]);
 }
 
-template <typename T>
-const DistanceTableABT<T>&
-ParticleSetT<T>::getDistTableAB(int table_ID) const
+template<typename T>
+const DistanceTableABT<T>& ParticleSetT<T>::getDistTableAB(int table_ID) const
 {
-    return dynamic_cast<DistanceTableABT<T>&>(*DistTables[table_ID]);
+  return dynamic_cast<DistanceTableABT<T>&>(*DistTables[table_ID]);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::update(bool skipSK)
+template<typename T>
+void ParticleSetT<T>::update(bool skipSK)
 {
-    ScopedTimer update_scope(myTimers[PS_update]);
+  ScopedTimer update_scope(myTimers[PS_update]);
 
-    coordinates_->setAllParticlePos(R);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->evaluate(*this);
-    if (!skipSK && structure_factor_)
-        structure_factor_->updateAllPart(*this);
+  coordinates_->setAllParticlePos(R);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->evaluate(*this);
+  if (!skipSK && structure_factor_)
+    structure_factor_->updateAllPart(*this);
 
-    active_ptcl_ = -1;
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_update(
-    const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
+template<typename T>
+void ParticleSetT<T>::mw_update(const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
 {
-    auto& p_leader = p_list.getLeader();
-    ScopedTimer update_scope(p_leader.myTimers[PS_update]);
+  auto& p_leader = p_list.getLeader();
+  ScopedTimer update_scope(p_leader.myTimers[PS_update]);
 
-    for (ParticleSetT& pset : p_list)
-        pset.coordinates_->setAllParticlePos(pset.R);
+  for (ParticleSetT& pset : p_list)
+    pset.coordinates_->setAllParticlePos(pset.R);
 
-    auto& dts = p_leader.DistTables;
-    for (int i = 0; i < dts.size(); ++i) {
-        const auto dt_list(extractDTRefList(p_list, i));
-        dts[i]->mw_evaluate(dt_list, p_list);
-    }
+  auto& dts = p_leader.DistTables;
+  for (int i = 0; i < dts.size(); ++i)
+  {
+    const auto dt_list(extractDTRefList(p_list, i));
+    dts[i]->mw_evaluate(dt_list, p_list);
+  }
 
-    if (!skipSK && p_leader.structure_factor_)
-        for (int iw = 0; iw < p_list.size(); iw++)
-            p_list[iw].structure_factor_->updateAllPart(p_list[iw]);
+  if (!skipSK && p_leader.structure_factor_)
+    for (int iw = 0; iw < p_list.size(); iw++)
+      p_list[iw].structure_factor_->updateAllPart(p_list[iw]);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::makeMove(
-    Index_t iat, const SingleParticlePos& displ, bool maybe_accept)
+template<typename T>
+void ParticleSetT<T>::makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept)
 {
-    active_ptcl_ = iat;
-    active_pos_ = R[iat] + displ;
-    active_spin_val_ = spins[iat];
-    computeNewPosDistTables(iat, active_pos_, maybe_accept);
+  active_ptcl_     = iat;
+  active_pos_      = R[iat] + displ;
+  active_spin_val_ = spins[iat];
+  computeNewPosDistTables(iat, active_pos_, maybe_accept);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::makeMoveWithSpin(
-    Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
+template<typename T>
+void ParticleSetT<T>::makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
 {
-    makeMove(iat, displ);
-    active_spin_val_ += sdispl;
+  makeMove(iat, displ);
+  active_spin_val_ += sdispl;
 }
 
-template <typename T>
-template <CoordsType CT>
-void
-ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
-    Index_t iat, const MCCoordsT<T, CT>& displs)
+template<typename T>
+template<CoordsType CT>
+void ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                  Index_t iat,
+                                  const MCCoordsT<T, CT>& displs)
 {
-    mw_makeMove(p_list, iat, displs.positions);
-    if constexpr (CT == CoordsType::POS_SPIN)
-        mw_makeSpinMove(p_list, iat, displs.spins);
+  mw_makeMove(p_list, iat, displs.positions);
+  if constexpr (CT == CoordsType::POS_SPIN)
+    mw_makeSpinMove(p_list, iat, displs.spins);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
-    Index_t iat, const std::vector<SingleParticlePos>& displs)
+template<typename T>
+void ParticleSetT<T>::mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                  Index_t iat,
+                                  const std::vector<SingleParticlePos>& displs)
 {
-    std::vector<SingleParticlePos> new_positions;
-    new_positions.reserve(displs.size());
+  std::vector<SingleParticlePos> new_positions;
+  new_positions.reserve(displs.size());
 
-    for (int iw = 0; iw < p_list.size(); iw++) {
-        p_list[iw].active_ptcl_ = iat;
-        p_list[iw].active_pos_ = p_list[iw].R[iat] + displs[iw];
-        new_positions.push_back(p_list[iw].active_pos_);
-    }
+  for (int iw = 0; iw < p_list.size(); iw++)
+  {
+    p_list[iw].active_ptcl_ = iat;
+    p_list[iw].active_pos_  = p_list[iw].R[iat] + displs[iw];
+    new_positions.push_back(p_list[iw].active_pos_);
+  }
 
-    mw_computeNewPosDistTables(p_list, iat, new_positions);
+  mw_computeNewPosDistTables(p_list, iat, new_positions);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_makeSpinMove(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<Scalar_t>& sdispls)
+template<typename T>
+void ParticleSetT<T>::mw_makeSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                      Index_t iat,
+                                      const std::vector<Scalar_t>& sdispls)
 {
-    for (int iw = 0; iw < p_list.size(); iw++)
-        p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw];
-}
-
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ)
-{
-    active_ptcl_ = iat;
-    active_pos_ = R[iat] + displ;
-    active_spin_val_ = spins[iat];
-    bool is_valid = true;
-    auto& Lattice = simulation_cell_.getLattice();
-    if (Lattice.explicitly_defined) {
-        if (Lattice.outOfBound(Lattice.toUnit(displ)))
-            is_valid = false;
-        else {
-            SingleParticlePos newRedPos = Lattice.toUnit(active_pos_);
-            if (!Lattice.isValid(newRedPos))
-                is_valid = false;
-        }
+  for (int iw = 0; iw < p_list.size(); iw++)
+    p_list[iw].active_spin_val_ = p_list[iw].spins[iat] + sdispls[iw];
+}
+
+template<typename T>
+bool ParticleSetT<T>::makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ)
+{
+  active_ptcl_     = iat;
+  active_pos_      = R[iat] + displ;
+  active_spin_val_ = spins[iat];
+  bool is_valid    = true;
+  auto& Lattice    = simulation_cell_.getLattice();
+  if (Lattice.explicitly_defined)
+  {
+    if (Lattice.outOfBound(Lattice.toUnit(displ)))
+      is_valid = false;
+    else
+    {
+      SingleParticlePos newRedPos = Lattice.toUnit(active_pos_);
+      if (!Lattice.isValid(newRedPos))
+        is_valid = false;
     }
-    computeNewPosDistTables(iat, active_pos_, true);
-    return is_valid;
+  }
+  computeNewPosDistTables(iat, active_pos_, true);
+  return is_valid;
 }
 
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAndCheckWithSpin(
-    Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
+template<typename T>
+bool ParticleSetT<T>::makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl)
 {
-    bool is_valid = makeMoveAndCheck(iat, displ);
-    active_spin_val_ += sdispl;
-    return is_valid;
+  bool is_valid = makeMoveAndCheck(iat, displ);
+  active_spin_val_ += sdispl;
+  return is_valid;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::computeNewPosDistTables(
-    Index_t iat, const SingleParticlePos& newpos, bool maybe_accept)
+template<typename T>
+void ParticleSetT<T>::computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept)
 {
-    ScopedTimer compute_newpos_scope(myTimers[PS_newpos]);
+  ScopedTimer compute_newpos_scope(myTimers[PS_newpos]);
 
-    for (int i = 0; i < DistTables.size(); ++i)
-        DistTables[i]->move(*this, newpos, iat, maybe_accept);
+  for (int i = 0; i < DistTables.size(); ++i)
+    DistTables[i]->move(*this, newpos, iat, maybe_accept);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_computeNewPosDistTables(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<SingleParticlePos>& new_positions, bool maybe_accept)
+template<typename T>
+void ParticleSetT<T>::mw_computeNewPosDistTables(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                                 Index_t iat,
+                                                 const std::vector<SingleParticlePos>& new_positions,
+                                                 bool maybe_accept)
 {
-    ParticleSetT& p_leader = p_list.getLeader();
-    ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]);
+  ParticleSetT& p_leader = p_list.getLeader();
+  ScopedTimer compute_newpos_scope(p_leader.myTimers[PS_newpos]);
 
-    {
-        ScopedTimer copy_scope(p_leader.myTimers[PS_mw_copy]);
-        const auto coords_list(extractCoordsRefList(p_list));
-        p_leader.coordinates_->mw_copyActivePos(
-            coords_list, iat, new_positions);
-    }
+  {
+    ScopedTimer copy_scope(p_leader.myTimers[PS_mw_copy]);
+    const auto coords_list(extractCoordsRefList(p_list));
+    p_leader.coordinates_->mw_copyActivePos(coords_list, iat, new_positions);
+  }
 
+  {
+    ScopedTimer dt_scope(p_leader.myTimers[PS_dt_move]);
+    const int dist_tables_size = p_leader.DistTables.size();
+    for (int i = 0; i < dist_tables_size; ++i)
     {
-        ScopedTimer dt_scope(p_leader.myTimers[PS_dt_move]);
-        const int dist_tables_size = p_leader.DistTables.size();
-        for (int i = 0; i < dist_tables_size; ++i) {
-            const auto dt_list(extractDTRefList(p_list, i));
-            p_leader.DistTables[i]->mw_move(
-                dt_list, p_list, new_positions, iat, maybe_accept);
-        }
-
-        // DistTables mw_move calls are asynchronous. Wait for them before
-        // return.
-        PRAGMA_OFFLOAD("omp taskwait")
+      const auto dt_list(extractDTRefList(p_list, i));
+      p_leader.DistTables[i]->mw_move(dt_list, p_list, new_positions, iat, maybe_accept);
     }
+
+    // DistTables mw_move calls are asynchronous. Wait for them before
+    // return.
+    PRAGMA_OFFLOAD("omp taskwait")
+  }
 }
 
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAllParticles(
-    const Walker_t& awalker, const ParticlePos& deltaR, RealType dt)
-{
-    active_ptcl_ = -1;
-    auto& Lattice = simulation_cell_.getLattice();
-    if (Lattice.explicitly_defined) {
-        for (int iat = 0; iat < deltaR.size(); ++iat) {
-            SingleParticlePos displ(dt * deltaR[iat]);
-            if (Lattice.outOfBound(Lattice.toUnit(displ)))
-                return false;
-            SingleParticlePos newpos(awalker.R[iat] + displ);
-            if (!Lattice.isValid(Lattice.toUnit(newpos)))
-                return false;
-            R[iat] = newpos;
-        }
-    }
-    else {
-        for (int iat = 0; iat < deltaR.size(); ++iat)
-            R[iat] = awalker.R[iat] + dt * deltaR[iat];
-    }
-    coordinates_->setAllParticlePos(R);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->evaluate(*this);
-    if (structure_factor_)
-        structure_factor_->updateAllPart(*this);
-    // every move is valid
-    return true;
-}
-
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAllParticles(const Walker_t& awalker,
-    const ParticlePos& deltaR, const std::vector<RealType>& dt)
-{
-    active_ptcl_ = -1;
-    auto& Lattice = simulation_cell_.getLattice();
-    if (Lattice.explicitly_defined) {
-        for (int iat = 0; iat < deltaR.size(); ++iat) {
-            SingleParticlePos displ(dt[iat] * deltaR[iat]);
-            if (Lattice.outOfBound(Lattice.toUnit(displ)))
-                return false;
-            SingleParticlePos newpos(awalker.R[iat] + displ);
-            if (!Lattice.isValid(Lattice.toUnit(newpos)))
-                return false;
-            R[iat] = newpos;
-        }
+template<typename T>
+bool ParticleSetT<T>::makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt)
+{
+  active_ptcl_  = -1;
+  auto& Lattice = simulation_cell_.getLattice();
+  if (Lattice.explicitly_defined)
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+    {
+      SingleParticlePos displ(dt * deltaR[iat]);
+      if (Lattice.outOfBound(Lattice.toUnit(displ)))
+        return false;
+      SingleParticlePos newpos(awalker.R[iat] + displ);
+      if (!Lattice.isValid(Lattice.toUnit(newpos)))
+        return false;
+      R[iat] = newpos;
     }
-    else {
-        for (int iat = 0; iat < deltaR.size(); ++iat)
-            R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat];
+  }
+  else
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+      R[iat] = awalker.R[iat] + dt * deltaR[iat];
+  }
+  coordinates_->setAllParticlePos(R);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->evaluate(*this);
+  if (structure_factor_)
+    structure_factor_->updateAllPart(*this);
+  // every move is valid
+  return true;
+}
+
+template<typename T>
+bool ParticleSetT<T>::makeMoveAllParticles(const Walker_t& awalker,
+                                           const ParticlePos& deltaR,
+                                           const std::vector<RealType>& dt)
+{
+  active_ptcl_  = -1;
+  auto& Lattice = simulation_cell_.getLattice();
+  if (Lattice.explicitly_defined)
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+    {
+      SingleParticlePos displ(dt[iat] * deltaR[iat]);
+      if (Lattice.outOfBound(Lattice.toUnit(displ)))
+        return false;
+      SingleParticlePos newpos(awalker.R[iat] + displ);
+      if (!Lattice.isValid(Lattice.toUnit(newpos)))
+        return false;
+      R[iat] = newpos;
     }
-    coordinates_->setAllParticlePos(R);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->evaluate(*this);
-    if (structure_factor_)
-        structure_factor_->updateAllPart(*this);
-    // every move is valid
-    return true;
+  }
+  else
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+      R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat];
+  }
+  coordinates_->setAllParticlePos(R);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->evaluate(*this);
+  if (structure_factor_)
+    structure_factor_->updateAllPart(*this);
+  // every move is valid
+  return true;
 }
 
 /** move a walker by dt*deltaR + drift
@@ -642,68 +616,75 @@ ParticleSetT<T>::makeMoveAllParticles(const Walker_t& awalker,
  * @return true, if all the particle moves are legal under the boundary
  * conditions
  */
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
-    const ParticlePos& drift, const ParticlePos& deltaR, RealType dt)
-{
-    active_ptcl_ = -1;
-    auto& Lattice = simulation_cell_.getLattice();
-    if (Lattice.explicitly_defined) {
-        for (int iat = 0; iat < deltaR.size(); ++iat) {
-            SingleParticlePos displ(dt * deltaR[iat] + drift[iat]);
-            if (Lattice.outOfBound(Lattice.toUnit(displ)))
-                return false;
-            SingleParticlePos newpos(awalker.R[iat] + displ);
-            if (!Lattice.isValid(Lattice.toUnit(newpos)))
-                return false;
-            R[iat] = newpos;
-        }
-    }
-    else {
-        for (int iat = 0; iat < deltaR.size(); ++iat)
-            R[iat] = awalker.R[iat] + dt * deltaR[iat] + drift[iat];
-    }
-    coordinates_->setAllParticlePos(R);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->evaluate(*this);
-    if (structure_factor_)
-        structure_factor_->updateAllPart(*this);
-    // every move is valid
-    return true;
-}
-
-template <typename T>
-bool
-ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
-    const ParticlePos& drift, const ParticlePos& deltaR,
-    const std::vector<RealType>& dt)
-{
-    active_ptcl_ = -1;
-    auto& Lattice = simulation_cell_.getLattice();
-    if (Lattice.explicitly_defined) {
-        for (int iat = 0; iat < deltaR.size(); ++iat) {
-            SingleParticlePos displ(dt[iat] * deltaR[iat] + drift[iat]);
-            if (Lattice.outOfBound(Lattice.toUnit(displ)))
-                return false;
-            SingleParticlePos newpos(awalker.R[iat] + displ);
-            if (!Lattice.isValid(Lattice.toUnit(newpos)))
-                return false;
-            R[iat] = newpos;
-        }
+template<typename T>
+bool ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+                                                    const ParticlePos& drift,
+                                                    const ParticlePos& deltaR,
+                                                    RealType dt)
+{
+  active_ptcl_  = -1;
+  auto& Lattice = simulation_cell_.getLattice();
+  if (Lattice.explicitly_defined)
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+    {
+      SingleParticlePos displ(dt * deltaR[iat] + drift[iat]);
+      if (Lattice.outOfBound(Lattice.toUnit(displ)))
+        return false;
+      SingleParticlePos newpos(awalker.R[iat] + displ);
+      if (!Lattice.isValid(Lattice.toUnit(newpos)))
+        return false;
+      R[iat] = newpos;
     }
-    else {
-        for (int iat = 0; iat < deltaR.size(); ++iat)
-            R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat] + drift[iat];
+  }
+  else
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+      R[iat] = awalker.R[iat] + dt * deltaR[iat] + drift[iat];
+  }
+  coordinates_->setAllParticlePos(R);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->evaluate(*this);
+  if (structure_factor_)
+    structure_factor_->updateAllPart(*this);
+  // every move is valid
+  return true;
+}
+
+template<typename T>
+bool ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+                                                    const ParticlePos& drift,
+                                                    const ParticlePos& deltaR,
+                                                    const std::vector<RealType>& dt)
+{
+  active_ptcl_  = -1;
+  auto& Lattice = simulation_cell_.getLattice();
+  if (Lattice.explicitly_defined)
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+    {
+      SingleParticlePos displ(dt[iat] * deltaR[iat] + drift[iat]);
+      if (Lattice.outOfBound(Lattice.toUnit(displ)))
+        return false;
+      SingleParticlePos newpos(awalker.R[iat] + displ);
+      if (!Lattice.isValid(Lattice.toUnit(newpos)))
+        return false;
+      R[iat] = newpos;
     }
-    coordinates_->setAllParticlePos(R);
-
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->evaluate(*this);
-    if (structure_factor_)
-        structure_factor_->updateAllPart(*this);
-    // every move is valid
-    return true;
+  }
+  else
+  {
+    for (int iat = 0; iat < deltaR.size(); ++iat)
+      R[iat] = awalker.R[iat] + dt[iat] * deltaR[iat] + drift[iat];
+  }
+  coordinates_->setAllParticlePos(R);
+
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->evaluate(*this);
+  if (structure_factor_)
+    structure_factor_->updateAllPart(*this);
+  // every move is valid
+  return true;
 }
 
 /** update the particle attribute by the proposed move
@@ -711,303 +692,291 @@ ParticleSetT<T>::makeMoveAllParticlesWithDrift(const Walker_t& awalker,
  * When the active_ptcl_ is equal to iat, overwrite the position and update the
  * content of the distance tables.
  */
-template <typename T>
-void
-ParticleSetT<T>::acceptMove(Index_t iat)
+template<typename T>
+void ParticleSetT<T>::acceptMove(Index_t iat)
 {
 #ifndef NDEBUG
-    if (iat != active_ptcl_)
-        throw std::runtime_error(
-            "Bug detected by acceptMove! Request electron is not active!");
+  if (iat != active_ptcl_)
+    throw std::runtime_error("Bug detected by acceptMove! Request electron is not active!");
 #endif
-    ScopedTimer update_scope(myTimers[PS_accept]);
-    // Update position + distance-table
-    coordinates_->setOneParticlePos(active_pos_, iat);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->update(iat);
+  ScopedTimer update_scope(myTimers[PS_accept]);
+  // Update position + distance-table
+  coordinates_->setOneParticlePos(active_pos_, iat);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->update(iat);
 
-    R[iat] = active_pos_;
-    spins[iat] = active_spin_val_;
-    active_ptcl_ = -1;
+  R[iat]       = active_pos_;
+  spins[iat]   = active_spin_val_;
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::acceptMoveForwardMode(Index_t iat)
+template<typename T>
+void ParticleSetT<T>::acceptMoveForwardMode(Index_t iat)
 {
-    assert(iat == active_ptcl_);
-    ScopedTimer update_scope(myTimers[PS_accept]);
-    // Update position + distance-table
-    coordinates_->setOneParticlePos(active_pos_, iat);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->updatePartial(iat, true);
+  assert(iat == active_ptcl_);
+  ScopedTimer update_scope(myTimers[PS_accept]);
+  // Update position + distance-table
+  coordinates_->setOneParticlePos(active_pos_, iat);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->updatePartial(iat, true);
 
-    R[iat] = active_pos_;
-    spins[iat] = active_spin_val_;
-    active_ptcl_ = -1;
+  R[iat]       = active_pos_;
+  spins[iat]   = active_spin_val_;
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::accept_rejectMove(
-    Index_t iat, bool accepted, bool forward_mode)
+template<typename T>
+void ParticleSetT<T>::accept_rejectMove(Index_t iat, bool accepted, bool forward_mode)
 {
-    if (forward_mode)
-        if (accepted)
-            acceptMoveForwardMode(iat);
-        else
-            rejectMoveForwardMode(iat);
-    else if (accepted)
-        acceptMove(iat);
+  if (forward_mode)
+    if (accepted)
+      acceptMoveForwardMode(iat);
     else
-        rejectMove(iat);
+      rejectMoveForwardMode(iat);
+  else if (accepted)
+    acceptMove(iat);
+  else
+    rejectMove(iat);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::rejectMove(Index_t iat)
+template<typename T>
+void ParticleSetT<T>::rejectMove(Index_t iat)
 {
 #ifndef NDEBUG
-    if (iat != active_ptcl_)
-        throw std::runtime_error(
-            "Bug detected by rejectMove! Request electron is not active!");
+  if (iat != active_ptcl_)
+    throw std::runtime_error("Bug detected by rejectMove! Request electron is not active!");
 #endif
-    active_ptcl_ = -1;
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::rejectMoveForwardMode(Index_t iat)
+template<typename T>
+void ParticleSetT<T>::rejectMoveForwardMode(Index_t iat)
 {
-    assert(iat == active_ptcl_);
-    // Update distance-table
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->updatePartial(iat, false);
-    active_ptcl_ = -1;
-}
-
-template <typename T>
-template <CoordsType CT>
-void
-ParticleSetT<T>::mw_accept_rejectMoveT(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode)
-{
-    if constexpr (CT == CoordsType::POS_SPIN)
-        mw_accept_rejectSpinMove(p_list, iat, isAccepted);
-    mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode);
-}
-
-template <typename T>
-void
-ParticleSetT<T>::mw_accept_rejectMove(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode)
-{
-    if (forward_mode) {
-        ParticleSetT& p_leader = p_list.getLeader();
-        ScopedTimer update_scope(p_leader.myTimers[PS_accept]);
-
-        const auto coords_list(extractCoordsRefList(p_list));
-        std::vector<SingleParticlePos> new_positions;
-        new_positions.reserve(p_list.size());
-        for (const ParticleSetT& pset : p_list)
-            new_positions.push_back(pset.active_pos_);
-        p_leader.coordinates_->mw_acceptParticlePos(
-            coords_list, iat, new_positions, isAccepted);
-
-        auto& dts = p_leader.DistTables;
-        for (int i = 0; i < dts.size(); ++i) {
-            const auto dt_list(extractDTRefList(p_list, i));
-            dts[i]->mw_updatePartial(dt_list, iat, isAccepted);
-        }
-
-        for (int iw = 0; iw < p_list.size(); iw++) {
-            assert(iat == p_list[iw].active_ptcl_);
-            if (isAccepted[iw])
-                p_list[iw].R[iat] = p_list[iw].active_pos_;
-            p_list[iw].active_ptcl_ = -1;
-            assert(p_list[iw].R[iat] ==
-                p_list[iw].coordinates_->getAllParticlePos()[iat]);
-        }
-    }
-    else {
-        // loop over single walker acceptMove/rejectMove doesn't work safely.
-        // need to code carefully for both coordinate and distance table updates
-        // disable non-forward mode cases
-        if (!forward_mode)
-            throw std::runtime_error(
-                "BUG calling mw_accept_rejectMove in non-forward mode");
-    }
+  assert(iat == active_ptcl_);
+  // Update distance-table
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->updatePartial(iat, false);
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_accept_rejectSpinMove(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted)
+template<typename T>
+template<CoordsType CT>
+void ParticleSetT<T>::mw_accept_rejectMoveT(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                            Index_t iat,
+                                            const std::vector<bool>& isAccepted,
+                                            bool forward_mode)
 {
-    for (int iw = 0; iw < p_list.size(); iw++) {
-        assert(iat == p_list[iw].active_ptcl_);
-        if (isAccepted[iw])
-            p_list[iw].spins[iat] = p_list[iw].active_spin_val_;
-    }
+  if constexpr (CT == CoordsType::POS_SPIN)
+    mw_accept_rejectSpinMove(p_list, iat, isAccepted);
+  mw_accept_rejectMove(p_list, iat, isAccepted, forward_mode);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::donePbyP(bool skipSK)
-{
-    ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]);
-    coordinates_->donePbyP();
-    if (!skipSK && structure_factor_)
-        structure_factor_->updateAllPart(*this);
-    for (size_t i = 0; i < DistTables.size(); ++i)
-        DistTables[i]->finalizePbyP(*this);
-    active_ptcl_ = -1;
-}
-
-template <typename T>
-void
-ParticleSetT<T>::mw_donePbyP(
-    const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
+template<typename T>
+void ParticleSetT<T>::mw_accept_rejectMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                           Index_t iat,
+                                           const std::vector<bool>& isAccepted,
+                                           bool forward_mode)
 {
+  if (forward_mode)
+  {
     ParticleSetT& p_leader = p_list.getLeader();
-    ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]);
+    ScopedTimer update_scope(p_leader.myTimers[PS_accept]);
 
-    for (ParticleSetT& pset : p_list) {
-        pset.coordinates_->donePbyP();
-        pset.active_ptcl_ = -1;
-    }
+    const auto coords_list(extractCoordsRefList(p_list));
+    std::vector<SingleParticlePos> new_positions;
+    new_positions.reserve(p_list.size());
+    for (const ParticleSetT& pset : p_list)
+      new_positions.push_back(pset.active_pos_);
+    p_leader.coordinates_->mw_acceptParticlePos(coords_list, iat, new_positions, isAccepted);
 
-    if (!skipSK && p_leader.structure_factor_) {
-        auto sk_list = extractSKRefList(p_list);
-        StructFactT<T>::mw_updateAllPart(
-            sk_list, p_list, p_leader.mw_structure_factor_data_handle_);
+    auto& dts = p_leader.DistTables;
+    for (int i = 0; i < dts.size(); ++i)
+    {
+      const auto dt_list(extractDTRefList(p_list, i));
+      dts[i]->mw_updatePartial(dt_list, iat, isAccepted);
     }
 
-    auto& dts = p_leader.DistTables;
-    for (int i = 0; i < dts.size(); ++i) {
-        const auto dt_list(extractDTRefList(p_list, i));
-        dts[i]->mw_finalizePbyP(dt_list, p_list);
+    for (int iw = 0; iw < p_list.size(); iw++)
+    {
+      assert(iat == p_list[iw].active_ptcl_);
+      if (isAccepted[iw])
+        p_list[iw].R[iat] = p_list[iw].active_pos_;
+      p_list[iw].active_ptcl_ = -1;
+      assert(p_list[iw].R[iat] == p_list[iw].coordinates_->getAllParticlePos()[iat]);
     }
-}
+  }
+  else
+  {
+    // loop over single walker acceptMove/rejectMove doesn't work safely.
+    // need to code carefully for both coordinate and distance table updates
+    // disable non-forward mode cases
+    if (!forward_mode)
+      throw std::runtime_error("BUG calling mw_accept_rejectMove in non-forward mode");
+  }
+}
+
+template<typename T>
+void ParticleSetT<T>::mw_accept_rejectSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                               Index_t iat,
+                                               const std::vector<bool>& isAccepted)
+{
+  for (int iw = 0; iw < p_list.size(); iw++)
+  {
+    assert(iat == p_list[iw].active_ptcl_);
+    if (isAccepted[iw])
+      p_list[iw].spins[iat] = p_list[iw].active_spin_val_;
+  }
+}
+
+template<typename T>
+void ParticleSetT<T>::donePbyP(bool skipSK)
+{
+  ScopedTimer donePbyP_scope(myTimers[PS_donePbyP]);
+  coordinates_->donePbyP();
+  if (!skipSK && structure_factor_)
+    structure_factor_->updateAllPart(*this);
+  for (size_t i = 0; i < DistTables.size(); ++i)
+    DistTables[i]->finalizePbyP(*this);
+  active_ptcl_ = -1;
+}
+
+template<typename T>
+void ParticleSetT<T>::mw_donePbyP(const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK)
+{
+  ParticleSetT& p_leader = p_list.getLeader();
+  ScopedTimer donePbyP_scope(p_leader.myTimers[PS_donePbyP]);
+
+  for (ParticleSetT& pset : p_list)
+  {
+    pset.coordinates_->donePbyP();
+    pset.active_ptcl_ = -1;
+  }
+
+  if (!skipSK && p_leader.structure_factor_)
+  {
+    auto sk_list = extractSKRefList(p_list);
+    StructFactT<T>::mw_updateAllPart(sk_list, p_list, p_leader.mw_structure_factor_data_handle_);
+  }
+
+  auto& dts = p_leader.DistTables;
+  for (int i = 0; i < dts.size(); ++i)
+  {
+    const auto dt_list(extractDTRefList(p_list, i));
+    dts[i]->mw_finalizePbyP(dt_list, p_list);
+  }
+}
+
+template<typename T>
+void ParticleSetT<T>::makeVirtualMoves(const SingleParticlePos& newpos)
+{
+  active_ptcl_ = -1;
+  active_pos_  = newpos;
+  for (size_t i = 0; i < DistTables.size(); ++i)
+    DistTables[i]->move(*this, newpos, active_ptcl_, false);
+}
+
+template<typename T>
+void ParticleSetT<T>::loadWalker(Walker_t& awalker, bool pbyp)
+{
+  ScopedTimer update_scope(myTimers[PS_loadWalker]);
+  R     = awalker.R;
+  spins = awalker.spins;
+  coordinates_->setAllParticlePos(R);
+#if !defined(SOA_MEMORY_OPTIMIZED)
+  G = awalker.G;
+  L = awalker.L;
+#endif
+  if (pbyp)
+  {
+    // in certain cases, full tables must be ready
+    for (int i = 0; i < DistTables.size(); i++)
+      if (DistTables[i]->getModes() & DTModes::NEED_FULL_TABLE_ANYTIME)
+        DistTables[i]->evaluate(*this);
+  }
 
-template <typename T>
-void
-ParticleSetT<T>::makeVirtualMoves(const SingleParticlePos& newpos)
-{
-    active_ptcl_ = -1;
-    active_pos_ = newpos;
-    for (size_t i = 0; i < DistTables.size(); ++i)
-        DistTables[i]->move(*this, newpos, active_ptcl_, false);
+  active_ptcl_ = -1;
 }
 
-template <typename T>
-void
-ParticleSetT<T>::loadWalker(Walker_t& awalker, bool pbyp)
+template<typename T>
+void ParticleSetT<T>::mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                    const RefVector<Walker_t>& walkers,
+                                    const std::vector<bool>& recompute,
+                                    bool pbyp)
 {
-    ScopedTimer update_scope(myTimers[PS_loadWalker]);
-    R = awalker.R;
-    spins = awalker.spins;
-    coordinates_->setAllParticlePos(R);
-#if !defined(SOA_MEMORY_OPTIMIZED)
-    G = awalker.G;
-    L = awalker.L;
-#endif
-    if (pbyp) {
-        // in certain cases, full tables must be ready
-        for (int i = 0; i < DistTables.size(); i++)
-            if (DistTables[i]->getModes() & DTModes::NEED_FULL_TABLE_ANYTIME)
-                DistTables[i]->evaluate(*this);
-    }
+  auto& p_leader = p_list.getLeader();
+  ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]);
 
-    active_ptcl_ = -1;
-}
-
-template <typename T>
-void
-ParticleSetT<T>::mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
-    const RefVector<Walker_t>& walkers, const std::vector<bool>& recompute,
-    bool pbyp)
-{
-    auto& p_leader = p_list.getLeader();
-    ScopedTimer load_scope(p_leader.myTimers[PS_loadWalker]);
-
-    auto loadWalkerConfig = [](ParticleSetT& pset, Walker_t& awalker) {
-        pset.R = awalker.R;
-        pset.spins = awalker.spins;
-        pset.coordinates_->setAllParticlePos(pset.R);
-    };
-    for (int iw = 0; iw < p_list.size(); ++iw)
-        if (recompute[iw])
-            loadWalkerConfig(p_list[iw], walkers[iw]);
-
-    if (pbyp) {
-        auto& dts = p_leader.DistTables;
-        for (int i = 0; i < dts.size(); ++i) {
-            const auto dt_list(extractDTRefList(p_list, i));
-            dts[i]->mw_recompute(dt_list, p_list, recompute);
-        }
+  auto loadWalkerConfig = [](ParticleSetT& pset, Walker_t& awalker) {
+    pset.R     = awalker.R;
+    pset.spins = awalker.spins;
+    pset.coordinates_->setAllParticlePos(pset.R);
+  };
+  for (int iw = 0; iw < p_list.size(); ++iw)
+    if (recompute[iw])
+      loadWalkerConfig(p_list[iw], walkers[iw]);
+
+  if (pbyp)
+  {
+    auto& dts = p_leader.DistTables;
+    for (int i = 0; i < dts.size(); ++i)
+    {
+      const auto dt_list(extractDTRefList(p_list, i));
+      dts[i]->mw_recompute(dt_list, p_list, recompute);
     }
+  }
 }
 
-template <typename T>
-void
-ParticleSetT<T>::saveWalker(Walker_t& awalker)
+template<typename T>
+void ParticleSetT<T>::saveWalker(Walker_t& awalker)
 {
-    awalker.R = R;
-    awalker.spins = spins;
+  awalker.R     = R;
+  awalker.spins = spins;
 #if !defined(SOA_MEMORY_OPTIMIZED)
-    awalker.G = G;
-    awalker.L = L;
+  awalker.G = G;
+  awalker.L = L;
 #endif
 }
 
-template <typename T>
-void
-ParticleSetT<T>::mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets,
-    const RefVector<Walker_t>& walkers)
+template<typename T>
+void ParticleSetT<T>::mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets, const RefVector<Walker_t>& walkers)
 {
-    for (int iw = 0; iw < psets.size(); ++iw)
-        psets[iw].saveWalker(walkers[iw]);
+  for (int iw = 0; iw < psets.size(); ++iw)
+    psets[iw].saveWalker(walkers[iw]);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::initPropertyList()
+template<typename T>
+void ParticleSetT<T>::initPropertyList()
 {
-    PropertyList.clear();
-    // Need to add the default Properties according to the enumeration
-    PropertyList.add("LogPsi");
-    PropertyList.add("SignPsi");
-    PropertyList.add("UmbrellaWeight");
-    PropertyList.add("R2Accepted");
-    PropertyList.add("R2Proposed");
-    PropertyList.add("DriftScale");
-    PropertyList.add("AltEnergy");
-    PropertyList.add("LocalEnergy");
-    PropertyList.add("LocalPotential");
+  PropertyList.clear();
+  // Need to add the default Properties according to the enumeration
+  PropertyList.add("LogPsi");
+  PropertyList.add("SignPsi");
+  PropertyList.add("UmbrellaWeight");
+  PropertyList.add("R2Accepted");
+  PropertyList.add("R2Proposed");
+  PropertyList.add("DriftScale");
+  PropertyList.add("AltEnergy");
+  PropertyList.add("LocalEnergy");
+  PropertyList.add("LocalPotential");
 
-    // There is no point in checking this, its quickly not consistent as other
-    // objects update property list. if (PropertyList.size() !=
-    // WP::NUMPROPERTIES)
-    // {
-    //   app_error() << "The number of default properties for walkers  is not
-    //   consistent." << std::endl; app_error() << "NUMPROPERTIES " <<
-    //   WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() <<
-    //   std::endl; throw std::runtime_error("ParticleSet::initPropertyList");
-    // }
+  // There is no point in checking this, its quickly not consistent as other
+  // objects update property list. if (PropertyList.size() !=
+  // WP::NUMPROPERTIES)
+  // {
+  //   app_error() << "The number of default properties for walkers  is not
+  //   consistent." << std::endl; app_error() << "NUMPROPERTIES " <<
+  //   WP::NUMPROPERTIES << " size of PropertyList " << PropertyList.size() <<
+  //   std::endl; throw std::runtime_error("ParticleSet::initPropertyList");
+  // }
 }
 
-template <typename T>
-int
-ParticleSetT<T>::addPropertyHistory(int leng)
+template<typename T>
+int ParticleSetT<T>::addPropertyHistory(int leng)
 {
-    int newL = PropertyHistory.size();
-    PropertyHistory.push_back(std::vector<FullPrecRealType>(leng, 0.0));
-    PHindex.push_back(0);
-    return newL;
+  int newL = PropertyHistory.size();
+  PropertyHistory.push_back(std::vector<FullPrecRealType>(leng, 0.0));
+  PHindex.push_back(0);
+  return newL;
 }
 
 //      void ParticleSet::resetPropertyHistory( )
@@ -1044,298 +1013,260 @@ ParticleSetT<T>::addPropertyHistory(int leng)
 //       }
 //     }
 
-template <typename T>
-void
-ParticleSetT<T>::createResource(ResourceCollection& collection) const
+template<typename T>
+void ParticleSetT<T>::createResource(ResourceCollection& collection) const
 {
-    coordinates_->createResource(collection);
-    for (int i = 0; i < DistTables.size(); i++)
-        DistTables[i]->createResource(collection);
-    if (structure_factor_)
-        collection.addResource(std::make_unique<SKMultiWalkerMemT<T>>());
+  coordinates_->createResource(collection);
+  for (int i = 0; i < DistTables.size(); i++)
+    DistTables[i]->createResource(collection);
+  if (structure_factor_)
+    collection.addResource(std::make_unique<SKMultiWalkerMemT<T>>());
 }
 
-template <typename T>
-void
-ParticleSetT<T>::acquireResource(ResourceCollection& collection,
-    const RefVectorWithLeader<ParticleSetT>& p_list)
+template<typename T>
+void ParticleSetT<T>::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<ParticleSetT>& p_list)
 {
-    auto& ps_leader = p_list.getLeader();
-    ps_leader.coordinates_->acquireResource(
-        collection, extractCoordsRefList(p_list));
-    for (int i = 0; i < ps_leader.DistTables.size(); i++)
-        ps_leader.DistTables[i]->acquireResource(
-            collection, extractDTRefList(p_list, i));
+  auto& ps_leader = p_list.getLeader();
+  ps_leader.coordinates_->acquireResource(collection, extractCoordsRefList(p_list));
+  for (int i = 0; i < ps_leader.DistTables.size(); i++)
+    ps_leader.DistTables[i]->acquireResource(collection, extractDTRefList(p_list, i));
 
-    if (ps_leader.structure_factor_)
-        p_list.getLeader().mw_structure_factor_data_handle_ =
-            collection.lendResource<SKMultiWalkerMemT<T>>();
+  if (ps_leader.structure_factor_)
+    p_list.getLeader().mw_structure_factor_data_handle_ = collection.lendResource<SKMultiWalkerMemT<T>>();
 }
 
-template <typename T>
-void
-ParticleSetT<T>::releaseResource(ResourceCollection& collection,
-    const RefVectorWithLeader<ParticleSetT>& p_list)
+template<typename T>
+void ParticleSetT<T>::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<ParticleSetT>& p_list)
 {
-    auto& ps_leader = p_list.getLeader();
-    ps_leader.coordinates_->releaseResource(
-        collection, extractCoordsRefList(p_list));
-    for (int i = 0; i < ps_leader.DistTables.size(); i++)
-        ps_leader.DistTables[i]->releaseResource(
-            collection, extractDTRefList(p_list, i));
+  auto& ps_leader = p_list.getLeader();
+  ps_leader.coordinates_->releaseResource(collection, extractCoordsRefList(p_list));
+  for (int i = 0; i < ps_leader.DistTables.size(); i++)
+    ps_leader.DistTables[i]->releaseResource(collection, extractDTRefList(p_list, i));
 
-    if (ps_leader.structure_factor_)
-        collection.takebackResource(
-            p_list.getLeader().mw_structure_factor_data_handle_);
+  if (ps_leader.structure_factor_)
+    collection.takebackResource(p_list.getLeader().mw_structure_factor_data_handle_);
 }
 
-template <typename T>
-RefVectorWithLeader<DistanceTableT<T>>
-ParticleSetT<T>::extractDTRefList(
-    const RefVectorWithLeader<ParticleSetT>& p_list, int id)
+template<typename T>
+RefVectorWithLeader<DistanceTableT<T>> ParticleSetT<T>::extractDTRefList(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    int id)
 {
-    RefVectorWithLeader<DistanceTableT<T>> dt_list(
-        *p_list.getLeader().DistTables[id]);
-    dt_list.reserve(p_list.size());
-    for (ParticleSetT& p : p_list)
-        dt_list.push_back(*p.DistTables[id]);
-    return dt_list;
+  RefVectorWithLeader<DistanceTableT<T>> dt_list(*p_list.getLeader().DistTables[id]);
+  dt_list.reserve(p_list.size());
+  for (ParticleSetT& p : p_list)
+    dt_list.push_back(*p.DistTables[id]);
+  return dt_list;
 }
 
-template <typename T>
-RefVectorWithLeader<DynamicCoordinatesT<T>>
-ParticleSetT<T>::extractCoordsRefList(
+template<typename T>
+RefVectorWithLeader<DynamicCoordinatesT<T>> ParticleSetT<T>::extractCoordsRefList(
     const RefVectorWithLeader<ParticleSetT>& p_list)
 {
-    RefVectorWithLeader<DynamicCoordinatesT<T>> coords_list(
-        *p_list.getLeader().coordinates_);
-    coords_list.reserve(p_list.size());
-    for (ParticleSetT& p : p_list)
-        coords_list.push_back(*p.coordinates_);
-    return coords_list;
+  RefVectorWithLeader<DynamicCoordinatesT<T>> coords_list(*p_list.getLeader().coordinates_);
+  coords_list.reserve(p_list.size());
+  for (ParticleSetT& p : p_list)
+    coords_list.push_back(*p.coordinates_);
+  return coords_list;
 }
 
-template <typename T>
-RefVectorWithLeader<StructFactT<T>>
-ParticleSetT<T>::extractSKRefList(
-    const RefVectorWithLeader<ParticleSetT>& p_list)
+template<typename T>
+RefVectorWithLeader<StructFactT<T>> ParticleSetT<T>::extractSKRefList(const RefVectorWithLeader<ParticleSetT>& p_list)
 {
-    RefVectorWithLeader<StructFactT<T>> sk_list(
-        *p_list.getLeader().structure_factor_);
-    sk_list.reserve(p_list.size());
-    for (ParticleSetT& p : p_list)
-        sk_list.push_back(*p.structure_factor_);
-    return sk_list;
+  RefVectorWithLeader<StructFactT<T>> sk_list(*p_list.getLeader().structure_factor_);
+  sk_list.reserve(p_list.size());
+  for (ParticleSetT& p : p_list)
+    sk_list.push_back(*p.structure_factor_);
+  return sk_list;
 }
 
 /** Creating StructureFactor
  *
  * Currently testing only 1 component for PBCs.
  */
-template <typename T>
-void
-ParticleSetT<T>::createSK()
-{
-    if (structure_factor_)
-        throw std::runtime_error("Report bug! structure_factor_ has already "
-                                 "been created. Unexpected call sequence.");
-
-    auto& Lattice = getLattice();
-    auto& LRBox = getLRBox();
-    if (Lattice.explicitly_defined)
-        convert2Cart(R); // make sure that R is in Cartesian coordinates
-
-    if (Lattice.SuperCellEnum != SUPERCELL_OPEN) {
-        app_log() << "\n  Creating Structure Factor for periodic systems "
-                  << LRBox.LR_kc << std::endl;
-        structure_factor_ = std::make_unique<StructFactT<T>>(
-            LRBox, simulation_cell_.getKLists());
-    }
-
-    // set the mass array
-    int beforemass = my_species_.numAttributes();
-    int massind = my_species_.addAttribute("mass");
-    if (beforemass == massind) {
-        app_log() << "  ParticleSet::createSK setting mass of  " << getName()
-                  << " to 1.0" << std::endl;
-        for (int ig = 0; ig < my_species_.getTotalNum(); ++ig)
-            my_species_(massind, ig) = 1.0;
-    }
-    for (int iat = 0; iat < GroupID.size(); iat++)
-        Mass[iat] = my_species_(massind, GroupID[iat]);
+template<typename T>
+void ParticleSetT<T>::createSK()
+{
+  if (structure_factor_)
+    throw std::runtime_error("Report bug! structure_factor_ has already "
+                             "been created. Unexpected call sequence.");
+
+  auto& Lattice = getLattice();
+  auto& LRBox   = getLRBox();
+  if (Lattice.explicitly_defined)
+    convert2Cart(R); // make sure that R is in Cartesian coordinates
+
+  if (Lattice.SuperCellEnum != SUPERCELL_OPEN)
+  {
+    app_log() << "\n  Creating Structure Factor for periodic systems " << LRBox.LR_kc << std::endl;
+    structure_factor_ = std::make_unique<StructFactT<T>>(LRBox, simulation_cell_.getKLists());
+  }
+
+  // set the mass array
+  int beforemass = my_species_.numAttributes();
+  int massind    = my_species_.addAttribute("mass");
+  if (beforemass == massind)
+  {
+    app_log() << "  ParticleSet::createSK setting mass of  " << getName() << " to 1.0" << std::endl;
+    for (int ig = 0; ig < my_species_.getTotalNum(); ++ig)
+      my_species_(massind, ig) = 1.0;
+  }
+  for (int iat = 0; iat < GroupID.size(); iat++)
+    Mass[iat] = my_species_(massind, GroupID[iat]);
 
-    coordinates_->setAllParticlePos(R);
-}
-
-template <typename T>
-void
-ParticleSetT<T>::turnOnPerParticleSK()
-{
-    if (structure_factor_)
-        structure_factor_->turnOnStorePerParticle(*this);
-    else
-        throw std::runtime_error(
-            "ParticleSet::turnOnPerParticleSK trying to turn on per particle "
-            "storage in "
-            "structure_factor_ but structure_factor_ has not been created.");
+  coordinates_->setAllParticlePos(R);
+}
+
+template<typename T>
+void ParticleSetT<T>::turnOnPerParticleSK()
+{
+  if (structure_factor_)
+    structure_factor_->turnOnStorePerParticle(*this);
+  else
+    throw std::runtime_error("ParticleSet::turnOnPerParticleSK trying to turn on per particle "
+                             "storage in "
+                             "structure_factor_ but structure_factor_ has not been created.");
+}
+
+template<typename T>
+bool ParticleSetT<T>::getPerParticleSKState() const
+{
+  bool isPerParticleOn = false;
+  if (structure_factor_)
+    isPerParticleOn = structure_factor_->isStorePerParticle();
+  return isPerParticleOn;
+}
+
+template<typename T>
+void ParticleSetT<T>::convert(const ParticlePos& pin, ParticlePos& pout)
+{
+  if (pin.getUnit() == pout.getUnit())
+  {
+    pout = pin;
+    return;
+  }
+  if (pin.getUnit() == PosUnit::Lattice)
+  // convert to CartesianUnit
+  {
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pin, getLattice().R, pout, 0, pin.size());
+  }
+  else
+  // convert to getLattice()Unit
+  {
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pin, getLattice().G, pout, 0, pin.size());
+  }
+}
+
+template<typename T>
+void ParticleSetT<T>::convert2Unit(const ParticlePos& pin, ParticlePos& pout)
+{
+  pout.setUnit(PosUnit::Lattice);
+  if (pin.getUnit() == PosUnit::Lattice)
+    pout = pin;
+  else
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pin, getLattice().G, pout, 0, pin.size());
+}
+
+template<typename T>
+void ParticleSetT<T>::convert2Cart(const ParticlePos& pin, ParticlePos& pout)
+{
+  pout.setUnit(PosUnit::Cartesian);
+  if (pin.getUnit() == PosUnit::Cartesian)
+    pout = pin;
+  else
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pin, getLattice().R, pout, 0, pin.size());
+}
+
+template<typename T>
+void ParticleSetT<T>::convert2Unit(ParticlePos& pinout)
+{
+  if (pinout.getUnit() == PosUnit::Lattice)
+    return;
+  else
+  {
+    pinout.setUnit(PosUnit::Lattice);
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pinout, getLattice().G, 0, pinout.size());
+  }
+}
+
+template<typename T>
+void ParticleSetT<T>::convert2Cart(ParticlePos& pinout)
+{
+  if (pinout.getUnit() == PosUnit::Cartesian)
+    return;
+  else
+  {
+    pinout.setUnit(PosUnit::Cartesian);
+    ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(pinout, getLattice().R, 0, pinout.size());
+  }
 }
 
-template <typename T>
-bool
-ParticleSetT<T>::getPerParticleSKState() const
+template<typename T>
+void ParticleSetT<T>::applyBC(const ParticlePos& pin, ParticlePos& pout)
 {
-    bool isPerParticleOn = false;
-    if (structure_factor_)
-        isPerParticleOn = structure_factor_->isStorePerParticle();
-    return isPerParticleOn;
+  applyBC(pin, pout, 0, pin.size());
 }
 
-template <typename T>
-void
-ParticleSetT<T>::convert(const ParticlePos& pin, ParticlePos& pout)
+template<typename T>
+void ParticleSetT<T>::applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last)
 {
-    if (pin.getUnit() == pout.getUnit()) {
-        pout = pin;
-        return;
-    }
-    if (pin.getUnit() == PosUnit::Lattice)
-    // convert to CartesianUnit
-    {
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pin, getLattice().R, pout, 0, pin.size());
-    }
-    else
-    // convert to getLattice()Unit
-    {
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pin, getLattice().G, pout, 0, pin.size());
-    }
-}
-
-template <typename T>
-void
-ParticleSetT<T>::convert2Unit(const ParticlePos& pin, ParticlePos& pout)
-{
-    pout.setUnit(PosUnit::Lattice);
-    if (pin.getUnit() == PosUnit::Lattice)
-        pout = pin;
+  if (pin.getUnit() == PosUnit::Cartesian)
+  {
+    if (pout.getUnit() == PosUnit::Cartesian)
+      ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Cart(pin, getLattice().G, getLattice().R, pout, first, last);
+    else if (pout.getUnit() == PosUnit::Lattice)
+      ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Unit(pin, getLattice().G, pout, first, last);
     else
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pin, getLattice().G, pout, 0, pin.size());
-}
-
-template <typename T>
-void
-ParticleSetT<T>::convert2Cart(const ParticlePos& pin, ParticlePos& pout)
-{
-    pout.setUnit(PosUnit::Cartesian);
-    if (pin.getUnit() == PosUnit::Cartesian)
-        pout = pin;
+      throw std::runtime_error("Unknown unit conversion");
+  }
+  else if (pin.getUnit() == PosUnit::Lattice)
+  {
+    if (pout.getUnit() == PosUnit::Cartesian)
+      ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Cart(pin, getLattice().R, pout, first, last);
+    else if (pout.getUnit() == PosUnit::Lattice)
+      ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Unit(pin, pout, first, last);
     else
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pin, getLattice().R, pout, 0, pin.size());
-}
-
-template <typename T>
-void
-ParticleSetT<T>::convert2Unit(ParticlePos& pinout)
-{
-    if (pinout.getUnit() == PosUnit::Lattice)
-        return;
-    else {
-        pinout.setUnit(PosUnit::Lattice);
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pinout, getLattice().G, 0, pinout.size());
-    }
+      throw std::runtime_error("Unknown unit conversion");
+  }
+  else
+    throw std::runtime_error("Unknown unit conversion");
 }
 
-template <typename T>
-void
-ParticleSetT<T>::convert2Cart(ParticlePos& pinout)
+template<typename T>
+void ParticleSetT<T>::applyBC(ParticlePos& pos)
 {
-    if (pinout.getUnit() == PosUnit::Cartesian)
-        return;
-    else {
-        pinout.setUnit(PosUnit::Cartesian);
-        ConvertPosUnit<ParticlePos, Tensor_t, DIM>::apply(
-            pinout, getLattice().R, 0, pinout.size());
-    }
-}
-
-template <typename T>
-void
-ParticleSetT<T>::applyBC(const ParticlePos& pin, ParticlePos& pout)
-{
-    applyBC(pin, pout, 0, pin.size());
-}
-
-template <typename T>
-void
-ParticleSetT<T>::applyBC(
-    const ParticlePos& pin, ParticlePos& pout, int first, int last)
-{
-    if (pin.getUnit() == PosUnit::Cartesian) {
-        if (pout.getUnit() == PosUnit::Cartesian)
-            ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Cart(
-                pin, getLattice().G, getLattice().R, pout, first, last);
-        else if (pout.getUnit() == PosUnit::Lattice)
-            ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Unit(
-                pin, getLattice().G, pout, first, last);
-        else
-            throw std::runtime_error("Unknown unit conversion");
-    }
-    else if (pin.getUnit() == PosUnit::Lattice) {
-        if (pout.getUnit() == PosUnit::Cartesian)
-            ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Cart(
-                pin, getLattice().R, pout, first, last);
-        else if (pout.getUnit() == PosUnit::Lattice)
-            ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Unit(
-                pin, pout, first, last);
-        else
-            throw std::runtime_error("Unknown unit conversion");
-    }
-    else
-        throw std::runtime_error("Unknown unit conversion");
-}
-
-template <typename T>
-void
-ParticleSetT<T>::applyBC(ParticlePos& pos)
-{
-    if (pos.getUnit() == PosUnit::Lattice) {
-        ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Unit(pos, 0, TotalNum);
-    }
-    else {
-        ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Cart(
-            pos, getLattice().G, getLattice().R, 0, TotalNum);
-    }
+  if (pos.getUnit() == PosUnit::Lattice)
+  {
+    ApplyBConds<ParticlePos, Tensor_t, DIM>::Unit2Unit(pos, 0, TotalNum);
+  }
+  else
+  {
+    ApplyBConds<ParticlePos, Tensor_t, DIM>::Cart2Cart(pos, getLattice().G, getLattice().R, 0, TotalNum);
+  }
 }
 
-template <typename T>
-void
-ParticleSetT<T>::applyMinimumImage(ParticlePos& pinout)
+template<typename T>
+void ParticleSetT<T>::applyMinimumImage(ParticlePos& pinout)
 {
-    if (getLattice().SuperCellEnum == SUPERCELL_OPEN)
-        return;
-    for (int i = 0; i < pinout.size(); ++i)
-        getLattice().applyMinimumImage(pinout[i]);
+  if (getLattice().SuperCellEnum == SUPERCELL_OPEN)
+    return;
+  for (int i = 0; i < pinout.size(); ++i)
+    getLattice().applyMinimumImage(pinout[i]);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout)
+template<typename T>
+void ParticleSetT<T>::convert2UnitInBox(const ParticlePos& pin, ParticlePos& pout)
 {
-    pout.setUnit(PosUnit::Lattice);
-    convert2Unit(pin, pout); // convert to crystalline unit
-    put2box(pout);
+  pout.setUnit(PosUnit::Lattice);
+  convert2Unit(pin, pout); // convert to crystalline unit
+  put2box(pout);
 }
 
-template <typename T>
-void
-ParticleSetT<T>::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout)
+template<typename T>
+void ParticleSetT<T>::convert2CartInBox(const ParticlePos& pin, ParticlePos& pout)
 {
-    convert2UnitInBox(pin, pout); // convert to crystalline unit
-    convert2Cart(pout);
+  convert2UnitInBox(pin, pout); // convert to crystalline unit
+  convert2Cart(pout);
 }
 
 // explicit instantiations
@@ -1347,71 +1278,77 @@ template class ParticleSetT<std::complex<double>>;
 template class ParticleSetT<std::complex<float>>;
 #endif
 
-template void
-ParticleSetT<double>::mw_makeMove<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const MCCoordsT<double, CoordsType::POS>& displs);
-template void
-ParticleSetT<double>::mw_makeMove<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<double>::mw_makeMove<CoordsType::POS>(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                                                 Index_t iat,
+                                                                 const MCCoordsT<double, CoordsType::POS>& displs);
+template void ParticleSetT<double>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<double, CoordsType::POS_SPIN>& displs);
-template void
-ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-template void
-ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-
-template void
-ParticleSetT<float>::mw_makeMove<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const MCCoordsT<float, CoordsType::POS>& displs);
-template void
-ParticleSetT<float>::mw_makeMove<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+template void ParticleSetT<double>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+
+template void ParticleSetT<float>::mw_makeMove<CoordsType::POS>(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                                                Index_t iat,
+                                                                const MCCoordsT<float, CoordsType::POS>& displs);
+template void ParticleSetT<float>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<float, CoordsType::POS_SPIN>& displs);
-template void
-ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-template void
-ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-
-template void
-ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+template void ParticleSetT<float>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+
+template void ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<std::complex<double>, CoordsType::POS>& displs);
-template void
-ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<std::complex<double>>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<std::complex<double>, CoordsType::POS_SPIN>& displs);
-template void
-ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-template void
-ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-
-template void
-ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+template void ParticleSetT<std::complex<double>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+
+template void ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<std::complex<float>, CoordsType::POS>& displs);
-template void
-ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
+template void ParticleSetT<std::complex<float>>::mw_makeMove<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
     const MCCoordsT<std::complex<float>, CoordsType::POS_SPIN>& displs);
-template void
-ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
-template void
-ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
-    const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-    const std::vector<bool>& isAccepted, bool forward_mode);
+template void ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
+template void ParticleSetT<std::complex<float>>::mw_accept_rejectMoveT<CoordsType::POS_SPIN>(
+    const RefVectorWithLeader<ParticleSetT>& p_list,
+    Index_t iat,
+    const std::vector<bool>& isAccepted,
+    bool forward_mode);
 } // namespace qmcplusplus
diff --git a/src/Particle/ParticleSetT.h b/src/Particle/ParticleSetT.h
index c5b984cbbe9..53759ae56f9 100644
--- a/src/Particle/ParticleSetT.h
+++ b/src/Particle/ParticleSetT.h
@@ -6,16 +6,13 @@
 //
 // File developed by: D. Das, University of Illinois at Urbana-Champaign
 //                    Bryan Clark, bclark@Princeton.edu, Princeton University
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_PARTICLESETT_H
@@ -41,16 +38,16 @@
 namespace qmcplusplus
 {
 /// forward declarations
-template <typename T>
+template<typename T>
 class DistanceTableT;
-template <typename T>
+template<typename T>
 class DistanceTableAAT;
-template <typename T>
+template<typename T>
 class DistanceTableABT;
 class ResourceCollection;
-template <typename T>
+template<typename T>
 class StructFactT;
-template <typename T>
+template<typename T>
 struct SKMultiWalkerMemT;
 
 /** Specialized paritlce class for atomistic simulations
@@ -61,88 +58,85 @@ struct SKMultiWalkerMemT;
  * takes care of spatial decompositions for efficient evaluations for the
  * interactions with a finite cutoff.
  */
-template <typename T>
+template<typename T>
 class ParticleSetT : public OhmmsElementBase
 {
 public:
-    using RealType = typename ParticleSetTraits<T>::RealType;
-    using ValueType        = typename ParticleSetTraits<T>::ValueType;
-    using GradType         = typename ParticleSetTraits<T>::GradType;
-    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
-    using ComplexType = typename ParticleSetTraits<T>::ComplexType;
-    using PosType = typename ParticleSetTraits<T>::PosType;
-    using TensorType       = typename ParticleSetTraits<T>::TensorType;
-
-    using PropertySetType = typename ParticleSetTraits<T>::PropertySetType;
-
-    using Index_t = typename LatticeParticleTraits<T>::Index_t;
-    using Scalar_t = typename LatticeParticleTraits<T>::Scalar_t;
-    using Tensor_t = typename LatticeParticleTraits<T>::Tensor_t;
-    using ParticleLayout = typename LatticeParticleTraits<T>::ParticleLayout;
-    using SingleParticlePos =
-        typename LatticeParticleTraits<T>::SingleParticlePos;
-    using ParticleIndex = typename LatticeParticleTraits<T>::ParticleIndex;
-    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
-    using ParticleScalar = typename LatticeParticleTraits<T>::ParticleScalar;
-    using ParticleGradient =
-        typename LatticeParticleTraits<T>::ParticleGradient;
-    using ParticleLaplacian =
-        typename LatticeParticleTraits<T>::ParticleLaplacian;
-    using ParticleTensor = typename LatticeParticleTraits<T>::ParticleTensor;
-
-    /// walker type
-    using Walker_t = Walker<ParticleSetTraits<T>, LatticeParticleTraits<T>>;
-    /// container type to store the property
-    using PropertyContainer_t = typename Walker_t::PropertyContainer_t;
-    /// buffer type for a serialized buffer
-    using Buffer_t = PooledData<RealType>;
-
-    using SingleParticleValue = typename LatticeParticleTraits<T>::SingleParticleValue;
-
-    enum quantum_domains
-    {
-        no_quantum_domain = 0,
-        classical,
-        quantum
-    };
-
-    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
-
-    /// quantum_domain of the particles, default = classical
-    quantum_domains quantum_domain;
-
-    //@{ public data members
-    /// Species ID
-    ParticleIndex GroupID;
-    /// Position
-    ParticlePos R;
-    /// internal spin variables for dynamical spin calculations
-    ParticleScalar spins;
-    /// gradients of the particles
-    ParticleGradient G;
-    /// laplacians of the particles
-    ParticleLaplacian L;
-    /// mass of each particle
-    ParticleScalar Mass;
-    /// charge of each particle
-    ParticleScalar Z;
-
-    /// the index of the active bead for particle-by-particle moves
-    Index_t activeBead;
-    /// the direction reptile traveling
-    Index_t direction;
-
-    /// Particle density in G-space for MPC interaction
-    std::vector<TinyVector<int, DIM>> DensityReducedGvecs;
-    std::vector<ComplexType> Density_G;
-    Array<RealType, DIM> Density_r;
-
-    /// DFT potential
-    std::vector<TinyVector<int, DIM>> VHXCReducedGvecs;
-    std::vector<ComplexType> VHXC_G[2];
-    Array<RealType, DIM> VHXC_r[2];
-
-    /** name-value map of Walker Properties
+  using RealType         = typename ParticleSetTraits<T>::RealType;
+  using ValueType        = typename ParticleSetTraits<T>::ValueType;
+  using GradType         = typename ParticleSetTraits<T>::GradType;
+  using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
+  using ComplexType      = typename ParticleSetTraits<T>::ComplexType;
+  using PosType          = typename ParticleSetTraits<T>::PosType;
+  using TensorType       = typename ParticleSetTraits<T>::TensorType;
+
+  using PropertySetType = typename ParticleSetTraits<T>::PropertySetType;
+
+  using Index_t           = typename LatticeParticleTraits<T>::Index_t;
+  using Scalar_t          = typename LatticeParticleTraits<T>::Scalar_t;
+  using Tensor_t          = typename LatticeParticleTraits<T>::Tensor_t;
+  using ParticleLayout    = typename LatticeParticleTraits<T>::ParticleLayout;
+  using SingleParticlePos = typename LatticeParticleTraits<T>::SingleParticlePos;
+  using ParticleIndex     = typename LatticeParticleTraits<T>::ParticleIndex;
+  using ParticlePos       = typename LatticeParticleTraits<T>::ParticlePos;
+  using ParticleScalar    = typename LatticeParticleTraits<T>::ParticleScalar;
+  using ParticleGradient  = typename LatticeParticleTraits<T>::ParticleGradient;
+  using ParticleLaplacian = typename LatticeParticleTraits<T>::ParticleLaplacian;
+  using ParticleTensor    = typename LatticeParticleTraits<T>::ParticleTensor;
+
+  /// walker type
+  using Walker_t = Walker<ParticleSetTraits<T>, LatticeParticleTraits<T>>;
+  /// container type to store the property
+  using PropertyContainer_t = typename Walker_t::PropertyContainer_t;
+  /// buffer type for a serialized buffer
+  using Buffer_t = PooledData<RealType>;
+
+  using SingleParticleValue = typename LatticeParticleTraits<T>::SingleParticleValue;
+
+  enum quantum_domains
+  {
+    no_quantum_domain = 0,
+    classical,
+    quantum
+  };
+
+  static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+
+  /// quantum_domain of the particles, default = classical
+  quantum_domains quantum_domain;
+
+  //@{ public data members
+  /// Species ID
+  ParticleIndex GroupID;
+  /// Position
+  ParticlePos R;
+  /// internal spin variables for dynamical spin calculations
+  ParticleScalar spins;
+  /// gradients of the particles
+  ParticleGradient G;
+  /// laplacians of the particles
+  ParticleLaplacian L;
+  /// mass of each particle
+  ParticleScalar Mass;
+  /// charge of each particle
+  ParticleScalar Z;
+
+  /// the index of the active bead for particle-by-particle moves
+  Index_t activeBead;
+  /// the direction reptile traveling
+  Index_t direction;
+
+  /// Particle density in G-space for MPC interaction
+  std::vector<TinyVector<int, DIM>> DensityReducedGvecs;
+  std::vector<ComplexType> Density_G;
+  Array<RealType, DIM> Density_r;
+
+  /// DFT potential
+  std::vector<TinyVector<int, DIM>> VHXCReducedGvecs;
+  std::vector<ComplexType> VHXC_G[2];
+  Array<RealType, DIM> VHXC_r[2];
+
+  /** name-value map of Walker Properties
      *
      * PropertyMap is used to keep the name-value mapping of
      * Walker_t::Properties.  PropertyList::Values are not
@@ -258,128 +252,67 @@ class ParticleSetT : public OhmmsElementBase
   {
     assert(structure_factor_);
     return *structure_factor_;
-    };
+  };
 
-    /** Turn on per particle storage in Structure Factor
+  /** Turn on per particle storage in Structure Factor
      */
-    void
-    turnOnPerParticleSK();
+  void turnOnPerParticleSK();
 
-    /** Get state (on/off) of per particle storage in Structure Factor
+  /** Get state (on/off) of per particle storage in Structure Factor
      */
-    bool
-    getPerParticleSKState() const;
+  bool getPerParticleSKState() const;
 
-    /// retrun the SpeciesSet of this particle set
-    inline SpeciesSet&
-    getSpeciesSet()
-    {
-        return my_species_;
-    }
-    /// retrun the const SpeciesSet of this particle set
-    inline const SpeciesSet&
-    getSpeciesSet() const
-    {
-        return my_species_;
-    }
+  /// retrun the SpeciesSet of this particle set
+  inline SpeciesSet& getSpeciesSet() { return my_species_; }
+  /// retrun the const SpeciesSet of this particle set
+  inline const SpeciesSet& getSpeciesSet() const { return my_species_; }
 
-    /// return parent's name
-    inline const std::string&
-    parentName() const
-    {
-        return ParentName;
-    }
-    inline void
-    setName(const std::string& aname)
+  /// return parent's name
+  inline const std::string& parentName() const { return ParentName; }
+  inline void setName(const std::string& aname)
+  {
+    myName = aname;
+    if (ParentName == "0")
     {
-        myName = aname;
-        if (ParentName == "0") {
-            ParentName = aname;
-        }
+      ParentName = aname;
     }
+  }
 
-    inline const DynamicCoordinatesT<T>&
-    getCoordinates() const
-    {
-        return *coordinates_;
-    }
+  inline const DynamicCoordinatesT<T>& getCoordinates() const { return *coordinates_; }
 
-    void
-    resetGroups();
+  void resetGroups();
 
-    const auto&
-    getSimulationCell() const
-    {
-        return simulation_cell_;
-    }
-    const auto&
-    getLattice() const
-    {
-        return simulation_cell_.getLattice();
-    }
-    auto&
-    getPrimitiveLattice() const
-    {
-        return const_cast<ParticleLayout&>(simulation_cell_.getPrimLattice());
-    }
-    const auto&
-    getLRBox() const
-    {
-        return simulation_cell_.getLRBox();
-    }
+  const auto& getSimulationCell() const { return simulation_cell_; }
+  const auto& getLattice() const { return simulation_cell_.getLattice(); }
+  auto& getPrimitiveLattice() const { return const_cast<ParticleLayout&>(simulation_cell_.getPrimLattice()); }
+  const auto& getLRBox() const { return simulation_cell_.getLRBox(); }
 
-    inline bool
-    isSameMass() const
-    {
-        return same_mass_;
-    }
-    inline bool
-    isSpinor() const
-    {
-        return is_spinor_;
-    }
-    inline void
-    setSpinor(bool is_spinor)
-    {
-        is_spinor_ = is_spinor;
-    }
+  inline bool isSameMass() const { return same_mass_; }
+  inline bool isSpinor() const { return is_spinor_; }
+  inline void setSpinor(bool is_spinor) { is_spinor_ = is_spinor; }
 
-    /// return active particle id
-    inline Index_t
-    getActivePtcl() const
-    {
-        return active_ptcl_;
-    }
-    inline const PosType&
-    getActivePos() const
-    {
-        return active_pos_;
-    }
-    inline Scalar_t
-    getActiveSpinVal() const
-    {
-        return active_spin_val_;
-    }
+  /// return active particle id
+  inline Index_t getActivePtcl() const { return active_ptcl_; }
+  inline const PosType& getActivePos() const { return active_pos_; }
+  inline Scalar_t getActiveSpinVal() const { return active_spin_val_; }
 
-    /// return the active position if the particle is active or the return
-    /// current position if not
-    inline const PosType&
-    activeR(int iat) const
-    {
-        // When active_ptcl_ == iat, a move has been proposed.
-        return (active_ptcl_ == iat) ? active_pos_ : R[iat];
-    }
+  /// return the active position if the particle is active or the return
+  /// current position if not
+  inline const PosType& activeR(int iat) const
+  {
+    // When active_ptcl_ == iat, a move has been proposed.
+    return (active_ptcl_ == iat) ? active_pos_ : R[iat];
+  }
 
-    /// return the active spin value if the particle is active or return the
-    /// current spin value if not
-    inline const Scalar_t&
-    activeSpin(int iat) const
-    {
-        // When active_ptcl_ == iat, a move has been proposed.
-        return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat];
-    }
+  /// return the active spin value if the particle is active or return the
+  /// current spin value if not
+  inline const Scalar_t& activeSpin(int iat) const
+  {
+    // When active_ptcl_ == iat, a move has been proposed.
+    return (active_ptcl_ == iat) ? active_spin_val_ : spins[iat];
+  }
 
-    /** move the iat-th particle to active_pos_
+  /** move the iat-th particle to active_pos_
      * @param iat the index of the particle to be moved
      * @param displ the displacement of the iat-th particle position
      * @param maybe_accept if false, the caller guarantees that the proposed
@@ -390,30 +323,26 @@ class ParticleSetT : public OhmmsElementBase
      * DistanceTable::Temp. If maybe_accept = false, certain operations for
      * accepting moves will be skipped for optimal performance.
      */
-    void
-    makeMove(
-        Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true);
-    /// makeMove, but now includes an update to the spin variable
-    void
-    makeMoveWithSpin(
-        Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
-
-    /// batched version of makeMove
-    template <CoordsType CT>
-    static void
-    mw_makeMove(const RefVectorWithLeader<ParticleSetT<T>>& p_list, Index_t iat,
-        const MCCoordsT<T, CT>& displs);
-
-    static void
-    mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list, Index_t iat,
-        const std::vector<SingleParticlePos>& displs);
-
-    /// batched version makeMove for spin variable only
-    static void
-    mw_makeSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
-        Index_t iat, const std::vector<Scalar_t>& sdispls);
-
-    /** move the iat-th particle to active_pos_
+  void makeMove(Index_t iat, const SingleParticlePos& displ, bool maybe_accept = true);
+  /// makeMove, but now includes an update to the spin variable
+  void makeMoveWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
+
+  /// batched version of makeMove
+  template<CoordsType CT>
+  static void mw_makeMove(const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                          Index_t iat,
+                          const MCCoordsT<T, CT>& displs);
+
+  static void mw_makeMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                          Index_t iat,
+                          const std::vector<SingleParticlePos>& displs);
+
+  /// batched version makeMove for spin variable only
+  static void mw_makeSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                              Index_t iat,
+                              const std::vector<Scalar_t>& sdispls);
+
+  /** move the iat-th particle to active_pos_
      * @param iat the index of the particle to be moved
      * @param displ random displacement of the iat-th particle
      * @return true, if the move is valid
@@ -429,23 +358,19 @@ class ParticleSetT : public OhmmsElementBase
      * open BC. Note: active_pos_ and distances tables are always evaluated no
      * matter the move is valid or not.
      */
-    bool
-    makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ);
-    /// makeMoveAndCheck, but now includes an update to the spin variable
-    bool
-    makeMoveAndCheckWithSpin(
-        Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
+  bool makeMoveAndCheck(Index_t iat, const SingleParticlePos& displ);
+  /// makeMoveAndCheck, but now includes an update to the spin variable
+  bool makeMoveAndCheckWithSpin(Index_t iat, const SingleParticlePos& displ, const Scalar_t& sdispl);
 
-    /** Handles virtual moves for all the particles to a single newpos.
+  /** Handles virtual moves for all the particles to a single newpos.
      *
      * The state active_ptcl_ remains -1 and rejectMove is not needed.
      * acceptMove can not be used.
      * See QMCHamiltonians::MomentumEstimator as an example
      */
-    void
-    makeVirtualMoves(const SingleParticlePos& newpos);
+  void makeVirtualMoves(const SingleParticlePos& newpos);
 
-    /** move all the particles of a walker
+  /** move all the particles of a walker
      * @param awalker the walker to operate
      * @param deltaR proposed displacement
      * @param dt  factor of deltaR
@@ -454,28 +379,25 @@ class ParticleSetT : public OhmmsElementBase
      * If big displacements or illegal positions are detected, return false.
      * If all good, R = awalker.R + dt* deltaR
      */
-    bool
-    makeMoveAllParticles(
-        const Walker_t& awalker, const ParticlePos& deltaR, RealType dt);
+  bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, RealType dt);
 
-    bool
-    makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR,
-        const std::vector<RealType>& dt);
+  bool makeMoveAllParticles(const Walker_t& awalker, const ParticlePos& deltaR, const std::vector<RealType>& dt);
 
-    /** move all the particles including the drift
+  /** move all the particles including the drift
      *
      * Otherwise, everything is the same as makeMove for a walker
      */
-    bool
-    makeMoveAllParticlesWithDrift(const Walker_t& awalker,
-        const ParticlePos& drift, const ParticlePos& deltaR, RealType dt);
+  bool makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+                                     const ParticlePos& drift,
+                                     const ParticlePos& deltaR,
+                                     RealType dt);
 
-    bool
-    makeMoveAllParticlesWithDrift(const Walker_t& awalker,
-        const ParticlePos& drift, const ParticlePos& deltaR,
-        const std::vector<RealType>& dt);
+  bool makeMoveAllParticlesWithDrift(const Walker_t& awalker,
+                                     const ParticlePos& drift,
+                                     const ParticlePos& deltaR,
+                                     const std::vector<RealType>& dt);
 
-    /** accept or reject a proposed move
+  /** accept or reject a proposed move
      *  Two operation modes:
      *  The using and updating distance tables via `ParticleSet` operate in two
      * modes, regular and forward modes.
@@ -500,117 +422,93 @@ class ParticleSetT : public OhmmsElementBase
      *  `ParticleSet::accept_rejectMove` involves writing the distances with
      * respect to the old particle position.
      */
-    void
-    accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true);
+  void accept_rejectMove(Index_t iat, bool accepted, bool forward_mode = true);
 
-    /** accept the move and update the particle attribute by the proposed move
+  /** accept the move and update the particle attribute by the proposed move
      *in regular mode
      *@param iat the index of the particle whose position and other attributes
      *to be updated
      */
-    void
-    acceptMove(Index_t iat);
+  void acceptMove(Index_t iat);
 
-    /** reject a proposed move in regular mode
+  /** reject a proposed move in regular mode
      * @param iat the electron whose proposed move gets rejected.
      */
-    void
-    rejectMove(Index_t iat);
+  void rejectMove(Index_t iat);
 
-    /// batched version of acceptMove and rejectMove fused, templated on
-    /// CoordsType
-    template <CoordsType CT>
-    static void
-    mw_accept_rejectMoveT(const RefVectorWithLeader<ParticleSetT>& p_list,
-        Index_t iat, const std::vector<bool>& isAccepted,
-        bool forward_mode = true);
+  /// batched version of acceptMove and rejectMove fused, templated on
+  /// CoordsType
+  template<CoordsType CT>
+  static void mw_accept_rejectMoveT(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                    Index_t iat,
+                                    const std::vector<bool>& isAccepted,
+                                    bool forward_mode = true);
 
-    /// batched version of acceptMove and rejectMove fused
-    static void
-    mw_accept_rejectMove(const RefVectorWithLeader<ParticleSetT>& p_list,
-        Index_t iat, const std::vector<bool>& isAccepted,
-        bool forward_mode = true);
+  /// batched version of acceptMove and rejectMove fused
+  static void mw_accept_rejectMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                   Index_t iat,
+                                   const std::vector<bool>& isAccepted,
+                                   bool forward_mode = true);
 
-    /** batched version  of acceptMove and reject Move fused, but only for spins
+  /** batched version  of acceptMove and reject Move fused, but only for spins
      *
      * note: should be called BEFORE mw_accept_rejectMove since the active_ptcl_
      * gets reset to -1 This would cause the assertion that we have the right
      * particle index to fail if done in the wrong order
      */
-    static void
-    mw_accept_rejectSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
-        Index_t iat, const std::vector<bool>& isAccepted);
+  static void mw_accept_rejectSpinMove(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                       Index_t iat,
+                                       const std::vector<bool>& isAccepted);
 
-    void
-    initPropertyList();
-    inline int
-    addProperty(const std::string& pname)
-    {
-        return PropertyList.add(pname.c_str());
-    }
+  void initPropertyList();
+  inline int addProperty(const std::string& pname) { return PropertyList.add(pname.c_str()); }
+
+  int addPropertyHistory(int leng);
+  //        void rejectedMove();
+  //        void resetPropertyHistory( );
+  //        void addPropertyHistoryPoint(int index, RealType data);
+
+  void convert(const ParticlePos& pin, ParticlePos& pout);
+  void convert2Unit(const ParticlePos& pin, ParticlePos& pout);
+  void convert2Cart(const ParticlePos& pin, ParticlePos& pout);
+  void convert2Unit(ParticlePos& pout);
+  void convert2Cart(ParticlePos& pout);
+  void convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout);
+  void convert2CartInBox(const ParticlePos& pint, ParticlePos& pout);
 
-    int
-    addPropertyHistory(int leng);
-    //        void rejectedMove();
-    //        void resetPropertyHistory( );
-    //        void addPropertyHistoryPoint(int index, RealType data);
-
-    void
-    convert(const ParticlePos& pin, ParticlePos& pout);
-    void
-    convert2Unit(const ParticlePos& pin, ParticlePos& pout);
-    void
-    convert2Cart(const ParticlePos& pin, ParticlePos& pout);
-    void
-    convert2Unit(ParticlePos& pout);
-    void
-    convert2Cart(ParticlePos& pout);
-    void
-    convert2UnitInBox(const ParticlePos& pint, ParticlePos& pout);
-    void
-    convert2CartInBox(const ParticlePos& pint, ParticlePos& pout);
-
-    void
-    applyBC(const ParticlePos& pin, ParticlePos& pout);
-    void
-    applyBC(ParticlePos& pos);
-    void
-    applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last);
-    void
-    applyMinimumImage(ParticlePos& pinout);
-
-    /** load a Walker_t to the current ParticleSet
+  void applyBC(const ParticlePos& pin, ParticlePos& pout);
+  void applyBC(ParticlePos& pos);
+  void applyBC(const ParticlePos& pin, ParticlePos& pout, int first, int last);
+  void applyMinimumImage(ParticlePos& pinout);
+
+  /** load a Walker_t to the current ParticleSet
      * @param awalker the reference to the walker to be loaded
      * @param pbyp true if it is used by PbyP update
      *
      * PbyP requires the distance tables and Sk with awalker.R
      */
-    void
-    loadWalker(Walker_t& awalker, bool pbyp);
-    /** batched version of loadWalker */
-    static void
-    mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
-        const RefVector<Walker_t>& walkers, const std::vector<bool>& recompute,
-        bool pbyp);
+  void loadWalker(Walker_t& awalker, bool pbyp);
+  /** batched version of loadWalker */
+  static void mw_loadWalker(const RefVectorWithLeader<ParticleSetT>& p_list,
+                            const RefVector<Walker_t>& walkers,
+                            const std::vector<bool>& recompute,
+                            bool pbyp);
 
-    /** save this to awalker
+  /** save this to awalker
      *
      *  just the R, G, and L
      *  More duplicate data that makes code difficult to reason about should be
      * removed.
      */
-    void
-    saveWalker(Walker_t& awalker);
+  void saveWalker(Walker_t& awalker);
 
-    /** batched version of saveWalker
+  /** batched version of saveWalker
      *
      *  just the R, G, and L
      */
-    static void
-    mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets,
-        const RefVector<Walker_t>& walkers);
+  static void mw_saveWalker(const RefVectorWithLeader<ParticleSetT>& psets, const RefVector<Walker_t>& walkers);
 
-    /** update structure factor and unmark active_ptcl_
+  /** update structure factor and unmark active_ptcl_
      *@param skip SK update if skipSK is true
      *
      * The Coulomb interaction evaluation needs the structure factor.
@@ -618,281 +516,206 @@ class ParticleSetT : public OhmmsElementBase
      * electron moves before evaluating the Hamiltonian. Unmark
      * active_ptcl_ is more of a safety measure probably not needed.
      */
-    void
-    donePbyP(bool skipSK = false);
-    /// batched version of donePbyP
-    static void
-    mw_donePbyP(
-        const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK = false);
+  void donePbyP(bool skipSK = false);
+  /// batched version of donePbyP
+  static void mw_donePbyP(const RefVectorWithLeader<ParticleSetT>& p_list, bool skipSK = false);
 
-    /// return the address of the values of Hamiltonian terms
-    inline FullPrecRealType* restrict getPropertyBase()
-    {
-        return Properties.data();
-    }
+  /// return the address of the values of Hamiltonian terms
+  inline FullPrecRealType* restrict getPropertyBase() { return Properties.data(); }
 
-    /// return the address of the values of Hamiltonian terms
-    inline const FullPrecRealType* restrict getPropertyBase() const
-    {
-        return Properties.data();
-    }
+  /// return the address of the values of Hamiltonian terms
+  inline const FullPrecRealType* restrict getPropertyBase() const { return Properties.data(); }
 
-    /// return the address of the i-th properties
-    inline FullPrecRealType* restrict getPropertyBase(int i)
-    {
-        return Properties[i];
-    }
+  /// return the address of the i-th properties
+  inline FullPrecRealType* restrict getPropertyBase(int i) { return Properties[i]; }
 
-    /// return the address of the i-th properties
-    inline const FullPrecRealType* restrict getPropertyBase(int i) const
-    {
-        return Properties[i];
-    }
+  /// return the address of the i-th properties
+  inline const FullPrecRealType* restrict getPropertyBase(int i) const { return Properties[i]; }
 
-    inline void
-    setTwist(const SingleParticlePos& t)
-    {
-        myTwist = t;
-    }
+  inline void setTwist(const SingleParticlePos& t) { myTwist = t; }
 
-    inline const SingleParticlePos&
-    getTwist() const
-    {
-        return myTwist;
-    }
+  inline const SingleParticlePos& getTwist() const { return myTwist; }
 
-    /** Initialize particles around another ParticleSet
+  /** Initialize particles around another ParticleSet
      * Used to initialize an electron ParticleSet by an ion ParticleSet
      */
-    void
-    randomizeFromSource(ParticleSetT& src);
+  void randomizeFromSource(ParticleSetT& src);
 
-    /** get species name of particle i
+  /** get species name of particle i
      */
-    inline const std::string&
-    species_from_index(int i)
-    {
-        return my_species_.speciesName[GroupID[i]];
-    }
+  inline const std::string& species_from_index(int i) { return my_species_.speciesName[GroupID[i]]; }
 
-    inline size_t
-    getTotalNum() const
-    {
-        return TotalNum;
-    }
-
-    inline void
-    clear()
-    {
-        TotalNum = 0;
-
-        R.clear();
-        spins.clear();
-        GroupID.clear();
-        G.clear();
-        L.clear();
-        Mass.clear();
-        Z.clear();
-
-        coordinates_->resize(0);
-    }
-
-    /// return the number of groups
-    inline int
-    groups() const
-    {
-        return group_offsets_->size() - 1;
-    }
+  inline size_t getTotalNum() const { return TotalNum; }
 
-    /// return the first index of a group i
-    inline int
-    first(int igroup) const
-    {
-        return (*group_offsets_)[igroup];
-    }
+  inline void clear()
+  {
+    TotalNum = 0;
 
-    /// return the last index of a group i
-    inline int
-    last(int igroup) const
-    {
-        return (*group_offsets_)[igroup + 1];
-    }
+    R.clear();
+    spins.clear();
+    GroupID.clear();
+    G.clear();
+    L.clear();
+    Mass.clear();
+    Z.clear();
 
-    /// return the group id of a given particle in the particle set.
-    inline int
-    getGroupID(int iat) const
-    {
-        assert(iat >= 0 && iat < TotalNum);
-        return GroupID[iat];
-    }
+    coordinates_->resize(0);
+  }
 
-    /// return the size of a group
-    inline int
-    groupsize(int igroup) const
-    {
-        return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup];
-    }
+  /// return the number of groups
+  inline int groups() const { return group_offsets_->size() - 1; }
 
-    /// add attributes to list for IO
-    template <typename ATList>
-    inline void
-    createAttributeList(ATList& AttribList)
-    {
-        R.setTypeName(ParticleTags::postype_tag);
-        R.setObjName(ParticleTags::position_tag);
-        spins.setTypeName(ParticleTags::scalartype_tag);
-        spins.setObjName(ParticleTags::spins_tag);
-        GroupID.setTypeName(ParticleTags::indextype_tag);
-        GroupID.setObjName(ParticleTags::ionid_tag);
-        // add basic attributes
-        AttribList.add(R);
-        AttribList.add(spins);
-        AttribList.add(GroupID);
-
-        G.setTypeName(ParticleTags::gradtype_tag);
-        L.setTypeName(ParticleTags::laptype_tag);
-
-        G.setObjName("grad");
-        L.setObjName("lap");
-
-        AttribList.add(G);
-        AttribList.add(L);
-
-        // more particle attributes
-        Mass.setTypeName(ParticleTags::scalartype_tag);
-        Mass.setObjName("mass");
-        AttribList.add(Mass);
-
-        Z.setTypeName(ParticleTags::scalartype_tag);
-        Z.setObjName("charge");
-        AttribList.add(Z);
-    }
+  /// return the first index of a group i
+  inline int first(int igroup) const { return (*group_offsets_)[igroup]; }
 
-    inline void
-    setMapStorageToInput(const std::vector<int>& mapping)
-    {
-        map_storage_to_input_ = mapping;
-    }
-    inline const std::vector<int>&
-    get_map_storage_to_input() const
-    {
-        return map_storage_to_input_;
-    }
+  /// return the last index of a group i
+  inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; }
 
-    inline int
-    getNumDistTables() const
-    {
-        return DistTables.size();
-    }
+  /// return the group id of a given particle in the particle set.
+  inline int getGroupID(int iat) const
+  {
+    assert(iat >= 0 && iat < TotalNum);
+    return GroupID[iat];
+  }
 
-    inline auto&
-    get_group_offsets() const
-    {
-        return *group_offsets_;
-    }
+  /// return the size of a group
+  inline int groupsize(int igroup) const { return (*group_offsets_)[igroup + 1] - (*group_offsets_)[igroup]; }
 
-    /// initialize a shared resource and hand it to a collection
-    void
-    createResource(ResourceCollection& collection) const;
-    /** acquire external resource and assocaite it with the list of ParticleSet
+  /// add attributes to list for IO
+  template<typename ATList>
+  inline void createAttributeList(ATList& AttribList)
+  {
+    R.setTypeName(ParticleTags::postype_tag);
+    R.setObjName(ParticleTags::position_tag);
+    spins.setTypeName(ParticleTags::scalartype_tag);
+    spins.setObjName(ParticleTags::spins_tag);
+    GroupID.setTypeName(ParticleTags::indextype_tag);
+    GroupID.setObjName(ParticleTags::ionid_tag);
+    // add basic attributes
+    AttribList.add(R);
+    AttribList.add(spins);
+    AttribList.add(GroupID);
+
+    G.setTypeName(ParticleTags::gradtype_tag);
+    L.setTypeName(ParticleTags::laptype_tag);
+
+    G.setObjName("grad");
+    L.setObjName("lap");
+
+    AttribList.add(G);
+    AttribList.add(L);
+
+    // more particle attributes
+    Mass.setTypeName(ParticleTags::scalartype_tag);
+    Mass.setObjName("mass");
+    AttribList.add(Mass);
+
+    Z.setTypeName(ParticleTags::scalartype_tag);
+    Z.setObjName("charge");
+    AttribList.add(Z);
+  }
+
+  inline void setMapStorageToInput(const std::vector<int>& mapping) { map_storage_to_input_ = mapping; }
+  inline const std::vector<int>& get_map_storage_to_input() const { return map_storage_to_input_; }
+
+  inline int getNumDistTables() const { return DistTables.size(); }
+
+  inline auto& get_group_offsets() const { return *group_offsets_; }
+
+  /// initialize a shared resource and hand it to a collection
+  void createResource(ResourceCollection& collection) const;
+  /** acquire external resource and assocaite it with the list of ParticleSet
      * Note: use RAII ResourceCollectionTeamLock whenever possible
      */
-    static void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<ParticleSetT>& p_list);
-    /** release external resource
+  static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<ParticleSetT>& p_list);
+  /** release external resource
      * Note: use RAII ResourceCollectionTeamLock whenever possible
      */
-    static void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<ParticleSetT>& p_list);
+  static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<ParticleSetT>& p_list);
 
-    static RefVectorWithLeader<DistanceTableT<T>>
-    extractDTRefList(const RefVectorWithLeader<ParticleSetT>& p_list, int id);
-    static RefVectorWithLeader<DynamicCoordinatesT<T>>
-    extractCoordsRefList(const RefVectorWithLeader<ParticleSetT>& p_list);
-    static RefVectorWithLeader<StructFactT<T>>
-    extractSKRefList(const RefVectorWithLeader<ParticleSetT>& p_list);
+  static RefVectorWithLeader<DistanceTableT<T>> extractDTRefList(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                                                 int id);
+  static RefVectorWithLeader<DynamicCoordinatesT<T>> extractCoordsRefList(
+      const RefVectorWithLeader<ParticleSetT>& p_list);
+  static RefVectorWithLeader<StructFactT<T>> extractSKRefList(const RefVectorWithLeader<ParticleSetT>& p_list);
 
 protected:
-    /// reference to global simulation cell
-    const SimulationCellT<T>& simulation_cell_;
-
-    /// true if the particles have the same mass
-    bool same_mass_;
-    /// true is a dynamic spin calculation
-    bool is_spinor_;
-    /** the index of the active particle during particle-by-particle moves
+  /// reference to global simulation cell
+  const SimulationCellT<T>& simulation_cell_;
+
+  /// true if the particles have the same mass
+  bool same_mass_;
+  /// true is a dynamic spin calculation
+  bool is_spinor_;
+  /** the index of the active particle during particle-by-particle moves
      *
      * when a single particle move is proposed, the particle id is assigned to
      * active_ptcl_ No matter the move is accepted or rejected, active_ptcl_ is
      * marked back to -1. This state flag is used for picking coordinates and
      * distances for SPO evaluation.
      */
-    Index_t active_ptcl_;
-    /// the proposed position of active_ptcl_ during particle-by-particle moves
-    SingleParticlePos active_pos_;
-    /// the proposed spin of active_ptcl_ during particle-by-particle moves
-    Scalar_t active_spin_val_;
+  Index_t active_ptcl_;
+  /// the proposed position of active_ptcl_ during particle-by-particle moves
+  SingleParticlePos active_pos_;
+  /// the proposed spin of active_ptcl_ during particle-by-particle moves
+  Scalar_t active_spin_val_;
 
-    /** Map storage index to the input index.
+  /** Map storage index to the input index.
      * If not empty, particles were reordered by groups when being loaded from
      * XML input. When other input data are affected by reordering, its builder
      * should query this mapping. map_storage_to_input_[5] = 2 means the index
      * 5(6th) particle in this ParticleSet was read from the index 2(3th)
      * particle in the XML input
      */
-    std::vector<int> map_storage_to_input_;
+  std::vector<int> map_storage_to_input_;
 
-    /// SpeciesSet of particles
-    SpeciesSet my_species_;
+  /// SpeciesSet of particles
+  SpeciesSet my_species_;
 
-    /// Structure factor
-    std::unique_ptr<StructFactT<T>> structure_factor_;
+  /// Structure factor
+  std::unique_ptr<StructFactT<T>> structure_factor_;
 
-    /// multi walker structure factor data
-    ResourceHandle<SKMultiWalkerMemT<T>> mw_structure_factor_data_handle_;
+  /// multi walker structure factor data
+  ResourceHandle<SKMultiWalkerMemT<T>> mw_structure_factor_data_handle_;
 
-    /** map to handle distance tables
+  /** map to handle distance tables
      *
      * myDistTableMap[source-particle-tag]= locator in the distance table
      * myDistTableMap[ObjectTag] === 0
      */
-    std::map<std::string, int> myDistTableMap;
+  std::map<std::string, int> myDistTableMap;
 
-    /// distance tables that need to be updated by moving this ParticleSet
-    std::vector<std::unique_ptr<DistanceTableT<T>>> DistTables;
+  /// distance tables that need to be updated by moving this ParticleSet
+  std::vector<std::unique_ptr<DistanceTableT<T>>> DistTables;
 
-    /// Descriptions from distance table creation.  Same order as DistTables.
-    std::vector<std::string> distTableDescriptions;
+  /// Descriptions from distance table creation.  Same order as DistTables.
+  std::vector<std::string> distTableDescriptions;
 
-    TimerList_t myTimers;
+  TimerList_t myTimers;
 
-    SingleParticlePos myTwist;
+  SingleParticlePos myTwist;
 
-    std::string ParentName;
+  std::string ParentName;
 
-    /// total number of particles
-    size_t TotalNum;
+  /// total number of particles
+  size_t TotalNum;
 
-    /// array to handle a group of distinct particles per species
-    std::shared_ptr<Vector<int, OMPallocator<int>>> group_offsets_;
+  /// array to handle a group of distinct particles per species
+  std::shared_ptr<Vector<int, OMPallocator<int>>> group_offsets_;
 
-    /// internal representation of R. It can be an SoA copy of R
-    std::unique_ptr<DynamicCoordinatesT<T>> coordinates_;
+  /// internal representation of R. It can be an SoA copy of R
+  std::unique_ptr<DynamicCoordinatesT<T>> coordinates_;
 
-    /** compute temporal DistTables and SK for a new particle position
+  /** compute temporal DistTables and SK for a new particle position
      *
      * @param iat the particle that is moved on a sphere
      * @param newpos a new particle position
      * @param maybe_accept if false, the caller guarantees that the proposed
      * move will not be accepted.
      */
-    void
-    computeNewPosDistTables(
-        Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true);
+  void computeNewPosDistTables(Index_t iat, const SingleParticlePos& newpos, bool maybe_accept = true);
 
-    /** compute temporal DistTables and SK for a new particle position for each
+  /** compute temporal DistTables and SK for a new particle position for each
      * walker in a batch
      *
      * @param p_list the list of wrapped ParticleSet references in a walker
@@ -902,41 +725,38 @@ class ParticleSetT : public OhmmsElementBase
      * @param maybe_accept if false, the caller guarantees that the proposed
      * move will not be accepted.
      */
-    static void
-    mw_computeNewPosDistTables(const RefVectorWithLeader<ParticleSetT>& p_list,
-        Index_t iat, const std::vector<SingleParticlePos>& new_positions,
-        bool maybe_accept = true);
+  static void mw_computeNewPosDistTables(const RefVectorWithLeader<ParticleSetT>& p_list,
+                                         Index_t iat,
+                                         const std::vector<SingleParticlePos>& new_positions,
+                                         bool maybe_accept = true);
 
-    /** actual implemenation for accepting a proposed move in forward mode
+  /** actual implemenation for accepting a proposed move in forward mode
      *
      * @param iat the index of the particle whose position and other attributes
      * to be updated
      */
-    void
-    acceptMoveForwardMode(Index_t iat);
+  void acceptMoveForwardMode(Index_t iat);
 
-    /** reject a proposed move in forward mode
+  /** reject a proposed move in forward mode
      * @param iat the electron whose proposed move gets rejected.
      */
-    void
-    rejectMoveForwardMode(Index_t iat);
-
-    /// resize internal storage
-    inline void
-    resize(size_t numPtcl)
-    {
-        TotalNum = numPtcl;
+  void rejectMoveForwardMode(Index_t iat);
 
-        R.resize(numPtcl);
-        spins.resize(numPtcl);
-        GroupID.resize(numPtcl);
-        G.resize(numPtcl);
-        L.resize(numPtcl);
-        Mass.resize(numPtcl);
-        Z.resize(numPtcl);
-
-        coordinates_->resize(numPtcl);
-    }
+  /// resize internal storage
+  inline void resize(size_t numPtcl)
+  {
+    TotalNum = numPtcl;
+
+    R.resize(numPtcl);
+    spins.resize(numPtcl);
+    GroupID.resize(numPtcl);
+    G.resize(numPtcl);
+    L.resize(numPtcl);
+    Mass.resize(numPtcl);
+    Z.resize(numPtcl);
+
+    coordinates_->resize(numPtcl);
+  }
 };
 
 } // namespace qmcplusplus
diff --git a/src/Particle/ParticleSetTraits.h b/src/Particle/ParticleSetTraits.h
index 7a54a08f1bc..ec02fa4d586 100644
--- a/src/Particle/ParticleSetTraits.h
+++ b/src/Particle/ParticleSetTraits.h
@@ -1,3 +1,14 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Philip Fackler, facklerpw@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Philip Fackler, facklerpw@ornl.gov, Oak Ridge National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
 #ifndef QMCPLUSPLUS_PARTICLESETTRAITS_H
 #define QMCPLUSPLUS_PARTICLESETTRAITS_H
 
@@ -12,75 +23,73 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 struct ParticleSetTraits
 {
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    using RealType = RealAlias<T>;
-    using ComplexType = std::complex<RealType>;
-    using ValueType = T;
-    using IndexType = int;
-    using PosType = TinyVector<RealType, DIM>;
-    using GradType = TinyVector<ValueType, DIM>;
-    // using HessType = Tensor<ValueType, DIM>;
-    using TensorType = Tensor<ValueType, DIM>;
-    // using GradHessType = TinyVector<Tensor<ValueType, DIM>, DIM>;
-    // using IndexVector = Vector<IndexType>;
-    // using ValueVector = Vector<ValueType>;
-    // using ValueMatrix = Matrix<ValueType>;
-    // using GradVector = Vector<GradType>;
-    // using GradMatrix = Matrix<GradType>;
-    // using HessVector = Vector<HessType>;
-    // using HessMatrix = Matrix<HessType>;
-    // using GradHessVector = Vector<GradHessType>;
-    // using GradHessMatrix = Matrix<GradHessType>;
-    // using VGLVector = VectorSoaContainer<ValueType, DIM + 2>;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  using RealType    = RealAlias<T>;
+  using ComplexType = std::complex<RealType>;
+  using ValueType   = T;
+  using IndexType   = int;
+  using PosType     = TinyVector<RealType, DIM>;
+  using GradType    = TinyVector<ValueType, DIM>;
+  // using HessType = Tensor<ValueType, DIM>;
+  using TensorType = Tensor<ValueType, DIM>;
+  // using GradHessType = TinyVector<Tensor<ValueType, DIM>, DIM>;
+  // using IndexVector = Vector<IndexType>;
+  // using ValueVector = Vector<ValueType>;
+  // using ValueMatrix = Matrix<ValueType>;
+  // using GradVector = Vector<GradType>;
+  // using GradMatrix = Matrix<GradType>;
+  // using HessVector = Vector<HessType>;
+  // using HessMatrix = Matrix<HessType>;
+  // using GradHessVector = Vector<GradHessType>;
+  // using GradHessMatrix = Matrix<GradHessType>;
+  // using VGLVector = VectorSoaContainer<ValueType, DIM + 2>;
 
-    using FullPrecRealType = double;
-    using FullPrecComplexType = std::complex<double>;
-    using FullPrecValueType = std::conditional_t<IsComplex_t<T>::value,
-        FullPrecComplexType, FullPrecRealType>;
+  using FullPrecRealType    = double;
+  using FullPrecComplexType = std::complex<double>;
+  using FullPrecValueType   = std::conditional_t<IsComplex_t<T>::value, FullPrecComplexType, FullPrecRealType>;
 
-    using PropertySetType = RecordNamedProperty<FullPrecRealType>;
+  using PropertySetType = RecordNamedProperty<FullPrecRealType>;
 };
 
-template <typename T>
+template<typename T>
 struct LatticeParticleTraits
 {
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    using RealType = typename ParticleSetTraits<T>::RealType;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  using RealType = typename ParticleSetTraits<T>::RealType;
 
-    using ParticleLayout = CrystalLattice<RealType, DIM>;
-    using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex;
-    using SingleParticlePos = typename ParticleLayout::SingleParticlePos;
-    using ParticleTensorType = typename ParticleLayout::Tensor_t;
+  using ParticleLayout      = CrystalLattice<RealType, DIM>;
+  using SingleParticleIndex = typename ParticleLayout::SingleParticleIndex;
+  using SingleParticlePos   = typename ParticleLayout::SingleParticlePos;
+  using ParticleTensorType  = typename ParticleLayout::Tensor_t;
 
-    using FullPrecRealType = typename ParticleSetTraits<T>::FullPrecRealType;
-    using FullPrecComplexType =
-        typename ParticleSetTraits<T>::FullPrecComplexType;
-    using FullPrecValueType = typename ParticleSetTraits<T>::FullPrecValueType;
+  using FullPrecRealType    = typename ParticleSetTraits<T>::FullPrecRealType;
+  using FullPrecComplexType = typename ParticleSetTraits<T>::FullPrecComplexType;
+  using FullPrecValueType   = typename ParticleSetTraits<T>::FullPrecValueType;
 
-    using FullPrecGradType = TinyVector<FullPrecValueType, DIM>;
+  using FullPrecGradType = TinyVector<FullPrecValueType, DIM>;
 
-    using Index_t = int;
-    using Scalar_t = FullPrecRealType;
-    using Complex_t = FullPrecComplexType;
-    using Tensor_t  = Tensor<RealType, OHMMS_DIM>;
+  using Index_t   = int;
+  using Scalar_t  = FullPrecRealType;
+  using Complex_t = FullPrecComplexType;
+  using Tensor_t  = Tensor<RealType, OHMMS_DIM>;
 
-    using ParticleIndex = ParticleAttrib<Index_t>;
-    using ParticleScalar = ParticleAttrib<Scalar_t>;
-    using ParticlePos = ParticleAttrib<SingleParticlePos>;
-    using ParticleTensor = ParticleAttrib<ParticleTensorType>;
+  using ParticleIndex  = ParticleAttrib<Index_t>;
+  using ParticleScalar = ParticleAttrib<Scalar_t>;
+  using ParticlePos    = ParticleAttrib<SingleParticlePos>;
+  using ParticleTensor = ParticleAttrib<ParticleTensorType>;
 
-    using ParticleGradient = ParticleAttrib<FullPrecGradType>;
-    using ParticleLaplacian = ParticleAttrib<FullPrecValueType>;
-    using SingleParticleValue = FullPrecValueType;
+  using ParticleGradient    = ParticleAttrib<FullPrecGradType>;
+  using ParticleLaplacian   = ParticleAttrib<FullPrecValueType>;
+  using SingleParticleValue = FullPrecValueType;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/RealSpacePositionsT.h b/src/Particle/RealSpacePositionsT.h
index 7cd81723b63..562756ee962 100644
--- a/src/Particle/RealSpacePositionsT.h
+++ b/src/Particle/RealSpacePositionsT.h
@@ -22,75 +22,46 @@ namespace qmcplusplus
 /** Introduced to handle virtual moves and ratio computations, e.g. for
  * non-local PP evaluations.
  */
-template <typename T>
+template<typename T>
 class RealSpacePositionsT : public DynamicCoordinatesT<T>
 {
 public:
-    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
-    using RealType = typename DynamicCoordinatesT<T>::RealType;
-    using PosType = typename DynamicCoordinatesT<T>::PosType;
-    using PosVectorSoa = typename DynamicCoordinatesT<T>::PosVectorSoa;
+  using ParticlePos  = typename LatticeParticleTraits<T>::ParticlePos;
+  using RealType     = typename DynamicCoordinatesT<T>::RealType;
+  using PosType      = typename DynamicCoordinatesT<T>::PosType;
+  using PosVectorSoa = typename DynamicCoordinatesT<T>::PosVectorSoa;
 
-    RealSpacePositionsT() :
-        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS)
-    {
-    }
+  RealSpacePositionsT() : DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS) {}
 
-    std::unique_ptr<DynamicCoordinatesT<T>>
-    makeClone() override
-    {
-        return std::make_unique<RealSpacePositionsT>(*this);
-    }
+  std::unique_ptr<DynamicCoordinatesT<T>> makeClone() override { return std::make_unique<RealSpacePositionsT>(*this); }
 
-    void
-    resize(size_t n) override
-    {
-        RSoA.resize(n);
-    }
-    size_t
-    size() const override
-    {
-        return RSoA.size();
-    }
+  void resize(size_t n) override { RSoA.resize(n); }
+  size_t size() const override { return RSoA.size(); }
 
-    void
-    setAllParticlePos(const ParticlePos& R) override
-    {
-        resize(R.size());
-        RSoA.copyIn(R);
-    }
-    void
-    setOneParticlePos(const PosType& pos, size_t iat) override
-    {
-        RSoA(iat) = pos;
-    }
+  void setAllParticlePos(const ParticlePos& R) override
+  {
+    resize(R.size());
+    RSoA.copyIn(R);
+  }
+  void setOneParticlePos(const PosType& pos, size_t iat) override { RSoA(iat) = pos; }
 
-    void
-    mw_acceptParticlePos(
-        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
-        size_t iat, const std::vector<PosType>& new_positions,
-        const std::vector<bool>& isAccepted) const override
-    {
-        assert(this == &coords_list.getLeader());
-        for (size_t iw = 0; iw < isAccepted.size(); iw++)
-            if (isAccepted[iw])
-                coords_list[iw].setOneParticlePos(new_positions[iw], iat);
-    }
+  void mw_acceptParticlePos(const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+                            size_t iat,
+                            const std::vector<PosType>& new_positions,
+                            const std::vector<bool>& isAccepted) const override
+  {
+    assert(this == &coords_list.getLeader());
+    for (size_t iw = 0; iw < isAccepted.size(); iw++)
+      if (isAccepted[iw])
+        coords_list[iw].setOneParticlePos(new_positions[iw], iat);
+  }
 
-    const PosVectorSoa&
-    getAllParticlePos() const override
-    {
-        return RSoA;
-    }
-    PosType
-    getOneParticlePos(size_t iat) const override
-    {
-        return RSoA[iat];
-    }
+  const PosVectorSoa& getAllParticlePos() const override { return RSoA; }
+  PosType getOneParticlePos(size_t iat) const override { return RSoA[iat]; }
 
 private:
-    /// particle positions in SoA layout
-    PosVectorSoa RSoA;
+  /// particle positions in SoA layout
+  PosVectorSoa RSoA;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/RealSpacePositionsTOMPTarget.h b/src/Particle/RealSpacePositionsTOMPTarget.h
index 7f0f52b16b6..fc4be1f073d 100644
--- a/src/Particle/RealSpacePositionsTOMPTarget.h
+++ b/src/Particle/RealSpacePositionsTOMPTarget.h
@@ -26,67 +26,55 @@ namespace qmcplusplus
 /** Introduced to handle virtual moves and ratio computations, e.g. for
  * non-local PP evaluations.
  */
-template <typename T>
+template<typename T>
 class RealSpacePositionsTOMPTarget : public DynamicCoordinatesT<T>
 {
 public:
-    using ParticlePos = typename LatticeParticleTraits<T>::ParticlePos;
-    using RealType = typename DynamicCoordinatesT<T>::RealType;
-    using PosType = typename DynamicCoordinatesT<T>::PosType;
-    using PosVectorSoa = typename DynamicCoordinatesT<T>::PosVectorSoa;
-    static constexpr auto DIM = ParticleSetTraits<T>::DIM;
-
-    RealSpacePositionsTOMPTarget() :
-        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD),
-        is_host_position_changed_(false)
+  using ParticlePos         = typename LatticeParticleTraits<T>::ParticlePos;
+  using RealType            = typename DynamicCoordinatesT<T>::RealType;
+  using PosType             = typename DynamicCoordinatesT<T>::PosType;
+  using PosVectorSoa        = typename DynamicCoordinatesT<T>::PosVectorSoa;
+  static constexpr auto DIM = ParticleSetTraits<T>::DIM;
+
+  RealSpacePositionsTOMPTarget()
+      : DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD), is_host_position_changed_(false)
+  {}
+  RealSpacePositionsTOMPTarget(const RealSpacePositionsTOMPTarget& in)
+      : DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD), RSoA(in.RSoA)
+  {
+    RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data());
+    updateH2D();
+  }
+
+  std::unique_ptr<DynamicCoordinatesT<T>> makeClone() override
+  {
+    return std::make_unique<RealSpacePositionsTOMPTarget>(*this);
+  }
+
+  void resize(size_t n) override
+  {
+    if (RSoA.size() != n)
     {
+      RSoA.resize(n);
+      RSoA_hostview.attachReference(RSoA.size(), RSoA.capacity(), RSoA.data());
     }
-    RealSpacePositionsTOMPTarget(const RealSpacePositionsTOMPTarget& in) :
-        DynamicCoordinatesT<T>(DynamicCoordinateKind::DC_POS_OFFLOAD),
-        RSoA(in.RSoA)
-    {
-        RSoA_hostview.attachReference(
-            RSoA.size(), RSoA.capacity(), RSoA.data());
-        updateH2D();
-    }
-
-    std::unique_ptr<DynamicCoordinatesT<T>>
-    makeClone() override
-    {
-        return std::make_unique<RealSpacePositionsTOMPTarget>(*this);
-    }
-
-    void
-    resize(size_t n) override
-    {
-        if (RSoA.size() != n) {
-            RSoA.resize(n);
-            RSoA_hostview.attachReference(
-                RSoA.size(), RSoA.capacity(), RSoA.data());
-        }
-    }
-
-    size_t
-    size() const override
-    {
-        return RSoA_hostview.size();
-    }
-
-    void
-    setAllParticlePos(const ParticlePos& R) override
-    {
-        resize(R.size());
-        RSoA_hostview.copyIn(R);
-        updateH2D();
-        is_nw_new_pos_prepared = false;
-    }
-
-    void
-    setOneParticlePos(const PosType& pos, size_t iat) override
-    {
-        RSoA_hostview(iat) = pos;
-        is_host_position_changed_ = true;
-        /* This was too slow due to overhead.
+  }
+
+  size_t size() const override { return RSoA_hostview.size(); }
+
+  void setAllParticlePos(const ParticlePos& R) override
+  {
+    resize(R.size());
+    RSoA_hostview.copyIn(R);
+    updateH2D();
+    is_nw_new_pos_prepared = false;
+  }
+
+  void setOneParticlePos(const PosType& pos, size_t iat) override
+  {
+    RSoA_hostview(iat)        = pos;
+    is_host_position_changed_ = true;
+    /* This was too slow due to overhead.
         RealType x     = pos[0];
         RealType y     = pos[1];
         RealType z     = pos[2];
@@ -100,231 +88,177 @@ class RealSpacePositionsTOMPTarget : public DynamicCoordinatesT<T>
           data[iat + offset * 2] = z;
         }
         */
-    }
+  }
 
-    void
-    mw_copyActivePos(
-        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
-        size_t iat, const std::vector<PosType>& new_positions) const override
-    {
-        assert(this == &coords_list.getLeader());
-        auto& coords_leader =
-            coords_list
-                .template getCastedLeader<RealSpacePositionsTOMPTarget>();
+  void mw_copyActivePos(const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+                        size_t iat,
+                        const std::vector<PosType>& new_positions) const override
+  {
+    assert(this == &coords_list.getLeader());
+    auto& coords_leader = coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>();
 
-        const auto nw = coords_list.size();
-        auto& mw_new_pos =
-            coords_leader.mw_mem_handle_.getResource().mw_new_pos;
-        mw_new_pos.resize(nw);
+    const auto nw    = coords_list.size();
+    auto& mw_new_pos = coords_leader.mw_mem_handle_.getResource().mw_new_pos;
+    mw_new_pos.resize(nw);
 
-        for (int iw = 0; iw < nw; iw++)
-            mw_new_pos(iw) = new_positions[iw];
+    for (int iw = 0; iw < nw; iw++)
+      mw_new_pos(iw) = new_positions[iw];
 
-        auto* mw_pos_ptr = mw_new_pos.data();
-        PRAGMA_OFFLOAD("omp target update to(\
+    auto* mw_pos_ptr = mw_new_pos.data();
+    PRAGMA_OFFLOAD("omp target update to(\
             mw_pos_ptr[DIM * mw_new_pos.capacity()])")
 
-        coords_leader.is_nw_new_pos_prepared = true;
-    }
-
-    void
-    mw_acceptParticlePos(
-        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
-        size_t iat, const std::vector<PosType>& new_positions,
-        const std::vector<bool>& isAccepted) const override
+    coords_leader.is_nw_new_pos_prepared = true;
+  }
+
+  void mw_acceptParticlePos(const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list,
+                            size_t iat,
+                            const std::vector<PosType>& new_positions,
+                            const std::vector<bool>& isAccepted) const override
+  {
+    assert(this == &coords_list.getLeader());
+    const size_t nw         = coords_list.size();
+    auto& coords_leader     = coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>();
+    MultiWalkerMem& mw_mem  = coords_leader.mw_mem_handle_;
+    auto& mw_new_pos        = mw_mem.mw_new_pos;
+    auto& mw_rsoa_ptrs      = mw_mem.mw_rsoa_ptrs;
+    auto& mw_accept_indices = mw_mem.mw_accept_indices;
+
+    if (!is_nw_new_pos_prepared)
     {
-        assert(this == &coords_list.getLeader());
-        const size_t nw = coords_list.size();
-        auto& coords_leader =
-            coords_list
-                .template getCastedLeader<RealSpacePositionsTOMPTarget>();
-        MultiWalkerMem& mw_mem = coords_leader.mw_mem_handle_;
-        auto& mw_new_pos = mw_mem.mw_new_pos;
-        auto& mw_rsoa_ptrs = mw_mem.mw_rsoa_ptrs;
-        auto& mw_accept_indices = mw_mem.mw_accept_indices;
-
-        if (!is_nw_new_pos_prepared) {
-            mw_copyActivePos(coords_list, iat, new_positions);
-            app_warning() << "This message only appear in unit tests. Report a "
-                             "bug if seen in production code."
-                          << std::endl;
-        }
+      mw_copyActivePos(coords_list, iat, new_positions);
+      app_warning() << "This message only appear in unit tests. Report a "
+                       "bug if seen in production code."
+                    << std::endl;
+    }
 
-        coords_leader.is_nw_new_pos_prepared = false;
-
-        mw_accept_indices.resize(nw);
-        auto* restrict id_array = mw_accept_indices.data();
-
-        size_t num_accepted = 0;
-        for (int iw = 0; iw < nw; iw++)
-            if (isAccepted[iw]) {
-                auto& coords = coords_list.template getCastedElement<
-                    RealSpacePositionsTOMPTarget>(iw);
-                id_array[num_accepted] = iw;
-                // save new coordinates on host copy
-                coords.RSoA_hostview(iat) = mw_new_pos[iw];
-                num_accepted++;
-            }
-
-        // early return to avoid OpenMP runtime mishandling of size 0 in
-        // transfer/compute.
-        if (num_accepted == 0)
-            return;
-
-        // offload to GPU
-        auto* restrict mw_pos_ptr = mw_new_pos.data();
-        auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data();
-        const size_t rsoa_stride = RSoA.capacity();
-        const size_t mw_pos_stride = mw_new_pos.capacity();
-
-        PRAGMA_OFFLOAD("omp target teams distribute parallel for \
+    coords_leader.is_nw_new_pos_prepared = false;
+
+    mw_accept_indices.resize(nw);
+    auto* restrict id_array = mw_accept_indices.data();
+
+    size_t num_accepted = 0;
+    for (int iw = 0; iw < nw; iw++)
+      if (isAccepted[iw])
+      {
+        auto& coords           = coords_list.template getCastedElement<RealSpacePositionsTOMPTarget>(iw);
+        id_array[num_accepted] = iw;
+        // save new coordinates on host copy
+        coords.RSoA_hostview(iat) = mw_new_pos[iw];
+        num_accepted++;
+      }
+
+    // early return to avoid OpenMP runtime mishandling of size 0 in
+    // transfer/compute.
+    if (num_accepted == 0)
+      return;
+
+    // offload to GPU
+    auto* restrict mw_pos_ptr  = mw_new_pos.data();
+    auto* restrict mw_rosa_ptr = mw_rsoa_ptrs.data();
+    const size_t rsoa_stride   = RSoA.capacity();
+    const size_t mw_pos_stride = mw_new_pos.capacity();
+
+    PRAGMA_OFFLOAD("omp target teams distribute parallel for \
                     is_device_ptr(mw_pos_ptr, mw_rosa_ptr) \
                     map(always, to : id_array[:num_accepted])")
-        for (int i = 0; i < num_accepted; i++) {
-            const int iw = id_array[i];
-            RealType* RSoA_dev_ptr = mw_rosa_ptr[iw];
-            for (int id = 0; id < QMCTraits::DIM; id++)
-                RSoA_dev_ptr[iat + rsoa_stride * id] =
-                    mw_pos_ptr[iw + mw_pos_stride * id];
-        }
-    }
-
-    const PosVectorSoa&
-    getAllParticlePos() const override
+    for (int i = 0; i < num_accepted; i++)
     {
-        return RSoA_hostview;
-    }
-    PosType
-    getOneParticlePos(size_t iat) const override
-    {
-        return RSoA_hostview[iat];
+      const int iw           = id_array[i];
+      RealType* RSoA_dev_ptr = mw_rosa_ptr[iw];
+      for (int id = 0; id < QMCTraits::DIM; id++)
+        RSoA_dev_ptr[iat + rsoa_stride * id] = mw_pos_ptr[iw + mw_pos_stride * id];
     }
+  }
 
-    void
-    donePbyP() override
-    {
-        is_nw_new_pos_prepared = false;
-        if (is_host_position_changed_) {
-            updateH2D();
-            is_host_position_changed_ = false;
-        }
-    }
+  const PosVectorSoa& getAllParticlePos() const override { return RSoA_hostview; }
+  PosType getOneParticlePos(size_t iat) const override { return RSoA_hostview[iat]; }
 
-    const RealType*
-    getDevicePtr() const
+  void donePbyP() override
+  {
+    is_nw_new_pos_prepared = false;
+    if (is_host_position_changed_)
     {
-        return RSoA.device_data();
+      updateH2D();
+      is_host_position_changed_ = false;
     }
+  }
 
-    const auto&
-    getFusedNewPosBuffer() const
-    {
-        return mw_mem_handle_.getResource().mw_new_pos;
-    }
+  const RealType* getDevicePtr() const { return RSoA.device_data(); }
 
-    void
-    createResource(ResourceCollection& collection) const override
-    {
-        auto resource_index =
-            collection.addResource(std::make_unique<MultiWalkerMem>());
-    }
+  const auto& getFusedNewPosBuffer() const { return mw_mem_handle_.getResource().mw_new_pos; }
 
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list)
-        const override
-    {
-        MultiWalkerMem& mw_mem =
-            coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>()
-                .mw_mem_handle_ = collection.lendResource<MultiWalkerMem>();
-
-        auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs);
-        const auto nw = coords_list.size();
-        mw_rsoa_ptrs.resize(nw);
-        for (int iw = 0; iw < nw; iw++) {
-            auto& coords =
-                coords_list
-                    .template getCastedElement<RealSpacePositionsTOMPTarget>(
-                        iw);
-            mw_rsoa_ptrs[iw] = coords.RSoA.device_data();
-        }
-        mw_rsoa_ptrs.updateTo();
-    }
+  void createResource(ResourceCollection& collection) const override
+  {
+    auto resource_index = collection.addResource(std::make_unique<MultiWalkerMem>());
+  }
 
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list)
-        const override
-    {
-        collection.takebackResource(
-            coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>()
-                .mw_mem_handle_);
-    }
+  void acquireResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list) const override
+  {
+    MultiWalkerMem& mw_mem = coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>().mw_mem_handle_ =
+        collection.lendResource<MultiWalkerMem>();
 
-    const auto&
-    getMultiWalkerRSoADevicePtrs() const
+    auto& mw_rsoa_ptrs(mw_mem.mw_rsoa_ptrs);
+    const auto nw = coords_list.size();
+    mw_rsoa_ptrs.resize(nw);
+    for (int iw = 0; iw < nw; iw++)
     {
-        return mw_mem_handle_.getResource().mw_rsoa_ptrs;
+      auto& coords     = coords_list.template getCastedElement<RealSpacePositionsTOMPTarget>(iw);
+      mw_rsoa_ptrs[iw] = coords.RSoA.device_data();
     }
+    mw_rsoa_ptrs.updateTo();
+  }
+
+  void releaseResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DynamicCoordinatesT<T>>& coords_list) const override
+  {
+    collection.takebackResource(coords_list.template getCastedLeader<RealSpacePositionsTOMPTarget>().mw_mem_handle_);
+  }
+
+  const auto& getMultiWalkerRSoADevicePtrs() const { return mw_mem_handle_.getResource().mw_rsoa_ptrs; }
 
 private:
-    /// particle positions in SoA layout
-    VectorSoaContainer<RealType, QMCTraits::DIM,
-        OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
-        RSoA;
+  /// particle positions in SoA layout
+  VectorSoaContainer<RealType, QMCTraits::DIM, OMPallocator<RealType, PinnedAlignedAllocator<RealType>>> RSoA;
 
-    /// multi walker shared memory buffer
-    struct MultiWalkerMem : public Resource
-    {
-        /// one particle new/old positions in SoA layout
-        VectorSoaContainer<RealType, QMCTraits::DIM,
-            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
-            mw_new_pos;
+  /// multi walker shared memory buffer
+  struct MultiWalkerMem : public Resource
+  {
+    /// one particle new/old positions in SoA layout
+    VectorSoaContainer<RealType, QMCTraits::DIM, OMPallocator<RealType, PinnedAlignedAllocator<RealType>>> mw_new_pos;
 
-        /// accept list
-        Vector<int, OMPallocator<int, PinnedAlignedAllocator<int>>>
-            mw_accept_indices;
+    /// accept list
+    Vector<int, OMPallocator<int, PinnedAlignedAllocator<int>>> mw_accept_indices;
 
-        /// RSoA device ptr list
-        Vector<RealType*,
-            OMPallocator<RealType*, PinnedAlignedAllocator<RealType*>>>
-            mw_rsoa_ptrs;
+    /// RSoA device ptr list
+    Vector<RealType*, OMPallocator<RealType*, PinnedAlignedAllocator<RealType*>>> mw_rsoa_ptrs;
 
-        MultiWalkerMem() : Resource("MultiWalkerMem")
-        {
-        }
+    MultiWalkerMem() : Resource("MultiWalkerMem") {}
 
-        MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem()
-        {
-        }
+    MultiWalkerMem(const MultiWalkerMem&) : MultiWalkerMem() {}
 
-        std::unique_ptr<Resource>
-        makeClone() const override
-        {
-            return std::make_unique<MultiWalkerMem>(*this);
-        }
-    };
+    std::unique_ptr<Resource> makeClone() const override { return std::make_unique<MultiWalkerMem>(*this); }
+  };
 
-    ResourceHandle<MultiWalkerMem> mw_mem_handle_;
+  ResourceHandle<MultiWalkerMem> mw_mem_handle_;
 
-    /// host view of RSoA
-    PosVectorSoa RSoA_hostview;
+  /// host view of RSoA
+  PosVectorSoa RSoA_hostview;
 
-    /// if true, host position has been changed while the device copy has not
-    /// been updated.
-    bool is_host_position_changed_;
+  /// if true, host position has been changed while the device copy has not
+  /// been updated.
+  bool is_host_position_changed_;
 
-    /// if true, mw_new_pos has been updated with active positions.
-    bool is_nw_new_pos_prepared;
+  /// if true, mw_new_pos has been updated with active positions.
+  bool is_nw_new_pos_prepared;
 
-    void
-    updateH2D()
-    {
-        RealType* data = RSoA.data();
-        PRAGMA_OFFLOAD(
-            "omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])")
-        is_host_position_changed_ = false;
-    }
+  void updateH2D()
+  {
+    RealType* data = RSoA.data();
+    PRAGMA_OFFLOAD("omp target update to(data[0:RSoA.capacity()*QMCTraits::DIM])")
+    is_host_position_changed_ = false;
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/ReptileT.h b/src/Particle/ReptileT.h
index ada42b2712f..d36ec5ecd3a 100644
--- a/src/Particle/ReptileT.h
+++ b/src/Particle/ReptileT.h
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence
-//                    Livermore National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_REPTILET_H
@@ -25,325 +22,254 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class MCWalkerConfigurationT;
 
-template <typename T>
+template<typename T>
 class ReptileT
 {
 public:
-    using WP = WalkerProperties::Indexes;
-    using Walker_t = typename MCWalkerConfigurationT<T>::Walker_t;
-    // using Buffer_t = Walker_t::Buffer_t             ;
-    //     using Walker_t = MCWalkerConfiguration::Walker_t;
-    using WalkerIter_t = typename MCWalkerConfigurationT<T>::iterator;
-    using ReptileConfig_t = std::vector<typename Walker_t::ParticlePos>;
-    using IndexType = typename ParticleSetTraits<T>::IndexType;
-    using RealType = typename ParticleSetTraits<T>::RealType;
-
-    std::vector<IndexType> Action;
-    std::vector<IndexType> TransProb;
-
-    RealType forwardprob;
-    RealType backwardprob;
-    RealType forwardaction;
-    RealType backwardaction;
-
-    RealType tau;
-
-    MCWalkerConfigurationT<T>& w;
-    WalkerIter_t repstart, repend;
-    IndexType direction, headindex, nbeads;
-    Walker_t* prophead;
-
-    inline ReptileT(
-        MCWalkerConfigurationT<T>& W, WalkerIter_t start, WalkerIter_t end) :
-        w(W),
+  using WP       = WalkerProperties::Indexes;
+  using Walker_t = typename MCWalkerConfigurationT<T>::Walker_t;
+  // using Buffer_t = Walker_t::Buffer_t             ;
+  //     using Walker_t = MCWalkerConfiguration::Walker_t;
+  using WalkerIter_t    = typename MCWalkerConfigurationT<T>::iterator;
+  using ReptileConfig_t = std::vector<typename Walker_t::ParticlePos>;
+  using IndexType       = typename ParticleSetTraits<T>::IndexType;
+  using RealType        = typename ParticleSetTraits<T>::RealType;
+
+  std::vector<IndexType> Action;
+  std::vector<IndexType> TransProb;
+
+  RealType forwardprob;
+  RealType backwardprob;
+  RealType forwardaction;
+  RealType backwardaction;
+
+  RealType tau;
+
+  MCWalkerConfigurationT<T>& w;
+  WalkerIter_t repstart, repend;
+  IndexType direction, headindex, nbeads;
+  Walker_t* prophead;
+
+  inline ReptileT(MCWalkerConfigurationT<T>& W, WalkerIter_t start, WalkerIter_t end)
+      : w(W),
         repstart(start),
         repend(end),
         direction(1),
         headindex(0),
         prophead(0) //, r2prop(0.0), r2accept(0.0),tau(0.0)
+  {
+    Action.resize(3);
+    Action[0] = w.addProperty("ActionBackward");
+    Action[1] = w.addProperty("ActionForward");
+    Action[2] = w.addProperty("ActionLocal");
+    TransProb.resize(2);
+    TransProb[0] = w.addProperty("TransProbBackward");
+    TransProb[1] = w.addProperty("TransProbForward");
+
+    nbeads = repend - repstart;
+  }
+
+  ~ReptileT() {}
+
+  inline IndexType size() { return nbeads; }
+
+  inline Walker_t& operator[](IndexType i) { return getWalker(getBeadIndex(i)); }
+
+  inline IndexType wrapIndex(IndexType repindex) { return (repindex % nbeads + nbeads) % nbeads; }
+
+  inline Walker_t& getWalker(IndexType i)
+  {
+    WalkerIter_t bead = repstart + wrapIndex(i);
+    return **bead;
+  }
+
+  inline IndexType getBeadIndex(IndexType i) { return wrapIndex(headindex + direction * i); }
+  inline Walker_t& getBead(IndexType i) { return getWalker(getBeadIndex(i)); }
+  inline Walker_t& getHead() { return getWalker(getBeadIndex(0)); }
+  inline Walker_t& getTail() { return getWalker(getBeadIndex(nbeads - 1)); }
+  inline Walker_t& getNext() { return getWalker(getBeadIndex(nbeads - 2)); }
+  inline Walker_t& getCenter() { return getWalker(getBeadIndex((nbeads - 1) / 2)); }
+  // inline void setProposedHead(){
+
+  inline void flip()
+  {
+    // direction*=-1;
+    // headindex = getBeadIndex(nbeads-1);
+    headindex = wrapIndex(headindex - direction);
+    direction *= -1;
+  }
+
+  inline void setDirection(IndexType dir) { direction = dir; }
+
+  inline void setBead(Walker_t& walker, IndexType i)
+  {
+    IndexType index = getBeadIndex(i);
+    Walker_t& newbead(getWalker(index));
+    newbead = walker; // This should be a hard copy
+  }
+
+  inline void setHead(Walker_t& overwrite)
+  {
+    // overwrite last element.
+    headindex = getBeadIndex(nbeads - 1); // sets to position of tail.
+    Walker_t& newhead(getBead(0));
+    newhead = overwrite;
+  }
+  // This function does two things:  1.)  Moves the reptile forward 1
+  // step.  2.) Returns the new head.
+  inline Walker_t& getNewHead()
+  {
+    // overwrite last element.
+    headindex = getBeadIndex(nbeads - 1); // sets to position of tail.
+    return getWalker(headindex);
+  }
+
+  void saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType actionindex = 2;
+    if (direction != 0)
+      actionindex = (1 - d * direction) / 2;
+    walker.Properties(nPsi, Action[actionindex]) = val;
+  }
+
+  RealType getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType actionindex = 2;
+    if (d != 0)
+      actionindex = (1 - direction * d) / 2;
+
+    return walker.Properties(nPsi, Action[actionindex]);
+  }
+
+  RealType getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d, IndexType nPsi = 0)
+  {
+    RealType af = getDirectionalAction(old_walker, +1, nPsi);
+    RealType ab = getDirectionalAction(new_walker, -1, nPsi);
+    RealType a0 = getDirectionalAction(old_walker, 0, nPsi) + getDirectionalAction(new_walker, 0, nPsi);
+    return af + ab + a0;
+  }
+
+  void saveTransProb(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType transindex                           = (1 - d * direction) / 2;
+    walker.Properties(nPsi, TransProb[transindex]) = val;
+  }
+
+  void saveTransProb(ParticleSetT<T>& W, IndexType d, RealType val, IndexType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType transindex                      = (1 - d * direction) / 2;
+    W.Properties(nPsi, TransProb[transindex]) = val;
+  }
+  RealType getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType transindex = (1 - d * direction) / 2;
+    return walker.Properties(nPsi, TransProb[transindex]);
+  }
+  RealType getTransProb(ParticleSetT<T>& W, IndexType d, RealType nPsi = 0)
+  {
+    // IndexType repdirection=circbuffer.get_direction();
+    IndexType transindex = (1 - d * direction) / 2;
+    return W.Properties(nPsi, TransProb[transindex]);
+  }
+
+  inline void printState()
+  {
+    app_log() << "********PRINT REPTILE STATE*********\n";
+    app_log() << "Direction=" << direction << "  Headindex=" << headindex << "  tail=" << getBeadIndex(nbeads - 1)
+              << "\n  next=" << getBeadIndex(nbeads - 2) << "  nbeads=" << nbeads << std::endl;
+    app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]"
+                 "\tAction[2]\t\n";
+    for (int i = 0; i < nbeads; i++)
     {
-        Action.resize(3);
-        Action[0] = w.addProperty("ActionBackward");
-        Action[1] = w.addProperty("ActionForward");
-        Action[2] = w.addProperty("ActionLocal");
-        TransProb.resize(2);
-        TransProb[0] = w.addProperty("TransProbBackward");
-        TransProb[1] = w.addProperty("TransProbForward");
-
-        nbeads = repend - repstart;
+      app_log() << i << "\t" << getBeadIndex(i) << "\t" << getBead(i).Properties(WP::LOCALENERGY) << "\t"
+                << getBead(i).Properties(Action[0]) << "\t" << getBead(i).Properties(Action[1]) << "\t"
+                << getBead(i).Properties(Action[2]) << "\n";
     }
-
-    ~ReptileT()
+    app_log() << "POSITIONS===============:\n";
+    for (int i = 0; i < nbeads; i++)
     {
+      //  app_log()<<i<<"\t1"<<1<<"\t"<<getBead(i).R[0]<<"\n";
+      //  app_log()<<i<<"\t2"<<2<<"\t"<<getBead(i).R[1]<<"\n";
+      app_log() << "BEAD #" << i << " tau = " << tau * i << std::endl;
+      app_log() << getBead(i).R << std::endl;
     }
-
-    inline IndexType
-    size()
+    app_log() << "GVECS===============:\n";
+    for (int i = 0; i < nbeads; i++)
     {
-        return nbeads;
+      //      app_log()<<i<<"\t1"<<1<<"\t"<<getBead(i).G[0]<<"\n";
+      //      app_log()<<i<<"\t2"<<2<<"\t"<<getBead(i).G[1]<<"\n";
+      app_log() << "BEAD #" << i << " tau = " << tau * i << std::endl;
+      app_log() << getBead(i).G << std::endl;
     }
-
-    inline Walker_t&
-    operator[](IndexType i)
+    app_log() << "************************************\n";
+  }
+  inline RealType getTau() { return tau; }
+  inline void setTau(RealType t) { tau = t; }
+
+  // This takes a value of imaginary time "t" and returns a 3N particle
+  // position vector, corresponding to a time slice extrapolated
+  //  from the current reptile.  If t>length of reptile, then return the last
+  //  bead.  if t<0; return the first bead.
+  inline typename Walker_t::ParticlePos linearInterp(RealType t)
+  {
+    IndexType nbead = IndexType(t / tau); // Calculate the lower bound on the timeslice.
+                                          // t is between binnum*Tau and (binnum+1)Tau
+    RealType beadfrac = t / tau - nbead;  // the fractional coordinate between n and n+1 bead
+    if (nbead <= 0)
     {
-        return getWalker(getBeadIndex(i));
+      typename ParticleSetT<T>::ParticlePos result = getHead().R;
+      return result;
     }
-
-    inline IndexType
-    wrapIndex(IndexType repindex)
+    else if (nbead >= nbeads - 1)
     {
-        return (repindex % nbeads + nbeads) % nbeads;
+      typename ParticleSetT<T>::ParticlePos result = getTail().R;
+      return result;
     }
 
-    inline Walker_t&
-    getWalker(IndexType i)
+    else
     {
-        WalkerIter_t bead = repstart + wrapIndex(i);
-        return **bead;
-    }
+      typename Walker_t::ParticlePos dR(getBead(nbead + 1).R), interpR(getBead(nbead).R);
+      dR = dR - getBead(nbead).R;
 
-    inline IndexType
-    getBeadIndex(IndexType i)
-    {
-        return wrapIndex(headindex + direction * i);
-    }
-    inline Walker_t&
-    getBead(IndexType i)
-    {
-        return getWalker(getBeadIndex(i));
+      interpR = getBead(nbead).R + beadfrac * dR;
+      return interpR;
     }
-    inline Walker_t&
-    getHead()
-    {
-        return getWalker(getBeadIndex(0));
-    }
-    inline Walker_t&
-    getTail()
-    {
-        return getWalker(getBeadIndex(nbeads - 1));
-    }
-    inline Walker_t&
-    getNext()
-    {
-        return getWalker(getBeadIndex(nbeads - 2));
-    }
-    inline Walker_t&
-    getCenter()
-    {
-        return getWalker(getBeadIndex((nbeads - 1) / 2));
-    }
-    // inline void setProposedHead(){
+  }
+  inline ReptileConfig_t getReptileSlicePositions(RealType tau, RealType beta)
+  {
+    IndexType nbeads_new = IndexType(beta / tau);
+    ReptileConfig_t new_reptile_coords(0);
 
-    inline void
-    flip()
-    {
-        // direction*=-1;
-        // headindex = getBeadIndex(nbeads-1);
-        headindex = wrapIndex(headindex - direction);
-        direction *= -1;
-    }
-
-    inline void
-    setDirection(IndexType dir)
-    {
-        direction = dir;
-    }
-
-    inline void
-    setBead(Walker_t& walker, IndexType i)
-    {
-        IndexType index = getBeadIndex(i);
-        Walker_t& newbead(getWalker(index));
-        newbead = walker; // This should be a hard copy
-    }
-
-    inline void
-    setHead(Walker_t& overwrite)
-    {
-        // overwrite last element.
-        headindex = getBeadIndex(nbeads - 1); // sets to position of tail.
-        Walker_t& newhead(getBead(0));
-        newhead = overwrite;
-    }
-    // This function does two things:  1.)  Moves the reptile forward 1
-    // step.  2.) Returns the new head.
-    inline Walker_t&
-    getNewHead()
-    {
-        // overwrite last element.
-        headindex = getBeadIndex(nbeads - 1); // sets to position of tail.
-        return getWalker(headindex);
-    }
+    for (IndexType i = 0; i < nbeads_new; i++)
+      new_reptile_coords.push_back(linearInterp(tau * i));
 
-    void
-    saveAction(Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType actionindex = 2;
-        if (direction != 0)
-            actionindex = (1 - d * direction) / 2;
-        walker.Properties(nPsi, Action[actionindex]) = val;
-    }
-
-    RealType
-    getDirectionalAction(Walker_t& walker, IndexType d, IndexType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType actionindex = 2;
-        if (d != 0)
-            actionindex = (1 - direction * d) / 2;
-
-        return walker.Properties(nPsi, Action[actionindex]);
-    }
-
-    RealType
-    getLinkAction(Walker_t& new_walker, Walker_t& old_walker, IndexType d,
-        IndexType nPsi = 0)
-    {
-        RealType af = getDirectionalAction(old_walker, +1, nPsi);
-        RealType ab = getDirectionalAction(new_walker, -1, nPsi);
-        RealType a0 = getDirectionalAction(old_walker, 0, nPsi) +
-            getDirectionalAction(new_walker, 0, nPsi);
-        return af + ab + a0;
-    }
-
-    void
-    saveTransProb(
-        Walker_t& walker, IndexType d, RealType val, IndexType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType transindex = (1 - d * direction) / 2;
-        walker.Properties(nPsi, TransProb[transindex]) = val;
-    }
-
-    void
-    saveTransProb(ParticleSetT<T>& W, IndexType d, RealType val, IndexType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType transindex = (1 - d * direction) / 2;
-        W.Properties(nPsi, TransProb[transindex]) = val;
-    }
-    RealType
-    getTransProb(Walker_t& walker, IndexType d, RealType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType transindex = (1 - d * direction) / 2;
-        return walker.Properties(nPsi, TransProb[transindex]);
-    }
-    RealType
-    getTransProb(ParticleSetT<T>& W, IndexType d, RealType nPsi = 0)
-    {
-        // IndexType repdirection=circbuffer.get_direction();
-        IndexType transindex = (1 - d * direction) / 2;
-        return W.Properties(nPsi, TransProb[transindex]);
-    }
-
-    inline void
-    printState()
-    {
-        app_log() << "********PRINT REPTILE STATE*********\n";
-        app_log() << "Direction=" << direction << "  Headindex=" << headindex
-                  << "  tail=" << getBeadIndex(nbeads - 1)
-                  << "\n  next=" << getBeadIndex(nbeads - 2)
-                  << "  nbeads=" << nbeads << std::endl;
-        app_log() << "BeadIndex\tWrapIndex\tEnergy\tAction[0]\tAction[1]"
-                     "\tAction[2]\t\n";
-        for (int i = 0; i < nbeads; i++) {
-            app_log() << i << "\t" << getBeadIndex(i) << "\t"
-                      << getBead(i).Properties(WP::LOCALENERGY) << "\t"
-                      << getBead(i).Properties(Action[0]) << "\t"
-                      << getBead(i).Properties(Action[1]) << "\t"
-                      << getBead(i).Properties(Action[2]) << "\n";
-        }
-        app_log() << "POSITIONS===============:\n";
-        for (int i = 0; i < nbeads; i++) {
-            //  app_log()<<i<<"\t1"<<1<<"\t"<<getBead(i).R[0]<<"\n";
-            //  app_log()<<i<<"\t2"<<2<<"\t"<<getBead(i).R[1]<<"\n";
-            app_log() << "BEAD #" << i << " tau = " << tau * i << std::endl;
-            app_log() << getBead(i).R << std::endl;
-        }
-        app_log() << "GVECS===============:\n";
-        for (int i = 0; i < nbeads; i++) {
-            //      app_log()<<i<<"\t1"<<1<<"\t"<<getBead(i).G[0]<<"\n";
-            //      app_log()<<i<<"\t2"<<2<<"\t"<<getBead(i).G[1]<<"\n";
-            app_log() << "BEAD #" << i << " tau = " << tau * i << std::endl;
-            app_log() << getBead(i).G << std::endl;
-        }
-        app_log() << "************************************\n";
-    }
-    inline RealType
-    getTau()
-    {
-        return tau;
-    }
-    inline void
-    setTau(RealType t)
-    {
-        tau = t;
-    }
-
-    // This takes a value of imaginary time "t" and returns a 3N particle
-    // position vector, corresponding to a time slice extrapolated
-    //  from the current reptile.  If t>length of reptile, then return the last
-    //  bead.  if t<0; return the first bead.
-    inline typename Walker_t::ParticlePos
-    linearInterp(RealType t)
-    {
-        IndexType nbead =
-            IndexType(t / tau); // Calculate the lower bound on the timeslice.
-                                // t is between binnum*Tau and (binnum+1)Tau
-        RealType beadfrac =
-            t / tau - nbead; // the fractional coordinate between n and n+1 bead
-        if (nbead <= 0) {
-            typename ParticleSetT<T>::ParticlePos result = getHead().R;
-            return result;
-        }
-        else if (nbead >= nbeads - 1) {
-            typename ParticleSetT<T>::ParticlePos result = getTail().R;
-            return result;
-        }
-
-        else {
-            typename Walker_t::ParticlePos dR(getBead(nbead + 1).R),
-                interpR(getBead(nbead).R);
-            dR = dR - getBead(nbead).R;
-
-            interpR = getBead(nbead).R + beadfrac * dR;
-            return interpR;
-        }
-    }
-    inline ReptileConfig_t
-    getReptileSlicePositions(RealType tau, RealType beta)
-    {
-        IndexType nbeads_new = IndexType(beta / tau);
-        ReptileConfig_t new_reptile_coords(0);
-
-        for (IndexType i = 0; i < nbeads_new; i++)
-            new_reptile_coords.push_back(linearInterp(tau * i));
-
-        return new_reptile_coords;
-    }
-
-    inline void
-    setReptileSlicePositions(ReptileConfig_t& rept)
-    {
-        if (rept.size() == nbeads) {
-            for (int i = 0; i < nbeads; i++)
-                getBead(i).R = rept[i];
-        }
-        else
-            ;
-    }
+    return new_reptile_coords;
+  }
 
-    inline void
-    setReptileSlicePositions(typename Walker_t::ParticlePos R)
+  inline void setReptileSlicePositions(ReptileConfig_t& rept)
+  {
+    if (rept.size() == nbeads)
     {
-        for (int i = 0; i < nbeads; i++)
-            getBead(i).R = R;
+      for (int i = 0; i < nbeads; i++)
+        getBead(i).R = rept[i];
     }
+    else
+      ;
+  }
+
+  inline void setReptileSlicePositions(typename Walker_t::ParticlePos R)
+  {
+    for (int i = 0; i < nbeads; i++)
+      getBead(i).R = R;
+  }
 };
 
 } // namespace qmcplusplus
diff --git a/src/Particle/SampleStackT.cpp b/src/Particle/SampleStackT.cpp
index a40acd9bb93..92c36f542d1 100644
--- a/src/Particle/SampleStackT.cpp
+++ b/src/Particle/SampleStackT.cpp
@@ -20,57 +20,52 @@ namespace qmcplusplus
  * @param n number of samples per rank
  * @param num_ranks number of ranks. Used to set global number of samples.
  */
-template <typename T>
-void
-SampleStackT<T>::setMaxSamples(size_t n, size_t num_ranks)
+template<typename T>
+void SampleStackT<T>::setMaxSamples(size_t n, size_t num_ranks)
 {
-    max_samples_ = n;
-    global_num_samples_ = n * num_ranks;
-    current_sample_count_ = std::min(current_sample_count_, max_samples_);
-    sample_vector_.resize(n, MCSample(0));
+  max_samples_          = n;
+  global_num_samples_   = n * num_ranks;
+  current_sample_count_ = std::min(current_sample_count_, max_samples_);
+  sample_vector_.resize(n, MCSample(0));
 }
 
-template <typename T>
-const MCSample&
-SampleStackT<T>::getSample(size_t i) const
+template<typename T>
+const MCSample& SampleStackT<T>::getSample(size_t i) const
 {
-    return sample_vector_[i];
+  return sample_vector_[i];
 }
 
-template <typename T>
-void
-SampleStackT<T>::appendSample(MCSample&& sample)
+template<typename T>
+void SampleStackT<T>::appendSample(MCSample&& sample)
 {
-    // Ignore samples in excess of the expected number of samples
-    if (current_sample_count_ < max_samples_) {
-        sample_vector_[current_sample_count_] = std::move(sample);
-        current_sample_count_++;
-    }
+  // Ignore samples in excess of the expected number of samples
+  if (current_sample_count_ < max_samples_)
+  {
+    sample_vector_[current_sample_count_] = std::move(sample);
+    current_sample_count_++;
+  }
 }
 
 /** load a single sample from SampleStack
  */
-template <typename T>
-void
-SampleStackT<T>::loadSample(ParticleSetT<T>& pset, size_t iw) const
+template<typename T>
+void SampleStackT<T>::loadSample(ParticleSetT<T>& pset, size_t iw) const
 {
-    pset.R = sample_vector_[iw].R;
-    pset.spins = sample_vector_[iw].spins;
+  pset.R     = sample_vector_[iw].R;
+  pset.spins = sample_vector_[iw].spins;
 }
 
-template <typename T>
-void
-SampleStackT<T>::clearEnsemble()
+template<typename T>
+void SampleStackT<T>::clearEnsemble()
 {
-    sample_vector_.clear();
-    current_sample_count_ = 0;
+  sample_vector_.clear();
+  current_sample_count_ = 0;
 }
 
-template <typename T>
-void
-SampleStackT<T>::resetSampleCount()
+template<typename T>
+void SampleStackT<T>::resetSampleCount()
 {
-    current_sample_count_ = 0;
+  current_sample_count_ = 0;
 }
 
 template class SampleStackT<double>;
diff --git a/src/Particle/SampleStackT.h b/src/Particle/SampleStackT.h
index 228a26e8744..25165911fce 100644
--- a/src/Particle/SampleStackT.h
+++ b/src/Particle/SampleStackT.h
@@ -21,63 +21,41 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class SampleStackT
 {
 public:
-    using PropertySetType = typename ParticleSetTraits<T>::PropertySetType;
+  using PropertySetType = typename ParticleSetTraits<T>::PropertySetType;
 
-    size_t
-    getMaxSamples() const
-    {
-        return max_samples_;
-    }
+  size_t getMaxSamples() const { return max_samples_; }
 
-    bool
-    empty() const
-    {
-        return sample_vector_.empty();
-    }
+  bool empty() const { return sample_vector_.empty(); }
 
-    const MCSample&
-    getSample(size_t i) const;
+  const MCSample& getSample(size_t i) const;
 
-    //@{save/load/clear function for optimization
-    inline size_t
-    getNumSamples() const
-    {
-        return current_sample_count_;
-    }
-    /// set the number of max samples per rank.
-    void
-    setMaxSamples(size_t n, size_t number_of_ranks = 1);
-    /// Global number of samples is number of samples per rank * number of ranks
-    size_t
-    getGlobalNumSamples() const
-    {
-        return global_num_samples_;
-    }
-    /// load a single sample from SampleStack
-    void
-    loadSample(ParticleSetT<T>& pset, size_t iw) const;
+  //@{save/load/clear function for optimization
+  inline size_t getNumSamples() const { return current_sample_count_; }
+  /// set the number of max samples per rank.
+  void setMaxSamples(size_t n, size_t number_of_ranks = 1);
+  /// Global number of samples is number of samples per rank * number of ranks
+  size_t getGlobalNumSamples() const { return global_num_samples_; }
+  /// load a single sample from SampleStack
+  void loadSample(ParticleSetT<T>& pset, size_t iw) const;
 
-    void
-    appendSample(MCSample&& sample);
+  void appendSample(MCSample&& sample);
 
-    /// clear the ensemble
-    void
-    clearEnsemble();
-    //@}
-    ///  Set the sample count to zero but preserve the storage
-    void
-    resetSampleCount();
+  /// clear the ensemble
+  void clearEnsemble();
+  //@}
+  ///  Set the sample count to zero but preserve the storage
+  void resetSampleCount();
 
 private:
-    size_t max_samples_{10};
-    size_t current_sample_count_{0};
-    size_t global_num_samples_{max_samples_};
+  size_t max_samples_{10};
+  size_t current_sample_count_{0};
+  size_t global_num_samples_{max_samples_};
 
-    std::vector<MCSample> sample_vector_;
+  std::vector<MCSample> sample_vector_;
 };
 
 } // namespace qmcplusplus
diff --git a/src/Particle/SimulationCellT.cpp b/src/Particle/SimulationCellT.cpp
index 8ad7295bb65..df847762860 100644
--- a/src/Particle/SimulationCellT.cpp
+++ b/src/Particle/SimulationCellT.cpp
@@ -14,57 +14,54 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 SimulationCellT<T>::SimulationCellT() = default;
 
-template <typename T>
+template<typename T>
 SimulationCellT<T>::SimulationCellT(const Lattice& lattice) : lattice_(lattice)
 {
-    resetLRBox();
+  resetLRBox();
 }
 
-template <typename T>
-void
-SimulationCellT<T>::resetLRBox()
+template<typename T>
+void SimulationCellT<T>::resetLRBox()
 {
-    if (lattice_.SuperCellEnum != SUPERCELL_OPEN) {
-        lattice_.SetLRCutoffs(lattice_.Rv);
-        LRBox_ = lattice_;
-        bool changed = false;
-        if (lattice_.SuperCellEnum == SUPERCELL_SLAB &&
-            lattice_.VacuumScale != 1.0) {
-            LRBox_.R(2, 0) *= lattice_.VacuumScale;
-            LRBox_.R(2, 1) *= lattice_.VacuumScale;
-            LRBox_.R(2, 2) *= lattice_.VacuumScale;
-            changed = true;
-        }
-        else if (lattice_.SuperCellEnum == SUPERCELL_WIRE &&
-            lattice_.VacuumScale != 1.0) {
-            LRBox_.R(1, 0) *= lattice_.VacuumScale;
-            LRBox_.R(1, 1) *= lattice_.VacuumScale;
-            LRBox_.R(1, 2) *= lattice_.VacuumScale;
-            LRBox_.R(2, 0) *= lattice_.VacuumScale;
-            LRBox_.R(2, 1) *= lattice_.VacuumScale;
-            LRBox_.R(2, 2) *= lattice_.VacuumScale;
-            changed = true;
-        }
-        LRBox_.reset();
-        LRBox_.SetLRCutoffs(LRBox_.Rv);
-        LRBox_.printCutoffs(app_log());
-
-        if (changed) {
-            app_summary()
-                << "  Simulation box changed by vacuum supercell conditions"
-                << std::endl;
-            app_log() << "--------------------------------------- "
-                      << std::endl;
-            LRBox_.print(app_log());
-            app_log() << "--------------------------------------- "
-                      << std::endl;
-        }
+  if (lattice_.SuperCellEnum != SUPERCELL_OPEN)
+  {
+    lattice_.SetLRCutoffs(lattice_.Rv);
+    LRBox_       = lattice_;
+    bool changed = false;
+    if (lattice_.SuperCellEnum == SUPERCELL_SLAB && lattice_.VacuumScale != 1.0)
+    {
+      LRBox_.R(2, 0) *= lattice_.VacuumScale;
+      LRBox_.R(2, 1) *= lattice_.VacuumScale;
+      LRBox_.R(2, 2) *= lattice_.VacuumScale;
+      changed = true;
+    }
+    else if (lattice_.SuperCellEnum == SUPERCELL_WIRE && lattice_.VacuumScale != 1.0)
+    {
+      LRBox_.R(1, 0) *= lattice_.VacuumScale;
+      LRBox_.R(1, 1) *= lattice_.VacuumScale;
+      LRBox_.R(1, 2) *= lattice_.VacuumScale;
+      LRBox_.R(2, 0) *= lattice_.VacuumScale;
+      LRBox_.R(2, 1) *= lattice_.VacuumScale;
+      LRBox_.R(2, 2) *= lattice_.VacuumScale;
+      changed = true;
+    }
+    LRBox_.reset();
+    LRBox_.SetLRCutoffs(LRBox_.Rv);
+    LRBox_.printCutoffs(app_log());
 
-        k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim);
+    if (changed)
+    {
+      app_summary() << "  Simulation box changed by vacuum supercell conditions" << std::endl;
+      app_log() << "--------------------------------------- " << std::endl;
+      LRBox_.print(app_log());
+      app_log() << "--------------------------------------- " << std::endl;
     }
+
+    k_lists_.updateKLists(LRBox_, LRBox_.LR_kc, LRBox_.ndim);
+  }
 }
 
 template class SimulationCellT<double>;
diff --git a/src/Particle/SimulationCellT.h b/src/Particle/SimulationCellT.h
index 7eb03fc3a4f..b1956d15da3 100644
--- a/src/Particle/SimulationCellT.h
+++ b/src/Particle/SimulationCellT.h
@@ -17,56 +17,39 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class ParticleSetPoolT;
 
-template <typename T>
+template<typename T>
 class SimulationCellT
 {
 public:
-    using Lattice = typename LatticeParticleTraits<T>::ParticleLayout;
+  using Lattice = typename LatticeParticleTraits<T>::ParticleLayout;
 
-    SimulationCellT();
-    SimulationCellT(const Lattice& lattice);
+  SimulationCellT();
+  SimulationCellT(const Lattice& lattice);
 
-    const Lattice&
-    getLattice() const
-    {
-        return lattice_;
-    }
-    const Lattice&
-    getPrimLattice() const
-    {
-        return primative_lattice_;
-    }
-    const Lattice&
-    getLRBox() const
-    {
-        return LRBox_;
-    }
+  const Lattice& getLattice() const { return lattice_; }
+  const Lattice& getPrimLattice() const { return primative_lattice_; }
+  const Lattice& getLRBox() const { return LRBox_; }
 
-    void
-    resetLRBox();
+  void resetLRBox();
 
-    /// access k_lists_ read only
-    const KContainerT<T>&
-    getKLists() const
-    {
-        return k_lists_;
-    }
+  /// access k_lists_ read only
+  const KContainerT<T>& getKLists() const { return k_lists_; }
 
 private:
-    /// simulation cell lattice
-    Lattice lattice_;
-    /// Primative cell lattice
-    Lattice primative_lattice_;
-    /// long-range box
-    Lattice LRBox_;
+  /// simulation cell lattice
+  Lattice lattice_;
+  /// Primative cell lattice
+  Lattice primative_lattice_;
+  /// long-range box
+  Lattice LRBox_;
 
-    /// K-Vector List.
-    KContainerT<T> k_lists_;
+  /// K-Vector List.
+  KContainerT<T> k_lists_;
 
-    friend class ParticleSetPoolT<T>;
+  friend class ParticleSetPoolT<T>;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/SoaDistanceTableAAT.h b/src/Particle/SoaDistanceTableAAT.h
index 289bcc22a84..3ccb43be7ad 100644
--- a/src/Particle/SoaDistanceTableAAT.h
+++ b/src/Particle/SoaDistanceTableAAT.h
@@ -22,216 +22,197 @@ namespace qmcplusplus
 /**@ingroup nnlist
  * @brief A derived classe from DistacneTableData, specialized for dense case
  */
-template <typename T, unsigned D, int SC>
-struct SoaDistanceTableAAT :
-    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
-    public DistanceTableAAT<T>
+template<typename T, unsigned D, int SC>
+struct SoaDistanceTableAAT : public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+                             public DistanceTableAAT<T>
 {
-    using RealType = typename DistanceTableAAT<T>::RealType;
-    using PosType = typename DistanceTableAAT<T>::PosType;
-    using IndexType = typename DistanceTableAAT<T>::IndexType;
+  using RealType  = typename DistanceTableAAT<T>::RealType;
+  using PosType   = typename DistanceTableAAT<T>::PosType;
+  using IndexType = typename DistanceTableAAT<T>::IndexType;
 
-    /// actual memory for dist and displacements_
-    aligned_vector<RealType> memory_pool_;
+  /// actual memory for dist and displacements_
+  aligned_vector<RealType> memory_pool_;
 
-    SoaDistanceTableAAT(ParticleSetT<T>& target) :
-        DTD_BConds<RealType, D, SC>(target.getLattice()),
+  SoaDistanceTableAAT(ParticleSetT<T>& target)
+      : DTD_BConds<RealType, D, SC>(target.getLattice()),
         DistanceTableAAT<T>(target, DTModes::ALL_OFF),
         num_targets_padded_(getAlignedSize<RealType>(this->num_targets_)),
 #if !defined(NDEBUG)
         old_prepared_elec_id_(-1),
 #endif
-        evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") +
-                target.getName() + "_" + target.getName(),
-            timer_level_fine)),
-        move_timer_(createGlobalTimer(std::string("DTAA::move_") +
-                target.getName() + "_" + target.getName(),
-            timer_level_fine)),
-        update_timer_(createGlobalTimer(std::string("DTAA::update_") +
-                target.getName() + "_" + target.getName(),
-            timer_level_fine))
-    {
-        resize();
-    }
+        evaluate_timer_(createGlobalTimer(std::string("DTAA::evaluate_") + target.getName() + "_" + target.getName(),
+                                          timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTAA::move_") + target.getName() + "_" + target.getName(),
+                                      timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTAA::update_") + target.getName() + "_" + target.getName(),
+                                        timer_level_fine))
+  {
+    resize();
+  }
 
-    SoaDistanceTableAAT() = delete;
-    SoaDistanceTableAAT(const SoaDistanceTableAAT&) = delete;
-    ~SoaDistanceTableAAT() override
-    {
-    }
+  SoaDistanceTableAAT()                           = delete;
+  SoaDistanceTableAAT(const SoaDistanceTableAAT&) = delete;
+  ~SoaDistanceTableAAT() override {}
+
+  size_t compute_size(int N) const
+  {
+    const size_t num_padded = getAlignedSize<RealType>(N);
+    const size_t Alignment  = getAlignment<RealType>();
+    return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2;
+  }
 
-    size_t
-    compute_size(int N) const
+  void resize()
+  {
+    // initialize memory containers and views
+    const size_t total_size = compute_size(this->num_targets_);
+    memory_pool_.resize(total_size * (1 + D));
+    this->distances_.resize(this->num_targets_);
+    this->displacements_.resize(this->num_targets_);
+    for (int i = 0; i < this->num_targets_; ++i)
     {
-        const size_t num_padded = getAlignedSize<RealType>(N);
-        const size_t Alignment = getAlignment<RealType>();
-        return (num_padded * (2 * N - num_padded + 1) +
-                   (Alignment - 1) * num_padded) /
-            2;
+      this->distances_[i].attachReference(memory_pool_.data() + compute_size(i), i);
+      this->displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i));
     }
 
-    void
-    resize()
-    {
-        // initialize memory containers and views
-        const size_t total_size = compute_size(this->num_targets_);
-        memory_pool_.resize(total_size * (1 + D));
-        this->distances_.resize(this->num_targets_);
-        this->displacements_.resize(this->num_targets_);
-        for (int i = 0; i < this->num_targets_; ++i) {
-            this->distances_[i].attachReference(
-                memory_pool_.data() + compute_size(i), i);
-            this->displacements_[i].attachReference(i, total_size,
-                memory_pool_.data() + total_size + compute_size(i));
-        }
+    this->old_r_.resize(this->num_targets_);
+    this->old_dr_.resize(this->num_targets_);
+    this->temp_r_.resize(this->num_targets_);
+    this->temp_dr_.resize(this->num_targets_);
+  }
 
-        this->old_r_.resize(this->num_targets_);
-        this->old_dr_.resize(this->num_targets_);
-        this->temp_r_.resize(this->num_targets_);
-        this->temp_dr_.resize(this->num_targets_);
-    }
+  inline void evaluate(ParticleSetT<T>& P) override
+  {
+    ScopedTimer local_timer(evaluate_timer_);
+    constexpr RealType BigR = std::numeric_limits<RealType>::max();
+    for (int iat = 1; iat < this->num_targets_; ++iat)
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(),
+                                                    this->distances_[iat].data(), this->displacements_[iat], 0, iat,
+                                                    iat);
+  }
 
-    inline void
-    evaluate(ParticleSetT<T>& P) override
+  /// evaluate the temporary pair relations
+  inline void move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat, bool prepare_old) override
+  {
+    ScopedTimer local_timer(move_timer_);
+
+#if !defined(NDEBUG)
+    old_prepared_elec_id_ = prepare_old ? iat : -1;
+#endif
+    DTD_BConds<RealType, D, SC>::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
+                                                  this->temp_dr_, 0, this->num_targets_, iat);
+    // set up old_r_ and old_dr_ for moves may get accepted.
+    if (prepare_old)
     {
-        ScopedTimer local_timer(evaluate_timer_);
-        constexpr RealType BigR = std::numeric_limits<RealType>::max();
-        for (int iat = 1; iat < this->num_targets_; ++iat)
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                P.getCoordinates().getAllParticlePos(),
-                this->distances_[iat].data(), this->displacements_[iat], 0, iat,
-                iat);
+      // recompute from scratch
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(),
+                                                    this->old_r_.data(), this->old_dr_, 0, this->num_targets_, iat);
+      this->old_r_[iat] = std::numeric_limits<RealType>::max(); // assign a big number
     }
+  }
 
-    /// evaluate the temporary pair relations
-    inline void
-    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
-        bool prepare_old) override
+  int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+  {
+    // ensure there are neighbors
+    assert(this->num_targets_ > 1);
+    RealType min_dist = std::numeric_limits<RealType>::max();
+    int index         = -1;
+    if (newpos)
     {
-        ScopedTimer local_timer(move_timer_);
-
-#if !defined(NDEBUG)
-        old_prepared_elec_id_ = prepare_old ? iat : -1;
-#endif
-        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
-            P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
-            this->temp_dr_, 0, this->num_targets_, iat);
-        // set up old_r_ and old_dr_ for moves may get accepted.
-        if (prepare_old) {
-            // recompute from scratch
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                P.getCoordinates().getAllParticlePos(), this->old_r_.data(),
-                this->old_dr_, 0, this->num_targets_, iat);
-            this->old_r_[iat] =
-                std::numeric_limits<RealType>::max(); // assign a big number
+      for (int jat = 0; jat < this->num_targets_; ++jat)
+        if (this->temp_r_[jat] < min_dist && jat != iat)
+        {
+          min_dist = this->temp_r_[jat];
+          index    = jat;
         }
+      assert(index >= 0);
+      dr = this->temp_dr_[index];
     }
-
-    int
-    get_first_neighbor(
-        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    else
     {
-        // ensure there are neighbors
-        assert(this->num_targets_ > 1);
-        RealType min_dist = std::numeric_limits<RealType>::max();
-        int index = -1;
-        if (newpos) {
-            for (int jat = 0; jat < this->num_targets_; ++jat)
-                if (this->temp_r_[jat] < min_dist && jat != iat) {
-                    min_dist = this->temp_r_[jat];
-                    index = jat;
-                }
-            assert(index >= 0);
-            dr = this->temp_dr_[index];
+      for (int jat = 0; jat < iat; ++jat)
+        if (this->distances_[iat][jat] < min_dist)
+        {
+          min_dist = this->distances_[iat][jat];
+          index    = jat;
         }
-        else {
-            for (int jat = 0; jat < iat; ++jat)
-                if (this->distances_[iat][jat] < min_dist) {
-                    min_dist = this->distances_[iat][jat];
-                    index = jat;
-                }
-            for (int jat = iat + 1; jat < this->num_targets_; ++jat)
-                if (this->distances_[jat][iat] < min_dist) {
-                    min_dist = this->distances_[jat][iat];
-                    index = jat;
-                }
-            assert(index != iat && index >= 0);
-            if (index < iat)
-                dr = this->displacements_[iat][index];
-            else
-                dr = this->displacements_[index][iat];
+      for (int jat = iat + 1; jat < this->num_targets_; ++jat)
+        if (this->distances_[jat][iat] < min_dist)
+        {
+          min_dist = this->distances_[jat][iat];
+          index    = jat;
         }
-        r = min_dist;
-        return index;
+      assert(index != iat && index >= 0);
+      if (index < iat)
+        dr = this->displacements_[iat][index];
+      else
+        dr = this->displacements_[index][iat];
     }
+    r = min_dist;
+    return index;
+  }
 
-    /** After accepting the iat-th particle, update the iat-th row of distances_
+  /** After accepting the iat-th particle, update the iat-th row of distances_
      * and displacements_. Upper triangle is not needed in the later computation
      * and thus not updated
      */
-    inline void
-    update(IndexType iat) override
+  inline void update(IndexType iat) override
+  {
+    ScopedTimer local_timer(update_timer_);
+    // update [0, iat)
+    const int nupdate = iat;
+    // copy row
+    assert(nupdate <= this->temp_r_.size());
+    std::copy_n(this->temp_r_.data(), nupdate, this->distances_[iat].data());
+    for (int idim = 0; idim < D; ++idim)
+      std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[iat].data(idim));
+    // copy column
+    for (size_t i = iat + 1; i < this->num_targets_; ++i)
     {
-        ScopedTimer local_timer(update_timer_);
-        // update [0, iat)
-        const int nupdate = iat;
-        // copy row
-        assert(nupdate <= this->temp_r_.size());
-        std::copy_n(
-            this->temp_r_.data(), nupdate, this->distances_[iat].data());
-        for (int idim = 0; idim < D; ++idim)
-            std::copy_n(this->temp_dr_.data(idim), nupdate,
-                this->displacements_[iat].data(idim));
-        // copy column
-        for (size_t i = iat + 1; i < this->num_targets_; ++i) {
-            this->distances_[i][iat] = this->temp_r_[i];
-            this->displacements_[i](iat) = -this->temp_dr_[i];
-        }
+      this->distances_[i][iat]     = this->temp_r_[i];
+      this->displacements_[i](iat) = -this->temp_dr_[i];
     }
+  }
 
-    void
-    updatePartial(IndexType jat, bool from_temp) override
+  void updatePartial(IndexType jat, bool from_temp) override
+  {
+    ScopedTimer local_timer(update_timer_);
+    // update [0, jat)
+    const int nupdate = jat;
+    if (from_temp)
     {
-        ScopedTimer local_timer(update_timer_);
-        // update [0, jat)
-        const int nupdate = jat;
-        if (from_temp) {
-            // copy row
-            assert(nupdate <= this->temp_r_.size());
-            std::copy_n(
-                this->temp_r_.data(), nupdate, this->distances_[jat].data());
-            for (int idim = 0; idim < D; ++idim)
-                std::copy_n(this->temp_dr_.data(idim), nupdate,
-                    this->displacements_[jat].data(idim));
-        }
-        else {
-            assert(old_prepared_elec_id_ == jat);
-            // copy row
-            assert(nupdate <= this->old_r_.size());
-            std::copy_n(
-                this->old_r_.data(), nupdate, this->distances_[jat].data());
-            for (int idim = 0; idim < D; ++idim)
-                std::copy_n(this->old_dr_.data(idim), nupdate,
-                    this->displacements_[jat].data(idim));
-        }
+      // copy row
+      assert(nupdate <= this->temp_r_.size());
+      std::copy_n(this->temp_r_.data(), nupdate, this->distances_[jat].data());
+      for (int idim = 0; idim < D; ++idim)
+        std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[jat].data(idim));
+    }
+    else
+    {
+      assert(old_prepared_elec_id_ == jat);
+      // copy row
+      assert(nupdate <= this->old_r_.size());
+      std::copy_n(this->old_r_.data(), nupdate, this->distances_[jat].data());
+      for (int idim = 0; idim < D; ++idim)
+        std::copy_n(this->old_dr_.data(idim), nupdate, this->displacements_[jat].data(idim));
     }
+  }
 
 private:
-    /// number of targets with padding
-    const size_t num_targets_padded_;
+  /// number of targets with padding
+  const size_t num_targets_padded_;
 #if !defined(NDEBUG)
-    /** set to particle id after move() with prepare_old = true. -1 means not
+  /** set to particle id after move() with prepare_old = true. -1 means not
      * prepared. It is intended only for safety checks, not for codepath
      * selection.
      */
-    int old_prepared_elec_id_;
+  int old_prepared_elec_id_;
 #endif
-    /// timer for evaluate()
-    NewTimer& evaluate_timer_;
-    /// timer for move()
-    NewTimer& move_timer_;
-    /// timer for update()
-    NewTimer& update_timer_;
+  /// timer for evaluate()
+  NewTimer& evaluate_timer_;
+  /// timer for move()
+  NewTimer& move_timer_;
+  /// timer for update()
+  NewTimer& update_timer_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/SoaDistanceTableAATOMPTarget.h b/src/Particle/SoaDistanceTableAATOMPTarget.h
index dededc19e66..cb20b2dd516 100644
--- a/src/Particle/SoaDistanceTableAATOMPTarget.h
+++ b/src/Particle/SoaDistanceTableAATOMPTarget.h
@@ -28,596 +28,505 @@ namespace qmcplusplus
 /**@ingroup nnlist
  * @brief A derived classe from DistacneTableData, specialized for dense case
  */
-template <typename T, unsigned D, int SC>
-struct SoaDistanceTableAATOMPTarget :
-    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
-    public DistanceTableAAT<T>
+template<typename T, unsigned D, int SC>
+struct SoaDistanceTableAATOMPTarget : public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+                                      public DistanceTableAAT<T>
 {
-    using RealType = typename DistanceTableAAT<T>::RealType;
-    using PosType = typename DistanceTableAAT<T>::PosType;
-    using IndexType = typename DistanceTableAAT<T>::IndexType;
-    using DistRow = typename DistanceTableAAT<T>::DistRow;
-    using DisplRow = typename DistanceTableAAT<T>::DisplRow;
-
-    /// actual memory for dist and displacements_
-    aligned_vector<RealType> memory_pool_;
-
-    /// actual memory for temp_r_
-    DistRow temp_r_mem_;
-    /// actual memory for temp_dr_
-    DisplRow temp_dr_mem_;
-    /// actual memory for old_r_
-    DistRow old_r_mem_;
-    /// actual memory for old_dr_
-    DisplRow old_dr_mem_;
-
-    /// multi walker shared memory buffer
-    struct DTAAMultiWalkerMem : public Resource
-    {
-        /// dist displ for temporary and old pairs
-        Vector<RealType,
-            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
-            mw_new_old_dist_displ;
-
-        /** distances from a range of indics to the source.
+  using RealType  = typename DistanceTableAAT<T>::RealType;
+  using PosType   = typename DistanceTableAAT<T>::PosType;
+  using IndexType = typename DistanceTableAAT<T>::IndexType;
+  using DistRow   = typename DistanceTableAAT<T>::DistRow;
+  using DisplRow  = typename DistanceTableAAT<T>::DisplRow;
+
+  /// actual memory for dist and displacements_
+  aligned_vector<RealType> memory_pool_;
+
+  /// actual memory for temp_r_
+  DistRow temp_r_mem_;
+  /// actual memory for temp_dr_
+  DisplRow temp_dr_mem_;
+  /// actual memory for old_r_
+  DistRow old_r_mem_;
+  /// actual memory for old_dr_
+  DisplRow old_dr_mem_;
+
+  /// multi walker shared memory buffer
+  struct DTAAMultiWalkerMem : public Resource
+  {
+    /// dist displ for temporary and old pairs
+    Vector<RealType, OMPallocator<RealType, PinnedAlignedAllocator<RealType>>> mw_new_old_dist_displ;
+
+    /** distances from a range of indics to the source.
          * for original particle index i (row) and source particle id j (col)
          * j < i,  the element data is dist(r_i - r_j)
          * j > i,  the element data is dist(r_(n - 1 - i) - r_(n - 1 - j))
          */
-        Vector<RealType,
-            OMPallocator<RealType, PinnedAlignedAllocator<RealType>>>
-            mw_distances_subset;
+    Vector<RealType, OMPallocator<RealType, PinnedAlignedAllocator<RealType>>> mw_distances_subset;
 
-        DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem")
-        {
-        }
+    DTAAMultiWalkerMem() : Resource("DTAAMultiWalkerMem") {}
 
-        DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem()
-        {
-        }
+    DTAAMultiWalkerMem(const DTAAMultiWalkerMem&) : DTAAMultiWalkerMem() {}
 
-        std::unique_ptr<Resource>
-        makeClone() const override
-        {
-            return std::make_unique<DTAAMultiWalkerMem>(*this);
-        }
-    };
+    std::unique_ptr<Resource> makeClone() const override { return std::make_unique<DTAAMultiWalkerMem>(*this); }
+  };
 
-    ResourceHandle<DTAAMultiWalkerMem> mw_mem_handle_;
+  ResourceHandle<DTAAMultiWalkerMem> mw_mem_handle_;
 
-    SoaDistanceTableAATOMPTarget(ParticleSetT<T>& target) :
-        DTD_BConds<RealType, D, SC>(target.getLattice()),
+  SoaDistanceTableAATOMPTarget(ParticleSetT<T>& target)
+      : DTD_BConds<RealType, D, SC>(target.getLattice()),
         DistanceTableAAT<T>(target, DTModes::ALL_OFF),
         num_targets_padded_(getAlignedSize<RealType>(this->num_targets_)),
 #if !defined(NDEBUG)
         old_prepared_elec_id_(-1),
 #endif
-        offload_timer_(createGlobalTimer(
-            std::string("DTAAOMPTarget::offload_") + this->name_,
-            timer_level_fine)),
-        evaluate_timer_(createGlobalTimer(
-            std::string("DTAAOMPTarget::evaluate_") + this->name_,
-            timer_level_fine)),
-        move_timer_(
-            createGlobalTimer(std::string("DTAAOMPTarget::move_") + this->name_,
-                timer_level_fine)),
-        update_timer_(createGlobalTimer(
-            std::string("DTAAOMPTarget::update_") + this->name_,
-            timer_level_fine))
-
+        offload_timer_(createGlobalTimer(std::string("DTAAOMPTarget::offload_") + this->name_, timer_level_fine)),
+        evaluate_timer_(createGlobalTimer(std::string("DTAAOMPTarget::evaluate_") + this->name_, timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTAAOMPTarget::move_") + this->name_, timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTAAOMPTarget::update_") + this->name_, timer_level_fine))
+
+  {
+    auto* coordinates_soa = dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(&target.getCoordinates());
+    if (!coordinates_soa)
+      throw std::runtime_error("Source particle set doesn't have OpenMP "
+                               "offload. Contact developers!");
+    resize();
+    PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
+  }
+
+  SoaDistanceTableAATOMPTarget()                                    = delete;
+  SoaDistanceTableAATOMPTarget(const SoaDistanceTableAATOMPTarget&) = delete;
+  ~SoaDistanceTableAATOMPTarget(){PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")}
+
+  size_t compute_size(int N) const
+  {
+    const size_t num_padded = getAlignedSize<RealType>(N);
+    const size_t Alignment  = getAlignment<RealType>();
+    return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2;
+  }
+
+  void resize()
+  {
+    // initialize memory containers and views
+    const size_t total_size = compute_size(this->num_targets_);
+    memory_pool_.resize(total_size * (1 + D));
+    this->distances_.resize(this->num_targets_);
+    this->displacements_.resize(this->num_targets_);
+    for (int i = 0; i < this->num_targets_; ++i)
     {
-        auto* coordinates_soa =
-            dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(
-                &target.getCoordinates());
-        if (!coordinates_soa)
-            throw std::runtime_error("Source particle set doesn't have OpenMP "
-                                     "offload. Contact developers!");
-        resize();
-        PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
+      this->distances_[i].attachReference(memory_pool_.data() + compute_size(i), i);
+      this->displacements_[i].attachReference(i, total_size, memory_pool_.data() + total_size + compute_size(i));
     }
 
-    SoaDistanceTableAATOMPTarget() = delete;
-    SoaDistanceTableAATOMPTarget(const SoaDistanceTableAATOMPTarget&) = delete;
-    ~SoaDistanceTableAATOMPTarget(){
-        PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")}
-
-    size_t compute_size(int N) const
+    old_r_mem_.resize(this->num_targets_);
+    old_dr_mem_.resize(this->num_targets_);
+    temp_r_mem_.resize(this->num_targets_);
+    temp_dr_mem_.resize(this->num_targets_);
+  }
+
+  const RealType* getMultiWalkerTempDataPtr() const override
+  {
+    return mw_mem_handle_.getResource().mw_new_old_dist_displ.data();
+  }
+
+  void createResource(ResourceCollection& collection) const override
+  {
+    auto resource_index = collection.addResource(std::make_unique<DTAAMultiWalkerMem>());
+  }
+
+  void acquireResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+  {
+    assert(this == &dt_list.getLeader());
+    auto& dt_leader          = dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+    dt_leader.mw_mem_handle_ = collection.lendResource<DTAAMultiWalkerMem>();
+    const size_t nw          = dt_list.size();
+    const size_t stride_size = num_targets_padded_ * (D + 1);
+
+    for (int iw = 0; iw < nw; iw++)
     {
-      const size_t num_padded = getAlignedSize<RealType>(N);
-      const size_t Alignment  = getAlignment<RealType>();
-      return (num_padded * (2 * N - num_padded + 1) + (Alignment - 1) * num_padded) / 2;
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(iw);
+      dt.temp_r_.free();
+      dt.temp_dr_.free();
+      dt.old_r_.free();
+      dt.old_dr_.free();
     }
 
-    void
-    resize()
+    auto& mw_new_old_dist_displ = dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ;
+    mw_new_old_dist_displ.resize(nw * 2 * stride_size);
+    for (int iw = 0; iw < nw; iw++)
     {
-        // initialize memory containers and views
-        const size_t total_size = compute_size(this->num_targets_);
-        memory_pool_.resize(total_size * (1 + D));
-        this->distances_.resize(this->num_targets_);
-        this->displacements_.resize(this->num_targets_);
-        for (int i = 0; i < this->num_targets_; ++i) {
-            this->distances_[i].attachReference(
-                memory_pool_.data() + compute_size(i), i);
-            this->displacements_[i].attachReference(i, total_size,
-                memory_pool_.data() + total_size + compute_size(i));
-        }
-
-        old_r_mem_.resize(this->num_targets_);
-        old_dr_mem_.resize(this->num_targets_);
-        temp_r_mem_.resize(this->num_targets_);
-        temp_dr_mem_.resize(this->num_targets_);
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(iw);
+      dt.temp_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * iw, num_targets_padded_);
+      dt.temp_dr_.attachReference(this->num_targets_, num_targets_padded_,
+                                  mw_new_old_dist_displ.data() + stride_size * iw + num_targets_padded_);
+      dt.old_r_.attachReference(mw_new_old_dist_displ.data() + stride_size * (iw + nw), num_targets_padded_);
+      dt.old_dr_.attachReference(this->num_targets_, num_targets_padded_,
+                                 mw_new_old_dist_displ.data() + stride_size * (iw + nw) + num_targets_padded_);
     }
-
-    const RealType*
-    getMultiWalkerTempDataPtr() const override
+  }
+
+  void releaseResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+  {
+    collection.takebackResource(dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>().mw_mem_handle_);
+    const size_t nw = dt_list.size();
+    for (int iw = 0; iw < nw; iw++)
     {
-        return mw_mem_handle_.getResource().mw_new_old_dist_displ.data();
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(iw);
+      dt.temp_r_.free();
+      dt.temp_dr_.free();
+      dt.old_r_.free();
+      dt.old_dr_.free();
     }
+  }
 
-    void
-    createResource(ResourceCollection& collection) const override
-    {
-        auto resource_index =
-            collection.addResource(std::make_unique<DTAAMultiWalkerMem>());
-    }
+  inline void evaluate(ParticleSetT<T>& P) override
+  {
+    ScopedTimer local_timer(evaluate_timer_);
 
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
-    {
-        assert(this == &dt_list.getLeader());
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
-        dt_leader.mw_mem_handle_ =
-            collection.lendResource<DTAAMultiWalkerMem>();
-        const size_t nw = dt_list.size();
-        const size_t stride_size = num_targets_padded_ * (D + 1);
-
-        for (int iw = 0; iw < nw; iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
-                    iw);
-            dt.temp_r_.free();
-            dt.temp_dr_.free();
-            dt.old_r_.free();
-            dt.old_dr_.free();
-        }
-
-        auto& mw_new_old_dist_displ =
-            dt_leader.mw_mem_handle_.getResource().mw_new_old_dist_displ;
-        mw_new_old_dist_displ.resize(nw * 2 * stride_size);
-        for (int iw = 0; iw < nw; iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
-                    iw);
-            dt.temp_r_.attachReference(
-                mw_new_old_dist_displ.data() + stride_size * iw,
-                num_targets_padded_);
-            dt.temp_dr_.attachReference(this->num_targets_, num_targets_padded_,
-                mw_new_old_dist_displ.data() + stride_size * iw +
-                    num_targets_padded_);
-            dt.old_r_.attachReference(
-                mw_new_old_dist_displ.data() + stride_size * (iw + nw),
-                num_targets_padded_);
-            dt.old_dr_.attachReference(this->num_targets_, num_targets_padded_,
-                mw_new_old_dist_displ.data() + stride_size * (iw + nw) +
-                    num_targets_padded_);
-        }
-    }
-
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
-    {
-        collection.takebackResource(
-            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>()
-                .mw_mem_handle_);
-        const size_t nw = dt_list.size();
-        for (int iw = 0; iw < nw; iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
-                    iw);
-            dt.temp_r_.free();
-            dt.temp_dr_.free();
-            dt.old_r_.free();
-            dt.old_dr_.free();
-        }
-    }
+    constexpr T BigR = std::numeric_limits<T>::max();
+    for (int iat = 1; iat < this->num_targets_; ++iat)
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(),
+                                                    this->distances_[iat].data(), this->displacements_[iat], 0, iat,
+                                                    iat);
+  }
 
-    inline void
-    evaluate(ParticleSetT<T>& P) override
-    {
-        ScopedTimer local_timer(evaluate_timer_);
-
-        constexpr T BigR = std::numeric_limits<T>::max();
-        for (int iat = 1; iat < this->num_targets_; ++iat)
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                P.getCoordinates().getAllParticlePos(),
-                this->distances_[iat].data(), this->displacements_[iat], 0, iat,
-                iat);
-    }
-
-    /** compute distances from particles in [range_begin, range_end) to all the
+  /** compute distances from particles in [range_begin, range_end) to all the
      * particles. Although [range_begin, range_end) and be any particle [0,
      * num_sources), it is only necessary to compute half of the table due to
      * the symmetry of AA table. See note of the output data object
      * mw_distances_subset To keep resident memory minimal on the device,
      * range_end - range_begin < num_particls_stored is required.
      */
-    const RealType*
-    mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list, size_t range_begin,
-        size_t range_end) const override
-    {
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
-        const size_t subset_size = range_end - range_begin;
-        if (subset_size > dt_leader.num_particls_stored)
-            throw std::runtime_error("not enough internal buffer");
+  const RealType* mw_evalDistsInRange(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                                      const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                                      size_t range_begin,
+                                      size_t range_end) const override
+  {
+    auto& dt_leader          = dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+    const size_t subset_size = range_end - range_begin;
+    if (subset_size > dt_leader.num_particls_stored)
+      throw std::runtime_error("not enough internal buffer");
 
-        ScopedTimer local_timer(dt_leader.evaluate_timer_);
+    ScopedTimer local_timer(dt_leader.evaluate_timer_);
 
-        DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
-        auto& pset_leader = p_list.getLeader();
+    DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+    auto& pset_leader          = p_list.getLeader();
 
-        const size_t nw = dt_list.size();
-        const auto num_sources_local = dt_leader.num_targets_;
-        const auto num_padded = dt_leader.num_targets_padded_;
-        mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded);
+    const size_t nw              = dt_list.size();
+    const auto num_sources_local = dt_leader.num_targets_;
+    const auto num_padded        = dt_leader.num_targets_padded_;
+    mw_mem.mw_distances_subset.resize(nw * subset_size * num_padded);
 
-        const int ChunkSizePerTeam = 512;
-        const size_t num_teams =
-            (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+    const int ChunkSizePerTeam = 512;
+    const size_t num_teams     = (num_sources_local + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
 
-        auto& coordinates_leader =
-            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
-                pset_leader.getCoordinates());
+    auto& coordinates_leader = static_cast<const RealSpacePositionsTOMPTarget<T>&>(pset_leader.getCoordinates());
 
-        auto* rsoa_dev_list_ptr =
-            coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
-        auto* dist_ranged = mw_mem.mw_distances_subset.data();
-        {
-            ScopedTimer offload(dt_leader.offload_timer_);
-            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+    auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
+    auto* dist_ranged       = mw_mem.mw_distances_subset.data();
+    {
+      ScopedTimer offload(dt_leader.offload_timer_);
+      PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
                            num_teams(nw * num_teams)")
-            for (int iw = 0; iw < nw; ++iw)
-                for (int team_id = 0; team_id < num_teams; team_id++) {
-                    auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
-                    const size_t first = ChunkSizePerTeam * team_id;
-                    const size_t last = omptarget::min(
-                        first + ChunkSizePerTeam, num_sources_local);
-
-                    PRAGMA_OFFLOAD("omp parallel for")
-                    for (int iel = first; iel < last; iel++) {
-                        for (int irow = 0; irow < subset_size; irow++) {
-                            RealType* dist = dist_ranged +
-                                (irow + subset_size * iw) * num_padded;
-                            size_t id_target = irow + range_begin;
-
-                            RealType dx, dy, dz;
-                            if (id_target < iel) {
-                                dx = source_pos_ptr[id_target] -
-                                    source_pos_ptr[iel];
-                                dy = source_pos_ptr[id_target + num_padded] -
-                                    source_pos_ptr[iel + num_padded];
-                                dz =
-                                    source_pos_ptr[id_target + num_padded * 2] -
-                                    source_pos_ptr[iel + num_padded * 2];
-                            }
-                            else {
-                                const size_t id_target_reverse =
-                                    num_sources_local - 1 - id_target;
-                                const size_t iel_reverse =
-                                    num_sources_local - 1 - iel;
-                                dx = source_pos_ptr[id_target_reverse] -
-                                    source_pos_ptr[iel_reverse];
-                                dy = source_pos_ptr[id_target_reverse +
-                                         num_padded] -
-                                    source_pos_ptr[iel_reverse + num_padded];
-                                dz = source_pos_ptr[id_target_reverse +
-                                         num_padded * 2] -
-                                    source_pos_ptr[iel_reverse +
-                                        num_padded * 2];
-                            }
-
-                            dist[iel] =
-                                DTD_BConds<RealType, D, SC>::computeDist(
-                                    dx, dy, dz);
-                        }
-                    }
-                }
+      for (int iw = 0; iw < nw; ++iw)
+        for (int team_id = 0; team_id < num_teams; team_id++)
+        {
+          auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
+          const size_t first   = ChunkSizePerTeam * team_id;
+          const size_t last    = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
+
+          PRAGMA_OFFLOAD("omp parallel for")
+          for (int iel = first; iel < last; iel++)
+          {
+            for (int irow = 0; irow < subset_size; irow++)
+            {
+              RealType* dist   = dist_ranged + (irow + subset_size * iw) * num_padded;
+              size_t id_target = irow + range_begin;
+
+              RealType dx, dy, dz;
+              if (id_target < iel)
+              {
+                dx = source_pos_ptr[id_target] - source_pos_ptr[iel];
+                dy = source_pos_ptr[id_target + num_padded] - source_pos_ptr[iel + num_padded];
+                dz = source_pos_ptr[id_target + num_padded * 2] - source_pos_ptr[iel + num_padded * 2];
+              }
+              else
+              {
+                const size_t id_target_reverse = num_sources_local - 1 - id_target;
+                const size_t iel_reverse       = num_sources_local - 1 - iel;
+                dx                             = source_pos_ptr[id_target_reverse] - source_pos_ptr[iel_reverse];
+                dy = source_pos_ptr[id_target_reverse + num_padded] - source_pos_ptr[iel_reverse + num_padded];
+                dz = source_pos_ptr[id_target_reverse + num_padded * 2] - source_pos_ptr[iel_reverse + num_padded * 2];
+              }
+
+              dist[iel] = DTD_BConds<RealType, D, SC>::computeDist(dx, dy, dz);
+            }
+          }
         }
-        return mw_mem.mw_distances_subset.data();
     }
+    return mw_mem.mw_distances_subset.data();
+  }
 
-    /// evaluate the temporary pair relations
-    inline void
-    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
-        bool prepare_old) override
-    {
-        ScopedTimer local_timer(move_timer_);
+  /// evaluate the temporary pair relations
+  inline void move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat, bool prepare_old) override
+  {
+    ScopedTimer local_timer(move_timer_);
 
 #if !defined(NDEBUG)
-        old_prepared_elec_id_ = prepare_old ? iat : -1;
+    old_prepared_elec_id_ = prepare_old ? iat : -1;
 #endif
-        this->temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size());
-        this->temp_dr_.attachReference(
-            temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data());
-
-        assert((prepare_old && iat >= 0 && iat < this->num_targets_) ||
-            !prepare_old);
-        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
-            P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
-            this->temp_dr_, 0, this->num_targets_, iat);
-        // set up old_r_ and old_dr_ for moves may get accepted.
-        if (prepare_old) {
-            this->old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size());
-            this->old_dr_.attachReference(
-                old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data());
-            // recompute from scratch
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                P.getCoordinates().getAllParticlePos(), this->old_r_.data(),
-                this->old_dr_, 0, this->num_targets_, iat);
-            this->old_r_[iat] =
-                std::numeric_limits<RealType>::max(); // assign a big number
-        }
+    this->temp_r_.attachReference(temp_r_mem_.data(), temp_r_mem_.size());
+    this->temp_dr_.attachReference(temp_dr_mem_.size(), temp_dr_mem_.capacity(), temp_dr_mem_.data());
+
+    assert((prepare_old && iat >= 0 && iat < this->num_targets_) || !prepare_old);
+    DTD_BConds<RealType, D, SC>::computeDistances(rnew, P.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
+                                                  this->temp_dr_, 0, this->num_targets_, iat);
+    // set up old_r_ and old_dr_ for moves may get accepted.
+    if (prepare_old)
+    {
+      this->old_r_.attachReference(old_r_mem_.data(), old_r_mem_.size());
+      this->old_dr_.attachReference(old_dr_mem_.size(), old_dr_mem_.capacity(), old_dr_mem_.data());
+      // recompute from scratch
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], P.getCoordinates().getAllParticlePos(),
+                                                    this->old_r_.data(), this->old_dr_, 0, this->num_targets_, iat);
+      this->old_r_[iat] = std::numeric_limits<RealType>::max(); // assign a big number
     }
+  }
 
-    /** evaluate the temporary pair relations when a move is proposed
+  /** evaluate the temporary pair relations when a move is proposed
      * this implementation is asynchronous and the synchronization is managed at
      * ParticleSet. Transferring results to host depends on
      * DTModes::NEED_TEMP_DATA_ON_HOST. If the temporary pair distance are
      * consumed on the device directly, the device to host data transfer can be
      * skipped as an optimization.
      */
-    void
-    mw_move(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const std::vector<PosType>& rnew_list, const IndexType iat,
-        bool prepare_old = true) const override
+  void mw_move(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+               const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+               const std::vector<PosType>& rnew_list,
+               const IndexType iat,
+               bool prepare_old = true) const override
+  {
+    assert(this == &dt_list.getLeader());
+    auto& dt_leader            = dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
+    DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+    auto& pset_leader          = p_list.getLeader();
+
+    ScopedTimer local_timer(move_timer_);
+    const size_t nw          = dt_list.size();
+    const size_t stride_size = num_targets_padded_ * (D + 1);
+
+    auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ;
+
+    for (int iw = 0; iw < nw; iw++)
     {
-        assert(this == &dt_list.getLeader());
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableAATOMPTarget>();
-        DTAAMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
-        auto& pset_leader = p_list.getLeader();
-
-        ScopedTimer local_timer(move_timer_);
-        const size_t nw = dt_list.size();
-        const size_t stride_size = num_targets_padded_ * (D + 1);
-
-        auto& mw_new_old_dist_displ = mw_mem.mw_new_old_dist_displ;
-
-        for (int iw = 0; iw < nw; iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(
-                    iw);
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableAATOMPTarget>(iw);
 #if !defined(NDEBUG)
-            dt.old_prepared_elec_id_ = prepare_old ? iat : -1;
+      dt.old_prepared_elec_id_ = prepare_old ? iat : -1;
 #endif
-            auto& coordinates_soa =
-                static_cast<const RealSpacePositionsTOMPTarget<T>&>(
-                    p_list[iw].getCoordinates());
-        }
+      auto& coordinates_soa = static_cast<const RealSpacePositionsTOMPTarget<T>&>(p_list[iw].getCoordinates());
+    }
 
-        const int ChunkSizePerTeam = 512;
-        const size_t num_teams =
-            (this->num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+    const int ChunkSizePerTeam = 512;
+    const size_t num_teams     = (this->num_targets_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
 
-        auto& coordinates_leader =
-            static_cast<const RealSpacePositionsTOMPTarget<T>&>(
-                pset_leader.getCoordinates());
+    auto& coordinates_leader = static_cast<const RealSpacePositionsTOMPTarget<T>&>(pset_leader.getCoordinates());
 
-        const auto num_sources_local = this->num_targets_;
-        const auto num_padded = num_targets_padded_;
-        const auto* rsoa_dev_list_ptr =
-            coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
-        auto* r_dr_ptr = mw_new_old_dist_displ.data();
-        const auto* new_pos_ptr = coordinates_leader.getFusedNewPosBuffer().data();
-        const size_t new_pos_stride =
-            coordinates_leader.getFusedNewPosBuffer().capacity();
+    const auto num_sources_local  = this->num_targets_;
+    const auto num_padded         = num_targets_padded_;
+    const auto* rsoa_dev_list_ptr = coordinates_leader.getMultiWalkerRSoADevicePtrs().data();
+    auto* r_dr_ptr                = mw_new_old_dist_displ.data();
+    const auto* new_pos_ptr       = coordinates_leader.getFusedNewPosBuffer().data();
+    const size_t new_pos_stride   = coordinates_leader.getFusedNewPosBuffer().capacity();
 
-        {
-            ScopedTimer offload(offload_timer_);
-            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+    {
+      ScopedTimer offload(offload_timer_);
+      PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
                 num_teams(nw * num_teams) nowait \
                 is_device_ptr(new_pos_ptr,rsoa_dev_list_ptr) \
                 depend(out: r_dr_ptr[:mw_new_old_dist_displ.size()])")
-            for (int iw = 0; iw < nw; ++iw)
-                for (int team_id = 0; team_id < num_teams; team_id++) {
-                    const auto* source_pos_ptr = rsoa_dev_list_ptr[iw];          
-                    const size_t first = ChunkSizePerTeam * team_id;
-                    const size_t last = omptarget::min(
-                        first + ChunkSizePerTeam, num_sources_local);
-
-                    { // temp
-                        auto* r_iw_ptr = r_dr_ptr + iw * stride_size;
-                        auto* dr_iw_ptr =
-                            r_dr_ptr + iw * stride_size + num_padded;
-
-                        RealType pos[D];
-                        for (int idim = 0; idim < D; idim++)
-                            pos[idim] = new_pos_ptr[idim * new_pos_stride + iw];
-
-                        PRAGMA_OFFLOAD("omp parallel for")
-                        for (int iel = first; iel < last; iel++)
-                            DTD_BConds<RealType, D,
-                                SC>::computeDistancesOffload(pos,
-                                source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
-                                num_padded, iel, iat);
-                    }
-
-                    if (prepare_old) { // old
-                        auto* r_iw_ptr = r_dr_ptr + (iw + nw) * stride_size;
-                        auto* dr_iw_ptr =
-                            r_dr_ptr + (iw + nw) * stride_size + num_padded;
-
-                        RealType pos[D];
-                        for (int idim = 0; idim < D; idim++)
-                            pos[idim] = source_pos_ptr[idim * num_padded + iat];
-
-                        PRAGMA_OFFLOAD("omp parallel for")
-                        for (int iel = first; iel < last; iel++)
-                            DTD_BConds<RealType, D,
-                                SC>::computeDistancesOffload(pos,
-                                source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
-                                num_padded, iel, iat);
-                        r_iw_ptr[iat] =
-                            std::numeric_limits<RealType>::max(); // assign a
+      for (int iw = 0; iw < nw; ++iw)
+        for (int team_id = 0; team_id < num_teams; team_id++)
+        {
+          const auto* source_pos_ptr = rsoa_dev_list_ptr[iw];
+          const size_t first         = ChunkSizePerTeam * team_id;
+          const size_t last          = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
+
+          { // temp
+            auto* r_iw_ptr  = r_dr_ptr + iw * stride_size;
+            auto* dr_iw_ptr = r_dr_ptr + iw * stride_size + num_padded;
+
+            RealType pos[D];
+            for (int idim = 0; idim < D; idim++)
+              pos[idim] = new_pos_ptr[idim * new_pos_stride + iw];
+
+            PRAGMA_OFFLOAD("omp parallel for")
+            for (int iel = first; iel < last; iel++)
+              DTD_BConds<RealType, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
+                                                                   num_padded, iel, iat);
+          }
+
+          if (prepare_old)
+          { // old
+            auto* r_iw_ptr  = r_dr_ptr + (iw + nw) * stride_size;
+            auto* dr_iw_ptr = r_dr_ptr + (iw + nw) * stride_size + num_padded;
+
+            RealType pos[D];
+            for (int idim = 0; idim < D; idim++)
+              pos[idim] = source_pos_ptr[idim * num_padded + iat];
+
+            PRAGMA_OFFLOAD("omp parallel for")
+            for (int iel = first; iel < last; iel++)
+              DTD_BConds<RealType, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iw_ptr, dr_iw_ptr,
+                                                                   num_padded, iel, iat);
+            r_iw_ptr[iat] = std::numeric_limits<RealType>::max(); // assign a
                                                                   // big number
-                    }
-                }
+          }
         }
+    }
 
-        if (this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) {
-            PRAGMA_OFFLOAD("omp target update nowait \
+    if (this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST)
+    {
+      PRAGMA_OFFLOAD("omp target update nowait \
                 depend(inout: r_dr_ptr[:mw_new_old_dist_displ.size()]) \
                       from(r_dr_ptr[:mw_new_old_dist_displ.size()])")
+    }
+  }
+
+  int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+  {
+    // ensure there are neighbors
+    assert(this->num_targets_ > 1);
+    RealType min_dist = std::numeric_limits<RealType>::max();
+    int index         = -1;
+    if (newpos)
+    {
+      for (int jat = 0; jat < this->num_targets_; ++jat)
+        if (this->temp_r_[jat] < min_dist && jat != iat)
+        {
+          min_dist = this->temp_r_[jat];
+          index    = jat;
         }
+      assert(index >= 0);
+      dr = this->temp_dr_[index];
     }
-
-    int
-    get_first_neighbor(
-        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    else
     {
-        // ensure there are neighbors
-        assert(this->num_targets_ > 1);
-        RealType min_dist = std::numeric_limits<RealType>::max();
-        int index = -1;
-        if (newpos) {
-            for (int jat = 0; jat < this->num_targets_; ++jat)
-                if (this->temp_r_[jat] < min_dist && jat != iat) {
-                    min_dist = this->temp_r_[jat];
-                    index = jat;
-                }
-            assert(index >= 0);
-            dr = this->temp_dr_[index];
+      for (int jat = 0; jat < iat; ++jat)
+        if (this->distances_[iat][jat] < min_dist)
+        {
+          min_dist = this->distances_[iat][jat];
+          index    = jat;
         }
-        else {
-            for (int jat = 0; jat < iat; ++jat)
-                if (this->distances_[iat][jat] < min_dist) {
-                    min_dist = this->distances_[iat][jat];
-                    index = jat;
-                }
-            for (int jat = iat + 1; jat < this->num_targets_; ++jat)
-                if (this->distances_[jat][iat] < min_dist) {
-                    min_dist = this->distances_[jat][iat];
-                    index = jat;
-                }
-            assert(index != iat && index >= 0);
-            if (index < iat)
-                dr = this->displacements_[iat][index];
-            else
-                dr = this->displacements_[index][iat];
+      for (int jat = iat + 1; jat < this->num_targets_; ++jat)
+        if (this->distances_[jat][iat] < min_dist)
+        {
+          min_dist = this->distances_[jat][iat];
+          index    = jat;
         }
-        r = min_dist;
-        return index;
+      assert(index != iat && index >= 0);
+      if (index < iat)
+        dr = this->displacements_[iat][index];
+      else
+        dr = this->displacements_[index][iat];
     }
+    r = min_dist;
+    return index;
+  }
 
-    /** After accepting the iat-th particle, update the iat-th row of distances_
+  /** After accepting the iat-th particle, update the iat-th row of distances_
      * and displacements_. Upper triangle is not needed in the later computation
      * and thus not updated
      */
-    inline void
-    update(IndexType iat) override
-    {
-        ScopedTimer local_timer(update_timer_);
-        // update [0, iat) columns
-        const int nupdate = iat;
-        // copy row
-        assert(nupdate <= this->temp_r_.size());
-        std::copy_n(
-            this->temp_r_.data(), nupdate, this->distances_[iat].data());
-        for (int idim = 0; idim < D; ++idim)
-            std::copy_n(this->temp_dr_.data(idim), nupdate,
-                this->displacements_[iat].data(idim));
-        // copy column
-        for (size_t i = iat + 1; i < this->num_targets_; ++i) {
-            this->distances_[i][iat] = this->temp_r_[i];
-            this->displacements_[i](iat) = -this->temp_dr_[i];
-        }
-    }
-
-    void
-    updatePartial(IndexType jat, bool from_temp) override
+  inline void update(IndexType iat) override
+  {
+    ScopedTimer local_timer(update_timer_);
+    // update [0, iat) columns
+    const int nupdate = iat;
+    // copy row
+    assert(nupdate <= this->temp_r_.size());
+    std::copy_n(this->temp_r_.data(), nupdate, this->distances_[iat].data());
+    for (int idim = 0; idim < D; ++idim)
+      std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[iat].data(idim));
+    // copy column
+    for (size_t i = iat + 1; i < this->num_targets_; ++i)
     {
-        ScopedTimer local_timer(update_timer_);
-
-        // update [0, jat)
-        const int nupdate = jat;
-        if (from_temp) {
-            // copy row
-            assert(nupdate <= this->temp_r_.size());
-            std::copy_n(
-                this->temp_r_.data(), nupdate, this->distances_[jat].data());
-            for (int idim = 0; idim < D; ++idim)
-                std::copy_n(this->temp_dr_.data(idim), nupdate,
-                    this->displacements_[jat].data(idim));
-        }
-        else {
-            assert(old_prepared_elec_id_ == jat);
-            // copy row
-            assert(nupdate <= this->old_r_.size());
-            std::copy_n(
-                this->old_r_.data(), nupdate, this->distances_[jat].data());
-            for (int idim = 0; idim < D; ++idim)
-                std::copy_n(this->old_dr_.data(idim), nupdate,
-                    this->displacements_[jat].data(idim));
-        }
+      this->distances_[i][iat]     = this->temp_r_[i];
+      this->displacements_[i](iat) = -this->temp_dr_[i];
     }
+  }
 
-    void
-    mw_updatePartial(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        IndexType jat, const std::vector<bool>& from_temp) override
-    {
-        // if temp data on host is not updated by mw_move during p-by-p moves,
-        // there is no need to update distance table
-        if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST))
-            return;
+  void updatePartial(IndexType jat, bool from_temp) override
+  {
+    ScopedTimer local_timer(update_timer_);
 
-        for (int iw = 0; iw < dt_list.size(); iw++)
-            dt_list[iw].updatePartial(jat, from_temp[iw]);
-    }
-
-    void
-    mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
+    // update [0, jat)
+    const int nupdate = jat;
+    if (from_temp)
     {
-        // if the distance table is not updated by mw_move during p-by-p, needs
-        // to recompute the whole table before being used by Hamiltonian if
-        // requested
-        if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) &&
-            (this->modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP))
-            this->mw_evaluate(dt_list, p_list);
+      // copy row
+      assert(nupdate <= this->temp_r_.size());
+      std::copy_n(this->temp_r_.data(), nupdate, this->distances_[jat].data());
+      for (int idim = 0; idim < D; ++idim)
+        std::copy_n(this->temp_dr_.data(idim), nupdate, this->displacements_[jat].data(idim));
     }
-
-    size_t
-    get_num_particls_stored() const override
+    else
     {
-        return num_particls_stored;
+      assert(old_prepared_elec_id_ == jat);
+      // copy row
+      assert(nupdate <= this->old_r_.size());
+      std::copy_n(this->old_r_.data(), nupdate, this->distances_[jat].data());
+      for (int idim = 0; idim < D; ++idim)
+        std::copy_n(this->old_dr_.data(idim), nupdate, this->displacements_[jat].data(idim));
     }
+  }
+
+  void mw_updatePartial(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                        IndexType jat,
+                        const std::vector<bool>& from_temp) override
+  {
+    // if temp data on host is not updated by mw_move during p-by-p moves,
+    // there is no need to update distance table
+    if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST))
+      return;
+
+    for (int iw = 0; iw < dt_list.size(); iw++)
+      dt_list[iw].updatePartial(jat, from_temp[iw]);
+  }
+
+  void mw_finalizePbyP(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                       const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
+  {
+    // if the distance table is not updated by mw_move during p-by-p, needs
+    // to recompute the whole table before being used by Hamiltonian if
+    // requested
+    if (!(this->modes_ & DTModes::NEED_TEMP_DATA_ON_HOST) &&
+        (this->modes_ & DTModes::NEED_FULL_TABLE_ON_HOST_AFTER_DONEPBYP))
+      this->mw_evaluate(dt_list, p_list);
+  }
+
+  size_t get_num_particls_stored() const override { return num_particls_stored; }
 
 private:
-    /// number of targets with padding
-    const size_t num_targets_padded_;
+  /// number of targets with padding
+  const size_t num_targets_padded_;
 #if !defined(NDEBUG)
-    /** set to particle id after move() with prepare_old = true. -1 means not
+  /** set to particle id after move() with prepare_old = true. -1 means not
      * prepared. It is intended only for safety checks, not for codepath
      * selection.
      */
-    int old_prepared_elec_id_;
+  int old_prepared_elec_id_;
 #endif
-    /// timer for offload portion
-    NewTimer& offload_timer_;
-    /// timer for evaluate()
-    NewTimer& evaluate_timer_;
-    /// timer for move()
-    NewTimer& move_timer_;
-    /// timer for update()
-    NewTimer& update_timer_;
-    /// the particle count of the internal stored distances.
-    const size_t num_particls_stored = 64;
+  /// timer for offload portion
+  NewTimer& offload_timer_;
+  /// timer for evaluate()
+  NewTimer& evaluate_timer_;
+  /// timer for move()
+  NewTimer& move_timer_;
+  /// timer for update()
+  NewTimer& update_timer_;
+  /// the particle count of the internal stored distances.
+  const size_t num_particls_stored = 64;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/SoaDistanceTableABT.h b/src/Particle/SoaDistanceTableABT.h
index e2eb2709bf2..56dce296876 100644
--- a/src/Particle/SoaDistanceTableABT.h
+++ b/src/Particle/SoaDistanceTableABT.h
@@ -24,147 +24,137 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for AB using a
  * transposed form
  */
-template <typename T, unsigned D, int SC>
-struct SoaDistanceTableABT :
-    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
-    public DistanceTableABT<T>
+template<typename T, unsigned D, int SC>
+struct SoaDistanceTableABT : public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+                             public DistanceTableABT<T>
 {
-    using RealType = typename DistanceTableABT<T>::RealType;
-    using PosType = typename DistanceTableABT<T>::PosType;
-    using IndexType = typename DistanceTableABT<T>::IndexType;
+  using RealType  = typename DistanceTableABT<T>::RealType;
+  using PosType   = typename DistanceTableABT<T>::PosType;
+  using IndexType = typename DistanceTableABT<T>::IndexType;
 
-    SoaDistanceTableABT(
-        const ParticleSetT<T>& source, ParticleSetT<T>& target) :
-        DTD_BConds<RealType, D, SC>(source.getLattice()),
+  SoaDistanceTableABT(const ParticleSetT<T>& source, ParticleSetT<T>& target)
+      : DTD_BConds<RealType, D, SC>(source.getLattice()),
         DistanceTableABT<T>(source, target, DTModes::ALL_OFF),
-        evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") +
-                target.getName() + "_" + source.getName(),
-            timer_level_fine)),
-        move_timer_(createGlobalTimer(std::string("DTAB::move_") +
-                target.getName() + "_" + source.getName(),
-            timer_level_fine)),
-        update_timer_(createGlobalTimer(std::string("DTAB::update_") +
-                target.getName() + "_" + source.getName(),
-            timer_level_fine))
-    {
-        resize();
-    }
+        evaluate_timer_(createGlobalTimer(std::string("DTAB::evaluate_") + target.getName() + "_" + source.getName(),
+                                          timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTAB::move_") + target.getName() + "_" + source.getName(),
+                                      timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTAB::update_") + target.getName() + "_" + source.getName(),
+                                        timer_level_fine))
+  {
+    resize();
+  }
 
-    void
-    resize()
-    {
-        if (this->num_sources_ * this->num_targets_ == 0)
-            return;
-
-        // initialize memory containers and views
-        const int num_sources_padded = getAlignedSize<RealType>(this->num_sources_);
-        this->distances_.resize(this->num_targets_);
-        this->displacements_.resize(this->num_targets_);
-        for (int i = 0; i < this->num_targets_; ++i) {
-            this->distances_[i].resize(num_sources_padded);
-            this->displacements_[i].resize(num_sources_padded);
-        }
+  void resize()
+  {
+    if (this->num_sources_ * this->num_targets_ == 0)
+      return;
 
-        // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
-        // in the update function temp_r_ is padded explicitly while temp_dr_ is
-        // padded internally
-        this->temp_r_.resize(num_sources_padded);
-        this->temp_dr_.resize(this->num_sources_);
+    // initialize memory containers and views
+    const int num_sources_padded = getAlignedSize<RealType>(this->num_sources_);
+    this->distances_.resize(this->num_targets_);
+    this->displacements_.resize(this->num_targets_);
+    for (int i = 0; i < this->num_targets_; ++i)
+    {
+      this->distances_[i].resize(num_sources_padded);
+      this->displacements_[i].resize(num_sources_padded);
     }
 
-    SoaDistanceTableABT() = delete;
-    SoaDistanceTableABT(const SoaDistanceTableABT&) = delete;
+    // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
+    // in the update function temp_r_ is padded explicitly while temp_dr_ is
+    // padded internally
+    this->temp_r_.resize(num_sources_padded);
+    this->temp_dr_.resize(this->num_sources_);
+  }
 
-    /** evaluate the full table */
-    inline void
-    evaluate(ParticleSetT<T>& P) override
-    {
-        ScopedTimer local_timer(evaluate_timer_);
+  SoaDistanceTableABT()                           = delete;
+  SoaDistanceTableABT(const SoaDistanceTableABT&) = delete;
+
+  /** evaluate the full table */
+  inline void evaluate(ParticleSetT<T>& P) override
+  {
+    ScopedTimer local_timer(evaluate_timer_);
 #pragma omp parallel
-        {
-            int first, last;
-            FairDivideAligned(this->num_sources_, getAlignment<RealType>(),
-                omp_get_num_threads(), omp_get_thread_num(), first, last);
+    {
+      int first, last;
+      FairDivideAligned(this->num_sources_, getAlignment<RealType>(), omp_get_num_threads(), omp_get_thread_num(),
+                        first, last);
 
-            // be aware of the sign of Displacement
-            for (int iat = 0; iat < this->num_targets_; ++iat)
-                DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                    this->origin_.getCoordinates().getAllParticlePos(),
-                    this->distances_[iat].data(), this->displacements_[iat],
-                    first, last);
-        }
+      // be aware of the sign of Displacement
+      for (int iat = 0; iat < this->num_targets_; ++iat)
+        DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(),
+                                                      this->distances_[iat].data(), this->displacements_[iat], first,
+                                                      last);
     }
+  }
 
-    /// evaluate the temporary pair relations
-    inline void
-    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
-        bool prepare_old) override
-    {
-        ScopedTimer local_timer(move_timer_);
-        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
-            this->origin_.getCoordinates().getAllParticlePos(), this->temp_r_.data(),
-            this->temp_dr_, 0, this->num_sources_);
-        // If the full table is not ready all the time, overwrite the current
-        // value. If this step is missing, DT values can be undefined in case a
-        // move is rejected.
-        if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                this->origin_.getCoordinates().getAllParticlePos(),
-                this->distances_[iat].data(), this->displacements_[iat], 0,
-                this->num_sources_);
-    }
+  /// evaluate the temporary pair relations
+  inline void move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat, bool prepare_old) override
+  {
+    ScopedTimer local_timer(move_timer_);
+    DTD_BConds<RealType, D, SC>::computeDistances(rnew, this->origin_.getCoordinates().getAllParticlePos(),
+                                                  this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_);
+    // If the full table is not ready all the time, overwrite the current
+    // value. If this step is missing, DT values can be undefined in case a
+    // move is rejected.
+    if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(),
+                                                    this->distances_[iat].data(), this->displacements_[iat], 0,
+                                                    this->num_sources_);
+  }
 
-    /// update the stripe for jat-th particle
-    inline void
-    update(IndexType iat) override
-    {
-        ScopedTimer local_timer(update_timer_);
-        std::copy_n(this->temp_r_.data(), this->num_sources_,
-            this->distances_[iat].data());
-        for (int idim = 0; idim < D; ++idim)
-            std::copy_n(this->temp_dr_.data(idim), this->num_sources_,
-                this->displacements_[iat].data(idim));
-    }
+  /// update the stripe for jat-th particle
+  inline void update(IndexType iat) override
+  {
+    ScopedTimer local_timer(update_timer_);
+    std::copy_n(this->temp_r_.data(), this->num_sources_, this->distances_[iat].data());
+    for (int idim = 0; idim < D; ++idim)
+      std::copy_n(this->temp_dr_.data(idim), this->num_sources_, this->displacements_[iat].data(idim));
+  }
 
-    int
-    get_first_neighbor(
-        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+  int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+  {
+    RealType min_dist = std::numeric_limits<RealType>::max();
+    int index         = -1;
+    if (newpos)
     {
-        RealType min_dist = std::numeric_limits<RealType>::max();
-        int index = -1;
-        if (newpos) {
-            for (int jat = 0; jat < this->num_sources_; ++jat)
-                if (this->temp_r_[jat] < min_dist) {
-                    min_dist = this->temp_r_[jat];
-                    index = jat;
-                }
-            if (index >= 0) {
-                r = min_dist;
-                dr = this->temp_dr_[index];
-            }
+      for (int jat = 0; jat < this->num_sources_; ++jat)
+        if (this->temp_r_[jat] < min_dist)
+        {
+          min_dist = this->temp_r_[jat];
+          index    = jat;
         }
-        else {
-            for (int jat = 0; jat < this->num_sources_; ++jat)
-                if (this->distances_[iat][jat] < min_dist) {
-                    min_dist = this->distances_[iat][jat];
-                    index = jat;
-                }
-            if (index >= 0) {
-                r = min_dist;
-                dr = this->displacements_[iat][index];
-            }
+      if (index >= 0)
+      {
+        r  = min_dist;
+        dr = this->temp_dr_[index];
+      }
+    }
+    else
+    {
+      for (int jat = 0; jat < this->num_sources_; ++jat)
+        if (this->distances_[iat][jat] < min_dist)
+        {
+          min_dist = this->distances_[iat][jat];
+          index    = jat;
         }
-        assert(index >= 0 && index < this->num_sources_);
-        return index;
+      if (index >= 0)
+      {
+        r  = min_dist;
+        dr = this->displacements_[iat][index];
+      }
     }
+    assert(index >= 0 && index < this->num_sources_);
+    return index;
+  }
 
 private:
-    /// timer for evaluate()
-    NewTimer& evaluate_timer_;
-    /// timer for move()
-    NewTimer& move_timer_;
-    /// timer for update()
-    NewTimer& update_timer_;
+  /// timer for evaluate()
+  NewTimer& evaluate_timer_;
+  /// timer for move()
+  NewTimer& move_timer_;
+  /// timer for update()
+  NewTimer& update_timer_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/SoaDistanceTableABTOMPTarget.h b/src/Particle/SoaDistanceTableABTOMPTarget.h
index 452100cb25b..3f0072a10ac 100644
--- a/src/Particle/SoaDistanceTableABTOMPTarget.h
+++ b/src/Particle/SoaDistanceTableABTOMPTarget.h
@@ -28,486 +28,409 @@ namespace qmcplusplus
  * @brief A derived classe from DistacneTableData, specialized for AB using a
  * transposed form
  */
-template <typename T, unsigned D, int SC>
-class SoaDistanceTableABTOMPTarget :
-    public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
-    public DistanceTableABT<T>
+template<typename T, unsigned D, int SC>
+class SoaDistanceTableABTOMPTarget : public DTD_BConds<typename ParticleSetTraits<T>::RealType, D, SC>,
+                                     public DistanceTableABT<T>
 {
 private:
-    template <typename DT>
-    using OffloadPinnedVector =
-        Vector<DT, OMPallocator<DT, PinnedAlignedAllocator<DT>>>;
-
-    using RealType = typename DistanceTableABT<T>::RealType;
-    using PosType = typename DistanceTableABT<T>::PosType;
-    using IndexType = typename DistanceTableABT<T>::IndexType;
-
-    /// accelerator output buffer for r and dr
-    OffloadPinnedVector<RealType> r_dr_memorypool_;
-    /// accelerator input array for a list of target particle positions,
-    /// num_targets_ x D
-    OffloadPinnedVector<RealType> target_pos;
-
-    /// multi walker shared memory buffer
-    struct DTABMultiWalkerMem : public Resource
+  template<typename DT>
+  using OffloadPinnedVector = Vector<DT, OMPallocator<DT, PinnedAlignedAllocator<DT>>>;
+
+  using RealType  = typename DistanceTableABT<T>::RealType;
+  using PosType   = typename DistanceTableABT<T>::PosType;
+  using IndexType = typename DistanceTableABT<T>::IndexType;
+
+  /// accelerator output buffer for r and dr
+  OffloadPinnedVector<RealType> r_dr_memorypool_;
+  /// accelerator input array for a list of target particle positions,
+  /// num_targets_ x D
+  OffloadPinnedVector<RealType> target_pos;
+
+  /// multi walker shared memory buffer
+  struct DTABMultiWalkerMem : public Resource
+  {
+    /// accelerator output array for multiple walkers,
+    /// [1+D][num_targets_][num_padded] (distances, displacements)
+    OffloadPinnedVector<RealType> mw_r_dr;
+    /// accelerator input buffer for multiple data set
+    OffloadPinnedVector<char> offload_input;
+
+    DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem") {}
+
+    DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem() {}
+
+    std::unique_ptr<Resource> makeClone() const override { return std::make_unique<DTABMultiWalkerMem>(*this); }
+  };
+
+  ResourceHandle<DTABMultiWalkerMem> mw_mem_handle_;
+
+  void resize()
+  {
+    if (this->num_sources_ * this->num_targets_ == 0)
+      return;
+    if (this->distances_.size())
+      return;
+
+    // initialize memory containers and views
+    const size_t num_padded  = getAlignedSize<RealType>(this->num_sources_);
+    const size_t stride_size = getPerTargetPctlStrideSize();
+    r_dr_memorypool_.resize(stride_size * this->num_targets_);
+
+    this->distances_.resize(this->num_targets_);
+    this->displacements_.resize(this->num_targets_);
+    for (int i = 0; i < this->num_targets_; ++i)
     {
-        /// accelerator output array for multiple walkers,
-        /// [1+D][num_targets_][num_padded] (distances, displacements)
-        OffloadPinnedVector<RealType> mw_r_dr;
-        /// accelerator input buffer for multiple data set
-        OffloadPinnedVector<char> offload_input;
-
-        DTABMultiWalkerMem() : Resource("DTABMultiWalkerMem")
-        {
-        }
-
-        DTABMultiWalkerMem(const DTABMultiWalkerMem&) : DTABMultiWalkerMem()
-        {
-        }
-
-        std::unique_ptr<Resource>
-        makeClone() const override
-        {
-            return std::make_unique<DTABMultiWalkerMem>(*this);
-        }
-    };
+      this->distances_[i].attachReference(r_dr_memorypool_.data() + i * stride_size, this->num_sources_);
+      this->displacements_[i].attachReference(this->num_sources_, num_padded,
+                                              r_dr_memorypool_.data() + i * stride_size + num_padded);
+    }
+  }
 
-    ResourceHandle<DTABMultiWalkerMem> mw_mem_handle_;
+  static void associateResource(const RefVectorWithLeader<DistanceTableT<T>>& dt_list)
+  {
+    auto& dt_leader = dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
 
-    void
-    resize()
+    // initialize memory containers and views
+    size_t count_targets = 0;
+    for (size_t iw = 0; iw < dt_list.size(); iw++)
     {
-        if (this->num_sources_ * this->num_targets_ == 0)
-            return;
-        if (this->distances_.size())
-            return;
-
-        // initialize memory containers and views
-        const size_t num_padded = getAlignedSize<RealType>(this->num_sources_);
-        const size_t stride_size = getPerTargetPctlStrideSize();
-        r_dr_memorypool_.resize(stride_size * this->num_targets_);
-
-        this->distances_.resize(this->num_targets_);
-        this->displacements_.resize(this->num_targets_);
-        for (int i = 0; i < this->num_targets_; ++i) {
-            this->distances_[i].attachReference(
-                r_dr_memorypool_.data() + i * stride_size, this->num_sources_);
-            this->displacements_[i].attachReference(this->num_sources_,
-                num_padded,
-                r_dr_memorypool_.data() + i * stride_size + num_padded);
-        }
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(iw);
+      count_targets += dt.targets();
+      dt.r_dr_memorypool_.free();
     }
 
-    static void
-    associateResource(const RefVectorWithLeader<DistanceTableT<T>>& dt_list)
-    {
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
-
-        // initialize memory containers and views
-        size_t count_targets = 0;
-        for (size_t iw = 0; iw < dt_list.size(); iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
-                    iw);
-            count_targets += dt.targets();
-            dt.r_dr_memorypool_.free();
-        }
+    const size_t num_sources   = dt_leader.num_sources_;
+    const size_t num_padded    = getAlignedSize<RealType>(dt_leader.num_sources_);
+    const size_t stride_size   = num_padded * (D + 1);
+    const size_t total_targets = count_targets;
+    auto& mw_r_dr              = dt_leader.mw_mem_handle_.getResource().mw_r_dr;
+    mw_r_dr.resize(total_targets * stride_size);
 
-        const size_t num_sources = dt_leader.num_sources_;
-        const size_t num_padded =
-            getAlignedSize<RealType>(dt_leader.num_sources_);
-        const size_t stride_size = num_padded * (D + 1);
-        const size_t total_targets = count_targets;
-        auto& mw_r_dr = dt_leader.mw_mem_handle_.getResource().mw_r_dr;
-        mw_r_dr.resize(total_targets * stride_size);
-
-        count_targets = 0;
-        for (size_t iw = 0; iw < dt_list.size(); iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
-                    iw);
-            assert(num_sources == dt.num_sources_);
-
-            dt.distances_.resize(dt.targets());
-            dt.displacements_.resize(dt.targets());
-
-            for (int i = 0; i < dt.targets(); ++i) {
-                dt.distances_[i].attachReference(
-                    mw_r_dr.data() + (i + count_targets) * stride_size,
-                    num_sources);
-                dt.displacements_[i].attachReference(num_sources, num_padded,
-                    mw_r_dr.data() + (i + count_targets) * stride_size +
-                        num_padded);
-            }
-            count_targets += dt.targets();
-        }
+    count_targets = 0;
+    for (size_t iw = 0; iw < dt_list.size(); iw++)
+    {
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(iw);
+      assert(num_sources == dt.num_sources_);
+
+      dt.distances_.resize(dt.targets());
+      dt.displacements_.resize(dt.targets());
+
+      for (int i = 0; i < dt.targets(); ++i)
+      {
+        dt.distances_[i].attachReference(mw_r_dr.data() + (i + count_targets) * stride_size, num_sources);
+        dt.displacements_[i].attachReference(num_sources, num_padded,
+                                             mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
+      }
+      count_targets += dt.targets();
     }
+  }
 
 public:
-    SoaDistanceTableABTOMPTarget(
-        const ParticleSetT<T>& source, ParticleSetT<T>& target) :
-        DTD_BConds<RealType, D, SC>(source.getLattice()),
+  SoaDistanceTableABTOMPTarget(const ParticleSetT<T>& source, ParticleSetT<T>& target)
+      : DTD_BConds<RealType, D, SC>(source.getLattice()),
         DistanceTableABT<T>(source, target, DTModes::ALL_OFF),
-        offload_timer_(createGlobalTimer(
-            std::string("DTABOMPTarget::offload_") + this->name_,
-            timer_level_fine)),
-        evaluate_timer_(createGlobalTimer(
-            std::string("DTABOMPTarget::evaluate_") + this->name_,
-            timer_level_fine)),
-        move_timer_(
-            createGlobalTimer(std::string("DTABOMPTarget::move_") + this->name_,
-                timer_level_fine)),
-        update_timer_(createGlobalTimer(
-            std::string("DTABOMPTarget::update_") + this->name_,
-            timer_level_fine))
-
+        offload_timer_(createGlobalTimer(std::string("DTABOMPTarget::offload_") + this->name_, timer_level_fine)),
+        evaluate_timer_(createGlobalTimer(std::string("DTABOMPTarget::evaluate_") + this->name_, timer_level_fine)),
+        move_timer_(createGlobalTimer(std::string("DTABOMPTarget::move_") + this->name_, timer_level_fine)),
+        update_timer_(createGlobalTimer(std::string("DTABOMPTarget::update_") + this->name_, timer_level_fine))
+
+  {
+    auto* coordinates_soa = dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(&source.getCoordinates());
+    if (!coordinates_soa)
+      throw std::runtime_error("Source particle set doesn't have OpenMP "
+                               "offload. Contact developers!");
+    PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
+
+    // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
+    // in the update function temp_r_ is padded explicitly while temp_dr_ is
+    // padded internally
+    const int num_padded = getAlignedSize<RealType>(this->num_sources_);
+    this->temp_r_.resize(num_padded);
+    this->temp_dr_.resize(this->num_sources_);
+  }
+
+  SoaDistanceTableABTOMPTarget()                                    = delete;
+  SoaDistanceTableABTOMPTarget(const SoaDistanceTableABTOMPTarget&) = delete;
+
+  ~SoaDistanceTableABTOMPTarget() { PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])") }
+
+  void createResource(ResourceCollection& collection) const override
+  {
+    auto resource_index = collection.addResource(std::make_unique<DTABMultiWalkerMem>());
+  }
+
+  void acquireResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+  {
+    auto& dt_leader          = dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
+    dt_leader.mw_mem_handle_ = collection.lendResource<DTABMultiWalkerMem>();
+    associateResource(dt_list);
+  }
+
+  void releaseResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
+  {
+    collection.takebackResource(dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>().mw_mem_handle_);
+    for (size_t iw = 0; iw < dt_list.size(); iw++)
     {
-        auto* coordinates_soa =
-            dynamic_cast<const RealSpacePositionsTOMPTarget<T>*>(
-                &source.getCoordinates());
-        if (!coordinates_soa)
-            throw std::runtime_error("Source particle set doesn't have OpenMP "
-                                     "offload. Contact developers!");
-        PRAGMA_OFFLOAD("omp target enter data map(to : this[:1])")
-
-        // The padding of temp_r_ and temp_dr_ is necessary for the memory copy
-        // in the update function temp_r_ is padded explicitly while temp_dr_ is
-        // padded internally
-        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
-        this->temp_r_.resize(num_padded);
-        this->temp_dr_.resize(this->num_sources_);
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(iw);
+      dt.distances_.clear();
+      dt.displacements_.clear();
     }
+  }
 
-    SoaDistanceTableABTOMPTarget() = delete;
-    SoaDistanceTableABTOMPTarget(const SoaDistanceTableABTOMPTarget&) = delete;
+  const RealType* getMultiWalkerDataPtr() const override { return mw_mem_handle_.getResource().mw_r_dr.data(); }
 
-    ~SoaDistanceTableABTOMPTarget()
-    {
-        PRAGMA_OFFLOAD("omp target exit data map(delete : this[:1])")
-    }
+  size_t getPerTargetPctlStrideSize() const override { return getAlignedSize<RealType>(this->num_sources_) * (D + 1); }
 
-    void
-    createResource(ResourceCollection& collection) const override
-    {
-        auto resource_index =
-            collection.addResource(std::make_unique<DTABMultiWalkerMem>());
-    }
+  /** evaluate the full table */
+  inline void evaluate(ParticleSetT<T>& P) override
+  {
+    resize();
 
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
-    {
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
-        dt_leader.mw_mem_handle_ =
-            collection.lendResource<DTABMultiWalkerMem>();
-        associateResource(dt_list);
-    }
+    ScopedTimer local_timer(evaluate_timer_);
+    // be aware of the sign of Displacement
+    const int num_targets_local = this->num_targets_;
+    const int num_sources_local = this->num_sources_;
+    const int num_padded        = getAlignedSize<RealType>(this->num_sources_);
 
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<DistanceTableT<T>>& dt_list) const override
-    {
-        collection.takebackResource(
-            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>()
-                .mw_mem_handle_);
-        for (size_t iw = 0; iw < dt_list.size(); iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
-                    iw);
-            dt.distances_.clear();
-            dt.displacements_.clear();
-        }
-    }
+    target_pos.resize(this->num_targets_ * D);
+    for (size_t iat = 0; iat < this->num_targets_; iat++)
+      for (size_t idim = 0; idim < D; idim++)
+        target_pos[iat * D + idim] = P.R[iat][idim];
 
-    const RealType*
-    getMultiWalkerDataPtr() const override
-    {
-        return mw_mem_handle_.getResource().mw_r_dr.data();
-    }
+    auto* target_pos_ptr = target_pos.data();
+    auto* source_pos_ptr = this->origin_.getCoordinates().getAllParticlePos().data();
+    auto* r_dr_ptr       = this->distances_[0].data();
+    assert(this->distances_[0].data() + num_padded == this->displacements_[0].data());
 
-    size_t
-    getPerTargetPctlStrideSize() const override
-    {
-        return getAlignedSize<RealType>(this->num_sources_) * (D + 1);
-    }
+    // To maximize thread usage, the loop over electrons is chunked. Each
+    // chunk is sent to an OpenMP offload thread team.
+    const int ChunkSizePerTeam = 512;
+    const size_t num_teams     = (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+    const size_t stride_size   = getPerTargetPctlStrideSize();
 
-    /** evaluate the full table */
-    inline void
-    evaluate(ParticleSetT<T>& P) override
     {
-        resize();
-
-        ScopedTimer local_timer(evaluate_timer_);
-        // be aware of the sign of Displacement
-        const int num_targets_local = this->num_targets_;
-        const int num_sources_local = this->num_sources_;
-        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
-
-        target_pos.resize(this->num_targets_ * D);
-        for (size_t iat = 0; iat < this->num_targets_; iat++)
-            for (size_t idim = 0; idim < D; idim++)
-                target_pos[iat * D + idim] = P.R[iat][idim];
-
-        auto* target_pos_ptr = target_pos.data();
-        auto* source_pos_ptr =
-            this->origin_.getCoordinates().getAllParticlePos().data();
-        auto* r_dr_ptr = this->distances_[0].data();
-        assert(this->distances_[0].data() + num_padded ==
-            this->displacements_[0].data());
-
-        // To maximize thread usage, the loop over electrons is chunked. Each
-        // chunk is sent to an OpenMP offload thread team.
-        const int ChunkSizePerTeam = 512;
-        const size_t num_teams =
-            (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-        const size_t stride_size = getPerTargetPctlStrideSize();
-
-        {
-            ScopedTimer offload(offload_timer_);
-            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+      ScopedTimer offload(offload_timer_);
+      PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
                 num_teams(this->num_targets_*num_teams) \
                 map(to: source_pos_ptr[:num_padded*D]) \
                 map(always, to: target_pos_ptr[:this->num_targets_*D]) \
                 map(always, from: r_dr_ptr[:this->num_targets_*stride_size])")
-            for (int iat = 0; iat < num_targets_local; ++iat)
-                for (int team_id = 0; team_id < num_teams; team_id++) {
-                    const int first = ChunkSizePerTeam * team_id;
-                    const int last = omptarget::min(
-                        first + ChunkSizePerTeam, num_sources_local);
-
-                    RealType pos[D];
-                    for (int idim = 0; idim < D; idim++)
-                        pos[idim] = target_pos_ptr[iat * D + idim];
-
-                    auto* r_iat_ptr = r_dr_ptr + iat * stride_size;
-                    auto* dr_iat_ptr = r_iat_ptr + num_padded;
-
-                    PRAGMA_OFFLOAD("omp parallel for")
-                    for (int iel = first; iel < last; iel++)
-                        DTD_BConds<RealType, D, SC>::computeDistancesOffload(
-                            pos, source_pos_ptr, num_padded, r_iat_ptr,
-                            dr_iat_ptr, num_padded, iel);
-                }
+      for (int iat = 0; iat < num_targets_local; ++iat)
+        for (int team_id = 0; team_id < num_teams; team_id++)
+        {
+          const int first = ChunkSizePerTeam * team_id;
+          const int last  = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
+
+          RealType pos[D];
+          for (int idim = 0; idim < D; idim++)
+            pos[idim] = target_pos_ptr[iat * D + idim];
+
+          auto* r_iat_ptr  = r_dr_ptr + iat * stride_size;
+          auto* dr_iat_ptr = r_iat_ptr + num_padded;
+
+          PRAGMA_OFFLOAD("omp parallel for")
+          for (int iel = first; iel < last; iel++)
+            DTD_BConds<RealType, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr,
+                                                                 num_padded, iel);
         }
     }
+  }
 
-    inline void
-    mw_evaluate(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
-    {
-        assert(this == &dt_list.getLeader());
-        auto& dt_leader =
-            dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
+  inline void mw_evaluate(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                          const RefVectorWithLeader<ParticleSetT<T>>& p_list) const override
+  {
+    assert(this == &dt_list.getLeader());
+    auto& dt_leader = dt_list.template getCastedLeader<SoaDistanceTableABTOMPTarget>();
 
-        ScopedTimer local_timer(evaluate_timer_);
+    ScopedTimer local_timer(evaluate_timer_);
 
-        const size_t nw = dt_list.size();
-        DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
-        auto& mw_r_dr = mw_mem.mw_r_dr;
+    const size_t nw            = dt_list.size();
+    DTABMultiWalkerMem& mw_mem = dt_leader.mw_mem_handle_;
+    auto& mw_r_dr              = mw_mem.mw_r_dr;
 
-        size_t count_targets = 0;
-        for (ParticleSetT<T>& p : p_list)
-            count_targets += p.getTotalNum();
-        const size_t total_targets = count_targets;
+    size_t count_targets = 0;
+    for (ParticleSetT<T>& p : p_list)
+      count_targets += p.getTotalNum();
+    const size_t total_targets = count_targets;
 
-        const int num_padded = getAlignedSize<RealType>(this->num_sources_);
+    const int num_padded = getAlignedSize<RealType>(this->num_sources_);
 
 #ifndef NDEBUG
-        const int stride_size = getPerTargetPctlStrideSize();
-        count_targets = 0;
-        for (size_t iw = 0; iw < dt_list.size(); iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
-                    iw);
-
-            for (int i = 0; i < dt.targets(); ++i) {
-                assert(dt.distances_[i].data() ==
-                    mw_r_dr.data() + (i + count_targets) * stride_size);
-                assert(dt.displacements_[i].data() ==
-                    mw_r_dr.data() + (i + count_targets) * stride_size +
-                        num_padded);
-            }
-            count_targets += dt.targets();
-        }
+    const int stride_size = getPerTargetPctlStrideSize();
+    count_targets         = 0;
+    for (size_t iw = 0; iw < dt_list.size(); iw++)
+    {
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(iw);
+
+      for (int i = 0; i < dt.targets(); ++i)
+      {
+        assert(dt.distances_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size);
+        assert(dt.displacements_[i].data() == mw_r_dr.data() + (i + count_targets) * stride_size + num_padded);
+      }
+      count_targets += dt.targets();
+    }
 #endif
 
-        // This is horrible optimization putting different data types in a
-        // single buffer but allows a single H2D transfer
-        const size_t realtype_size = sizeof(RealType);
-        const size_t int_size = sizeof(int);
-        const size_t ptr_size = sizeof(RealType*);
-        auto& offload_input = mw_mem.offload_input;
-        offload_input.resize(total_targets * D * realtype_size +
-            total_targets * int_size + nw * ptr_size);
-        auto source_ptrs = reinterpret_cast<RealType**>(offload_input.data());
-        auto target_positions =
-            reinterpret_cast<RealType*>(offload_input.data() + ptr_size * nw);
-        auto walker_id_ptr = reinterpret_cast<int*>(offload_input.data() +
-            ptr_size * nw + total_targets * D * realtype_size);
-
-        count_targets = 0;
-        for (size_t iw = 0; iw < nw; iw++) {
-            auto& dt =
-                dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(
-                    iw);
-            ParticleSetT<T>& pset(p_list[iw]);
-
-            assert(dt.targets() == pset.getTotalNum());
-            assert(this->num_sources_ == dt.num_sources_);
-
-            auto& RSoA_OMPTarget =
-                static_cast<const RealSpacePositionsTOMPTarget<T>&>(
-                    dt.origin_.getCoordinates());
-            source_ptrs[iw] =
-                const_cast<RealType*>(RSoA_OMPTarget.getDevicePtr());
-
-            for (size_t iat = 0; iat < pset.getTotalNum();
-                 ++iat, ++count_targets) {
-                walker_id_ptr[count_targets] = iw;
-                for (size_t idim = 0; idim < D; idim++)
-                    target_positions[count_targets * D + idim] =
-                        pset.R[iat][idim];
-            }
-        }
+    // This is horrible optimization putting different data types in a
+    // single buffer but allows a single H2D transfer
+    const size_t realtype_size = sizeof(RealType);
+    const size_t int_size      = sizeof(int);
+    const size_t ptr_size      = sizeof(RealType*);
+    auto& offload_input        = mw_mem.offload_input;
+    offload_input.resize(total_targets * D * realtype_size + total_targets * int_size + nw * ptr_size);
+    auto source_ptrs      = reinterpret_cast<RealType**>(offload_input.data());
+    auto target_positions = reinterpret_cast<RealType*>(offload_input.data() + ptr_size * nw);
+    auto walker_id_ptr =
+        reinterpret_cast<int*>(offload_input.data() + ptr_size * nw + total_targets * D * realtype_size);
+
+    count_targets = 0;
+    for (size_t iw = 0; iw < nw; iw++)
+    {
+      auto& dt = dt_list.template getCastedElement<SoaDistanceTableABTOMPTarget>(iw);
+      ParticleSetT<T>& pset(p_list[iw]);
 
-        // To maximize thread usage, the loop over electrons is chunked. Each
-        // chunk is sent to an OpenMP offload thread team.
-        const int ChunkSizePerTeam = 512;
-        const size_t num_teams =
-            (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+      assert(dt.targets() == pset.getTotalNum());
+      assert(this->num_sources_ == dt.num_sources_);
 
-        auto* r_dr_ptr = mw_r_dr.data();
-        auto* input_ptr = offload_input.data();
-        const int num_sources_local = this->num_sources_;
+      auto& RSoA_OMPTarget = static_cast<const RealSpacePositionsTOMPTarget<T>&>(dt.origin_.getCoordinates());
+      source_ptrs[iw]      = const_cast<RealType*>(RSoA_OMPTarget.getDevicePtr());
 
-        {
-            ScopedTimer offload(dt_leader.offload_timer_);
-            PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
+      for (size_t iat = 0; iat < pset.getTotalNum(); ++iat, ++count_targets)
+      {
+        walker_id_ptr[count_targets] = iw;
+        for (size_t idim = 0; idim < D; idim++)
+          target_positions[count_targets * D + idim] = pset.R[iat][idim];
+      }
+    }
+
+    // To maximize thread usage, the loop over electrons is chunked. Each
+    // chunk is sent to an OpenMP offload thread team.
+    const int ChunkSizePerTeam = 512;
+    const size_t num_teams     = (this->num_sources_ + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+    auto* r_dr_ptr              = mw_r_dr.data();
+    auto* input_ptr             = offload_input.data();
+    const int num_sources_local = this->num_sources_;
+
+    {
+      ScopedTimer offload(dt_leader.offload_timer_);
+      PRAGMA_OFFLOAD("omp target teams distribute collapse(2) \
                 num_teams(total_targets*num_teams) \
                 map(always, to: input_ptr[:offload_input.size()]) \
                 depend(out:r_dr_ptr[:mw_r_dr.size()]) nowait")
-            for (int iat = 0; iat < total_targets; ++iat)
-                for (int team_id = 0; team_id < num_teams; team_id++) {
-                    auto* target_pos_ptr =
-                        reinterpret_cast<RealType*>(input_ptr + ptr_size * nw);
-                    const int walker_id = reinterpret_cast<int*>(input_ptr +
-                        ptr_size * nw + total_targets * D * realtype_size)[iat];
-                    auto* source_pos_ptr =
-                        reinterpret_cast<RealType**>(input_ptr)[walker_id];
-                    auto* r_iat_ptr = r_dr_ptr + iat * num_padded * (D + 1);
-                    auto* dr_iat_ptr =
-                        r_dr_ptr + iat * num_padded * (D + 1) + num_padded;
-
-                    const int first = ChunkSizePerTeam * team_id;
-                    const int last = omptarget::min(
-                        first + ChunkSizePerTeam, num_sources_local);
-
-                    RealType pos[D];
-                    for (int idim = 0; idim < D; idim++)
-                        pos[idim] = target_pos_ptr[iat * D + idim];
-
-                    PRAGMA_OFFLOAD("omp parallel for")
-                    for (int iel = first; iel < last; iel++)
-                        DTD_BConds<RealType, D, SC>::computeDistancesOffload(
-                            pos, source_pos_ptr, num_padded, r_iat_ptr,
-                            dr_iat_ptr, num_padded, iel);
-                }
-
-            if (!(this->modes_ &
-                    DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST)) {
-                PRAGMA_OFFLOAD(
-                    "omp target update from(r_dr_ptr[:mw_r_dr.size()]) \
-                    depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
-            }
-            // wait for computing and (optional) transferring back to host.
-            // It can potentially be moved to ParticleSet to fuse multiple
-            // similar taskwait
-            PRAGMA_OFFLOAD("omp taskwait")
+      for (int iat = 0; iat < total_targets; ++iat)
+        for (int team_id = 0; team_id < num_teams; team_id++)
+        {
+          auto* target_pos_ptr = reinterpret_cast<RealType*>(input_ptr + ptr_size * nw);
+          const int walker_id =
+              reinterpret_cast<int*>(input_ptr + ptr_size * nw + total_targets * D * realtype_size)[iat];
+          auto* source_pos_ptr = reinterpret_cast<RealType**>(input_ptr)[walker_id];
+          auto* r_iat_ptr      = r_dr_ptr + iat * num_padded * (D + 1);
+          auto* dr_iat_ptr     = r_dr_ptr + iat * num_padded * (D + 1) + num_padded;
+
+          const int first = ChunkSizePerTeam * team_id;
+          const int last  = omptarget::min(first + ChunkSizePerTeam, num_sources_local);
+
+          RealType pos[D];
+          for (int idim = 0; idim < D; idim++)
+            pos[idim] = target_pos_ptr[iat * D + idim];
+
+          PRAGMA_OFFLOAD("omp parallel for")
+          for (int iel = first; iel < last; iel++)
+            DTD_BConds<RealType, D, SC>::computeDistancesOffload(pos, source_pos_ptr, num_padded, r_iat_ptr, dr_iat_ptr,
+                                                                 num_padded, iel);
         }
-    }
 
-    inline void
-    mw_recompute(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const std::vector<bool>& recompute) const override
-    {
-        mw_evaluate(dt_list, p_list);
-    }
-
-    /// evaluate the temporary pair relations
-    inline void
-    move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat,
-        bool prepare_old) override
-    {
-        ScopedTimer local_timer(move_timer_);
-        DTD_BConds<RealType, D, SC>::computeDistances(rnew,
-            this->origin_.getCoordinates().getAllParticlePos(),
-            this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_);
-        // If the full table is not ready all the time, overwrite the current
-        // value. If this step is missing, DT values can be undefined in case a
-        // move is rejected.
-        if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
-            DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat],
-                this->origin_.getCoordinates().getAllParticlePos(),
-                this->distances_[iat].data(), this->displacements_[iat], 0,
-                this->num_sources_);
+      if (!(this->modes_ & DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST))
+      {
+        PRAGMA_OFFLOAD("omp target update from(r_dr_ptr[:mw_r_dr.size()]) \
+                    depend(inout:r_dr_ptr[:mw_r_dr.size()]) nowait")
+      }
+      // wait for computing and (optional) transferring back to host.
+      // It can potentially be moved to ParticleSet to fuse multiple
+      // similar taskwait
+      PRAGMA_OFFLOAD("omp taskwait")
     }
-
-    /// update the stripe for jat-th particle
-    inline void
-    update(IndexType iat) override
+  }
+
+  inline void mw_recompute(const RefVectorWithLeader<DistanceTableT<T>>& dt_list,
+                           const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                           const std::vector<bool>& recompute) const override
+  {
+    mw_evaluate(dt_list, p_list);
+  }
+
+  /// evaluate the temporary pair relations
+  inline void move(const ParticleSetT<T>& P, const PosType& rnew, const IndexType iat, bool prepare_old) override
+  {
+    ScopedTimer local_timer(move_timer_);
+    DTD_BConds<RealType, D, SC>::computeDistances(rnew, this->origin_.getCoordinates().getAllParticlePos(),
+                                                  this->temp_r_.data(), this->temp_dr_, 0, this->num_sources_);
+    // If the full table is not ready all the time, overwrite the current
+    // value. If this step is missing, DT values can be undefined in case a
+    // move is rejected.
+    if (!(this->modes_ & DTModes::NEED_FULL_TABLE_ANYTIME) && prepare_old)
+      DTD_BConds<RealType, D, SC>::computeDistances(P.R[iat], this->origin_.getCoordinates().getAllParticlePos(),
+                                                    this->distances_[iat].data(), this->displacements_[iat], 0,
+                                                    this->num_sources_);
+  }
+
+  /// update the stripe for jat-th particle
+  inline void update(IndexType iat) override
+  {
+    ScopedTimer local_timer(update_timer_);
+    std::copy_n(this->temp_r_.data(), this->num_sources_, this->distances_[iat].data());
+    for (int idim = 0; idim < D; ++idim)
+      std::copy_n(this->temp_dr_.data(idim), this->num_sources_, this->displacements_[iat].data(idim));
+  }
+
+  int get_first_neighbor(IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+  {
+    RealType min_dist = std::numeric_limits<RealType>::max();
+    int index         = -1;
+    if (newpos)
     {
-        ScopedTimer local_timer(update_timer_);
-        std::copy_n(this->temp_r_.data(), this->num_sources_,
-            this->distances_[iat].data());
-        for (int idim = 0; idim < D; ++idim)
-            std::copy_n(this->temp_dr_.data(idim), this->num_sources_,
-                this->displacements_[iat].data(idim));
+      for (int jat = 0; jat < this->num_sources_; ++jat)
+        if (this->temp_r_[jat] < min_dist)
+        {
+          min_dist = this->temp_r_[jat];
+          index    = jat;
+        }
+      if (index >= 0)
+      {
+        r  = min_dist;
+        dr = this->temp_dr_[index];
+      }
     }
-
-    int
-    get_first_neighbor(
-        IndexType iat, RealType& r, PosType& dr, bool newpos) const override
+    else
     {
-        RealType min_dist = std::numeric_limits<RealType>::max();
-        int index = -1;
-        if (newpos) {
-            for (int jat = 0; jat < this->num_sources_; ++jat)
-                if (this->temp_r_[jat] < min_dist) {
-                    min_dist = this->temp_r_[jat];
-                    index = jat;
-                }
-            if (index >= 0) {
-                r = min_dist;
-                dr = this->temp_dr_[index];
-            }
-        }
-        else {
-            for (int jat = 0; jat < this->num_sources_; ++jat)
-                if (this->distances_[iat][jat] < min_dist) {
-                    min_dist = this->distances_[iat][jat];
-                    index = jat;
-                }
-            if (index >= 0) {
-                r = min_dist;
-                dr = this->displacements_[iat][index];
-            }
+      for (int jat = 0; jat < this->num_sources_; ++jat)
+        if (this->distances_[iat][jat] < min_dist)
+        {
+          min_dist = this->distances_[iat][jat];
+          index    = jat;
         }
-        assert(index >= 0 && index < this->num_sources_);
-        return index;
+      if (index >= 0)
+      {
+        r  = min_dist;
+        dr = this->displacements_[iat][index];
+      }
     }
+    assert(index >= 0 && index < this->num_sources_);
+    return index;
+  }
 
 private:
-    /// timer for offload portion
-    NewTimer& offload_timer_;
-    /// timer for evaluate()
-    NewTimer& evaluate_timer_;
-    /// timer for move()
-    NewTimer& move_timer_;
-    /// timer for update()
-    NewTimer& update_timer_;
+  /// timer for offload portion
+  NewTimer& offload_timer_;
+  /// timer for evaluate()
+  NewTimer& evaluate_timer_;
+  /// timer for move()
+  NewTimer& move_timer_;
+  /// timer for update()
+  NewTimer& update_timer_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/VirtualParticleSetT.cpp b/src/Particle/VirtualParticleSetT.cpp
index e208c7f8dbe..9606e7e46c2 100644
--- a/src/Particle/VirtualParticleSetT.cpp
+++ b/src/Particle/VirtualParticleSetT.cpp
@@ -5,11 +5,10 @@
 // Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file VirtualParticleSet.cpp
@@ -28,241 +27,228 @@ namespace qmcplusplus
 
 struct VPMultiWalkerMem : public Resource
 {
-    /// multi walker reference particle
-    Vector<int, OffloadPinnedAllocator<int>> mw_refPctls;
+  /// multi walker reference particle
+  Vector<int, OffloadPinnedAllocator<int>> mw_refPctls;
 
-    VPMultiWalkerMem() : Resource("VPMultiWalkerMem")
-    {
-    }
+  VPMultiWalkerMem() : Resource("VPMultiWalkerMem") {}
 
-    VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem()
-    {
-    }
+  VPMultiWalkerMem(const VPMultiWalkerMem&) : VPMultiWalkerMem() {}
 
-    std::unique_ptr<Resource>
-    makeClone() const override
-    {
-        return std::make_unique<VPMultiWalkerMem>(*this);
-    }
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<VPMultiWalkerMem>(*this); }
 };
 
-template <typename T>
-VirtualParticleSetT<T>::VirtualParticleSetT(
-    const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit) :
-    ParticleSetT<T>(p.getSimulationCell())
+template<typename T>
+VirtualParticleSetT<T>::VirtualParticleSetT(const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit)
+    : ParticleSetT<T>(p.getSimulationCell())
 {
-    this->setName("virtual");
-
-    // initialize local data structure
-    this->setSpinor(p.isSpinor());
-    this->TotalNum = nptcl;
-    this->R.resize(nptcl);
-    if (this->isSpinor())
-        this->spins.resize(nptcl);
-    this->coordinates_->resize(nptcl);
-
-    // create distancetables
-    assert(dt_count_limit <= p.getNumDistTables());
-    if (dt_count_limit == 0)
-        dt_count_limit = p.getNumDistTables();
-    for (int i = 0; i < dt_count_limit; ++i)
-        if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST)
-            this->addTable(p.getDistTable(i).get_origin());
-        else
-            this->addTable(p.getDistTable(i).get_origin(),
-                DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST);
+  this->setName("virtual");
+
+  // initialize local data structure
+  this->setSpinor(p.isSpinor());
+  this->TotalNum = nptcl;
+  this->R.resize(nptcl);
+  if (this->isSpinor())
+    this->spins.resize(nptcl);
+  this->coordinates_->resize(nptcl);
+
+  // create distancetables
+  assert(dt_count_limit <= p.getNumDistTables());
+  if (dt_count_limit == 0)
+    dt_count_limit = p.getNumDistTables();
+  for (int i = 0; i < dt_count_limit; ++i)
+    if (p.getDistTable(i).getModes() & DTModes::NEED_VP_FULL_TABLE_ON_HOST)
+      this->addTable(p.getDistTable(i).get_origin());
+    else
+      this->addTable(p.getDistTable(i).get_origin(), DTModes::MW_EVALUATE_RESULT_NO_TRANSFER_TO_HOST);
 }
 
-template <typename T>
+template<typename T>
 VirtualParticleSetT<T>::~VirtualParticleSetT() = default;
 
-template <typename T>
-Vector<int, OffloadPinnedAllocator<int>>&
-VirtualParticleSetT<T>::getMultiWalkerRefPctls()
+template<typename T>
+Vector<int, OffloadPinnedAllocator<int>>& VirtualParticleSetT<T>::getMultiWalkerRefPctls()
 {
-    return mw_mem_handle_.getResource().mw_refPctls;
+  return mw_mem_handle_.getResource().mw_refPctls;
 }
 
-template <typename T>
-const Vector<int, OffloadPinnedAllocator<int>>&
-VirtualParticleSetT<T>::getMultiWalkerRefPctls() const
+template<typename T>
+const Vector<int, OffloadPinnedAllocator<int>>& VirtualParticleSetT<T>::getMultiWalkerRefPctls() const
 {
-    return mw_mem_handle_.getResource().mw_refPctls;
+  return mw_mem_handle_.getResource().mw_refPctls;
 }
 
-template <typename T>
-void
-VirtualParticleSetT<T>::createResource(ResourceCollection& collection) const
+template<typename T>
+void VirtualParticleSetT<T>::createResource(ResourceCollection& collection) const
 {
-    collection.addResource(std::make_unique<VPMultiWalkerMem>());
-    ParticleSetT<T>::createResource(collection);
+  collection.addResource(std::make_unique<VPMultiWalkerMem>());
+  ParticleSetT<T>::createResource(collection);
 }
 
-template <typename T>
-void
-VirtualParticleSetT<T>::acquireResource(ResourceCollection& collection,
-    const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+template<typename T>
+void VirtualParticleSetT<T>::acquireResource(ResourceCollection& collection,
+                                             const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
 {
-    auto& vp_leader = vp_list.getLeader();
-    vp_leader.mw_mem_handle_ = collection.lendResource<VPMultiWalkerMem>();
+  auto& vp_leader          = vp_list.getLeader();
+  vp_leader.mw_mem_handle_ = collection.lendResource<VPMultiWalkerMem>();
 
-    auto p_list = RefVectorWithLeaderParticleSet(vp_list);
-    ParticleSetT<T>::acquireResource(collection, p_list);
+  auto p_list = RefVectorWithLeaderParticleSet(vp_list);
+  ParticleSetT<T>::acquireResource(collection, p_list);
 }
 
-template <typename T>
-void
-VirtualParticleSetT<T>::releaseResource(ResourceCollection& collection,
-    const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+template<typename T>
+void VirtualParticleSetT<T>::releaseResource(ResourceCollection& collection,
+                                             const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
 {
-    collection.takebackResource(vp_list.getLeader().mw_mem_handle_);
-    auto p_list = RefVectorWithLeaderParticleSet(vp_list);
-    ParticleSetT<T>::releaseResource(collection, p_list);
+  collection.takebackResource(vp_list.getLeader().mw_mem_handle_);
+  auto p_list = RefVectorWithLeaderParticleSet(vp_list);
+  ParticleSetT<T>::releaseResource(collection, p_list);
 }
 
 /// move virtual particles to new postions and update distance tables
-template <typename T>
-void
-VirtualParticleSetT<T>::makeMoves(const ParticleSetT<T>& refp, int jel,
-    const std::vector<PosType>& deltaV, bool sphere, int iat)
+template<typename T>
+void VirtualParticleSetT<T>::makeMoves(const ParticleSetT<T>& refp,
+                                       int jel,
+                                       const std::vector<PosType>& deltaV,
+                                       bool sphere,
+                                       int iat)
 {
-    if (sphere && iat < 0)
-        throw std::runtime_error(
-            "VirtualParticleSet::makeMoves is invoked incorrectly, the flag "
-            "sphere=true requires iat specified!");
-    onSphere = sphere;
-    refPS = refp;
-    refPtcl = jel;
-    refSourcePtcl = iat;
-    assert(this->R.size() == deltaV.size());
+  if (sphere && iat < 0)
+    throw std::runtime_error("VirtualParticleSet::makeMoves is invoked incorrectly, the flag "
+                             "sphere=true requires iat specified!");
+  onSphere      = sphere;
+  refPS         = refp;
+  refPtcl       = jel;
+  refSourcePtcl = iat;
+  assert(this->R.size() == deltaV.size());
+  for (size_t ivp = 0; ivp < this->R.size(); ivp++)
+    this->R[ivp] = refp.R[jel] + deltaV[ivp];
+  if (refp.isSpinor())
     for (size_t ivp = 0; ivp < this->R.size(); ivp++)
-        this->R[ivp] = refp.R[jel] + deltaV[ivp];
-    if (refp.isSpinor())
-        for (size_t ivp = 0; ivp < this->R.size(); ivp++)
-            this->spins[ivp] = refp.spins[jel]; // no spin deltas in this API
-    this->update();
+      this->spins[ivp] = refp.spins[jel]; // no spin deltas in this API
+  this->update();
 }
 
 /// move virtual particles to new postions and update distance tables
-template <typename T>
-void
-VirtualParticleSetT<T>::makeMovesWithSpin(const ParticleSetT<T>& refp, int jel,
-    const std::vector<PosType>& deltaV, const std::vector<RealType>& deltaS,
-    bool sphere, int iat)
+template<typename T>
+void VirtualParticleSetT<T>::makeMovesWithSpin(const ParticleSetT<T>& refp,
+                                               int jel,
+                                               const std::vector<PosType>& deltaV,
+                                               const std::vector<RealType>& deltaS,
+                                               bool sphere,
+                                               int iat)
 {
-    assert(refp.isSpinor());
-    if (sphere && iat < 0)
-        throw std::runtime_error(
-            "VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the "
-            "flag sphere=true requires iat specified!");
-    onSphere = sphere;
-    refPS = refp;
-    refPtcl = jel;
-    refSourcePtcl = iat;
-    assert(this->R.size() == deltaV.size());
-    assert(this->spins.size() == deltaS.size());
-    for (size_t ivp = 0; ivp < this->R.size(); ivp++) {
-        this->R[ivp] = refp.R[jel] + deltaV[ivp];
-        this->spins[ivp] = refp.spins[jel] + deltaS[ivp];
-    }
-    this->update();
+  assert(refp.isSpinor());
+  if (sphere && iat < 0)
+    throw std::runtime_error("VirtualParticleSet::makeMovesWithSpin is invoked incorrectly, the "
+                             "flag sphere=true requires iat specified!");
+  onSphere      = sphere;
+  refPS         = refp;
+  refPtcl       = jel;
+  refSourcePtcl = iat;
+  assert(this->R.size() == deltaV.size());
+  assert(this->spins.size() == deltaS.size());
+  for (size_t ivp = 0; ivp < this->R.size(); ivp++)
+  {
+    this->R[ivp]     = refp.R[jel] + deltaV[ivp];
+    this->spins[ivp] = refp.spins[jel] + deltaS[ivp];
+  }
+  this->update();
 }
 
-template <typename T>
-void
-VirtualParticleSetT<T>::mw_makeMoves(
-    const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
-    const RefVector<const std::vector<PosType>>& deltaV_list,
-    const RefVector<const NLPPJob<RealType>>& joblist, bool sphere)
+template<typename T>
+void VirtualParticleSetT<T>::mw_makeMoves(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+                                          const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
+                                          const RefVector<const std::vector<PosType>>& deltaV_list,
+                                          const RefVector<const NLPPJob<RealType>>& joblist,
+                                          bool sphere)
 {
-    auto& vp_leader = vp_list.getLeader();
-    vp_leader.onSphere = sphere;
-    vp_leader.refPS = refp_list.getLeader();
-
-    const size_t nVPs = countVPs(vp_list);
-    auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
-    mw_refPctls.resize(nVPs);
-
-    RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
-    p_list.reserve(vp_list.size());
-
-    size_t ivp = 0;
-    for (int iw = 0; iw < vp_list.size(); iw++) {
-        VirtualParticleSetT& vp(vp_list[iw]);
-        const std::vector<PosType>& deltaV(deltaV_list[iw]);
-        const NLPPJob<RealType>& job(joblist[iw]);
-
-        vp.onSphere = sphere;
-        vp.refPS = refp_list[iw];
-        vp.refPtcl = job.electron_id;
-        vp.refSourcePtcl = job.ion_id;
-        assert(vp.R.size() == deltaV.size());
-        for (size_t k = 0; k < vp.R.size(); k++, ivp++) {
-            vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k];
-            if (vp_leader.isSpinor())
-                vp.spins[k] =
-                    refp_list[iw]
-                        .spins[vp.refPtcl]; // no spin deltas in this API
-            mw_refPctls[ivp] = vp.refPtcl;
-        }
-        p_list.push_back(vp);
+  auto& vp_leader    = vp_list.getLeader();
+  vp_leader.onSphere = sphere;
+  vp_leader.refPS    = refp_list.getLeader();
+
+  const size_t nVPs = countVPs(vp_list);
+  auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
+  mw_refPctls.resize(nVPs);
+
+  RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
+  p_list.reserve(vp_list.size());
+
+  size_t ivp = 0;
+  for (int iw = 0; iw < vp_list.size(); iw++)
+  {
+    VirtualParticleSetT& vp(vp_list[iw]);
+    const std::vector<PosType>& deltaV(deltaV_list[iw]);
+    const NLPPJob<RealType>& job(joblist[iw]);
+
+    vp.onSphere      = sphere;
+    vp.refPS         = refp_list[iw];
+    vp.refPtcl       = job.electron_id;
+    vp.refSourcePtcl = job.ion_id;
+    assert(vp.R.size() == deltaV.size());
+    for (size_t k = 0; k < vp.R.size(); k++, ivp++)
+    {
+      vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k];
+      if (vp_leader.isSpinor())
+        vp.spins[k] = refp_list[iw].spins[vp.refPtcl]; // no spin deltas in this API
+      mw_refPctls[ivp] = vp.refPtcl;
     }
-    assert(ivp == nVPs);
+    p_list.push_back(vp);
+  }
+  assert(ivp == nVPs);
 
-    mw_refPctls.updateTo();
-    ParticleSetT<T>::mw_update(p_list);
+  mw_refPctls.updateTo();
+  ParticleSetT<T>::mw_update(p_list);
 }
 
-template <typename T>
-void
-VirtualParticleSetT<T>::mw_makeMovesWithSpin(
-    const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
-    const RefVector<const std::vector<PosType>>& deltaV_list,
-    const RefVector<const std::vector<RealType>>& deltaS_list,
-    const RefVector<const NLPPJob<RealType>>& joblist, bool sphere)
+template<typename T>
+void VirtualParticleSetT<T>::mw_makeMovesWithSpin(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+                                                  const RefVectorWithLeader<ParticleSetT<T>>& refp_list,
+                                                  const RefVector<const std::vector<PosType>>& deltaV_list,
+                                                  const RefVector<const std::vector<RealType>>& deltaS_list,
+                                                  const RefVector<const NLPPJob<RealType>>& joblist,
+                                                  bool sphere)
 {
-    auto& vp_leader = vp_list.getLeader();
-    if (!vp_leader.isSpinor())
-        throw std::runtime_error(
-            "VirtualParticleSet::mw_makeMovesWithSpin should not be called if "
-            "particle sets aren't spionor types");
-    vp_leader.onSphere = sphere;
-    vp_leader.refPS = refp_list.getLeader();
-
-    const size_t nVPs = countVPs(vp_list);
-    auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
-    mw_refPctls.resize(nVPs);
-
-    RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
-    p_list.reserve(vp_list.size());
-
-    size_t ivp = 0;
-    for (int iw = 0; iw < vp_list.size(); iw++) {
-        VirtualParticleSetT& vp(vp_list[iw]);
-        const std::vector<PosType>& deltaV(deltaV_list[iw]);
-        const std::vector<RealType>& deltaS(deltaS_list[iw]);
-        const NLPPJob<RealType>& job(joblist[iw]);
-
-        vp.onSphere = sphere;
-        vp.refPS = refp_list[iw];
-        vp.refPtcl = job.electron_id;
-        vp.refSourcePtcl = job.ion_id;
-        assert(vp.R.size() == deltaV.size());
-        assert(vp.spins.size() == deltaS.size());
-        assert(vp.R.size() == vp.spins.size());
-        for (size_t k = 0; k < vp.R.size(); k++, ivp++) {
-            vp.R[k] = refp_list[iw].R[vp.refPtcl] + deltaV[k];
-            vp.spins[k] = refp_list[iw].spins[vp.refPtcl] + deltaS[k];
-            mw_refPctls[ivp] = vp.refPtcl;
-        }
-        p_list.push_back(vp);
+  auto& vp_leader = vp_list.getLeader();
+  if (!vp_leader.isSpinor())
+    throw std::runtime_error("VirtualParticleSet::mw_makeMovesWithSpin should not be called if "
+                             "particle sets aren't spionor types");
+  vp_leader.onSphere = sphere;
+  vp_leader.refPS    = refp_list.getLeader();
+
+  const size_t nVPs = countVPs(vp_list);
+  auto& mw_refPctls = vp_leader.getMultiWalkerRefPctls();
+  mw_refPctls.resize(nVPs);
+
+  RefVectorWithLeader<ParticleSetT<T>> p_list(vp_leader);
+  p_list.reserve(vp_list.size());
+
+  size_t ivp = 0;
+  for (int iw = 0; iw < vp_list.size(); iw++)
+  {
+    VirtualParticleSetT& vp(vp_list[iw]);
+    const std::vector<PosType>& deltaV(deltaV_list[iw]);
+    const std::vector<RealType>& deltaS(deltaS_list[iw]);
+    const NLPPJob<RealType>& job(joblist[iw]);
+
+    vp.onSphere      = sphere;
+    vp.refPS         = refp_list[iw];
+    vp.refPtcl       = job.electron_id;
+    vp.refSourcePtcl = job.ion_id;
+    assert(vp.R.size() == deltaV.size());
+    assert(vp.spins.size() == deltaS.size());
+    assert(vp.R.size() == vp.spins.size());
+    for (size_t k = 0; k < vp.R.size(); k++, ivp++)
+    {
+      vp.R[k]          = refp_list[iw].R[vp.refPtcl] + deltaV[k];
+      vp.spins[k]      = refp_list[iw].spins[vp.refPtcl] + deltaS[k];
+      mw_refPctls[ivp] = vp.refPtcl;
     }
-    assert(ivp == nVPs);
+    p_list.push_back(vp);
+  }
+  assert(ivp == nVPs);
 
-    mw_refPctls.updateTo();
-    ParticleSetT<T>::mw_update(p_list);
+  mw_refPctls.updateTo();
+  ParticleSetT<T>::mw_update(p_list);
 }
 
 #ifndef QMC_COMPLEX
diff --git a/src/Particle/VirtualParticleSetT.h b/src/Particle/VirtualParticleSetT.h
index 97f8b62e360..2591ffff8dd 100644
--- a/src/Particle/VirtualParticleSetT.h
+++ b/src/Particle/VirtualParticleSetT.h
@@ -5,11 +5,9 @@
 // Copyright (c) 2021 QMCPACK developers.
 //
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_VIRTUAL_PARTICLESETT_H
@@ -23,7 +21,7 @@ namespace qmcplusplus
 {
 // forward declaration.
 class NonLocalECPComponent;
-template <typename T>
+template<typename T>
 struct NLPPJob;
 struct VPMultiWalkerMem;
 
@@ -34,75 +32,59 @@ struct VPMultiWalkerMem;
  * physical ParticleSet is always marked const. It is heavily used by non-local
  * PP evaluations.
  */
-template <typename T>
+template<typename T>
 class VirtualParticleSetT : public ParticleSetT<T>
 {
 public:
-    using RealType = typename ParticleSetT<T>::RealType;
-    using PosType = typename ParticleSetT<T>::PosType;
+  using RealType = typename ParticleSetT<T>::RealType;
+  using PosType  = typename ParticleSetT<T>::PosType;
 
 private:
-    /// true, if virtual particles are on a sphere for NLPP
-    bool onSphere;
-    /// multi walker resource
-    ResourceHandle<VPMultiWalkerMem> mw_mem_handle_;
+  /// true, if virtual particles are on a sphere for NLPP
+  bool onSphere;
+  /// multi walker resource
+  ResourceHandle<VPMultiWalkerMem> mw_mem_handle_;
 
-    Vector<int, OffloadPinnedAllocator<int>>&
-    getMultiWalkerRefPctls();
+  Vector<int, OffloadPinnedAllocator<int>>& getMultiWalkerRefPctls();
 
-    /// ParticleSet this object refers to after makeMoves
-    std::optional<std::reference_wrapper<const ParticleSetT<T>>> refPS;
+  /// ParticleSet this object refers to after makeMoves
+  std::optional<std::reference_wrapper<const ParticleSetT<T>>> refPS;
 
 public:
-    /// Reference particle
-    int refPtcl;
-    /// Reference source particle, used when onSphere=true
-    int refSourcePtcl;
-
-    /// ParticleSet this object refers to
-    const ParticleSetT<T>&
-    getRefPS() const
-    {
-        return refPS.value();
-    }
-
-    inline bool
-    isOnSphere() const
-    {
-        return onSphere;
-    }
-
-    const Vector<int, OffloadPinnedAllocator<int>>&
-    getMultiWalkerRefPctls() const;
-
-    /** constructor
+  /// Reference particle
+  int refPtcl;
+  /// Reference source particle, used when onSphere=true
+  int refSourcePtcl;
+
+  /// ParticleSet this object refers to
+  const ParticleSetT<T>& getRefPS() const { return refPS.value(); }
+
+  inline bool isOnSphere() const { return onSphere; }
+
+  const Vector<int, OffloadPinnedAllocator<int>>& getMultiWalkerRefPctls() const;
+
+  /** constructor
      * @param p ParticleSet whose virtual moves are handled by this object
      * @param nptcl number of virtual particles
      * @param dt_count_limit distance tables corresepond to [0, dt_count_limit)
      * of the reference particle set are created
      */
-    VirtualParticleSetT(
-        const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit = 0);
+  VirtualParticleSetT(const ParticleSetT<T>& p, int nptcl, size_t dt_count_limit = 0);
 
-    ~VirtualParticleSetT();
+  ~VirtualParticleSetT();
 
-    /// initialize a shared resource and hand it to a collection
-    void
-    createResource(ResourceCollection& collection) const;
-    /** acquire external resource and assocaite it with the list of ParticleSet
+  /// initialize a shared resource and hand it to a collection
+  void createResource(ResourceCollection& collection) const;
+  /** acquire external resource and assocaite it with the list of ParticleSet
      * Note: use RAII ResourceCollectionTeamLock whenever possible
      */
-    static void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
-    /** release external resource
+  static void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
+  /** release external resource
      * Note: use RAII ResourceCollectionTeamLock whenever possible
      */
-    static void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
+  static void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<VirtualParticleSetT>& vp_list);
 
-    /** move virtual particles to new postions and update distance tables
+  /** move virtual particles to new postions and update distance tables
      * @param refp reference particle set
      * @param jel reference particle that all the VP moves from
      * @param deltaV Position delta for virtual moves.
@@ -110,11 +92,13 @@ class VirtualParticleSetT : public ParticleSetT<T>
      * particle
      * @param iat reference source particle
      */
-    void
-    makeMoves(const ParticleSetT<T>& refp, int jel,
-        const std::vector<PosType>& deltaV, bool sphere = false, int iat = -1);
+  void makeMoves(const ParticleSetT<T>& refp,
+                 int jel,
+                 const std::vector<PosType>& deltaV,
+                 bool sphere = false,
+                 int iat     = -1);
 
-    /** move virtual particles to new postions and update distance tables
+  /** move virtual particles to new postions and update distance tables
      * @param refp reference particle set
      * @param jel reference particle that all the VP moves from
      * @param deltaV Position delta for virtual moves.
@@ -123,53 +107,51 @@ class VirtualParticleSetT : public ParticleSetT<T>
      * particle
      * @param iat reference source particle
      */
-    void
-    makeMovesWithSpin(const ParticleSetT<T>& refp, int jel,
-        const std::vector<PosType>& deltaV, const std::vector<RealType>& deltaS,
-        bool sphere = false, int iat = -1);
-
-    static void
-    mw_makeMoves(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const RefVector<const std::vector<PosType>>& deltaV_list,
-        const RefVector<const NLPPJob<RealType>>& joblist, bool sphere);
-
-    static void
-    mw_makeMovesWithSpin(
-        const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& p_list,
-        const RefVector<const std::vector<PosType>>& deltaV_list,
-        const RefVector<const std::vector<RealType>>& deltaS_list,
-        const RefVector<const NLPPJob<RealType>>& joblist, bool sphere);
-
-    static RefVectorWithLeader<ParticleSetT<T>>
-    RefVectorWithLeaderParticleSet(
-        const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
-    {
-        RefVectorWithLeader<ParticleSetT<T>> ref_list(vp_list.getLeader());
-        ref_list.reserve(ref_list.size());
-        for (VirtualParticleSetT& vp : vp_list)
-            ref_list.push_back(vp);
-        return ref_list;
-    }
-
-    static size_t
-    countVPs(const RefVectorWithLeader<const VirtualParticleSetT>& vp_list)
-    {
-        size_t nVPs = 0;
-        for (const VirtualParticleSetT& vp : vp_list)
-            nVPs += vp.getTotalNum();
-        return nVPs;
-    }
-
-    static size_t
-    countVPs(const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
-    {
-        size_t nVPs = 0;
-        for (const VirtualParticleSetT& vp : vp_list)
-            nVPs += vp.getTotalNum();
-        return nVPs;
-    }
+  void makeMovesWithSpin(const ParticleSetT<T>& refp,
+                         int jel,
+                         const std::vector<PosType>& deltaV,
+                         const std::vector<RealType>& deltaS,
+                         bool sphere = false,
+                         int iat     = -1);
+
+  static void mw_makeMoves(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+                           const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                           const RefVector<const std::vector<PosType>>& deltaV_list,
+                           const RefVector<const NLPPJob<RealType>>& joblist,
+                           bool sphere);
+
+  static void mw_makeMovesWithSpin(const RefVectorWithLeader<VirtualParticleSetT>& vp_list,
+                                   const RefVectorWithLeader<ParticleSetT<T>>& p_list,
+                                   const RefVector<const std::vector<PosType>>& deltaV_list,
+                                   const RefVector<const std::vector<RealType>>& deltaS_list,
+                                   const RefVector<const NLPPJob<RealType>>& joblist,
+                                   bool sphere);
+
+  static RefVectorWithLeader<ParticleSetT<T>> RefVectorWithLeaderParticleSet(
+      const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+  {
+    RefVectorWithLeader<ParticleSetT<T>> ref_list(vp_list.getLeader());
+    ref_list.reserve(ref_list.size());
+    for (VirtualParticleSetT& vp : vp_list)
+      ref_list.push_back(vp);
+    return ref_list;
+  }
+
+  static size_t countVPs(const RefVectorWithLeader<const VirtualParticleSetT>& vp_list)
+  {
+    size_t nVPs = 0;
+    for (const VirtualParticleSetT& vp : vp_list)
+      nVPs += vp.getTotalNum();
+    return nVPs;
+  }
+
+  static size_t countVPs(const RefVectorWithLeader<VirtualParticleSetT>& vp_list)
+  {
+    size_t nVPs = 0;
+    for (const VirtualParticleSetT& vp : vp_list)
+      nVPs += vp.getTotalNum();
+    return nVPs;
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/Particle/WalkerConfigurationsT.cpp b/src/Particle/WalkerConfigurationsT.cpp
index bf5642e2c39..f14b414a8af 100644
--- a/src/Particle/WalkerConfigurationsT.cpp
+++ b/src/Particle/WalkerConfigurationsT.cpp
@@ -4,17 +4,14 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Jordan E. Vincent, University of Illinois at
-// Urbana-Champaign
+// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
 //                    Bryan Clark, bclark@Princeton.edu, Princeton University
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge
-//                    National Laboratory Ye Luo, yeluo@anl.gov, Argonne
-//                    National Laboratory Mark A. Berrill, berrillma@ornl.gov,
-//                    Oak Ridge National Laboratory
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
@@ -28,108 +25,110 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 WalkerConfigurationsT<T>::WalkerConfigurationsT() = default;
 
 /// default destructor
-template <typename T>
+template<typename T>
 WalkerConfigurationsT<T>::~WalkerConfigurationsT()
 {
-    destroyWalkers(walker_list_.begin(), walker_list_.end());
+  destroyWalkers(walker_list_.begin(), walker_list_.end());
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::createWalkers(int n, size_t numPtcls)
+template<typename T>
+void WalkerConfigurationsT<T>::createWalkers(int n, size_t numPtcls)
 {
-    if (walker_list_.empty()) {
-        while (n) {
-            walker_list_.push_back(std::make_unique<Walker_t>(numPtcls));
-            --n;
-        }
+  if (walker_list_.empty())
+  {
+    while (n)
+    {
+      walker_list_.push_back(std::make_unique<Walker_t>(numPtcls));
+      --n;
     }
-    else {
-        if (walker_list_.size() >= n) {
-            int iw = walker_list_.size(); // copy from the back
-            for (int i = 0; i < n; ++i) {
-                walker_list_.push_back(
-                    std::make_unique<Walker_t>(*walker_list_[--iw]));
-            }
-        }
-        else {
-            int nc = n / walker_list_.size();
-            int nw0 = walker_list_.size();
-            for (int iw = 0; iw < nw0; ++iw) {
-                for (int ic = 0; ic < nc; ++ic)
-                    walker_list_.push_back(
-                        std::make_unique<Walker_t>(*walker_list_[iw]));
-            }
-            n -= nc * nw0;
-            while (n > 0) {
-                walker_list_.push_back(
-                    std::make_unique<Walker_t>(*walker_list_[--nw0]));
-                --n;
-            }
-        }
+  }
+  else
+  {
+    if (walker_list_.size() >= n)
+    {
+      int iw = walker_list_.size(); // copy from the back
+      for (int i = 0; i < n; ++i)
+      {
+        walker_list_.push_back(std::make_unique<Walker_t>(*walker_list_[--iw]));
+      }
     }
+    else
+    {
+      int nc  = n / walker_list_.size();
+      int nw0 = walker_list_.size();
+      for (int iw = 0; iw < nw0; ++iw)
+      {
+        for (int ic = 0; ic < nc; ++ic)
+          walker_list_.push_back(std::make_unique<Walker_t>(*walker_list_[iw]));
+      }
+      n -= nc * nw0;
+      while (n > 0)
+      {
+        walker_list_.push_back(std::make_unique<Walker_t>(*walker_list_[--nw0]));
+        --n;
+      }
+    }
+  }
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::resize(int numWalkers, size_t numPtcls)
+template<typename T>
+void WalkerConfigurationsT<T>::resize(int numWalkers, size_t numPtcls)
 {
-    int dn = numWalkers - walker_list_.size();
-    if (dn > 0)
-        createWalkers(dn, numPtcls);
-    if (dn < 0) {
-        int nw = -dn;
-        if (nw < walker_list_.size()) {
-            walker_list_.erase(walker_list_.begin(), walker_list_.begin() - dn);
-        }
+  int dn = numWalkers - walker_list_.size();
+  if (dn > 0)
+    createWalkers(dn, numPtcls);
+  if (dn < 0)
+  {
+    int nw = -dn;
+    if (nw < walker_list_.size())
+    {
+      walker_list_.erase(walker_list_.begin(), walker_list_.begin() - dn);
     }
+  }
 }
 
 /// returns the next valid iterator
-template <typename T>
-typename WalkerConfigurationsT<T>::iterator
-WalkerConfigurationsT<T>::destroyWalkers(iterator first, iterator last)
+template<typename T>
+typename WalkerConfigurationsT<T>::iterator WalkerConfigurationsT<T>::destroyWalkers(iterator first, iterator last)
 {
-    return walker_list_.erase(first, last);
+  return walker_list_.erase(first, last);
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::createWalkers(iterator first, iterator last)
+template<typename T>
+void WalkerConfigurationsT<T>::createWalkers(iterator first, iterator last)
 {
-    destroyWalkers(walker_list_.begin(), walker_list_.end());
-    while (first != last) {
-        walker_list_.push_back(std::make_unique<Walker_t>(**first));
-        ++first;
-    }
+  destroyWalkers(walker_list_.begin(), walker_list_.end());
+  while (first != last)
+  {
+    walker_list_.push_back(std::make_unique<Walker_t>(**first));
+    ++first;
+  }
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::destroyWalkers(int nw)
+template<typename T>
+void WalkerConfigurationsT<T>::destroyWalkers(int nw)
 {
-    if (nw > walker_list_.size()) {
-        app_warning() << "  Cannot remove walkers. Current Walkers = "
-                      << walker_list_.size() << std::endl;
-        return;
-    }
-    nw = walker_list_.size() - nw;
-    int iw = nw;
-    walker_list_.erase(walker_list_.begin() + nw, walker_list_.end());
+  if (nw > walker_list_.size())
+  {
+    app_warning() << "  Cannot remove walkers. Current Walkers = " << walker_list_.size() << std::endl;
+    return;
+  }
+  nw     = walker_list_.size() - nw;
+  int iw = nw;
+  walker_list_.erase(walker_list_.begin() + nw, walker_list_.end());
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::copyWalkers(
-    iterator first, iterator last, iterator it)
+template<typename T>
+void WalkerConfigurationsT<T>::copyWalkers(iterator first, iterator last, iterator it)
 {
-    while (first != last) {
-        (*it++)->makeCopy(**first++);
-    }
+  while (first != last)
+  {
+    (*it++)->makeCopy(**first++);
+  }
 }
 
 /** Make Metropolis move to the walkers and save in a temporary array.
@@ -138,28 +137,26 @@ WalkerConfigurationsT<T>::copyWalkers(
  *
  * R + D + X
  */
-template <typename T>
-void
-WalkerConfigurationsT<T>::reset()
+template<typename T>
+void WalkerConfigurationsT<T>::reset()
 {
-    for (auto& walker : walker_list_) {
-        walker->Weight = 1.0;
-        walker->Multiplicity = 1.0;
-    }
+  for (auto& walker : walker_list_)
+  {
+    walker->Weight       = 1.0;
+    walker->Multiplicity = 1.0;
+  }
 }
 
-template <typename T>
-void
-WalkerConfigurationsT<T>::putConfigurations(
-    RealType* target, FullPrecRealType* weights) const
+template<typename T>
+void WalkerConfigurationsT<T>::putConfigurations(RealType* target, FullPrecRealType* weights) const
 {
-    for (const auto& walker : walker_list_) {
-        std::copy(
-            get_first_address(walker->R), get_last_address(walker->R), target);
-        target += get_last_address(walker->R) - get_first_address(walker->R);
-        *weights = walker->Weight;
-        ++weights;
-    }
+  for (const auto& walker : walker_list_)
+  {
+    std::copy(get_first_address(walker->R), get_last_address(walker->R), target);
+    target += get_last_address(walker->R) - get_first_address(walker->R);
+    *weights = walker->Weight;
+    ++weights;
+  }
 }
 
 template class WalkerConfigurationsT<double>;
diff --git a/src/Particle/WalkerConfigurationsT.h b/src/Particle/WalkerConfigurationsT.h
index 207a8811940..2c86f6c91d8 100644
--- a/src/Particle/WalkerConfigurationsT.h
+++ b/src/Particle/WalkerConfigurationsT.h
@@ -4,18 +4,14 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Jordan E. Vincent, University of Illinois at
-// Urbana-Champaign
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Cynthia Gu, zg1@ornl.gov, Oak Ridge
-//                    National Laboratory Raymond Clay III,
-//                    j.k.rofling@gmail.com, Lawrence Livermore National
-//                    Laboratory Ye Luo, yeluo@anl.gov, Argonne National
-//                    Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge
-//                    National Laboratory
+// File developed by: Jordan E. Vincent, University of Illinois at Urbana-Champaign
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Cynthia Gu, zg1@ornl.gov, Oak Ridge National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/Particle/createDistanceTableT.cpp b/src/Particle/createDistanceTableT.cpp
index 1905aef3d8e..bedb8bd652f 100644
--- a/src/Particle/createDistanceTableT.cpp
+++ b/src/Particle/createDistanceTableT.cpp
@@ -4,17 +4,13 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "Particle/createDistanceTableT.h"
@@ -32,209 +28,203 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description)
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description)
 {
-    using RealType = typename ParticleSetT<T>::RealType;
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    const int sc = s.getLattice().SuperCellEnum;
-    std::unique_ptr<DistanceTableT<T>> dt;
-    std::ostringstream o;
-    o << "  Distance table for similar particles (A-A):" << std::endl;
-    o << "    source/target: " << s.getName() << std::endl;
-    o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
+  using RealType = typename ParticleSetT<T>::RealType;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  const int sc = s.getLattice().SuperCellEnum;
+  std::unique_ptr<DistanceTableT<T>> dt;
+  std::ostringstream o;
+  o << "  Distance table for similar particles (A-A):" << std::endl;
+  o << "    source/target: " << s.getName() << std::endl;
+  o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
 
-    if (sc == SUPERCELL_BULK) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic periodic cell in "
-                 "3D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableAAT<T, DIM, PPPO + SOA_OFFSET>>(s);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "3D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAAT<T, DIM, PPPG + SOA_OFFSET>>(s);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "3D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAAT<T, DIM, PPPS + SOA_OFFSET>>(s);
-            }
-        }
-    }
-    else if (sc == SUPERCELL_SLAB) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic code for periodic "
-                 "cell in 2D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableAAT<T, DIM, PPNO + SOA_OFFSET>>(s);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "2D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAAT<T, DIM, PPNG + SOA_OFFSET>>(s);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "2D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAAT<T, DIM, PPNS + SOA_OFFSET>>(s);
-            }
-        }
+  if (sc == SUPERCELL_BULK)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic periodic cell in "
+           "3D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPPO + SOA_OFFSET>>(s);
     }
-    else if (sc == SUPERCELL_WIRE) {
-        o << "    Distance computations use periodic cell in one dimension."
+    else
+    {
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D with corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableAAT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
+        dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPPG + SOA_OFFSET>>(s);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D without corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPPS + SOA_OFFSET>>(s);
+      }
+    }
+  }
+  else if (sc == SUPERCELL_SLAB)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic code for periodic "
+           "cell in 2D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPNO + SOA_OFFSET>>(s);
     }
-    else // open boundary condition
+    else
     {
-        o << "    Distance computations use open boundary conditions in 3D."
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D with corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPNG + SOA_OFFSET>>(s);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D without corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableAAT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
+        dt = std::make_unique<SoaDistanceTableAAT<T, DIM, PPNS + SOA_OFFSET>>(s);
+      }
     }
+  }
+  else if (sc == SUPERCELL_WIRE)
+  {
+    o << "    Distance computations use periodic cell in one dimension." << std::endl;
+    dt = std::make_unique<SoaDistanceTableAAT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
+  }
+  else // open boundary condition
+  {
+    o << "    Distance computations use open boundary conditions in 3D." << std::endl;
+    dt = std::make_unique<SoaDistanceTableAAT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
+  }
 
-    description << o.str() << std::endl;
-    return dt;
+  description << o.str() << std::endl;
+  return dt;
 }
 
-template std::unique_ptr<DistanceTableT<double>>
-createDistanceTableAAT<double>(
-    ParticleSetT<double>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<float>>
-createDistanceTableAAT<float>(
-    ParticleSetT<float>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<double>>>
-createDistanceTableAAT<std::complex<double>>(
-    ParticleSetT<std::complex<double>>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<float>>>
-createDistanceTableAAT<std::complex<float>>(
-    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<double>> createDistanceTableAAT<double>(ParticleSetT<double>& t,
+                                                                                std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>> createDistanceTableAAT<float>(ParticleSetT<float>& t,
+                                                                              std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>> createDistanceTableAAT<std::complex<double>>(
+    ParticleSetT<std::complex<double>>& t,
+    std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>> createDistanceTableAAT<std::complex<float>>(
+    ParticleSetT<std::complex<float>>& t,
+    std::ostream& description);
 
 /** Adding AsymmetricDTD to the list, e.g., el-el distance table
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableABT(
-    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableABT(const ParticleSetT<T>& s,
+                                                          ParticleSetT<T>& t,
+                                                          std::ostream& description)
 {
-    using RealType = typename ParticleSetT<T>::RealType;
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    const int sc = t.getLattice().SuperCellEnum;
-    std::unique_ptr<DistanceTableT<T>> dt;
-    std::ostringstream o;
-    o << "  Distance table for dissimilar particles (A-B):" << std::endl;
-    o << "    source: " << s.getName() << "  target: " << t.getName()
-      << std::endl;
-    o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
+  using RealType = typename ParticleSetT<T>::RealType;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  const int sc = t.getLattice().SuperCellEnum;
+  std::unique_ptr<DistanceTableT<T>> dt;
+  std::ostringstream o;
+  o << "  Distance table for dissimilar particles (A-B):" << std::endl;
+  o << "    source: " << s.getName() << "  target: " << t.getName() << std::endl;
+  o << "    Using structure-of-arrays (SoA) data layout" << std::endl;
 
-    if (sc == SUPERCELL_BULK) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic periodic cell in "
-                 "3D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableABT<T, DIM, PPPO + SOA_OFFSET>>(s, t);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "3D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABT<T, DIM, PPPG + SOA_OFFSET>>(s, t);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "3D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABT<T, DIM, PPPS + SOA_OFFSET>>(s, t);
-            }
-        }
-    }
-    else if (sc == SUPERCELL_SLAB) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic code for periodic "
-                 "cell in 2D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableABT<T, DIM, PPNO + SOA_OFFSET>>(s, t);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "2D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABT<T, DIM, PPNG + SOA_OFFSET>>(s, t);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "2D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABT<T, DIM, PPNS + SOA_OFFSET>>(s, t);
-            }
-        }
+  if (sc == SUPERCELL_BULK)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic periodic cell in "
+           "3D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPPO + SOA_OFFSET>>(s, t);
     }
-    else if (sc == SUPERCELL_WIRE) {
-        o << "    Distance computations use periodic cell in one dimension."
+    else
+    {
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D with corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableABT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
+        dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPPG + SOA_OFFSET>>(s, t);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D without corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPPS + SOA_OFFSET>>(s, t);
+      }
+    }
+  }
+  else if (sc == SUPERCELL_SLAB)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic code for periodic "
+           "cell in 2D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPNO + SOA_OFFSET>>(s, t);
     }
-    else // open boundary condition
+    else
     {
-        o << "    Distance computations use open boundary conditions in 3D."
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D with corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPNG + SOA_OFFSET>>(s, t);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D without corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableABT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
+        dt = std::make_unique<SoaDistanceTableABT<T, DIM, PPNS + SOA_OFFSET>>(s, t);
+      }
     }
+  }
+  else if (sc == SUPERCELL_WIRE)
+  {
+    o << "    Distance computations use periodic cell in one dimension." << std::endl;
+    dt = std::make_unique<SoaDistanceTableABT<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
+  }
+  else // open boundary condition
+  {
+    o << "    Distance computations use open boundary conditions in 3D." << std::endl;
+    dt = std::make_unique<SoaDistanceTableABT<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
+  }
 
-    description << o.str() << std::endl;
-    return dt;
+  description << o.str() << std::endl;
+  return dt;
 }
 
-template std::unique_ptr<DistanceTableT<double>>
-createDistanceTableABT<double>(const ParticleSetT<double>& s,
-    ParticleSetT<double>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<float>>
-createDistanceTableABT<float>(const ParticleSetT<float>& s,
-    ParticleSetT<float>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<double>>>
-createDistanceTableABT<std::complex<double>>(
+template std::unique_ptr<DistanceTableT<double>> createDistanceTableABT<double>(const ParticleSetT<double>& s,
+                                                                                ParticleSetT<double>& t,
+                                                                                std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>> createDistanceTableABT<float>(const ParticleSetT<float>& s,
+                                                                              ParticleSetT<float>& t,
+                                                                              std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>> createDistanceTableABT<std::complex<double>>(
     const ParticleSetT<std::complex<double>>& s,
-    ParticleSetT<std::complex<double>>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<float>>>
-createDistanceTableABT<std::complex<float>>(
+    ParticleSetT<std::complex<double>>& t,
+    std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>> createDistanceTableABT<std::complex<float>>(
     const ParticleSetT<std::complex<float>>& s,
-    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+    ParticleSetT<std::complex<float>>& t,
+    std::ostream& description);
 } // namespace qmcplusplus
diff --git a/src/Particle/createDistanceTableT.h b/src/Particle/createDistanceTableT.h
index 1c022e7c5b8..62e96044323 100644
--- a/src/Particle/createDistanceTableT.h
+++ b/src/Particle/createDistanceTableT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_DISTANCETABLET_H
@@ -39,50 +36,47 @@ namespace qmcplusplus
  */
 
 /// free function to create a distable table of s-s
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description);
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableAAT(ParticleSetT<T>& s, std::ostream& description);
 
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description);
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description);
 
-template <typename T>
-inline std::unique_ptr<DistanceTableT<T>>
-createDistanceTableT(ParticleSetT<T>& s, std::ostream& description)
+template<typename T>
+inline std::unique_ptr<DistanceTableT<T>> createDistanceTableT(ParticleSetT<T>& s, std::ostream& description)
 {
-    // during P-by-P move, the cost of single particle evaluation of distance
-    // tables is determined by the number of source particles. Thus the
-    // implementation selection is determined by the source particle set.
-    if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
-        return createDistanceTableAATOMPTarget(s, description);
-    else
-        return createDistanceTableAAT(s, description);
+  // during P-by-P move, the cost of single particle evaluation of distance
+  // tables is determined by the number of source particles. Thus the
+  // implementation selection is determined by the source particle set.
+  if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
+    return createDistanceTableAATOMPTarget(s, description);
+  else
+    return createDistanceTableAAT(s, description);
 }
 
 /// free function create a distable table of s-t
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableABT(
-    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description);
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableABT(const ParticleSetT<T>& s,
+                                                          ParticleSetT<T>& t,
+                                                          std::ostream& description);
 
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableABTOMPTarget(
-    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description);
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableABTOMPTarget(const ParticleSetT<T>& s,
+                                                                   ParticleSetT<T>& t,
+                                                                   std::ostream& description);
 
-template <typename T>
-inline std::unique_ptr<DistanceTableT<T>>
-createDistanceTableT(
-    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+template<typename T>
+inline std::unique_ptr<DistanceTableT<T>> createDistanceTableT(const ParticleSetT<T>& s,
+                                                               ParticleSetT<T>& t,
+                                                               std::ostream& description)
 {
-    // during P-by-P move, the cost of single particle evaluation of distance
-    // tables is determined by the number of source particles. Thus the
-    // implementation selection is determined by the source particle set.
-    if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
-        return createDistanceTableABTOMPTarget(s, t, description);
-    else
-        return createDistanceTableABT(s, t, description);
+  // during P-by-P move, the cost of single particle evaluation of distance
+  // tables is determined by the number of source particles. Thus the
+  // implementation selection is determined by the source particle set.
+  if (s.getCoordinates().getKind() == DynamicCoordinateKind::DC_POS_OFFLOAD)
+    return createDistanceTableABTOMPTarget(s, t, description);
+  else
+    return createDistanceTableABT(s, t, description);
 }
 
 } // namespace qmcplusplus
diff --git a/src/Particle/createDistanceTableTOMPTarget.cpp b/src/Particle/createDistanceTableTOMPTarget.cpp
index afb4653184b..e190fc30364 100644
--- a/src/Particle/createDistanceTableTOMPTarget.cpp
+++ b/src/Particle/createDistanceTableTOMPTarget.cpp
@@ -4,17 +4,13 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "Particle/createDistanceTableT.h"
@@ -30,219 +26,203 @@ namespace qmcplusplus
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description)
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableAATOMPTarget(ParticleSetT<T>& s, std::ostream& description)
 {
-    using RealType = typename ParticleSetT<T>::RealType;
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    const int sc = s.getLattice().SuperCellEnum;
-    std::unique_ptr<DistanceTableT<T>> dt;
-    std::ostringstream o;
-    o << "  Distance table for similar particles (A-A):" << std::endl;
-    o << "    source/target: " << s.getName() << std::endl;
-    o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload"
-      << std::endl;
+  using RealType = typename ParticleSetT<T>::RealType;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  const int sc = s.getLattice().SuperCellEnum;
+  std::unique_ptr<DistanceTableT<T>> dt;
+  std::ostringstream o;
+  o << "  Distance table for similar particles (A-A):" << std::endl;
+  o << "    source/target: " << s.getName() << std::endl;
+  o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl;
 
-    if (sc == SUPERCELL_BULK) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic periodic cell in "
-                 "3D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableAATOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "3D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAATOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(s);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "3D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAATOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(s);
-            }
-        }
-    }
-    else if (sc == SUPERCELL_SLAB) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic code for periodic "
-                 "cell in 2D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableAATOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "2D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAATOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(s);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "2D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableAATOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(s);
-            }
-        }
+  if (sc == SUPERCELL_BULK)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic periodic cell in "
+           "3D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s);
     }
-    else if (sc == SUPERCELL_WIRE) {
-        o << "    Distance computations use periodic cell in one dimension."
+    else
+    {
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D with corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(
-            s);
+        dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(s);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D without corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(s);
+      }
+    }
+  }
+  else if (sc == SUPERCELL_SLAB)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic code for periodic "
+           "cell in 2D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s);
     }
-    else // open boundary condition
+    else
     {
-        o << "    Distance computations use open boundary conditions in 3D."
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D with corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(s);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D without corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(
-            s);
+        dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(s);
+      }
     }
+  }
+  else if (sc == SUPERCELL_WIRE)
+  {
+    o << "    Distance computations use periodic cell in one dimension." << std::endl;
+    dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s);
+  }
+  else // open boundary condition
+  {
+    o << "    Distance computations use open boundary conditions in 3D." << std::endl;
+    dt = std::make_unique<SoaDistanceTableAATOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s);
+  }
 
-    description << o.str() << std::endl;
-    return dt;
+  description << o.str() << std::endl;
+  return dt;
 }
 
-template std::unique_ptr<DistanceTableT<double>>
-createDistanceTableAATOMPTarget<double>(
-    ParticleSetT<double>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<float>>
-createDistanceTableAATOMPTarget<float>(
-    ParticleSetT<float>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<double>>>
-createDistanceTableAATOMPTarget<std::complex<double>>(
-    ParticleSetT<std::complex<double>>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<float>>>
-createDistanceTableAATOMPTarget<std::complex<float>>(
-    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+template std::unique_ptr<DistanceTableT<double>> createDistanceTableAATOMPTarget<double>(ParticleSetT<double>& t,
+                                                                                         std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>> createDistanceTableAATOMPTarget<float>(ParticleSetT<float>& t,
+                                                                                       std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>> createDistanceTableAATOMPTarget<std::complex<double>>(
+    ParticleSetT<std::complex<double>>& t,
+    std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>> createDistanceTableAATOMPTarget<std::complex<float>>(
+    ParticleSetT<std::complex<float>>& t,
+    std::ostream& description);
 
 /** Adding AsymmetricDTD to the list, e.g., el-el distance table
  *\param s source/target particle set
  *\return index of the distance table with the name
  */
-template <typename T>
-std::unique_ptr<DistanceTableT<T>>
-createDistanceTableABTOMPTarget(
-    const ParticleSetT<T>& s, ParticleSetT<T>& t, std::ostream& description)
+template<typename T>
+std::unique_ptr<DistanceTableT<T>> createDistanceTableABTOMPTarget(const ParticleSetT<T>& s,
+                                                                   ParticleSetT<T>& t,
+                                                                   std::ostream& description)
 {
-    using RealType = typename ParticleSetT<T>::RealType;
-    enum
-    {
-        DIM = OHMMS_DIM
-    };
-    const int sc = t.getLattice().SuperCellEnum;
-    std::unique_ptr<DistanceTableT<T>> dt;
-    std::ostringstream o;
-    o << "  Distance table for dissimilar particles (A-B):" << std::endl;
-    o << "    source: " << s.getName() << "  target: " << t.getName()
-      << std::endl;
-    o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload"
-      << std::endl;
+  using RealType = typename ParticleSetT<T>::RealType;
+  enum
+  {
+    DIM = OHMMS_DIM
+  };
+  const int sc = t.getLattice().SuperCellEnum;
+  std::unique_ptr<DistanceTableT<T>> dt;
+  std::ostringstream o;
+  o << "  Distance table for dissimilar particles (A-B):" << std::endl;
+  o << "    source: " << s.getName() << "  target: " << t.getName() << std::endl;
+  o << "    Using structure-of-arrays (SoA) data layout and OpenMP offload" << std::endl;
 
-    if (sc == SUPERCELL_BULK) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic periodic cell in "
-                 "3D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableABTOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s, t);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "3D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABTOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(
-                    s, t);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "3D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABTOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(
-                    s, t);
-            }
-        }
-    }
-    else if (sc == SUPERCELL_SLAB) {
-        if (s.getLattice().DiagonalOnly) {
-            o << "    Distance computations use orthorhombic code for periodic "
-                 "cell in 2D."
-              << std::endl;
-            dt = std::make_unique<
-                SoaDistanceTableABTOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s, t);
-        }
-        else {
-            if (s.getLattice().WignerSeitzRadius >
-                s.getLattice().SimulationCellRadius) {
-                o << "    Distance computations use general periodic cell in "
-                     "2D with corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABTOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(
-                    s, t);
-            }
-            else {
-                o << "    Distance computations use general periodic cell in "
-                     "2D without corner image checks."
-                  << std::endl;
-                dt = std::make_unique<
-                    SoaDistanceTableABTOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(
-                    s, t);
-            }
-        }
+  if (sc == SUPERCELL_BULK)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic periodic cell in "
+           "3D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPPO + SOA_OFFSET>>(s, t);
     }
-    else if (sc == SUPERCELL_WIRE) {
-        o << "    Distance computations use periodic cell in one dimension."
+    else
+    {
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D with corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(
-            s, t);
+        dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPPG + SOA_OFFSET>>(s, t);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "3D without corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPPS + SOA_OFFSET>>(s, t);
+      }
+    }
+  }
+  else if (sc == SUPERCELL_SLAB)
+  {
+    if (s.getLattice().DiagonalOnly)
+    {
+      o << "    Distance computations use orthorhombic code for periodic "
+           "cell in 2D."
+        << std::endl;
+      dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPNO + SOA_OFFSET>>(s, t);
     }
-    else // open boundary condition
+    else
     {
-        o << "    Distance computations use open boundary conditions in 3D."
+      if (s.getLattice().WignerSeitzRadius > s.getLattice().SimulationCellRadius)
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D with corner image checks."
+          << std::endl;
+        dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPNG + SOA_OFFSET>>(s, t);
+      }
+      else
+      {
+        o << "    Distance computations use general periodic cell in "
+             "2D without corner image checks."
           << std::endl;
-        dt = std::make_unique<
-            SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(
-            s, t);
+        dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, PPNS + SOA_OFFSET>>(s, t);
+      }
     }
+  }
+  else if (sc == SUPERCELL_WIRE)
+  {
+    o << "    Distance computations use periodic cell in one dimension." << std::endl;
+    dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_WIRE + SOA_OFFSET>>(s, t);
+  }
+  else // open boundary condition
+  {
+    o << "    Distance computations use open boundary conditions in 3D." << std::endl;
+    dt = std::make_unique<SoaDistanceTableABTOMPTarget<T, DIM, SUPERCELL_OPEN + SOA_OFFSET>>(s, t);
+  }
 
-    description << o.str() << std::endl;
-    return dt;
+  description << o.str() << std::endl;
+  return dt;
 }
 
-template std::unique_ptr<DistanceTableT<double>>
-createDistanceTableABTOMPTarget<double>(const ParticleSetT<double>& s,
-    ParticleSetT<double>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<float>>
-createDistanceTableABTOMPTarget<float>(const ParticleSetT<float>& s,
-    ParticleSetT<float>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<double>>>
-createDistanceTableABTOMPTarget<std::complex<double>>(
+template std::unique_ptr<DistanceTableT<double>> createDistanceTableABTOMPTarget<double>(const ParticleSetT<double>& s,
+                                                                                         ParticleSetT<double>& t,
+                                                                                         std::ostream& description);
+template std::unique_ptr<DistanceTableT<float>> createDistanceTableABTOMPTarget<float>(const ParticleSetT<float>& s,
+                                                                                       ParticleSetT<float>& t,
+                                                                                       std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<double>>> createDistanceTableABTOMPTarget<std::complex<double>>(
     const ParticleSetT<std::complex<double>>& s,
-    ParticleSetT<std::complex<double>>& t, std::ostream& description);
-template std::unique_ptr<DistanceTableT<std::complex<float>>>
-createDistanceTableABTOMPTarget<std::complex<float>>(
+    ParticleSetT<std::complex<double>>& t,
+    std::ostream& description);
+template std::unique_ptr<DistanceTableT<std::complex<float>>> createDistanceTableABTOMPTarget<std::complex<float>>(
     const ParticleSetT<std::complex<float>>& s,
-    ParticleSetT<std::complex<float>>& t, std::ostream& description);
+    ParticleSetT<std::complex<float>>& t,
+    std::ostream& description);
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BasisSetBaseT.h b/src/QMCWaveFunctions/BasisSetBaseT.h
index 569abf9173f..c16bf0d1629 100644
--- a/src/QMCWaveFunctions/BasisSetBaseT.h
+++ b/src/QMCWaveFunctions/BasisSetBaseT.h
@@ -4,18 +4,14 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-//                    National Laboratory Jeremy McMinnis, jmcminis@gmail.com,
-//                    University of Illinois at Urbana-Champaign Jaron T.
-//                    Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_BASISSETBASET_H
@@ -32,109 +28,92 @@ namespace qmcplusplus
  * Define a common storage for the derived classes and
  * provides  a minimal set of interfaces to get/set BasisSetSize.
  */
-template <typename T>
+template<typename T>
 struct BasisSetBaseT : public OrbitalSetTraits<T>
 {
-    enum
+  enum
+  {
+    MAXINDEX = 2 + OHMMS_DIM
+  };
+  using RealType    = typename OrbitalSetTraits<T>::RealType;
+  using ValueType   = typename OrbitalSetTraits<T>::ValueType;
+  using IndexType   = typename OrbitalSetTraits<T>::IndexType;
+  using HessType    = typename OrbitalSetTraits<T>::HessType;
+  using IndexVector = typename OrbitalSetTraits<T>::IndexVector;
+  using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
+  using ValueMatrix = typename OrbitalSetTraits<T>::ValueMatrix;
+  using GradVector  = typename OrbitalSetTraits<T>::GradVector;
+  using GradMatrix  = typename OrbitalSetTraits<T>::GradMatrix;
+  using HessVector  = typename OrbitalSetTraits<T>::HessVector;
+  using HessMatrix  = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGType     = TinyVector<HessType, OHMMS_DIM>;
+  using GGGVector   = Vector<GGGType>;
+  using GGGMatrix   = Matrix<GGGType>;
+
+  /// size of the basis set
+  IndexType BasisSetSize;
+  /// index of the particle
+  IndexType ActivePtcl;
+  /// counter to keep track
+  unsigned long Counter;
+  /// phi[i] the value of the i-th basis set
+  ValueVector Phi;
+  /// dphi[i] the gradient of the i-th basis set
+  GradVector dPhi;
+  /// d2phi[i] the laplacian of the i-th basis set
+  ValueVector d2Phi;
+  /// grad_grad_Phi[i] the full hessian of the i-th basis set
+  HessVector grad_grad_Phi;
+  /// grad_grad_grad_Phi the full hessian of the i-th basis set
+  GGGVector grad_grad_grad_Phi;
+  /// container to store value, laplacian and gradient
+  ValueMatrix Temp;
+
+  ValueMatrix Y;
+  GradMatrix dY;
+  ValueMatrix d2Y;
+
+  /// default constructor
+  BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0) {}
+  /// virtual destructor
+  virtual ~BasisSetBaseT() {}
+  /** resize the container */
+  void resize(int ntargets)
+  {
+    if (BasisSetSize)
     {
-        MAXINDEX = 2 + OHMMS_DIM
-    };
-    using RealType = typename OrbitalSetTraits<T>::RealType;
-    using ValueType = typename OrbitalSetTraits<T>::ValueType;
-    using IndexType = typename OrbitalSetTraits<T>::IndexType;
-    using HessType = typename OrbitalSetTraits<T>::HessType;
-    using IndexVector = typename OrbitalSetTraits<T>::IndexVector;
-    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
-    using ValueMatrix = typename OrbitalSetTraits<T>::ValueMatrix;
-    using GradVector = typename OrbitalSetTraits<T>::GradVector;
-    using GradMatrix = typename OrbitalSetTraits<T>::GradMatrix;
-    using HessVector = typename OrbitalSetTraits<T>::HessVector;
-    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
-    using GGGType = TinyVector<HessType, OHMMS_DIM>;
-    using GGGVector = Vector<GGGType>;
-    using GGGMatrix = Matrix<GGGType>;
-
-    /// size of the basis set
-    IndexType BasisSetSize;
-    /// index of the particle
-    IndexType ActivePtcl;
-    /// counter to keep track
-    unsigned long Counter;
-    /// phi[i] the value of the i-th basis set
-    ValueVector Phi;
-    /// dphi[i] the gradient of the i-th basis set
-    GradVector dPhi;
-    /// d2phi[i] the laplacian of the i-th basis set
-    ValueVector d2Phi;
-    /// grad_grad_Phi[i] the full hessian of the i-th basis set
-    HessVector grad_grad_Phi;
-    /// grad_grad_grad_Phi the full hessian of the i-th basis set
-    GGGVector grad_grad_grad_Phi;
-    /// container to store value, laplacian and gradient
-    ValueMatrix Temp;
-
-    ValueMatrix Y;
-    GradMatrix dY;
-    ValueMatrix d2Y;
-
-    /// default constructor
-    BasisSetBaseT() : BasisSetSize(0), ActivePtcl(-1), Counter(0)
-    {
-    }
-    /// virtual destructor
-    virtual ~BasisSetBaseT()
-    {
-    }
-    /** resize the container */
-    void
-    resize(int ntargets)
-    {
-        if (BasisSetSize) {
-            Phi.resize(BasisSetSize);
-            dPhi.resize(BasisSetSize);
-            d2Phi.resize(BasisSetSize);
-            grad_grad_Phi.resize(BasisSetSize);
-            grad_grad_grad_Phi.resize(BasisSetSize);
-            Temp.resize(BasisSetSize, MAXINDEX);
-            Y.resize(ntargets, BasisSetSize);
-            dY.resize(ntargets, BasisSetSize);
-            d2Y.resize(ntargets, BasisSetSize);
-        }
-        else {
-            app_error() << "  BasisSetBase::BasisSetSize == 0" << std::endl;
-        }
+      Phi.resize(BasisSetSize);
+      dPhi.resize(BasisSetSize);
+      d2Phi.resize(BasisSetSize);
+      grad_grad_Phi.resize(BasisSetSize);
+      grad_grad_grad_Phi.resize(BasisSetSize);
+      Temp.resize(BasisSetSize, MAXINDEX);
+      Y.resize(ntargets, BasisSetSize);
+      dY.resize(ntargets, BasisSetSize);
+      d2Y.resize(ntargets, BasisSetSize);
     }
-
-    /// clone the basis set
-    virtual BasisSetBaseT*
-    makeClone() const = 0;
-    /** return the basis set size */
-    inline IndexType
-    getBasisSetSize() const
+    else
     {
-        return BasisSetSize;
+      app_error() << "  BasisSetBase::BasisSetSize == 0" << std::endl;
     }
-
-    /// resize the basis set
-    virtual void
-    setBasisSetSize(int nbs) = 0;
-
-    virtual void
-    evaluateWithHessian(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateWithThirdDeriv(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateThirdDerivOnly(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateForWalkerMove(const ParticleSetT<T>& P) = 0;
-    virtual void
-    evaluateForWalkerMove(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateForPtclMove(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateAllForPtclMove(const ParticleSetT<T>& P, int iat) = 0;
-    virtual void
-    evaluateForPtclMoveWithHessian(const ParticleSetT<T>& P, int iat) = 0;
+  }
+
+  /// clone the basis set
+  virtual BasisSetBaseT* makeClone() const = 0;
+  /** return the basis set size */
+  inline IndexType getBasisSetSize() const { return BasisSetSize; }
+
+  /// resize the basis set
+  virtual void setBasisSetSize(int nbs) = 0;
+
+  virtual void evaluateWithHessian(const ParticleSetT<T>& P, int iat)            = 0;
+  virtual void evaluateWithThirdDeriv(const ParticleSetT<T>& P, int iat)         = 0;
+  virtual void evaluateThirdDerivOnly(const ParticleSetT<T>& P, int iat)         = 0;
+  virtual void evaluateForWalkerMove(const ParticleSetT<T>& P)                   = 0;
+  virtual void evaluateForWalkerMove(const ParticleSetT<T>& P, int iat)          = 0;
+  virtual void evaluateForPtclMove(const ParticleSetT<T>& P, int iat)            = 0;
+  virtual void evaluateAllForPtclMove(const ParticleSetT<T>& P, int iat)         = 0;
+  virtual void evaluateForPtclMoveWithHessian(const ParticleSetT<T>& P, int iat) = 0;
 };
 
 /** Base for real basis set
@@ -142,96 +121,76 @@ struct BasisSetBaseT : public OrbitalSetTraits<T>
  * Equivalent to BasisSetBase with minimum requirements
  * Used by LCAO
  */
-template <typename T>
+template<typename T>
 struct SoaBasisSetBaseT
 {
-    using value_type = T;
-    using vgl_type = VectorSoaContainer<T, OHMMS_DIM + 2>;
-    using vgh_type = VectorSoaContainer<T, 10>;
-    using vghgh_type = VectorSoaContainer<T, 20>;
-    using OffloadMWVGLArray =
-        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
-    using OffloadMWVArray =
-        Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
-
-    /// size of the basis set
-    int BasisSetSize;
-
-    virtual ~SoaBasisSetBaseT() = default;
-    inline int
-    getBasisSetSize()
-    {
-        return BasisSetSize;
-    }
-
-    virtual SoaBasisSetBaseT<T>*
-    makeClone() const = 0;
-    virtual void
-    setBasisSetSize(int nbs) = 0;
-
-    // Evaluates value, gradient, and laplacian for electron "iat".  Parks them
-    // into a temporary data structure "vgl".
-    virtual void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, vgl_type& vgl) = 0;
-    // Evaluates value, gradient, and laplacian for electron "iat".  places them
-    // in a offload array for batched code.
-    virtual void
-    mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        OffloadMWVGLArray& vgl) = 0;
-    // Evaluates value for electron "iat".  places it in a offload array for
-    // batched code.
-    virtual void
-    mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<T>>& P_list,
-        int iat, OffloadMWVArray& v) = 0;
-    // Evaluates value, gradient, and Hessian for electron "iat".  Parks them
-    // into a temporary data structure "vgh".
-    virtual void
-    evaluateVGH(const ParticleSetT<T>& P, int iat, vgh_type& vgh) = 0;
-    // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron
-    // "iat".  Parks them into a temporary data structure "vghgh".
-    virtual void
-    evaluateVGHGH(const ParticleSetT<T>& P, int iat, vghgh_type& vghgh) = 0;
-    // Evaluates the x,y, and z components of ionic gradient associated with
-    // "jion" of value.  Parks the raw data into "vgl" container.
-    virtual void
-    evaluateGradSourceV(const ParticleSetT<T>& P, int iat,
-        const ParticleSetT<T>& ions, int jion, vgl_type& vgl) = 0;
-    // Evaluates the x,y, and z components of ionic gradient associated with
-    // "jion" value, gradient, and laplacian.
-    //     Parks the raw data into "vghgh" container.
-    virtual void
-    evaluateGradSourceVGL(const ParticleSetT<T>& P, int iat,
-        const ParticleSetT<T>& ions, int jion, vghgh_type& vghgh) = 0;
-    virtual void
-    evaluateV(const ParticleSetT<T>& P, int iat, value_type* restrict vals) = 0;
-    virtual bool
-    is_S_orbital(int mo_idx, int ao_idx)
-    {
-        return false;
-    }
-
-    /// Determine which orbitals are S-type.  Used for cusp correction.
-    virtual void
-    queryOrbitalsForSType(const std::vector<bool>& corrCenter,
-        std::vector<bool>& is_s_orbital) const
-    {
-    }
-
-    /** initialize a shared resource and hand it to collection
+  using value_type        = T;
+  using vgl_type          = VectorSoaContainer<T, OHMMS_DIM + 2>;
+  using vgh_type          = VectorSoaContainer<T, 10>;
+  using vghgh_type        = VectorSoaContainer<T, 20>;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+
+  /// size of the basis set
+  int BasisSetSize;
+
+  virtual ~SoaBasisSetBaseT() = default;
+  inline int getBasisSetSize() { return BasisSetSize; }
+
+  virtual SoaBasisSetBaseT<T>* makeClone() const = 0;
+  virtual void setBasisSetSize(int nbs)          = 0;
+
+  // Evaluates value, gradient, and laplacian for electron "iat".  Parks them
+  // into a temporary data structure "vgl".
+  virtual void evaluateVGL(const ParticleSetT<T>& P, int iat, vgl_type& vgl) = 0;
+  // Evaluates value, gradient, and laplacian for electron "iat".  places them
+  // in a offload array for batched code.
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat, OffloadMWVGLArray& vgl) = 0;
+  // Evaluates value for electron "iat".  places it in a offload array for
+  // batched code.
+  virtual void mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat, OffloadMWVArray& v) = 0;
+  // Evaluates value, gradient, and Hessian for electron "iat".  Parks them
+  // into a temporary data structure "vgh".
+  virtual void evaluateVGH(const ParticleSetT<T>& P, int iat, vgh_type& vgh) = 0;
+  // Evaluates value, gradient, and Hessian, and Gradient Hessian for electron
+  // "iat".  Parks them into a temporary data structure "vghgh".
+  virtual void evaluateVGHGH(const ParticleSetT<T>& P, int iat, vghgh_type& vghgh) = 0;
+  // Evaluates the x,y, and z components of ionic gradient associated with
+  // "jion" of value.  Parks the raw data into "vgl" container.
+  virtual void evaluateGradSourceV(const ParticleSetT<T>& P,
+                                   int iat,
+                                   const ParticleSetT<T>& ions,
+                                   int jion,
+                                   vgl_type& vgl) = 0;
+  // Evaluates the x,y, and z components of ionic gradient associated with
+  // "jion" value, gradient, and laplacian.
+  //     Parks the raw data into "vghgh" container.
+  virtual void evaluateGradSourceVGL(const ParticleSetT<T>& P,
+                                     int iat,
+                                     const ParticleSetT<T>& ions,
+                                     int jion,
+                                     vghgh_type& vghgh)                                = 0;
+  virtual void evaluateV(const ParticleSetT<T>& P, int iat, value_type* restrict vals) = 0;
+  virtual bool is_S_orbital(int mo_idx, int ao_idx) { return false; }
+
+  /// Determine which orbitals are S-type.  Used for cusp correction.
+  virtual void queryOrbitalsForSType(const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const {}
+
+  /** initialize a shared resource and hand it to collection
      */
-    virtual void createResource(ResourceCollection& collection) const {}
+  virtual void createResource(ResourceCollection& collection) const {}
 
-    /** acquire a shared resource from collection
+  /** acquire a shared resource from collection
      */
-    virtual void acquireResource(ResourceCollection& collection,
-                                 const RefVectorWithLeader<SoaBasisSetBaseT>& bset_list) const
-    {}
+  virtual void acquireResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<SoaBasisSetBaseT>& bset_list) const
+  {}
 
-    /** return a shared resource to collection
+  /** return a shared resource to collection
      */
-    virtual void releaseResource(ResourceCollection& collection,
-                                 const RefVectorWithLeader<SoaBasisSetBaseT>& bset_list) const
-    {}
+  virtual void releaseResource(ResourceCollection& collection,
+                               const RefVectorWithLeader<SoaBasisSetBaseT>& bset_list) const
+  {}
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp
index 83cdfbd190e..9429c39eb70 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.cpp
@@ -4,21 +4,14 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National
-//                    Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge
-//                    National Laboratory Ye Luo, yeluo@anl.gov, Argonne
-//                    National Laboratory
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
-/** @file BsplineReaderBaseT.cpp
- *
- * Implement super function
- */
 #include "BsplineReaderBaseT.h"
 
 #include "Message/CommOperators.h"
@@ -30,162 +23,155 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-BsplineReaderBaseT<T>::BsplineReaderBaseT(EinsplineSetBuilderT<T>* e) :
-    mybuilder(e),
-    MeshSize(0),
-    checkNorm(true),
-    saveSplineCoefs(false),
-    rotate(true)
+template<typename T>
+BsplineReaderBaseT<T>::BsplineReaderBaseT(EinsplineSetBuilderT<T>* e)
+    : mybuilder(e), MeshSize(0), checkNorm(true), saveSplineCoefs(false), rotate(true)
 {
-    myComm = mybuilder->getCommunicator();
+  myComm = mybuilder->getCommunicator();
 }
 
-template <typename T>
-void
-BsplineReaderBaseT<T>::get_psi_g(
-    int ti, int spin, int ib, Vector<std::complex<double>>& cG)
+template<typename T>
+void BsplineReaderBaseT<T>::get_psi_g(int ti, int spin, int ib, Vector<std::complex<double>>& cG)
 {
-    int ncg = 0;
-    if (myComm->rank() == 0) {
-        std::string path = psi_g_path(ti, spin, ib);
-        mybuilder->H5File.read(cG, path);
-        ncg = cG.size();
-    }
-    myComm->bcast(ncg);
-    if (ncg != mybuilder->MaxNumGvecs) {
-        APP_ABORT("Failed : ncg != MaxNumGvecs");
-    }
-    myComm->bcast(cG);
+  int ncg = 0;
+  if (myComm->rank() == 0)
+  {
+    std::string path = psi_g_path(ti, spin, ib);
+    mybuilder->H5File.read(cG, path);
+    ncg = cG.size();
+  }
+  myComm->bcast(ncg);
+  if (ncg != mybuilder->MaxNumGvecs)
+  {
+    APP_ABORT("Failed : ncg != MaxNumGvecs");
+  }
+  myComm->bcast(cG);
 }
 
-template <typename T>
+template<typename T>
 BsplineReaderBaseT<T>::~BsplineReaderBaseT()
-{
-}
+{}
 
-inline std::string
-make_bandinfo_filename(const std::string& root, int spin, int twist,
-    const Tensor<int, 3>& tilematrix, int gid)
+inline std::string make_bandinfo_filename(const std::string& root,
+                                          int spin,
+                                          int twist,
+                                          const Tensor<int, 3>& tilematrix,
+                                          int gid)
 {
-    std::ostringstream oo;
-    oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1)
-       << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1)
-       << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1)
-       << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist;
-    if (gid >= 0)
-        oo << ".g" << gid;
-    return oo.str();
+  std::ostringstream oo;
+  oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0)
+     << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_"
+     << spin << ".tw_" << twist;
+  if (gid >= 0)
+    oo << ".g" << gid;
+  return oo.str();
 }
 
-inline std::string
-make_bandgroup_name(const std::string& root, int spin, int twist,
-    const Tensor<int, 3>& tilematrix, int first, int last)
+inline std::string make_bandgroup_name(const std::string& root,
+                                       int spin,
+                                       int twist,
+                                       const Tensor<int, 3>& tilematrix,
+                                       int first,
+                                       int last)
 {
-    std::ostringstream oo;
-    oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1)
-       << tilematrix(0, 2) << tilematrix(1, 0) << tilematrix(1, 1)
-       << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1)
-       << tilematrix(2, 2) << ".spin_" << spin << ".tw_" << twist << ".l"
-       << first << "u" << last;
-    return oo.str();
+  std::ostringstream oo;
+  oo << root << ".tile_" << tilematrix(0, 0) << tilematrix(0, 1) << tilematrix(0, 2) << tilematrix(1, 0)
+     << tilematrix(1, 1) << tilematrix(1, 2) << tilematrix(2, 0) << tilematrix(2, 1) << tilematrix(2, 2) << ".spin_"
+     << spin << ".tw_" << twist << ".l" << first << "u" << last;
+  return oo.str();
 }
 
-template <typename T>
-void
-BsplineReaderBaseT<T>::setCommon(xmlNodePtr cur)
+template<typename T>
+void BsplineReaderBaseT<T>::setCommon(xmlNodePtr cur)
 {
-    // check orbital normalization by default
-    std::string checkOrbNorm("yes");
-    std::string saveCoefs("no");
-    OhmmsAttributeSet a;
-    a.add(checkOrbNorm, "check_orb_norm");
-    a.add(saveCoefs, "save_coefs");
-    a.put(cur);
-
-    // allow user to turn off norm check with a warning
-    if (checkOrbNorm == "no") {
-        app_log() << "WARNING: disable orbital normalization check!"
-                  << std::endl;
-        checkNorm = false;
-    }
-    saveSplineCoefs = saveCoefs == "yes";
+  // check orbital normalization by default
+  std::string checkOrbNorm("yes");
+  std::string saveCoefs("no");
+  OhmmsAttributeSet a;
+  a.add(checkOrbNorm, "check_orb_norm");
+  a.add(saveCoefs, "save_coefs");
+  a.put(cur);
+
+  // allow user to turn off norm check with a warning
+  if (checkOrbNorm == "no")
+  {
+    app_log() << "WARNING: disable orbital normalization check!" << std::endl;
+    checkNorm = false;
+  }
+  saveSplineCoefs = saveCoefs == "yes";
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-BsplineReaderBaseT<T>::create_spline_set(int spin, xmlNodePtr cur)
+template<typename T>
+std::unique_ptr<SPOSetT<T>> BsplineReaderBaseT<T>::create_spline_set(int spin, xmlNodePtr cur)
 {
-    int ns(0);
-    std::string spo_object_name;
-    OhmmsAttributeSet a;
-    a.add(ns, "size");
-    a.add(spo_object_name, "name");
-    a.add(spo_object_name, "id");
-    a.put(cur);
-
-    if (ns == 0)
-        APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing");
-
-    if (spo2band.empty())
-        spo2band.resize(mybuilder->states.size());
-
-    std::vector<BandInfo>& fullband = (*(mybuilder->FullBands[spin]));
-
-    if (spo2band[spin].empty()) {
-        spo2band[spin].reserve(fullband.size());
-        if (!mybuilder->states[spin])
-            mybuilder->states[spin] = std::make_unique<SPOSetInfo>();
-        mybuilder->clear_states(spin);
-        initialize_spo2band(
-            spin, fullband, *mybuilder->states[spin], spo2band[spin]);
-    }
-
-    BandInfoGroup vals;
-    vals.TwistIndex = fullband[0].TwistIndex;
-    vals.GroupID = 0;
-    vals.myName = make_bandgroup_name(mybuilder->getName(), spin,
-        mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns);
-    vals.selectBands(fullband, 0, ns, false);
-
-    return create_spline_set(spo_object_name, spin, vals);
+  int ns(0);
+  std::string spo_object_name;
+  OhmmsAttributeSet a;
+  a.add(ns, "size");
+  a.add(spo_object_name, "name");
+  a.add(spo_object_name, "id");
+  a.put(cur);
+
+  if (ns == 0)
+    APP_ABORT_TRACE(__FILE__, __LINE__, "parameter/@size missing");
+
+  if (spo2band.empty())
+    spo2band.resize(mybuilder->states.size());
+
+  std::vector<BandInfo>& fullband = (*(mybuilder->FullBands[spin]));
+
+  if (spo2band[spin].empty())
+  {
+    spo2band[spin].reserve(fullband.size());
+    if (!mybuilder->states[spin])
+      mybuilder->states[spin] = std::make_unique<SPOSetInfo>();
+    mybuilder->clear_states(spin);
+    initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]);
+  }
+
+  BandInfoGroup vals;
+  vals.TwistIndex = fullband[0].TwistIndex;
+  vals.GroupID    = 0;
+  vals.myName = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix, 0, ns);
+  vals.selectBands(fullband, 0, ns, false);
+
+  return create_spline_set(spo_object_name, spin, vals);
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-BsplineReaderBaseT<T>::create_spline_set(
-    int spin, xmlNodePtr cur, SPOSetInputInfo& input_info)
+template<typename T>
+std::unique_ptr<SPOSetT<T>> BsplineReaderBaseT<T>::create_spline_set(int spin,
+                                                                     xmlNodePtr cur,
+                                                                     SPOSetInputInfo& input_info)
 {
-    std::string spo_object_name;
-    OhmmsAttributeSet a;
-    a.add(spo_object_name, "name");
-    a.add(spo_object_name, "id");
-    a.put(cur);
-
-    if (spo2band.empty())
-        spo2band.resize(mybuilder->states.size());
-
-    std::vector<BandInfo>& fullband = (*(mybuilder->FullBands[spin]));
-
-    if (spo2band[spin].empty()) {
-        spo2band[spin].reserve(fullband.size());
-        if (!mybuilder->states[spin])
-            mybuilder->states[spin] = std::make_unique<SPOSetInfo>();
-        mybuilder->clear_states(spin);
-        initialize_spo2band(
-            spin, fullband, *mybuilder->states[spin], spo2band[spin]);
-    }
-
-    BandInfoGroup vals;
-    vals.TwistIndex = fullband[0].TwistIndex;
-    vals.GroupID = 0;
-    vals.myName = make_bandgroup_name(mybuilder->getName(), spin,
-        mybuilder->twist_num_, mybuilder->TileMatrix, input_info.min_index(),
-        input_info.max_index());
-    vals.selectBands(fullband, spo2band[spin][input_info.min_index()],
-        input_info.max_index() - input_info.min_index(), false);
-
-    return create_spline_set(spo_object_name, spin, vals);
+  std::string spo_object_name;
+  OhmmsAttributeSet a;
+  a.add(spo_object_name, "name");
+  a.add(spo_object_name, "id");
+  a.put(cur);
+
+  if (spo2band.empty())
+    spo2band.resize(mybuilder->states.size());
+
+  std::vector<BandInfo>& fullband = (*(mybuilder->FullBands[spin]));
+
+  if (spo2band[spin].empty())
+  {
+    spo2band[spin].reserve(fullband.size());
+    if (!mybuilder->states[spin])
+      mybuilder->states[spin] = std::make_unique<SPOSetInfo>();
+    mybuilder->clear_states(spin);
+    initialize_spo2band(spin, fullband, *mybuilder->states[spin], spo2band[spin]);
+  }
+
+  BandInfoGroup vals;
+  vals.TwistIndex = fullband[0].TwistIndex;
+  vals.GroupID    = 0;
+  vals.myName     = make_bandgroup_name(mybuilder->getName(), spin, mybuilder->twist_num_, mybuilder->TileMatrix,
+                                        input_info.min_index(), input_info.max_index());
+  vals.selectBands(fullband, spo2band[spin][input_info.min_index()], input_info.max_index() - input_info.min_index(),
+                   false);
+
+  return create_spline_set(spo_object_name, spin, vals);
 }
 
 /** build index tables to map a state to band with k-point folidng
@@ -195,60 +181,60 @@ BsplineReaderBaseT<T>::create_spline_set(
  *
  * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i
  */
-template <typename T>
-void
-BsplineReaderBaseT<T>::initialize_spo2band(int spin,
-    const std::vector<BandInfo>& bigspace, SPOSetInfo& sposet,
-    std::vector<int>& spo2band)
+template<typename T>
+void BsplineReaderBaseT<T>::initialize_spo2band(int spin,
+                                                const std::vector<BandInfo>& bigspace,
+                                                SPOSetInfo& sposet,
+                                                std::vector<int>& spo2band)
 {
-    spo2band.reserve(bigspace.size());
-    int ns = 0;
-    for (int i = 0; i < bigspace.size(); ++i) {
-        spo2band.push_back(i);
-        SPOInfo a(ns, bigspace[i].Energy);
-        sposet.add(a);
-        ns++;
-        if (bigspace[i].MakeTwoCopies) {
-            spo2band.push_back(i);
-            SPOInfo b(ns, bigspace[i].Energy);
-            sposet.add(b);
-            ns++;
-        }
-    }
-
-    // write to a file
-    const Communicate* comm = myComm;
-    if (comm->rank())
-        return;
-
-    std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(),
-        spin, mybuilder->twist_num_, mybuilder->TileMatrix, comm->getGroupID());
-    aname += ".bandinfo.dat";
-
-    std::ofstream o(aname.c_str());
-    std::array<char, 1024> s;
-    ns = 0;
-    using PosType = QMCTraits::PosType;
-    o << "#  Band    State   TwistIndex BandIndex Energy      Kx      Ky      "
-         "Kz      K1      K2      K3    KmK "
-      << std::endl;
-    for (int i = 0; i < bigspace.size(); ++i) {
-        int ti = bigspace[i].TwistIndex;
-        int bi = bigspace[i].BandIndex;
-        double e = bigspace[i].Energy;
-        int nd = (bigspace[i].MakeTwoCopies) ? 2 : 1;
-        PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]);
-        int s_size = std::snprintf(s.data(), s.size(),
-            "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n",
-            i, ns, ti, bi, e, k[0], k[1], k[2],
-            mybuilder->primcell_kpoints[ti][0],
-            mybuilder->primcell_kpoints[ti][1],
-            mybuilder->primcell_kpoints[ti][2], nd);
-        if (s_size < 0)
-            throw std::runtime_error("Error generating bandinfo");
-        o << s.data();
-        ns += nd;
+  spo2band.reserve(bigspace.size());
+  int ns = 0;
+  for (int i = 0; i < bigspace.size(); ++i)
+  {
+    spo2band.push_back(i);
+    SPOInfo a(ns, bigspace[i].Energy);
+    sposet.add(a);
+    ns++;
+    if (bigspace[i].MakeTwoCopies)
+    {
+      spo2band.push_back(i);
+      SPOInfo b(ns, bigspace[i].Energy);
+      sposet.add(b);
+      ns++;
     }
+  }
+
+  // write to a file
+  const Communicate* comm = myComm;
+  if (comm->rank())
+    return;
+
+  std::filesystem::path aname = make_bandinfo_filename(mybuilder->getName(), spin, mybuilder->twist_num_,
+                                                       mybuilder->TileMatrix, comm->getGroupID());
+  aname += ".bandinfo.dat";
+
+  std::ofstream o(aname.c_str());
+  std::array<char, 1024> s;
+  ns            = 0;
+  using PosType = QMCTraits::PosType;
+  o << "#  Band    State   TwistIndex BandIndex Energy      Kx      Ky      "
+       "Kz      K1      K2      K3    KmK "
+    << std::endl;
+  for (int i = 0; i < bigspace.size(); ++i)
+  {
+    int ti     = bigspace[i].TwistIndex;
+    int bi     = bigspace[i].BandIndex;
+    double e   = bigspace[i].Energy;
+    int nd     = (bigspace[i].MakeTwoCopies) ? 2 : 1;
+    PosType k  = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]);
+    int s_size = std::snprintf(s.data(), s.size(), "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n",
+                               i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->primcell_kpoints[ti][0],
+                               mybuilder->primcell_kpoints[ti][1], mybuilder->primcell_kpoints[ti][2], nd);
+    if (s_size < 0)
+      throw std::runtime_error("Error generating bandinfo");
+    o << s.data();
+    ns += nd;
+  }
 }
 
 template class BsplineReaderBaseT<double>;
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h
index 5eab41dea55..88ad89e1072 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBaseT.h
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence
-//                    Livermore National Laboratory Ye Luo, yeluo@anl.gov,
-//                    Argonne National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_BSPLINE_READER_BASET_H
@@ -28,7 +25,7 @@
 namespace qmcplusplus
 {
 struct SPOSetInputInfo;
-template <typename T>
+template<typename T>
 class EinsplineSetBuilderT;
 
 /**
@@ -39,189 +36,167 @@ class EinsplineSetBuilderT;
  * - set_grid : create the basic grid and boundary conditions for einspline
  * Note that template is abused but it works.
  */
-template <typename T>
+template<typename T>
 class BsplineReaderBaseT
 {
 public:
-    /// pointer to the EinsplineSetBuilder
-    EinsplineSetBuilderT<T>* mybuilder;
-    /// communicator
-    Communicate* myComm;
-    /// mesh size
-    TinyVector<int, 3> MeshSize;
-    /// check the norm of orbitals
-    bool checkNorm;
-    /// save spline coefficients to storage
-    bool saveSplineCoefs;
-    /// apply orbital rotations
-    bool rotate;
-    /// map from spo index to band index
-    std::vector<std::vector<int>> spo2band;
-
-    BsplineReaderBaseT(EinsplineSetBuilderT<T>* e);
-
-    virtual ~BsplineReaderBaseT();
-
-    /** read gvectors and set the mesh, and prepare for einspline
+  /// pointer to the EinsplineSetBuilder
+  EinsplineSetBuilderT<T>* mybuilder;
+  /// communicator
+  Communicate* myComm;
+  /// mesh size
+  TinyVector<int, 3> MeshSize;
+  /// check the norm of orbitals
+  bool checkNorm;
+  /// save spline coefficients to storage
+  bool saveSplineCoefs;
+  /// apply orbital rotations
+  bool rotate;
+  /// map from spo index to band index
+  std::vector<std::vector<int>> spo2band;
+
+  BsplineReaderBaseT(EinsplineSetBuilderT<T>* e);
+
+  virtual ~BsplineReaderBaseT();
+
+  /** read gvectors and set the mesh, and prepare for einspline
      */
-    template <typename GT, typename BCT>
-    inline bool
-    set_grid(const TinyVector<int, 3>& halfg, GT* xyz_grid, BCT* xyz_bc)
-    {
-        // This sets MeshSize from the input file
-        bool havePsig = mybuilder->ReadGvectors_ESHDF();
-
-        // If this MeshSize is not initialized, use the meshsize set by the
-        // input based on FFT grid and meshfactor
-        if (MeshSize[0] == 0)
-            MeshSize = mybuilder->MeshSize;
-
-        app_log() << "  Using meshsize=" << MeshSize
-                  << "\n  vs input meshsize=" << mybuilder->MeshSize
-                  << std::endl;
-
-        for (int j = 0; j < 3; ++j) {
-            xyz_grid[j].start = 0.0;
-            xyz_grid[j].end = 1.0;
-            xyz_grid[j].num = MeshSize[j];
-
-            if (halfg[j]) {
-                xyz_bc[j].lCode = ANTIPERIODIC;
-                xyz_bc[j].rCode = ANTIPERIODIC;
-            }
-            else {
-                xyz_bc[j].lCode = PERIODIC;
-                xyz_bc[j].rCode = PERIODIC;
-            }
-
-            xyz_bc[j].lVal = 0.0;
-            xyz_bc[j].rVal = 0.0;
-        }
-        return havePsig;
-    }
+  template<typename GT, typename BCT>
+  inline bool set_grid(const TinyVector<int, 3>& halfg, GT* xyz_grid, BCT* xyz_bc)
+  {
+    // This sets MeshSize from the input file
+    bool havePsig = mybuilder->ReadGvectors_ESHDF();
 
-    /** initialize twist-related data for N orbitals
-     */
-    template <typename SPE>
-    inline void
-    check_twists(SPE* bspline, const BandInfoGroup& bandgroup)
+    // If this MeshSize is not initialized, use the meshsize set by the
+    // input based on FFT grid and meshfactor
+    if (MeshSize[0] == 0)
+      MeshSize = mybuilder->MeshSize;
+
+    app_log() << "  Using meshsize=" << MeshSize << "\n  vs input meshsize=" << mybuilder->MeshSize << std::endl;
+
+    for (int j = 0; j < 3; ++j)
     {
-        // init(orbitalSet,bspline);
-        bspline->PrimLattice = mybuilder->PrimCell;
-        bspline->GGt =
-            dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G);
-
-        int N = bandgroup.getNumDistinctOrbitals();
-        int numOrbs = bandgroup.getNumSPOs();
-
-        bspline->setOrbitalSetSize(numOrbs);
-        bspline->resizeStorage(N, N);
-
-        bspline->first_spo = bandgroup.getFirstSPO();
-        bspline->last_spo = bandgroup.getLastSPO();
-
-        int num = 0;
-        const std::vector<BandInfo>& cur_bands = bandgroup.myBands;
-        for (int iorb = 0; iorb < N; iorb++) {
-            int ti = cur_bands[iorb].TwistIndex;
-            bspline->kPoints[iorb] =
-                mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]);
-            bspline->MakeTwoCopies[iorb] =
-                (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies;
-            num += bspline->MakeTwoCopies[iorb] ? 2 : 1;
-        }
-
-        app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs
-                  << std::endl;
-
-        bspline->HalfG = 0;
-        TinyVector<int, 3> bconds =
-            mybuilder->TargetPtcl.getLattice().BoxBConds;
-        if (!bspline->isComplex()) {
-            // no k-point folding, single special k point (G, L ...)
-            TinyVector<double, 3> twist0 =
-                mybuilder->primcell_kpoints[bandgroup.TwistIndex];
-            for (int i = 0; i < 3; i++)
-                if (bconds[i] &&
-                    ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8)))
-                    bspline->HalfG[i] = 1;
-                else
-                    bspline->HalfG[i] = 0;
-            app_log() << "  TwistIndex = " << cur_bands[0].TwistIndex
-                      << " TwistAngle " << twist0 << std::endl;
-            app_log() << "   HalfG = " << bspline->HalfG << std::endl;
-        }
-        app_log().flush();
+      xyz_grid[j].start = 0.0;
+      xyz_grid[j].end   = 1.0;
+      xyz_grid[j].num   = MeshSize[j];
+
+      if (halfg[j])
+      {
+        xyz_bc[j].lCode = ANTIPERIODIC;
+        xyz_bc[j].rCode = ANTIPERIODIC;
+      }
+      else
+      {
+        xyz_bc[j].lCode = PERIODIC;
+        xyz_bc[j].rCode = PERIODIC;
+      }
+
+      xyz_bc[j].lVal = 0.0;
+      xyz_bc[j].rVal = 0.0;
     }
+    return havePsig;
+  }
 
-    /** return the path name in hdf5
+  /** initialize twist-related data for N orbitals
      */
-    inline std::string
-    psi_g_path(int ti, int spin, int ib)
+  template<typename SPE>
+  inline void check_twists(SPE* bspline, const BandInfoGroup& bandgroup)
+  {
+    // init(orbitalSet,bspline);
+    bspline->PrimLattice = mybuilder->PrimCell;
+    bspline->GGt         = dot(transpose(bspline->PrimLattice.G), bspline->PrimLattice.G);
+
+    int N       = bandgroup.getNumDistinctOrbitals();
+    int numOrbs = bandgroup.getNumSPOs();
+
+    bspline->setOrbitalSetSize(numOrbs);
+    bspline->resizeStorage(N, N);
+
+    bspline->first_spo = bandgroup.getFirstSPO();
+    bspline->last_spo  = bandgroup.getLastSPO();
+
+    int num                                = 0;
+    const std::vector<BandInfo>& cur_bands = bandgroup.myBands;
+    for (int iorb = 0; iorb < N; iorb++)
     {
-        std::ostringstream path;
-        path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_"
-             << ib << "/psi_g";
-        return path.str();
+      int ti                       = cur_bands[iorb].TwistIndex;
+      bspline->kPoints[iorb]       = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]);
+      bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies;
+      num += bspline->MakeTwoCopies[iorb] ? 2 : 1;
     }
 
-    /** return the path name in hdf5
-     */
-    inline std::string
-    psi_r_path(int ti, int spin, int ib)
+    app_log() << "NumDistinctOrbitals " << N << " numOrbs = " << numOrbs << std::endl;
+
+    bspline->HalfG            = 0;
+    TinyVector<int, 3> bconds = mybuilder->TargetPtcl.getLattice().BoxBConds;
+    if (!bspline->isComplex())
     {
-        std::ostringstream path;
-        path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_"
-             << ib << "/psi_r";
-        return path.str();
+      // no k-point folding, single special k point (G, L ...)
+      TinyVector<double, 3> twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex];
+      for (int i = 0; i < 3; i++)
+        if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8)))
+          bspline->HalfG[i] = 1;
+        else
+          bspline->HalfG[i] = 0;
+      app_log() << "  TwistIndex = " << cur_bands[0].TwistIndex << " TwistAngle " << twist0 << std::endl;
+      app_log() << "   HalfG = " << bspline->HalfG << std::endl;
     }
+    app_log().flush();
+  }
 
-    /** read/bcast psi_g
+  /** return the path name in hdf5
+     */
+  inline std::string psi_g_path(int ti, int spin, int ib)
+  {
+    std::ostringstream path;
+    path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_g";
+    return path.str();
+  }
+
+  /** return the path name in hdf5
+     */
+  inline std::string psi_r_path(int ti, int spin, int ib)
+  {
+    std::ostringstream path;
+    path << "/electrons/kpoint_" << ti << "/spin_" << spin << "/state_" << ib << "/psi_r";
+    return path.str();
+  }
+
+  /** read/bcast psi_g
      * @param ti twist index
      * @param spin spin index
      * @param ib band index
      * @param cG psi_g as stored in hdf5
      */
-    void
-    get_psi_g(int ti, int spin, int ib, Vector<std::complex<double>>& cG);
+  void get_psi_g(int ti, int spin, int ib, Vector<std::complex<double>>& cG);
 
-    /** create the actual spline sets
+  /** create the actual spline sets
      */
-    virtual std::unique_ptr<SPOSetT<T>>
-    create_spline_set(const std::string& my_name, int spin,
-        const BandInfoGroup& bandgroup) = 0;
+  virtual std::unique_ptr<SPOSetT<T>> create_spline_set(const std::string& my_name,
+                                                        int spin,
+                                                        const BandInfoGroup& bandgroup) = 0;
 
-    /** setting common parameters
+  /** setting common parameters
      */
-    void
-    setCommon(xmlNodePtr cur);
+  void setCommon(xmlNodePtr cur);
 
-    /** create the spline after one of the kind is created */
-    std::unique_ptr<SPOSetT<T>>
-    create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info);
+  /** create the spline after one of the kind is created */
+  std::unique_ptr<SPOSetT<T>> create_spline_set(int spin, xmlNodePtr cur, SPOSetInputInfo& input_info);
 
-    /** create the spline set */
-    std::unique_ptr<SPOSetT<T>>
-    create_spline_set(int spin, xmlNodePtr cur);
+  /** create the spline set */
+  std::unique_ptr<SPOSetT<T>> create_spline_set(int spin, xmlNodePtr cur);
 
-    /** Set the checkNorm variable */
-    inline void
-    setCheckNorm(bool new_checknorm)
-    {
-        checkNorm = new_checknorm;
-    };
+  /** Set the checkNorm variable */
+  inline void setCheckNorm(bool new_checknorm) { checkNorm = new_checknorm; };
 
-    /** Set the orbital rotation flag. Rotations are applied to balance the
+  /** Set the orbital rotation flag. Rotations are applied to balance the
      * real/imaginary components. */
-    inline void
-    setRotate(bool new_rotate)
-    {
-        rotate = new_rotate;
-    };
+  inline void setRotate(bool new_rotate) { rotate = new_rotate; };
 
-    void
-    initialize_spo2band(int spin, const std::vector<BandInfo>& bigspace,
-        SPOSetInfo& sposet, std::vector<int>& band2spo);
+  void initialize_spo2band(int spin,
+                           const std::vector<BandInfo>& bigspace,
+                           SPOSetInfo& sposet,
+                           std::vector<int>& band2spo);
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
index 9286624c92d..76ddd85bcc4 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineSetT.h
@@ -4,16 +4,13 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-//                    Laboratory Jeongnim Kim, jeongnim.kim@gmail.com,
-//                    University of Illinois at Urbana-Champaign Mark A.
-//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file BsplineSetT.h
@@ -35,225 +32,224 @@ namespace qmcplusplus
  * precision. BsplineSet also implements a few fallback routines in case
  * optimized implementation is not necessary in the derived class.
  */
-template <class T>
+template<class T>
 class BsplineSetT : public SPOSetT<T>
 {
 public:
-    using PosType = typename SPOSetT<T>::PosType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using HessVector = typename SPOSetT<T>::HessVector;
-    using GGGVector = typename SPOSetT<T>::GGGVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using HessMatrix = typename SPOSetT<T>::HessMatrix;
-    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using GGGVector   = typename SPOSetT<T>::GGGVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
 
-    using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
-    using grad_type = typename SPOSetT<T>::GradMatrix::value_type;
+  using value_type = typename SPOSetT<T>::ValueMatrix::value_type;
+  using grad_type  = typename SPOSetT<T>::GradMatrix::value_type;
 
-    // used in derived classes
-    using RealType = typename SPOSetT<T>::RealType;
-    using ValueType = typename SPOSetT<T>::ValueType;
+  // used in derived classes
+  using RealType  = typename SPOSetT<T>::RealType;
+  using ValueType = typename SPOSetT<T>::ValueType;
 
-    BsplineSetT(const std::string& my_name) :
-        SPOSetT<T>(my_name),
-        MyIndex(0),
-        first_spo(0),
-        last_spo(0)
-    {
-    }
+  BsplineSetT(const std::string& my_name) : SPOSetT<T>(my_name), MyIndex(0), first_spo(0), last_spo(0) {}
 
-    virtual bool
-    isComplex() const = 0;
-    virtual std::string
-    getKeyword() const = 0;
+  virtual bool isComplex() const         = 0;
+  virtual std::string getKeyword() const = 0;
 
-    auto&
-    getHalfG() const
-    {
-        return HalfG;
-    }
+  auto& getHalfG() const { return HalfG; }
 
-    inline void
-    init_base(int n)
+  inline void init_base(int n)
+  {
+    kPoints.resize(n);
+    MakeTwoCopies.resize(n);
+    BandIndexMap.resize(n);
+    for (int i = 0; i < n; i++)
+      BandIndexMap[i] = i;
+  }
+
+  /// remap kpoints to group general kpoints & special kpoints
+  int remap_kpoints()
+  {
+    std::vector<PosType> k_copy(kPoints);
+    const int nk = kPoints.size();
+    int nCB      = 0;
+    // two pass
+    for (int i = 0; i < nk; ++i)
     {
-        kPoints.resize(n);
-        MakeTwoCopies.resize(n);
-        BandIndexMap.resize(n);
-        for (int i = 0; i < n; i++)
-            BandIndexMap[i] = i;
+      if (MakeTwoCopies[i])
+      {
+        kPoints[nCB]        = k_copy[i];
+        BandIndexMap[nCB++] = i;
+      }
     }
-
-    /// remap kpoints to group general kpoints & special kpoints
-    int
-    remap_kpoints()
+    int nRealBands = nCB;
+    for (int i = 0; i < nk; ++i)
     {
-        std::vector<PosType> k_copy(kPoints);
-        const int nk = kPoints.size();
-        int nCB = 0;
-        // two pass
-        for (int i = 0; i < nk; ++i) {
-            if (MakeTwoCopies[i]) {
-                kPoints[nCB] = k_copy[i];
-                BandIndexMap[nCB++] = i;
-            }
-        }
-        int nRealBands = nCB;
-        for (int i = 0; i < nk; ++i) {
-            if (!MakeTwoCopies[i]) {
-                kPoints[nRealBands] = k_copy[i];
-                BandIndexMap[nRealBands++] = i;
-            }
-        }
-        return nCB; // return the number of complex bands
+      if (!MakeTwoCopies[i])
+      {
+        kPoints[nRealBands]        = k_copy[i];
+        BandIndexMap[nRealBands++] = i;
+      }
     }
+    return nCB; // return the number of complex bands
+  }
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override = 0;
+  std::unique_ptr<SPOSetT<T>> makeClone() const override = 0;
 
-    void
-    setOrbitalSetSize(int norbs) override
-    {
-        this->OrbitalSetSize = norbs;
-    }
+  void setOrbitalSetSize(int norbs) override { this->OrbitalSetSize = norbs; }
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
     {
-        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-            ValueVector v(logdet[i], logdet.cols());
-            GradVector g(dlogdet[i], dlogdet.cols());
-            ValueVector l(d2logdet[i], d2logdet.cols());
-            this->evaluateVGL(P, iat, v, g, l);
-        }
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      ValueVector l(d2logdet[i], d2logdet.cols());
+      this->evaluateVGL(P, iat, v, g, l);
     }
+  }
 
-    void
-    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-        const RefVector<ValueMatrix>& logdet_list,
-        const RefVector<GradMatrix>& dlogdet_list,
-        const RefVector<ValueMatrix>& d2logdet_list) const override
-    {
-        assert(this == &spo_list.getLeader());
-        const size_t nw = spo_list.size();
-        std::vector<ValueVector> mw_psi_v;
-        std::vector<GradVector> mw_dpsi_v;
-        std::vector<ValueVector> mw_d2psi_v;
-        RefVector<ValueVector> psi_v_list;
-        RefVector<GradVector> dpsi_v_list;
-        RefVector<ValueVector> d2psi_v_list;
-        mw_psi_v.reserve(nw);
-        mw_dpsi_v.reserve(nw);
-        mw_d2psi_v.reserve(nw);
-        psi_v_list.reserve(nw);
-        dpsi_v_list.reserve(nw);
-        d2psi_v_list.reserve(nw);
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    const size_t nw = spo_list.size();
+    std::vector<ValueVector> mw_psi_v;
+    std::vector<GradVector> mw_dpsi_v;
+    std::vector<ValueVector> mw_d2psi_v;
+    RefVector<ValueVector> psi_v_list;
+    RefVector<GradVector> dpsi_v_list;
+    RefVector<ValueVector> d2psi_v_list;
+    mw_psi_v.reserve(nw);
+    mw_dpsi_v.reserve(nw);
+    mw_d2psi_v.reserve(nw);
+    psi_v_list.reserve(nw);
+    dpsi_v_list.reserve(nw);
+    d2psi_v_list.reserve(nw);
 
-        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-            mw_psi_v.clear();
-            mw_dpsi_v.clear();
-            mw_d2psi_v.clear();
-            psi_v_list.clear();
-            dpsi_v_list.clear();
-            d2psi_v_list.clear();
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
+    {
+      mw_psi_v.clear();
+      mw_dpsi_v.clear();
+      mw_d2psi_v.clear();
+      psi_v_list.clear();
+      dpsi_v_list.clear();
+      d2psi_v_list.clear();
 
-            for (int iw = 0; iw < nw; iw++) {
-                mw_psi_v.emplace_back(
-                    logdet_list[iw].get()[i], logdet_list[iw].get().cols());
-                mw_dpsi_v.emplace_back(
-                    dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
-                mw_d2psi_v.emplace_back(
-                    d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
-                psi_v_list.push_back(mw_psi_v.back());
-                dpsi_v_list.push_back(mw_dpsi_v.back());
-                d2psi_v_list.push_back(mw_d2psi_v.back());
-            }
+      for (int iw = 0; iw < nw; iw++)
+      {
+        mw_psi_v.emplace_back(logdet_list[iw].get()[i], logdet_list[iw].get().cols());
+        mw_dpsi_v.emplace_back(dlogdet_list[iw].get()[i], dlogdet_list[iw].get().cols());
+        mw_d2psi_v.emplace_back(d2logdet_list[iw].get()[i], d2logdet_list[iw].get().cols());
+        psi_v_list.push_back(mw_psi_v.back());
+        dpsi_v_list.push_back(mw_dpsi_v.back());
+        d2psi_v_list.push_back(mw_d2psi_v.back());
+      }
 
-            this->mw_evaluateVGL(
-                spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
-        }
+      this->mw_evaluateVGL(spo_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
     }
+  }
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        HessMatrix& grad_grad_logdet) override
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
     {
-        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-            ValueVector v(logdet[i], logdet.cols());
-            GradVector g(dlogdet[i], dlogdet.cols());
-            HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
-            this->evaluateVGH(P, iat, v, g, h);
-        }
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      this->evaluateVGH(P, iat, v, g, h);
     }
+  }
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
-        GGGMatrix& grad_grad_grad_logdet) override
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    for (int iat = first, i = 0; iat < last; ++iat, ++i)
     {
-        for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-            ValueVector v(logdet[i], logdet.cols());
-            GradVector g(dlogdet[i], dlogdet.cols());
-            HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
-            GGGVector gh(
-                grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
-            this->evaluateVGHGH(P, iat, v, g, h, gh);
-        }
+      ValueVector v(logdet[i], logdet.cols());
+      GradVector g(dlogdet[i], dlogdet.cols());
+      HessVector h(grad_grad_logdet[i], grad_grad_logdet.cols());
+      GGGVector gh(grad_grad_grad_logdet[i], grad_grad_grad_logdet.cols());
+      this->evaluateVGHGH(P, iat, v, g, h, gh);
     }
+  }
 
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src,
-        GradMatrix& gradphi) override
-    {
-        // Do nothing, since Einsplines don't explicitly depend on ion
-        // positions.
-    }
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override
+  {
+    // Do nothing, since Einsplines don't explicitly depend on ion
+    // positions.
+  }
 
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override
-    {
-        // Do nothing, since Einsplines don't explicitly depend on ion
-        // positions.
-    }
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    // Do nothing, since Einsplines don't explicitly depend on ion
+    // positions.
+  }
 
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
-    template <typename>
-    friend class HybridRepSetReaderT;
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
+  template<typename>
+  friend class HybridRepSetReaderT;
 
 protected:
-    static const int D = QMCTraits::DIM;
-    /// Index of this adoptor, when multiple adoptors are used for NUMA or
-    /// distributed cases
-    size_t MyIndex;
-    /// first index of the SPOs this Spline handles
-    size_t first_spo;
-    /// last index of the SPOs this Spline handles
-    size_t last_spo;
-    /// sign bits at the G/2 boundaries
-    TinyVector<int, D> HalfG;
-    /// flags to unpack sin/cos
-    std::vector<bool> MakeTwoCopies;
-    /** kpoints for each unique orbitals.
+  static const int D = QMCTraits::DIM;
+  /// Index of this adoptor, when multiple adoptors are used for NUMA or
+  /// distributed cases
+  size_t MyIndex;
+  /// first index of the SPOs this Spline handles
+  size_t first_spo;
+  /// last index of the SPOs this Spline handles
+  size_t last_spo;
+  /// sign bits at the G/2 boundaries
+  TinyVector<int, D> HalfG;
+  /// flags to unpack sin/cos
+  std::vector<bool> MakeTwoCopies;
+  /** kpoints for each unique orbitals.
      * Note: for historic reason, this sign is opposite to what was used in DFT
      * when orbitals were generated. Changing the sign requires updating all the
      * evaluation code.
      */
-    std::vector<PosType> kPoints;
-    /// remap splines to orbitals
-    aligned_vector<int> BandIndexMap;
-    /// band offsets used for communication
-    std::vector<int> offset;
+  std::vector<PosType> kPoints;
+  /// remap splines to orbitals
+  aligned_vector<int> BandIndexMap;
+  /// band offsets used for communication
+  std::vector<int> offset;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h
index 85bf667736a..55db6151dcd 100644
--- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitalsT.h
@@ -7,7 +7,6 @@
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_HYBRIDREP_CENTER_ORBITALST_H
@@ -22,628 +21,565 @@
 
 namespace qmcplusplus
 {
-template <class BSPLINESPO>
+template<class BSPLINESPO>
 class HybridRepSetReaderT;
 
-template <typename T>
+template<typename T>
 class AtomicOrbitalsT
 {
 public:
-    static const int D = 3;
-    using AtomicSplineType = typename bspline_traits<T, 1>::SplineType;
-    using AtomicBCType = typename bspline_traits<T, 1>::BCType;
-    using AtomicSingleSplineType = UBspline_1d_d;
-    using PointType = TinyVector<T, D>;
-    using value_type = T;
+  static const int D           = 3;
+  using AtomicSplineType       = typename bspline_traits<T, 1>::SplineType;
+  using AtomicBCType           = typename bspline_traits<T, 1>::BCType;
+  using AtomicSingleSplineType = UBspline_1d_d;
+  using PointType              = TinyVector<T, D>;
+  using value_type             = T;
 
-    using vContainer_type = aligned_vector<T>;
+  using vContainer_type = aligned_vector<T>;
 
 private:
-    // near core cutoff
-    T rmin;
-    // far from core cutoff, rmin_sqrt>=rmin
-    T rmin_sqrt;
-    T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius;
-    int spline_npoints, BaseN;
-    int NumBands, Npad;
-    PointType center_pos;
-    const int lmax, lm_tot;
-    SoaSphericalTensor<T> Ylm;
-    vContainer_type l_vals;
-    vContainer_type r_power_minus_l;
-    /// 1D spline of radial functions of all the orbitals
-    std::shared_ptr<MultiBspline1D<T>> SplineInst;
-
-    vContainer_type localV, localG, localL;
+  // near core cutoff
+  T rmin;
+  // far from core cutoff, rmin_sqrt>=rmin
+  T rmin_sqrt;
+  T cutoff, cutoff_buffer, spline_radius, non_overlapping_radius;
+  int spline_npoints, BaseN;
+  int NumBands, Npad;
+  PointType center_pos;
+  const int lmax, lm_tot;
+  SoaSphericalTensor<T> Ylm;
+  vContainer_type l_vals;
+  vContainer_type r_power_minus_l;
+  /// 1D spline of radial functions of all the orbitals
+  std::shared_ptr<MultiBspline1D<T>> SplineInst;
+
+  vContainer_type localV, localG, localL;
 
 public:
-    AtomicOrbitalsT(int Lmax) :
-        lmax(Lmax),
-        lm_tot((Lmax + 1) * (Lmax + 1)),
-        Ylm(Lmax)
-    {
-        r_power_minus_l.resize(lm_tot);
-        l_vals.resize(lm_tot);
-        for (int l = 0; l <= lmax; l++)
-            for (int m = -l; m <= l; m++)
-                l_vals[l * (l + 1) + m] = l;
-        rmin = std::exp(
-            std::log(std::numeric_limits<T>::min()) / std::max(Lmax, 1));
-        rmin = std::max(rmin, std::numeric_limits<T>::epsilon());
-        rmin_sqrt =
-            std::max(rmin, std::sqrt(std::numeric_limits<T>::epsilon()));
-    }
-
-    // accessing functions, const only
-    T
-    getCutoff() const
-    {
-        return cutoff;
-    }
-    T
-    getCutoffBuffer() const
-    {
-        return cutoff_buffer;
-    }
-    T
-    getSplineRadius() const
-    {
-        return spline_radius;
-    }
-    T
-    getNonOverlappingRadius() const
-    {
-        return non_overlapping_radius;
-    }
-    int
-    getSplineNpoints() const
-    {
-        return spline_npoints;
-    }
-    int
-    getLmax() const
-    {
-        return lmax;
-    }
-    const PointType&
-    getCenterPos() const
-    {
-        return center_pos;
-    }
-
-    inline void
-    resizeStorage(size_t Nb)
-    {
-        NumBands = Nb;
-        Npad = getAlignedSize<T>(Nb);
-        localV.resize(Npad * lm_tot);
-        localG.resize(Npad * lm_tot);
-        localL.resize(Npad * lm_tot);
-        create_spline();
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm, std::vector<int>& offset)
-    {
-        gatherv(comm, SplineInst->getSplinePtr(), Npad, offset);
-    }
-
-    template <typename PT, typename VT>
-    inline void
-    set_info(const PT& R, const VT& cutoff_in, const VT& cutoff_buffer_in,
-        const VT& spline_radius_in, const VT& non_overlapping_radius_in,
-        const int spline_npoints_in)
-    {
-        center_pos[0] = R[0];
-        center_pos[1] = R[1];
-        center_pos[2] = R[2];
-        cutoff = cutoff_in;
-        cutoff_buffer = cutoff_buffer_in;
-        spline_radius = spline_radius_in;
-        spline_npoints = spline_npoints_in;
-        non_overlapping_radius = non_overlapping_radius_in;
-        BaseN = spline_npoints + 2;
-    }
-
-    inline void
-    create_spline()
-    {
-        AtomicBCType bc;
-        bc.lCode = FLAT;
-        bc.rCode = NATURAL;
-        Ugrid grid;
-        grid.start = 0.0;
-        grid.end = spline_radius;
-        grid.num = spline_npoints;
-        SplineInst = std::make_shared<MultiBspline1D<T>>();
-        SplineInst->create(grid, bc, lm_tot * Npad);
-    }
-
-    inline size_t
-    getSplineSizeInBytes() const
+  AtomicOrbitalsT(int Lmax) : lmax(Lmax), lm_tot((Lmax + 1) * (Lmax + 1)), Ylm(Lmax)
+  {
+    r_power_minus_l.resize(lm_tot);
+    l_vals.resize(lm_tot);
+    for (int l = 0; l <= lmax; l++)
+      for (int m = -l; m <= l; m++)
+        l_vals[l * (l + 1) + m] = l;
+    rmin      = std::exp(std::log(std::numeric_limits<T>::min()) / std::max(Lmax, 1));
+    rmin      = std::max(rmin, std::numeric_limits<T>::epsilon());
+    rmin_sqrt = std::max(rmin, std::sqrt(std::numeric_limits<T>::epsilon()));
+  }
+
+  // accessing functions, const only
+  T getCutoff() const { return cutoff; }
+  T getCutoffBuffer() const { return cutoff_buffer; }
+  T getSplineRadius() const { return spline_radius; }
+  T getNonOverlappingRadius() const { return non_overlapping_radius; }
+  int getSplineNpoints() const { return spline_npoints; }
+  int getLmax() const { return lmax; }
+  const PointType& getCenterPos() const { return center_pos; }
+
+  inline void resizeStorage(size_t Nb)
+  {
+    NumBands = Nb;
+    Npad     = getAlignedSize<T>(Nb);
+    localV.resize(Npad * lm_tot);
+    localG.resize(Npad * lm_tot);
+    localL.resize(Npad * lm_tot);
+    create_spline();
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm, std::vector<int>& offset)
+  {
+    gatherv(comm, SplineInst->getSplinePtr(), Npad, offset);
+  }
+
+  template<typename PT, typename VT>
+  inline void set_info(const PT& R,
+                       const VT& cutoff_in,
+                       const VT& cutoff_buffer_in,
+                       const VT& spline_radius_in,
+                       const VT& non_overlapping_radius_in,
+                       const int spline_npoints_in)
+  {
+    center_pos[0]          = R[0];
+    center_pos[1]          = R[1];
+    center_pos[2]          = R[2];
+    cutoff                 = cutoff_in;
+    cutoff_buffer          = cutoff_buffer_in;
+    spline_radius          = spline_radius_in;
+    spline_npoints         = spline_npoints_in;
+    non_overlapping_radius = non_overlapping_radius_in;
+    BaseN                  = spline_npoints + 2;
+  }
+
+  inline void create_spline()
+  {
+    AtomicBCType bc;
+    bc.lCode = FLAT;
+    bc.rCode = NATURAL;
+    Ugrid grid;
+    grid.start = 0.0;
+    grid.end   = spline_radius;
+    grid.num   = spline_npoints;
+    SplineInst = std::make_shared<MultiBspline1D<T>>();
+    SplineInst->create(grid, bc, lm_tot * Npad);
+  }
+
+  inline size_t getSplineSizeInBytes() const { return SplineInst->sizeInByte(); }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  inline void set_spline(AtomicSingleSplineType* spline, int lm, int ispline)
+  {
+    SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN);
+  }
+
+  bool read_splines(hdf_archive& h5f)
+  {
+    einspline_engine<AtomicSplineType> bigtable(SplineInst->getSplinePtr());
+    int lmax_in = 0, spline_npoints_in = 0;
+    T spline_radius_in;
+    if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax)
+      return false;
+    if (!h5f.readEntry(spline_radius_in, "spline_radius") || spline_radius_in != spline_radius)
+      return false;
+    if (!h5f.readEntry(spline_npoints_in, "spline_npoints") || spline_npoints_in != spline_npoints)
+      return false;
+    return h5f.readEntry(bigtable, "radial_spline");
+  }
+
+  bool write_splines(hdf_archive& h5f)
+  {
+    bool success = true;
+    success      = success && h5f.writeEntry(spline_radius, "spline_radius");
+    success      = success && h5f.writeEntry(spline_npoints, "spline_npoints");
+    success      = success && h5f.writeEntry(lmax, "l_max");
+    success      = success && h5f.writeEntry(center_pos, "position");
+    einspline_engine<AtomicSplineType> bigtable(SplineInst->getSplinePtr());
+    success = success && h5f.writeEntry(bigtable, "radial_spline");
+    return success;
+  }
+
+  // evaluate only V
+  template<typename VV>
+  inline void evaluate_v(const T& r, const PointType& dr, VV& myV)
+  {
+    if (r > std::numeric_limits<T>::epsilon())
+      Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r);
+    else
+      Ylm.evaluateV(0, 0, 1);
+    const T* restrict Ylm_v = Ylm[0];
+
+    constexpr T czero(0);
+    T* restrict val       = myV.data();
+    T* restrict local_val = localV.data();
+    std::fill(myV.begin(), myV.end(), czero);
+
+    SplineInst->evaluate(r, localV);
+
+    for (size_t lm = 0; lm < lm_tot; lm++)
     {
-        return SplineInst->sizeInByte();
-    }
-
-    inline void
-    flush_zero()
-    {
-        SplineInst->flush_zero();
-    }
-
-    inline void
-    set_spline(AtomicSingleSplineType* spline, int lm, int ispline)
-    {
-        SplineInst->copy_spline(spline, lm * Npad + ispline, 0, BaseN);
-    }
-
-    bool
-    read_splines(hdf_archive& h5f)
-    {
-        einspline_engine<AtomicSplineType> bigtable(SplineInst->getSplinePtr());
-        int lmax_in = 0, spline_npoints_in = 0;
-        T spline_radius_in;
-        if (!h5f.readEntry(lmax_in, "l_max") || lmax_in != lmax)
-            return false;
-        if (!h5f.readEntry(spline_radius_in, "spline_radius") ||
-            spline_radius_in != spline_radius)
-            return false;
-        if (!h5f.readEntry(spline_npoints_in, "spline_npoints") ||
-            spline_npoints_in != spline_npoints)
-            return false;
-        return h5f.readEntry(bigtable, "radial_spline");
-    }
-
-    bool
-    write_splines(hdf_archive& h5f)
-    {
-        bool success = true;
-        success = success && h5f.writeEntry(spline_radius, "spline_radius");
-        success = success && h5f.writeEntry(spline_npoints, "spline_npoints");
-        success = success && h5f.writeEntry(lmax, "l_max");
-        success = success && h5f.writeEntry(center_pos, "position");
-        einspline_engine<AtomicSplineType> bigtable(SplineInst->getSplinePtr());
-        success = success && h5f.writeEntry(bigtable, "radial_spline");
-        return success;
-    }
-
-    // evaluate only V
-    template <typename VV>
-    inline void
-    evaluate_v(const T& r, const PointType& dr, VV& myV)
-    {
-        if (r > std::numeric_limits<T>::epsilon())
-            Ylm.evaluateV(dr[0] / r, dr[1] / r, dr[2] / r);
-        else
-            Ylm.evaluateV(0, 0, 1);
-        const T* restrict Ylm_v = Ylm[0];
-
-        constexpr T czero(0);
-        T* restrict val = myV.data();
-        T* restrict local_val = localV.data();
-        std::fill(myV.begin(), myV.end(), czero);
-
-        SplineInst->evaluate(r, localV);
-
-        for (size_t lm = 0; lm < lm_tot; lm++) {
 #pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT)
-            for (size_t ib = 0; ib < myV.size(); ib++)
-                val[ib] += Ylm_v[lm] * local_val[ib];
-            local_val += Npad;
-        }
-    }
-
-    template <typename DISPL, typename VM>
-    inline void
-    evaluateValues(const DISPL& Displacements, const int center_idx, const T& r,
-        VM& multi_myV)
-    {
-        if (r <= std::numeric_limits<T>::epsilon())
-            Ylm.evaluateV(0, 0, 1);
-        const T* restrict Ylm_v = Ylm[0];
-
-        const size_t m = multi_myV.cols();
-        constexpr T czero(0);
-        std::fill(multi_myV.begin(), multi_myV.end(), czero);
-        SplineInst->evaluate(r, localV);
-
-        for (int ivp = 0; ivp < Displacements.size(); ivp++) {
-            PointType dr = Displacements[ivp][center_idx];
-            if (r > std::numeric_limits<T>::epsilon())
-                Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r);
-
-            T* restrict val = multi_myV[ivp];
-            T* restrict local_val = localV.data();
-            for (size_t lm = 0; lm < lm_tot; lm++) {
+      for (size_t ib = 0; ib < myV.size(); ib++)
+        val[ib] += Ylm_v[lm] * local_val[ib];
+      local_val += Npad;
+    }
+  }
+
+  template<typename DISPL, typename VM>
+  inline void evaluateValues(const DISPL& Displacements, const int center_idx, const T& r, VM& multi_myV)
+  {
+    if (r <= std::numeric_limits<T>::epsilon())
+      Ylm.evaluateV(0, 0, 1);
+    const T* restrict Ylm_v = Ylm[0];
+
+    const size_t m = multi_myV.cols();
+    constexpr T czero(0);
+    std::fill(multi_myV.begin(), multi_myV.end(), czero);
+    SplineInst->evaluate(r, localV);
+
+    for (int ivp = 0; ivp < Displacements.size(); ivp++)
+    {
+      PointType dr = Displacements[ivp][center_idx];
+      if (r > std::numeric_limits<T>::epsilon())
+        Ylm.evaluateV(-dr[0] / r, -dr[1] / r, -dr[2] / r);
+
+      T* restrict val       = multi_myV[ivp];
+      T* restrict local_val = localV.data();
+      for (size_t lm = 0; lm < lm_tot; lm++)
+      {
 #pragma omp simd aligned(val, local_val : QMC_SIMD_ALIGNMENT)
-                for (size_t ib = 0; ib < m; ib++)
-                    val[ib] += Ylm_v[lm] * local_val[ib];
-                local_val += Npad;
-            }
+        for (size_t ib = 0; ib < m; ib++)
+          val[ib] += Ylm_v[lm] * local_val[ib];
+        local_val += Npad;
+      }
+    }
+  }
+
+  // evaluate VGL
+  template<typename VV, typename GV>
+  inline void evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL)
+  {
+    T drx, dry, drz, rhatx, rhaty, rhatz, rinv;
+    if (r > rmin)
+    {
+      rinv = 1.0 / r;
+    }
+    else
+    {
+      rinv = 0;
+    }
+    drx   = dr[0];
+    dry   = dr[1];
+    drz   = dr[2];
+    rhatx = drx * rinv;
+    rhaty = dry * rinv;
+    rhatz = drz * rinv;
+
+    Ylm.evaluateVGL(drx, dry, drz);
+    const T* restrict Ylm_v  = Ylm[0];
+    const T* restrict Ylm_gx = Ylm[1];
+    const T* restrict Ylm_gy = Ylm[2];
+    const T* restrict Ylm_gz = Ylm[3];
+
+    T* restrict g0 = myG.data(0);
+    T* restrict g1 = myG.data(1);
+    T* restrict g2 = myG.data(2);
+    constexpr T czero(0), cone(1), chalf(0.5);
+    std::fill(myV.begin(), myV.end(), czero);
+    std::fill(g0, g0 + Npad, czero);
+    std::fill(g1, g1 + Npad, czero);
+    std::fill(g2, g2 + Npad, czero);
+    std::fill(myL.begin(), myL.end(), czero);
+    T* restrict val        = myV.data();
+    T* restrict lapl       = myL.data();
+    T* restrict local_val  = localV.data();
+    T* restrict local_grad = localG.data();
+    T* restrict local_lapl = localL.data();
+
+    SplineInst->evaluate_vgl(r, localV, localG, localL);
+
+    if (r > rmin_sqrt)
+    {
+      // far from core
+      r_power_minus_l[0] = cone;
+      T r_power_temp     = cone;
+      for (int l = 1; l <= lmax; l++)
+      {
+        r_power_temp *= rinv;
+        for (int m = -l, lm = l * l; m <= l; m++, lm++)
+          r_power_minus_l[lm] = r_power_temp;
+      }
+
+      for (size_t lm = 0; lm < lm_tot; lm++)
+      {
+        const T& l_val      = l_vals[lm];
+        const T& r_power    = r_power_minus_l[lm];
+        const T Ylm_rescale = Ylm_v[lm] * r_power;
+        const T rhat_dot_G  = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] + rhatz * Ylm_gz[lm]) * r_power;
+#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT)
+        for (size_t ib = 0; ib < myV.size(); ib++)
+        {
+          const T local_v = local_val[ib];
+          const T local_g = local_grad[ib];
+          const T local_l = local_lapl[ib];
+          // value
+          const T Vpart = l_val * rinv * local_v;
+          val[ib] += Ylm_rescale * local_v;
+
+          // grad
+          const T factor1 = local_g * Ylm_rescale;
+          const T factor2 = local_v * r_power;
+          const T factor3 = -Vpart * Ylm_rescale;
+          g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx;
+          g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty;
+          g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz;
+
+          // laplacian
+          lapl[ib] += (local_l + (local_g * (2 - l_val) - Vpart) * rinv) * Ylm_rescale + (local_g - Vpart) * rhat_dot_G;
         }
+        local_val += Npad;
+        local_grad += Npad;
+        local_lapl += Npad;
+      }
+    }
+    else if (r > rmin)
+    {
+      // the possibility of reaching here is very very low
+      std::cout << "Warning: an electron is very close to an ion, distance=" << r << " be careful!" << std::endl;
+      // near core, kill divergence in the laplacian
+      r_power_minus_l[0] = cone;
+      T r_power_temp     = cone;
+      for (int l = 1; l <= lmax; l++)
+      {
+        r_power_temp *= rinv;
+        for (int m = -l, lm = l * l; m <= l; m++, lm++)
+          r_power_minus_l[lm] = r_power_temp;
+      }
+
+      for (size_t lm = 0; lm < lm_tot; lm++)
+      {
+        const T& l_val      = l_vals[lm];
+        const T& r_power    = r_power_minus_l[lm];
+        const T Ylm_rescale = Ylm_v[lm] * r_power;
+        const T rhat_dot_G  = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty + Ylm_gz[lm] * rhatz) * r_power * r;
+#pragma omp simd aligned(val, g0, g1, g2, lapl, local_val, local_grad, local_lapl : QMC_SIMD_ALIGNMENT)
+        for (size_t ib = 0; ib < myV.size(); ib++)
+        {
+          const T local_v = local_val[ib];
+          const T local_g = local_grad[ib];
+          const T local_l = local_lapl[ib];
+          // value
+          const T Vpart = Ylm_rescale * local_v;
+          val[ib] += Vpart;
+
+          // grad
+          const T factor1 = local_g * Ylm_rescale;
+          const T factor2 = local_v * r_power;
+          const T factor3 = -l_val * Vpart * rinv;
+          g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] + factor3 * rhatx;
+          g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] + factor3 * rhaty;
+          g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] + factor3 * rhatz;
+
+          // laplacian
+          lapl[ib] += local_l * (cone - chalf * l_val) * (3 * Ylm_rescale + rhat_dot_G);
+        }
+        local_val += Npad;
+        local_grad += Npad;
+        local_lapl += Npad;
+      }
     }
-
-    // evaluate VGL
-    template <typename VV, typename GV>
-    inline void
-    evaluate_vgl(const T& r, const PointType& dr, VV& myV, GV& myG, VV& myL)
+    else
     {
-        T drx, dry, drz, rhatx, rhaty, rhatz, rinv;
-        if (r > rmin) {
-            rinv = 1.0 / r;
-        }
-        else {
-            rinv = 0;
-        }
-        drx = dr[0];
-        dry = dr[1];
-        drz = dr[2];
-        rhatx = drx * rinv;
-        rhaty = dry * rinv;
-        rhatz = drz * rinv;
-
-        Ylm.evaluateVGL(drx, dry, drz);
-        const T* restrict Ylm_v = Ylm[0];
-        const T* restrict Ylm_gx = Ylm[1];
-        const T* restrict Ylm_gy = Ylm[2];
-        const T* restrict Ylm_gz = Ylm[3];
-
-        T* restrict g0 = myG.data(0);
-        T* restrict g1 = myG.data(1);
-        T* restrict g2 = myG.data(2);
-        constexpr T czero(0), cone(1), chalf(0.5);
-        std::fill(myV.begin(), myV.end(), czero);
-        std::fill(g0, g0 + Npad, czero);
-        std::fill(g1, g1 + Npad, czero);
-        std::fill(g2, g2 + Npad, czero);
-        std::fill(myL.begin(), myL.end(), czero);
-        T* restrict val = myV.data();
-        T* restrict lapl = myL.data();
-        T* restrict local_val = localV.data();
-        T* restrict local_grad = localG.data();
-        T* restrict local_lapl = localL.data();
-
-        SplineInst->evaluate_vgl(r, localV, localG, localL);
-
-        if (r > rmin_sqrt) {
-            // far from core
-            r_power_minus_l[0] = cone;
-            T r_power_temp = cone;
-            for (int l = 1; l <= lmax; l++) {
-                r_power_temp *= rinv;
-                for (int m = -l, lm = l * l; m <= l; m++, lm++)
-                    r_power_minus_l[lm] = r_power_temp;
-            }
-
-            for (size_t lm = 0; lm < lm_tot; lm++) {
-                const T& l_val = l_vals[lm];
-                const T& r_power = r_power_minus_l[lm];
-                const T Ylm_rescale = Ylm_v[lm] * r_power;
-                const T rhat_dot_G = (rhatx * Ylm_gx[lm] + rhaty * Ylm_gy[lm] +
-                                         rhatz * Ylm_gz[lm]) *
-                    r_power;
-#pragma omp simd aligned( \
-    val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \
-    : QMC_SIMD_ALIGNMENT)
-                for (size_t ib = 0; ib < myV.size(); ib++) {
-                    const T local_v = local_val[ib];
-                    const T local_g = local_grad[ib];
-                    const T local_l = local_lapl[ib];
-                    // value
-                    const T Vpart = l_val * rinv * local_v;
-                    val[ib] += Ylm_rescale * local_v;
-
-                    // grad
-                    const T factor1 = local_g * Ylm_rescale;
-                    const T factor2 = local_v * r_power;
-                    const T factor3 = -Vpart * Ylm_rescale;
-                    g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] +
-                        factor3 * rhatx;
-                    g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] +
-                        factor3 * rhaty;
-                    g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] +
-                        factor3 * rhatz;
-
-                    // laplacian
-                    lapl[ib] +=
-                        (local_l + (local_g * (2 - l_val) - Vpart) * rinv) *
-                            Ylm_rescale +
-                        (local_g - Vpart) * rhat_dot_G;
-                }
-                local_val += Npad;
-                local_grad += Npad;
-                local_lapl += Npad;
-            }
-        }
-        else if (r > rmin) {
-            // the possibility of reaching here is very very low
-            std::cout
-                << "Warning: an electron is very close to an ion, distance="
-                << r << " be careful!" << std::endl;
-            // near core, kill divergence in the laplacian
-            r_power_minus_l[0] = cone;
-            T r_power_temp = cone;
-            for (int l = 1; l <= lmax; l++) {
-                r_power_temp *= rinv;
-                for (int m = -l, lm = l * l; m <= l; m++, lm++)
-                    r_power_minus_l[lm] = r_power_temp;
-            }
-
-            for (size_t lm = 0; lm < lm_tot; lm++) {
-                const T& l_val = l_vals[lm];
-                const T& r_power = r_power_minus_l[lm];
-                const T Ylm_rescale = Ylm_v[lm] * r_power;
-                const T rhat_dot_G = (Ylm_gx[lm] * rhatx + Ylm_gy[lm] * rhaty +
-                                         Ylm_gz[lm] * rhatz) *
-                    r_power * r;
-#pragma omp simd aligned( \
-    val, g0, g1, g2, lapl, local_val, local_grad, local_lapl \
-    : QMC_SIMD_ALIGNMENT)
-                for (size_t ib = 0; ib < myV.size(); ib++) {
-                    const T local_v = local_val[ib];
-                    const T local_g = local_grad[ib];
-                    const T local_l = local_lapl[ib];
-                    // value
-                    const T Vpart = Ylm_rescale * local_v;
-                    val[ib] += Vpart;
-
-                    // grad
-                    const T factor1 = local_g * Ylm_rescale;
-                    const T factor2 = local_v * r_power;
-                    const T factor3 = -l_val * Vpart * rinv;
-                    g0[ib] += factor1 * rhatx + factor2 * Ylm_gx[lm] +
-                        factor3 * rhatx;
-                    g1[ib] += factor1 * rhaty + factor2 * Ylm_gy[lm] +
-                        factor3 * rhaty;
-                    g2[ib] += factor1 * rhatz + factor2 * Ylm_gz[lm] +
-                        factor3 * rhatz;
-
-                    // laplacian
-                    lapl[ib] += local_l * (cone - chalf * l_val) *
-                        (3 * Ylm_rescale + rhat_dot_G);
-                }
-                local_val += Npad;
-                local_grad += Npad;
-                local_lapl += Npad;
-            }
-        }
-        else {
-            std::cout << "Warning: an electron is on top of an ion!"
-                      << std::endl;
-            // strictly zero
+      std::cout << "Warning: an electron is on top of an ion!" << std::endl;
+      // strictly zero
 
 #pragma omp simd aligned(val, lapl, local_val, local_lapl : QMC_SIMD_ALIGNMENT)
-            for (size_t ib = 0; ib < myV.size(); ib++) {
-                // value
-                val[ib] = Ylm_v[0] * local_val[ib];
-
-                // laplacian
-                lapl[ib] = local_lapl[ib] * static_cast<T>(3) * Ylm_v[0];
-            }
-            local_val += Npad;
-            local_grad += Npad;
-            local_lapl += Npad;
-            if (lm_tot > 0) {
-                // std::cout << std::endl;
-                for (size_t lm = 1; lm < 4; lm++) {
+      for (size_t ib = 0; ib < myV.size(); ib++)
+      {
+        // value
+        val[ib] = Ylm_v[0] * local_val[ib];
+
+        // laplacian
+        lapl[ib] = local_lapl[ib] * static_cast<T>(3) * Ylm_v[0];
+      }
+      local_val += Npad;
+      local_grad += Npad;
+      local_lapl += Npad;
+      if (lm_tot > 0)
+      {
+        // std::cout << std::endl;
+        for (size_t lm = 1; lm < 4; lm++)
+        {
 #pragma omp simd aligned(g0, g1, g2, local_grad : QMC_SIMD_ALIGNMENT)
-                    for (size_t ib = 0; ib < myV.size(); ib++) {
-                        const T local_g = local_grad[ib];
-                        // grad
-                        g0[ib] += local_g * Ylm_gx[lm];
-                        g1[ib] += local_g * Ylm_gy[lm];
-                        g2[ib] += local_g * Ylm_gz[lm];
-                    }
-                    local_grad += Npad;
-                }
-            }
+          for (size_t ib = 0; ib < myV.size(); ib++)
+          {
+            const T local_g = local_grad[ib];
+            // grad
+            g0[ib] += local_g * Ylm_gx[lm];
+            g1[ib] += local_g * Ylm_gy[lm];
+            g2[ib] += local_g * Ylm_gz[lm];
+          }
+          local_grad += Npad;
         }
+      }
     }
+  }
 
-    template <typename VV, typename GV, typename HT>
-    void
-    evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH)
-    {
-        // Needed to do tensor product here
-        APP_ABORT("AtomicOrbitals::evaluate_vgh");
-    }
+  template<typename VV, typename GV, typename HT>
+  void evaluate_vgh(const T& r, const PointType& dr, VV& myV, GV& myG, HT& myH)
+  {
+    // Needed to do tensor product here
+    APP_ABORT("AtomicOrbitals::evaluate_vgh");
+  }
 };
 
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class HybridRepCenterOrbitalsT
 {
 public:
-    static const int D = 3;
-    using PointType = typename AtomicOrbitalsT<ST>::PointType;
-    using RealType = typename DistanceTableT<VT>::RealType;
-    using PosType = typename DistanceTableT<VT>::PosType;
+  static const int D = 3;
+  using PointType    = typename AtomicOrbitalsT<ST>::PointType;
+  using RealType     = typename DistanceTableT<VT>::RealType;
+  using PosType      = typename DistanceTableT<VT>::PosType;
 
 private:
-    /// atomic centers
-    std::vector<AtomicOrbitalsT<ST>> AtomicCenters;
-    /// table index
-    int myTableID;
-    /// mapping supercell to primitive cell
-    std::vector<int> Super2Prim;
-    /// r from distance table
-    RealType dist_r;
-    /// dr from distance table
-    PosType dist_dr;
-    /// for APBC
-    PointType r_image;
-    /// smooth function value
-    RealType f;
-    /// smooth function first derivative
-    RealType df_dr;
-    /// smooth function second derivative
-    RealType d2f_dr2;
-    /// smoothing schemes
-    enum class smoothing_schemes
-    {
-        CONSISTENT = 0,
-        SMOOTHALL,
-        SMOOTHPARTIAL
-    } smooth_scheme;
-    /// smoothing function
-    smoothing_functions smooth_func_id;
+  /// atomic centers
+  std::vector<AtomicOrbitalsT<ST>> AtomicCenters;
+  /// table index
+  int myTableID;
+  /// mapping supercell to primitive cell
+  std::vector<int> Super2Prim;
+  /// r from distance table
+  RealType dist_r;
+  /// dr from distance table
+  PosType dist_dr;
+  /// for APBC
+  PointType r_image;
+  /// smooth function value
+  RealType f;
+  /// smooth function first derivative
+  RealType df_dr;
+  /// smooth function second derivative
+  RealType d2f_dr2;
+  /// smoothing schemes
+  enum class smoothing_schemes
+  {
+    CONSISTENT = 0,
+    SMOOTHALL,
+    SMOOTHPARTIAL
+  } smooth_scheme;
+  /// smoothing function
+  smoothing_functions smooth_func_id;
 
 public:
-    HybridRepCenterOrbitalsT()
-    {
-    }
-
-    void
-    set_info(const ParticleSetT<VT>& ions, ParticleSetT<VT>& els,
-        const std::vector<int>& mapping)
-    {
-        myTableID = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST);
-        Super2Prim = mapping;
-    }
-
-    inline void
-    resizeStorage(size_t Nb)
-    {
-        size_t SplineCoefsBytes = 0;
-
-        for (int ic = 0; ic < AtomicCenters.size(); ic++) {
-            AtomicCenters[ic].resizeStorage(Nb);
-            SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes();
-        }
-
-        app_log()
-            << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated "
-            << "for the atomic radial splines in hybrid orbital representation"
-            << std::endl;
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        for (int ic = 0; ic < AtomicCenters.size(); ic++)
-            AtomicCenters[ic].bcast_tables(comm);
-    }
-
-    void
-    gather_atomic_tables(Communicate* comm, std::vector<int>& offset)
-    {
-        if (comm->size() == 1)
-            return;
-        for (int ic = 0; ic < AtomicCenters.size(); ic++)
-            AtomicCenters[ic].gather_tables(comm, offset);
-    }
-
-    inline void
-    flush_zero()
-    {
-        for (int ic = 0; ic < AtomicCenters.size(); ic++)
-            AtomicCenters[ic].flush_zero();
-    }
-
-    bool
-    read_splines(hdf_archive& h5f)
-    {
-        bool success = true;
-        size_t ncenter;
-
-        try {
-            h5f.push("atomic_centers", false);
-        }
-        catch (...) {
-            success = false;
-        }
-        success = success && h5f.readEntry(ncenter, "number_of_centers");
-        if (!success)
-            return success;
-        if (ncenter != AtomicCenters.size())
-            success = false;
-        // read splines of each center
-        for (int ic = 0; ic < AtomicCenters.size(); ic++) {
-            std::ostringstream gname;
-            gname << "center_" << ic;
-            try {
-                h5f.push(gname.str().c_str(), false);
-            }
-            catch (...) {
-                success = false;
-            }
-            success = success && AtomicCenters[ic].read_splines(h5f);
-            h5f.pop();
-        }
-        h5f.pop();
-        return success;
-    }
-
-    bool
-    write_splines(hdf_archive& h5f)
-    {
-        bool success = true;
-        int ncenter = AtomicCenters.size();
-        try {
-            h5f.push("atomic_centers", true);
-        }
-        catch (...) {
-            success = false;
-        }
-        success = success && h5f.writeEntry(ncenter, "number_of_centers");
-        // write splines of each center
-        for (int ic = 0; ic < AtomicCenters.size(); ic++) {
-            std::ostringstream gname;
-            gname << "center_" << ic;
-            try {
-                h5f.push(gname.str().c_str(), true);
-            }
-            catch (...) {
-                success = false;
-            }
-            success = success && AtomicCenters[ic].write_splines(h5f);
-            h5f.pop();
-        }
-        h5f.pop();
-        return success;
-    }
-
-    template <typename Cell>
-    inline int
-    get_bc_sign(
-        const PointType& r, const Cell& PrimLattice, TinyVector<int, D>& HalfG)
-    {
-        int bc_sign = 0;
-        PointType shift_unit = PrimLattice.toUnit(r - r_image);
-        for (int i = 0; i < D; i++) {
-            ST img = round(shift_unit[i]);
-            bc_sign += HalfG[i] * (int)img;
-        }
-        return bc_sign;
-    }
-
-    // evaluate only V
-    template <typename VV>
-    inline RealType
-    evaluate_v(const ParticleSetT<VT>& P, const int iat, VV& myV)
-    {
-        const auto& ei_dist = P.getDistTableAB(myTableID);
-        const int center_idx = ei_dist.get_first_neighbor(
-            iat, dist_r, dist_dr, P.getActivePtcl() == iat);
-        if (center_idx < 0)
-            abort();
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        if (dist_r < myCenter.getCutoff()) {
-            PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
-            r_image = myCenter.getCenterPos() + dr;
-            myCenter.evaluate_v(dist_r, dr, myV);
-            return smooth_function(
-                myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
-        }
-        return RealType(-1);
-    }
-
-    /* check if the batched algorithm is safe to operate
+  HybridRepCenterOrbitalsT() {}
+
+  void set_info(const ParticleSetT<VT>& ions, ParticleSetT<VT>& els, const std::vector<int>& mapping)
+  {
+    myTableID  = els.addTable(ions, DTModes::NEED_VP_FULL_TABLE_ON_HOST);
+    Super2Prim = mapping;
+  }
+
+  inline void resizeStorage(size_t Nb)
+  {
+    size_t SplineCoefsBytes = 0;
+
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+    {
+      AtomicCenters[ic].resizeStorage(Nb);
+      SplineCoefsBytes += AtomicCenters[ic].getSplineSizeInBytes();
+    }
+
+    app_log() << "MEMORY " << SplineCoefsBytes / (1 << 20) << " MB allocated "
+              << "for the atomic radial splines in hybrid orbital representation" << std::endl;
+  }
+
+  void bcast_tables(Communicate* comm)
+  {
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+      AtomicCenters[ic].bcast_tables(comm);
+  }
+
+  void gather_atomic_tables(Communicate* comm, std::vector<int>& offset)
+  {
+    if (comm->size() == 1)
+      return;
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+      AtomicCenters[ic].gather_tables(comm, offset);
+  }
+
+  inline void flush_zero()
+  {
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+      AtomicCenters[ic].flush_zero();
+  }
+
+  bool read_splines(hdf_archive& h5f)
+  {
+    bool success = true;
+    size_t ncenter;
+
+    try
+    {
+      h5f.push("atomic_centers", false);
+    }
+    catch (...)
+    {
+      success = false;
+    }
+    success = success && h5f.readEntry(ncenter, "number_of_centers");
+    if (!success)
+      return success;
+    if (ncenter != AtomicCenters.size())
+      success = false;
+    // read splines of each center
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+    {
+      std::ostringstream gname;
+      gname << "center_" << ic;
+      try
+      {
+        h5f.push(gname.str().c_str(), false);
+      }
+      catch (...)
+      {
+        success = false;
+      }
+      success = success && AtomicCenters[ic].read_splines(h5f);
+      h5f.pop();
+    }
+    h5f.pop();
+    return success;
+  }
+
+  bool write_splines(hdf_archive& h5f)
+  {
+    bool success = true;
+    int ncenter  = AtomicCenters.size();
+    try
+    {
+      h5f.push("atomic_centers", true);
+    }
+    catch (...)
+    {
+      success = false;
+    }
+    success = success && h5f.writeEntry(ncenter, "number_of_centers");
+    // write splines of each center
+    for (int ic = 0; ic < AtomicCenters.size(); ic++)
+    {
+      std::ostringstream gname;
+      gname << "center_" << ic;
+      try
+      {
+        h5f.push(gname.str().c_str(), true);
+      }
+      catch (...)
+      {
+        success = false;
+      }
+      success = success && AtomicCenters[ic].write_splines(h5f);
+      h5f.pop();
+    }
+    h5f.pop();
+    return success;
+  }
+
+  template<typename Cell>
+  inline int get_bc_sign(const PointType& r, const Cell& PrimLattice, TinyVector<int, D>& HalfG)
+  {
+    int bc_sign          = 0;
+    PointType shift_unit = PrimLattice.toUnit(r - r_image);
+    for (int i = 0; i < D; i++)
+    {
+      ST img = round(shift_unit[i]);
+      bc_sign += HalfG[i] * (int)img;
+    }
+    return bc_sign;
+  }
+
+  // evaluate only V
+  template<typename VV>
+  inline RealType evaluate_v(const ParticleSetT<VT>& P, const int iat, VV& myV)
+  {
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
+    const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat);
+    if (center_idx < 0)
+      abort();
+    auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
+    if (dist_r < myCenter.getCutoff())
+    {
+      PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
+      r_image = myCenter.getCenterPos() + dr;
+      myCenter.evaluate_v(dist_r, dr, myV);
+      return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
+    }
+    return RealType(-1);
+  }
+
+  /* check if the batched algorithm is safe to operate
      * @param VP virtual particle set
      * @return true if it is safe
      *
@@ -654,165 +590,154 @@ class HybridRepCenterOrbitalsT
      * reference center and introduce some error. In this case, the non-batched
      * algorithm should be used.
      */
-    bool
-    is_batched_safe(const VirtualParticleSetT<VT>& VP)
-    {
-        const int center_idx = VP.refSourcePtcl;
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        return VP.getRefPS().getDistTableAB(myTableID).getDistRow(
-                   VP.refPtcl)[center_idx] < myCenter.getNonOverlappingRadius();
-    }
-
-    // C2C, C2R cases
-    template <typename VM>
-    inline RealType
-    evaluateValuesC2X(const VirtualParticleSetT<VT>& VP, VM& multi_myV)
-    {
-        const int center_idx = VP.refSourcePtcl;
-        dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(
-            VP.refPtcl)[center_idx];
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        if (dist_r < myCenter.getCutoff()) {
-            myCenter.evaluateValues(
-                VP.getDistTableAB(myTableID).getDisplacements(), center_idx,
-                dist_r, multi_myV);
-            return smooth_function(
-                myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
-        }
-        return RealType(-1);
-    }
-
-    // R2R case
-    template <typename VM, typename Cell, typename SV>
-    inline RealType
-    evaluateValuesR2R(const VirtualParticleSetT<VT>& VP,
-        const Cell& PrimLattice, TinyVector<int, D>& HalfG, VM& multi_myV,
-        SV& bc_signs)
-    {
-        const int center_idx = VP.refSourcePtcl;
-        dist_r = VP.getRefPS().getDistTableAB(myTableID).getDistRow(
-            VP.refPtcl)[center_idx];
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        if (dist_r < myCenter.getCutoff()) {
-            const auto& displ = VP.getDistTableAB(myTableID).getDisplacements();
-            for (int ivp = 0; ivp < VP.getTotalNum(); ivp++) {
-                r_image = myCenter.getCenterPos() - displ[ivp][center_idx];
-                bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG);
-                ;
-            }
-            myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV);
-            return smooth_function(
-                myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
-        }
-        return RealType(-1);
-    }
-
-    // evaluate only VGL
-    template <typename VV, typename GV>
-    inline RealType
-    evaluate_vgl(
-        const ParticleSetT<VT>& P, const int iat, VV& myV, GV& myG, VV& myL)
-    {
-        const auto& ei_dist = P.getDistTableAB(myTableID);
-        const int center_idx = ei_dist.get_first_neighbor(
-            iat, dist_r, dist_dr, P.getActivePtcl() == iat);
-        if (center_idx < 0)
-            abort();
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        if (dist_r < myCenter.getCutoff()) {
-            PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
-            r_image = myCenter.getCenterPos() + dr;
-            myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL);
-            return smooth_function(
-                myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
-        }
-        return RealType(-1);
-    }
-
-    // evaluate only VGH
-    template <typename VV, typename GV, typename HT>
-    inline RealType
-    evaluate_vgh(
-        const ParticleSetT<VT>& P, const int iat, VV& myV, GV& myG, HT& myH)
-    {
-        const auto& ei_dist = P.getDistTableAB(myTableID);
-        const int center_idx = ei_dist.get_first_neighbor(
-            iat, dist_r, dist_dr, P.getActivePtcl() == iat);
-        if (center_idx < 0)
-            abort();
-        auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
-        if (dist_r < myCenter.getCutoff()) {
-            PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
-            r_image = myCenter.getCenterPos() + dr;
-            myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH);
-            return smooth_function(
-                myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
-        }
-        return RealType(-1);
-    }
-
-    // interpolate buffer region, value only
-    template <typename VV>
-    inline void
-    interpolate_buffer_v(VV& psi, const VV& psi_AO) const
-    {
-        const RealType cone(1);
-        for (size_t i = 0; i < psi.size(); i++)
-            psi[i] = psi_AO[i] * f + psi[i] * (cone - f);
-    }
-
-    // interpolate buffer region, value, gradients and laplacian
-    template <typename VV, typename GV>
-    inline void
-    interpolate_buffer_vgl(VV& psi, GV& dpsi, VV& d2psi, const VV& psi_AO,
-        const GV& dpsi_AO, const VV& d2psi_AO) const
-    {
-        const RealType cone(1), ctwo(2);
-        const RealType rinv(1.0 / dist_r);
-        if (smooth_scheme == smoothing_schemes::CONSISTENT)
-            for (size_t i = 0; i < psi.size();
-                 i++) { // psi, dpsi, d2psi are all consistent
-                d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) +
-                    df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) +
-                    (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr);
-                dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) +
-                    df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]);
-                psi[i] = psi_AO[i] * f + psi[i] * (cone - f);
-            }
-        else if (smooth_scheme == smoothing_schemes::SMOOTHALL)
-            for (size_t i = 0; i < psi.size(); i++) {
-                d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f);
-                dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f);
-                psi[i] = psi_AO[i] * f + psi[i] * (cone - f);
-            }
-        else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL)
-            for (size_t i = 0; i < psi.size();
-                 i++) { // dpsi, d2psi are consistent but psi is not.
-                d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) +
-                    df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr);
-                dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f);
-                psi[i] = psi_AO[i] * f + psi[i] * (cone - f);
-            }
-        else
-            throw std::runtime_error("Unknown smooth scheme!");
-    }
-
-    inline RealType
-    smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r)
-    {
-        const RealType cone(1);
-        if (r < cutoff_buffer)
-            return cone;
-        const RealType scale = cone / (cutoff - cutoff_buffer);
-        const RealType x = (r - cutoff_buffer) * scale;
-        f = smoothing(smooth_func_id, x, df_dr, d2f_dr2);
-        df_dr *= scale;
-        d2f_dr2 *= scale * scale;
-        return f;
-    }
-
-    template <class BSPLINESPO>
-    friend class HybridRepSetReaderT;
+  bool is_batched_safe(const VirtualParticleSetT<VT>& VP)
+  {
+    const int center_idx = VP.refSourcePtcl;
+    auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
+    return VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx] <
+        myCenter.getNonOverlappingRadius();
+  }
+
+  // C2C, C2R cases
+  template<typename VM>
+  inline RealType evaluateValuesC2X(const VirtualParticleSetT<VT>& VP, VM& multi_myV)
+  {
+    const int center_idx = VP.refSourcePtcl;
+    dist_r               = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx];
+    auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
+    if (dist_r < myCenter.getCutoff())
+    {
+      myCenter.evaluateValues(VP.getDistTableAB(myTableID).getDisplacements(), center_idx, dist_r, multi_myV);
+      return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
+    }
+    return RealType(-1);
+  }
+
+  // R2R case
+  template<typename VM, typename Cell, typename SV>
+  inline RealType evaluateValuesR2R(const VirtualParticleSetT<VT>& VP,
+                                    const Cell& PrimLattice,
+                                    TinyVector<int, D>& HalfG,
+                                    VM& multi_myV,
+                                    SV& bc_signs)
+  {
+    const int center_idx = VP.refSourcePtcl;
+    dist_r               = VP.getRefPS().getDistTableAB(myTableID).getDistRow(VP.refPtcl)[center_idx];
+    auto& myCenter       = AtomicCenters[Super2Prim[center_idx]];
+    if (dist_r < myCenter.getCutoff())
+    {
+      const auto& displ = VP.getDistTableAB(myTableID).getDisplacements();
+      for (int ivp = 0; ivp < VP.getTotalNum(); ivp++)
+      {
+        r_image       = myCenter.getCenterPos() - displ[ivp][center_idx];
+        bc_signs[ivp] = get_bc_sign(VP.R[ivp], PrimLattice, HalfG);
+        ;
+      }
+      myCenter.evaluateValues(displ, center_idx, dist_r, multi_myV);
+      return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
+    }
+    return RealType(-1);
+  }
+
+  // evaluate only VGL
+  template<typename VV, typename GV>
+  inline RealType evaluate_vgl(const ParticleSetT<VT>& P, const int iat, VV& myV, GV& myG, VV& myL)
+  {
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
+    const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat);
+    if (center_idx < 0)
+      abort();
+    auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
+    if (dist_r < myCenter.getCutoff())
+    {
+      PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
+      r_image = myCenter.getCenterPos() + dr;
+      myCenter.evaluate_vgl(dist_r, dr, myV, myG, myL);
+      return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
+    }
+    return RealType(-1);
+  }
+
+  // evaluate only VGH
+  template<typename VV, typename GV, typename HT>
+  inline RealType evaluate_vgh(const ParticleSetT<VT>& P, const int iat, VV& myV, GV& myG, HT& myH)
+  {
+    const auto& ei_dist  = P.getDistTableAB(myTableID);
+    const int center_idx = ei_dist.get_first_neighbor(iat, dist_r, dist_dr, P.getActivePtcl() == iat);
+    if (center_idx < 0)
+      abort();
+    auto& myCenter = AtomicCenters[Super2Prim[center_idx]];
+    if (dist_r < myCenter.getCutoff())
+    {
+      PointType dr(-dist_dr[0], -dist_dr[1], -dist_dr[2]);
+      r_image = myCenter.getCenterPos() + dr;
+      myCenter.evaluate_vgh(dist_r, dr, myV, myG, myH);
+      return smooth_function(myCenter.getCutoffBuffer(), myCenter.getCutoff(), dist_r);
+    }
+    return RealType(-1);
+  }
+
+  // interpolate buffer region, value only
+  template<typename VV>
+  inline void interpolate_buffer_v(VV& psi, const VV& psi_AO) const
+  {
+    const RealType cone(1);
+    for (size_t i = 0; i < psi.size(); i++)
+      psi[i] = psi_AO[i] * f + psi[i] * (cone - f);
+  }
+
+  // interpolate buffer region, value, gradients and laplacian
+  template<typename VV, typename GV>
+  inline void interpolate_buffer_vgl(VV& psi,
+                                     GV& dpsi,
+                                     VV& d2psi,
+                                     const VV& psi_AO,
+                                     const GV& dpsi_AO,
+                                     const VV& d2psi_AO) const
+  {
+    const RealType cone(1), ctwo(2);
+    const RealType rinv(1.0 / dist_r);
+    if (smooth_scheme == smoothing_schemes::CONSISTENT)
+      for (size_t i = 0; i < psi.size(); i++)
+      { // psi, dpsi, d2psi are all consistent
+        d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr) +
+            (psi_AO[i] - psi[i]) * (d2f_dr2 + ctwo * rinv * df_dr);
+        dpsi[i] = dpsi_AO[i] * f + dpsi[i] * (cone - f) + df_dr * rinv * dist_dr * (psi[i] - psi_AO[i]);
+        psi[i]  = psi_AO[i] * f + psi[i] * (cone - f);
+      }
+    else if (smooth_scheme == smoothing_schemes::SMOOTHALL)
+      for (size_t i = 0; i < psi.size(); i++)
+      {
+        d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f);
+        dpsi[i]  = dpsi_AO[i] * f + dpsi[i] * (cone - f);
+        psi[i]   = psi_AO[i] * f + psi[i] * (cone - f);
+      }
+    else if (smooth_scheme == smoothing_schemes::SMOOTHPARTIAL)
+      for (size_t i = 0; i < psi.size(); i++)
+      { // dpsi, d2psi are consistent but psi is not.
+        d2psi[i] = d2psi_AO[i] * f + d2psi[i] * (cone - f) + df_dr * rinv * ctwo * dot(dpsi[i] - dpsi_AO[i], dist_dr);
+        dpsi[i]  = dpsi_AO[i] * f + dpsi[i] * (cone - f);
+        psi[i]   = psi_AO[i] * f + psi[i] * (cone - f);
+      }
+    else
+      throw std::runtime_error("Unknown smooth scheme!");
+  }
+
+  inline RealType smooth_function(const ST& cutoff_buffer, const ST& cutoff, const RealType r)
+  {
+    const RealType cone(1);
+    if (r < cutoff_buffer)
+      return cone;
+    const RealType scale = cone / (cutoff - cutoff_buffer);
+    const RealType x     = (r - cutoff_buffer) * scale;
+    f                    = smoothing(smooth_func_id, x, df_dr, d2f_dr2);
+    df_dr *= scale;
+    d2f_dr2 *= scale * scale;
+    return f;
+  }
+
+  template<class BSPLINESPO>
+  friend class HybridRepSetReaderT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h
index 3e126b71f4b..0eb22d258ba 100644
--- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplxT.h
@@ -7,7 +7,6 @@
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_HYBRIDREP_CPLXT_H
diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h
index 9f36622371b..904ea20627e 100644
--- a/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepRealT.h
@@ -7,7 +7,6 @@
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file HybridRepReal.h
diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h
index 6783bc4b6bd..84bda28daa1 100644
--- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReaderT.h
@@ -129,467 +129,450 @@ struct Gvectors
 
 /** General HybridRepSetReader to handle any unitcell
  */
-template <typename SA>
+template<typename SA>
 class HybridRepSetReaderT : public SplineSetReaderT<SA>
 {
 public:
-    using BaseReader = SplineSetReaderT<SA>;
+  using BaseReader = SplineSetReaderT<SA>;
 
-    using BaseReader::bspline;
-    using BaseReader::mybuilder;
-    using BaseReader::rotate_phase_i;
-    using BaseReader::rotate_phase_r;
-    using typename BaseReader::DataType;
-    using typename BaseReader::ValueType;
+  using BaseReader::bspline;
+  using BaseReader::mybuilder;
+  using BaseReader::rotate_phase_i;
+  using BaseReader::rotate_phase_r;
+  using typename BaseReader::DataType;
+  using typename BaseReader::ValueType;
 
-    HybridRepSetReaderT(EinsplineSetBuilderT<ValueType>* e) : BaseReader(e)
-    {
-    }
+  HybridRepSetReaderT(EinsplineSetBuilderT<ValueType>* e) : BaseReader(e) {}
 
-    /** initialize basic parameters of atomic orbitals */
-    void
-    initialize_hybridrep_atomic_centers() override
-    {
-        OhmmsAttributeSet a;
-        std::string scheme_name("Consistent");
-        std::string s_function_name("LEKS2018");
-        a.add(scheme_name, "smoothing_scheme");
-        a.add(s_function_name, "smoothing_function");
-        a.put(mybuilder->XMLRoot);
-        // assign smooth_scheme
-        if (scheme_name == "Consistent")
-            this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT;
-        else if (scheme_name == "SmoothAll")
-            bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL;
-        else if (scheme_name == "SmoothPartial")
-            bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL;
-        else
-            APP_ABORT(
-                "initialize_hybridrep_atomic_centers wrong smoothing_scheme "
+  /** initialize basic parameters of atomic orbitals */
+  void initialize_hybridrep_atomic_centers() override
+  {
+    OhmmsAttributeSet a;
+    std::string scheme_name("Consistent");
+    std::string s_function_name("LEKS2018");
+    a.add(scheme_name, "smoothing_scheme");
+    a.add(s_function_name, "smoothing_function");
+    a.put(mybuilder->XMLRoot);
+    // assign smooth_scheme
+    if (scheme_name == "Consistent")
+      this->bspline->smooth_scheme = SA::smoothing_schemes::CONSISTENT;
+    else if (scheme_name == "SmoothAll")
+      bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHALL;
+    else if (scheme_name == "SmoothPartial")
+      bspline->smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL;
+    else
+      APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme "
                 "name! Only allows Consistent, SmoothAll or "
                 "SmoothPartial.");
 
-        // assign smooth_function
-        if (s_function_name == "LEKS2018")
-            bspline->smooth_func_id = smoothing_functions::LEKS2018;
-        else if (s_function_name == "coscos")
-            bspline->smooth_func_id = smoothing_functions::COSCOS;
-        else if (s_function_name == "linear")
-            bspline->smooth_func_id = smoothing_functions::LINEAR;
-        else
-            APP_ABORT(
-                "initialize_hybridrep_atomic_centers wrong smoothing_function "
+    // assign smooth_function
+    if (s_function_name == "LEKS2018")
+      bspline->smooth_func_id = smoothing_functions::LEKS2018;
+    else if (s_function_name == "coscos")
+      bspline->smooth_func_id = smoothing_functions::COSCOS;
+    else if (s_function_name == "linear")
+      bspline->smooth_func_id = smoothing_functions::LINEAR;
+    else
+      APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_function "
                 "name! Only allows LEKS2018, coscos or linear.");
-        app_log() << "Hybrid orbital representation uses " << scheme_name
-                  << " smoothing scheme and " << s_function_name
-                  << " smoothing function." << std::endl;
-
-        bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl,
-            mybuilder->Super2Prim);
-        auto& centers = bspline->AtomicCenters;
-        auto& ACInfo = mybuilder->AtomicCentersInfo;
-        // load atomic center info only when it is not initialized
-        if (centers.size() == 0) {
-            bool success = true;
-            app_log() << "Reading atomic center info for hybrid representation"
-                      << std::endl;
-            for (int center_idx = 0; center_idx < ACInfo.Ncenters;
-                 center_idx++) {
-                const int my_GroupID = ACInfo.GroupID[center_idx];
-                if (ACInfo.cutoff[center_idx] < 0) {
-                    app_error() << "Hybrid orbital representation needs "
-                                   "parameter 'cutoff_radius' for atom "
-                                << center_idx << std::endl;
-                    success = false;
-                }
+    app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name
+              << " smoothing function." << std::endl;
+
+    bspline->set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, mybuilder->Super2Prim);
+    auto& centers = bspline->AtomicCenters;
+    auto& ACInfo  = mybuilder->AtomicCentersInfo;
+    // load atomic center info only when it is not initialized
+    if (centers.size() == 0)
+    {
+      bool success = true;
+      app_log() << "Reading atomic center info for hybrid representation" << std::endl;
+      for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++)
+      {
+        const int my_GroupID = ACInfo.GroupID[center_idx];
+        if (ACInfo.cutoff[center_idx] < 0)
+        {
+          app_error() << "Hybrid orbital representation needs "
+                         "parameter 'cutoff_radius' for atom "
+                      << center_idx << std::endl;
+          success = false;
+        }
 
-                if (ACInfo.inner_cutoff[center_idx] < 0) {
-                    const double inner_cutoff =
-                        std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0);
-                    app_log() << "Hybrid orbital representation setting "
-                                 "'inner_cutoff' to "
-                              << inner_cutoff << " for group " << my_GroupID
-                              << " as atom " << center_idx << std::endl;
-                    // overwrite the inner_cutoff of all the atoms of the same
-                    // species
-                    for (int id = 0; id < ACInfo.Ncenters; id++)
-                        if (my_GroupID == ACInfo.GroupID[id])
-                            ACInfo.inner_cutoff[id] = inner_cutoff;
-                }
-                else if (ACInfo.inner_cutoff[center_idx] >
-                    ACInfo.cutoff[center_idx]) {
-                    app_error()
-                        << "Hybrid orbital representation 'inner_cutoff' must "
-                           "be smaller than 'spline_radius' for atom "
-                        << center_idx << std::endl;
-                    success = false;
-                }
+        if (ACInfo.inner_cutoff[center_idx] < 0)
+        {
+          const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0);
+          app_log() << "Hybrid orbital representation setting "
+                       "'inner_cutoff' to "
+                    << inner_cutoff << " for group " << my_GroupID << " as atom " << center_idx << std::endl;
+          // overwrite the inner_cutoff of all the atoms of the same
+          // species
+          for (int id = 0; id < ACInfo.Ncenters; id++)
+            if (my_GroupID == ACInfo.GroupID[id])
+              ACInfo.inner_cutoff[id] = inner_cutoff;
+        }
+        else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx])
+        {
+          app_error() << "Hybrid orbital representation 'inner_cutoff' must "
+                         "be smaller than 'spline_radius' for atom "
+                      << center_idx << std::endl;
+          success = false;
+        }
 
-                if (ACInfo.cutoff[center_idx] > 0) {
-                    if (ACInfo.lmax[center_idx] < 0) {
-                        app_error() << "Hybrid orbital representation needs "
-                                       "parameter 'lmax' for atom "
-                                    << center_idx << std::endl;
-                        success = false;
-                    }
-
-                    if (ACInfo.spline_radius[center_idx] < 0 &&
-                        ACInfo.spline_npoints[center_idx] < 0) {
-                        app_log() << "Parameters 'spline_radius' and "
-                                     "'spline_npoints' for group "
-                                  << my_GroupID << " as atom " << center_idx
-                                  << " are not specified." << std::endl;
-                        const double delta =
-                            std::min(0.02, ACInfo.cutoff[center_idx] / 4.0);
-                        const int n_grid_point =
-                            std::ceil(
-                                (ACInfo.cutoff[center_idx] + 1e-4) / delta) +
-                            3;
-                        for (int id = 0; id < ACInfo.Ncenters; id++)
-                            if (my_GroupID == ACInfo.GroupID[id]) {
-                                ACInfo.spline_npoints[id] = n_grid_point;
-                                ACInfo.spline_radius[id] =
-                                    (n_grid_point - 1) * delta;
-                            }
-                        app_log() << "  Based on default grid point distance "
-                                  << delta << std::endl;
-                        app_log()
-                            << "  Setting 'spline_npoints' to "
-                            << ACInfo.spline_npoints[center_idx] << std::endl;
-                        app_log()
-                            << "  Setting 'spline_radius' to "
-                            << ACInfo.spline_radius[center_idx] << std::endl;
-                    }
-                    else {
-                        if (ACInfo.spline_radius[center_idx] < 0) {
-                            app_error()
-                                << "Hybrid orbital representation needs "
-                                   "parameter 'spline_radius' for atom "
-                                << center_idx << std::endl;
-                            success = false;
-                        }
-
-                        if (ACInfo.spline_npoints[center_idx] < 0) {
-                            app_error()
-                                << "Hybrid orbital representation needs "
-                                   "parameter 'spline_npoints' for atom "
-                                << center_idx << std::endl;
-                            success = false;
-                        }
-                    }
-
-                    // check maximally allowed cutoff_radius
-                    double max_allowed_cutoff =
-                        ACInfo.spline_radius[center_idx] -
-                        2.0 * ACInfo.spline_radius[center_idx] /
-                            (ACInfo.spline_npoints[center_idx] - 1);
-                    if (success &&
-                        ACInfo.cutoff[center_idx] > max_allowed_cutoff) {
-                        app_error() << "Hybrid orbital representation requires "
-                                       "cutoff_radius<="
-                                    << max_allowed_cutoff
-                                    << " calculated by "
-                                       "spline_radius-2*spline_radius/"
-                                       "(spline_npoints-1) for atom "
-                                    << center_idx << std::endl;
-                        success = false;
-                    }
-                }
-                else {
-                    // no atomic regions for this atom type
-                    ACInfo.spline_radius[center_idx] = 0.0;
-                    ACInfo.spline_npoints[center_idx] = 0;
-                    ACInfo.lmax[center_idx] = 0;
-                }
+        if (ACInfo.cutoff[center_idx] > 0)
+        {
+          if (ACInfo.lmax[center_idx] < 0)
+          {
+            app_error() << "Hybrid orbital representation needs "
+                           "parameter 'lmax' for atom "
+                        << center_idx << std::endl;
+            success = false;
+          }
+
+          if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0)
+          {
+            app_log() << "Parameters 'spline_radius' and "
+                         "'spline_npoints' for group "
+                      << my_GroupID << " as atom " << center_idx << " are not specified." << std::endl;
+            const double delta     = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0);
+            const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3;
+            for (int id = 0; id < ACInfo.Ncenters; id++)
+              if (my_GroupID == ACInfo.GroupID[id])
+              {
+                ACInfo.spline_npoints[id] = n_grid_point;
+                ACInfo.spline_radius[id]  = (n_grid_point - 1) * delta;
+              }
+            app_log() << "  Based on default grid point distance " << delta << std::endl;
+            app_log() << "  Setting 'spline_npoints' to " << ACInfo.spline_npoints[center_idx] << std::endl;
+            app_log() << "  Setting 'spline_radius' to " << ACInfo.spline_radius[center_idx] << std::endl;
+          }
+          else
+          {
+            if (ACInfo.spline_radius[center_idx] < 0)
+            {
+              app_error() << "Hybrid orbital representation needs "
+                             "parameter 'spline_radius' for atom "
+                          << center_idx << std::endl;
+              success = false;
             }
-            if (!success)
-                BaseReader::myComm->barrier_and_abort(
-                    "initialize_hybridrep_atomic_centers Failed to initialize "
-                    "atomic centers "
-                    "in hybrid orbital representation!");
-
-            for (int center_idx = 0; center_idx < ACInfo.Ncenters;
-                 center_idx++) {
-                AtomicOrbitalsT<DataType> oneCenter(ACInfo.lmax[center_idx]);
-                oneCenter.set_info(ACInfo.ion_pos[center_idx],
-                    ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx],
-                    ACInfo.spline_radius[center_idx],
-                    ACInfo.non_overlapping_radius[center_idx],
-                    ACInfo.spline_npoints[center_idx]);
-                centers.push_back(oneCenter);
+
+            if (ACInfo.spline_npoints[center_idx] < 0)
+            {
+              app_error() << "Hybrid orbital representation needs "
+                             "parameter 'spline_npoints' for atom "
+                          << center_idx << std::endl;
+              success = false;
             }
+          }
+
+          // check maximally allowed cutoff_radius
+          double max_allowed_cutoff = ACInfo.spline_radius[center_idx] -
+              2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1);
+          if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff)
+          {
+            app_error() << "Hybrid orbital representation requires "
+                           "cutoff_radius<="
+                        << max_allowed_cutoff
+                        << " calculated by "
+                           "spline_radius-2*spline_radius/"
+                           "(spline_npoints-1) for atom "
+                        << center_idx << std::endl;
+            success = false;
+          }
+        }
+        else
+        {
+          // no atomic regions for this atom type
+          ACInfo.spline_radius[center_idx]  = 0.0;
+          ACInfo.spline_npoints[center_idx] = 0;
+          ACInfo.lmax[center_idx]           = 0;
         }
+      }
+      if (!success)
+        BaseReader::myComm->barrier_and_abort("initialize_hybridrep_atomic_centers Failed to initialize "
+                                              "atomic centers "
+                                              "in hybrid orbital representation!");
+
+      for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++)
+      {
+        AtomicOrbitalsT<DataType> oneCenter(ACInfo.lmax[center_idx]);
+        oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx],
+                           ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx],
+                           ACInfo.spline_npoints[center_idx]);
+        centers.push_back(oneCenter);
+      }
     }
+  }
 
-    /** initialize construct atomic orbital radial functions from plane waves */
-    inline void
-    create_atomic_centers_Gspace(Vector<std::complex<double>>& cG,
-        Communicate& band_group_comm, int iorb) override
+  /** initialize construct atomic orbital radial functions from plane waves */
+  inline void create_atomic_centers_Gspace(Vector<std::complex<double>>& cG,
+                                           Communicate& band_group_comm,
+                                           int iorb) override
+  {
+    band_group_comm.bcast(rotate_phase_r);
+    band_group_comm.bcast(rotate_phase_i);
+    band_group_comm.bcast(cG);
+    // distribute G-vectors over processor groups
+    const int Ngvecs      = mybuilder->Gvecs[0].size();
+    const int Nprocs      = band_group_comm.size();
+    const int Ngvecgroups = std::min(Ngvecs, Nprocs);
+    Communicate gvec_group_comm(band_group_comm, Ngvecgroups);
+    std::vector<int> gvec_groups(Ngvecgroups + 1, 0);
+    FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups);
+    const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()];
+    const int gvec_last  = gvec_groups[gvec_group_comm.getGroupID() + 1];
+
+    // prepare Gvecs Ylm(G)
+    using UnitCellType = typename EinsplineSetBuilderT<ValueType>::UnitCellType;
+    Gvectors<double, UnitCellType> Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline->HalfG, gvec_first,
+                                         gvec_last);
+    // if(band_group_comm.isGroupLeader()) std::cout << "print band=" <<
+    // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl;
+
+    std::vector<AtomicOrbitalsT<DataType>>& centers = bspline->AtomicCenters;
+    app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl;
+    // collect atomic centers by group
+    std::vector<int> uniq_species;
+    for (int center_idx = 0; center_idx < centers.size(); center_idx++)
     {
-        band_group_comm.bcast(rotate_phase_r);
-        band_group_comm.bcast(rotate_phase_i);
-        band_group_comm.bcast(cG);
-        // distribute G-vectors over processor groups
-        const int Ngvecs = mybuilder->Gvecs[0].size();
-        const int Nprocs = band_group_comm.size();
-        const int Ngvecgroups = std::min(Ngvecs, Nprocs);
-        Communicate gvec_group_comm(band_group_comm, Ngvecgroups);
-        std::vector<int> gvec_groups(Ngvecgroups + 1, 0);
-        FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups);
-        const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()];
-        const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1];
-
-        // prepare Gvecs Ylm(G)
-        using UnitCellType =
-            typename EinsplineSetBuilderT<ValueType>::UnitCellType;
-        Gvectors<double, UnitCellType> Gvecs(mybuilder->Gvecs[0],
-            mybuilder->PrimCell, bspline->HalfG, gvec_first, gvec_last);
-        // if(band_group_comm.isGroupLeader()) std::cout << "print band=" <<
-        // iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl;
-
-        std::vector<AtomicOrbitalsT<DataType>>& centers = bspline->AtomicCenters;
-        app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl;
-        // collect atomic centers by group
-        std::vector<int> uniq_species;
-        for (int center_idx = 0; center_idx < centers.size(); center_idx++) {
-            auto& ACInfo = mybuilder->AtomicCentersInfo;
-            const int my_GroupID = ACInfo.GroupID[center_idx];
-            int found_idx = -1;
-            for (size_t idx = 0; idx < uniq_species.size(); idx++)
-                if (my_GroupID == uniq_species[idx]) {
-                    found_idx = idx;
-                    break;
-                }
-            if (found_idx < 0)
-                uniq_species.push_back(my_GroupID);
+      auto& ACInfo         = mybuilder->AtomicCentersInfo;
+      const int my_GroupID = ACInfo.GroupID[center_idx];
+      int found_idx        = -1;
+      for (size_t idx = 0; idx < uniq_species.size(); idx++)
+        if (my_GroupID == uniq_species[idx])
+        {
+          found_idx = idx;
+          break;
         }
-        // construct group list
-        std::vector<std::vector<int>> group_list(uniq_species.size());
-        for (int center_idx = 0; center_idx < centers.size(); center_idx++) {
-            auto& ACInfo = mybuilder->AtomicCentersInfo;
-            const int my_GroupID = ACInfo.GroupID[center_idx];
-            for (size_t idx = 0; idx < uniq_species.size(); idx++)
-                if (my_GroupID == uniq_species[idx]) {
-                    group_list[idx].push_back(center_idx);
-                    break;
-                }
+      if (found_idx < 0)
+        uniq_species.push_back(my_GroupID);
+    }
+    // construct group list
+    std::vector<std::vector<int>> group_list(uniq_species.size());
+    for (int center_idx = 0; center_idx < centers.size(); center_idx++)
+    {
+      auto& ACInfo         = mybuilder->AtomicCentersInfo;
+      const int my_GroupID = ACInfo.GroupID[center_idx];
+      for (size_t idx = 0; idx < uniq_species.size(); idx++)
+        if (my_GroupID == uniq_species[idx])
+        {
+          group_list[idx].push_back(center_idx);
+          break;
         }
+    }
 
-        for (int group_idx = 0; group_idx < group_list.size(); group_idx++) {
-            const auto& mygroup = group_list[group_idx];
-            const double spline_radius = centers[mygroup[0]].getSplineRadius();
-            const int spline_npoints = centers[mygroup[0]].getSplineNpoints();
-            const int lmax = centers[mygroup[0]].getLmax();
-            const double delta =
-                spline_radius / static_cast<double>(spline_npoints - 1);
-            const int lm_tot = (lmax + 1) * (lmax + 1);
-            const size_t natoms = mygroup.size();
-            const int policy = lm_tot > natoms ? 0 : 1;
-
-            std::vector<std::complex<double>> i_power(lm_tot);
-            // rotate phase is introduced here.
-            std::complex<double> i_temp(rotate_phase_r, rotate_phase_i);
-            for (size_t l = 0; l <= lmax; l++) {
-                for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++)
-                    i_power[lm] = i_temp;
-                i_temp *= std::complex<double>(0.0, 1.0);
-            }
-
-            std::vector<Matrix<double>> all_vals(natoms);
-            std::vector<std::vector<aligned_vector<double>>> vals_local(
-                spline_npoints * omp_get_max_threads());
-            VectorSoaContainer<double, 3> myRSoA(natoms);
-            for (size_t idx = 0; idx < natoms; idx++) {
-                all_vals[idx].resize(spline_npoints, lm_tot * 2);
-                all_vals[idx] = 0.0;
-                myRSoA(idx) = centers[mygroup[idx]].getCenterPos();
-            }
+    for (int group_idx = 0; group_idx < group_list.size(); group_idx++)
+    {
+      const auto& mygroup        = group_list[group_idx];
+      const double spline_radius = centers[mygroup[0]].getSplineRadius();
+      const int spline_npoints   = centers[mygroup[0]].getSplineNpoints();
+      const int lmax             = centers[mygroup[0]].getLmax();
+      const double delta         = spline_radius / static_cast<double>(spline_npoints - 1);
+      const int lm_tot           = (lmax + 1) * (lmax + 1);
+      const size_t natoms        = mygroup.size();
+      const int policy           = lm_tot > natoms ? 0 : 1;
+
+      std::vector<std::complex<double>> i_power(lm_tot);
+      // rotate phase is introduced here.
+      std::complex<double> i_temp(rotate_phase_r, rotate_phase_i);
+      for (size_t l = 0; l <= lmax; l++)
+      {
+        for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++)
+          i_power[lm] = i_temp;
+        i_temp *= std::complex<double>(0.0, 1.0);
+      }
+
+      std::vector<Matrix<double>> all_vals(natoms);
+      std::vector<std::vector<aligned_vector<double>>> vals_local(spline_npoints * omp_get_max_threads());
+      VectorSoaContainer<double, 3> myRSoA(natoms);
+      for (size_t idx = 0; idx < natoms; idx++)
+      {
+        all_vals[idx].resize(spline_npoints, lm_tot * 2);
+        all_vals[idx] = 0.0;
+        myRSoA(idx)   = centers[mygroup[idx]].getCenterPos();
+      }
 
 #pragma omp parallel
+      {
+        const size_t tid = omp_get_thread_num();
+        const size_t nt  = omp_get_num_threads();
+
+        for (int ip = 0; ip < spline_npoints; ip++)
+        {
+          const size_t ip_idx = tid * spline_npoints + ip;
+          if (policy == 1)
+          {
+            vals_local[ip_idx].resize(lm_tot * 2);
+            for (size_t lm = 0; lm < lm_tot * 2; lm++)
             {
-                const size_t tid = omp_get_thread_num();
-                const size_t nt = omp_get_num_threads();
-
-                for (int ip = 0; ip < spline_npoints; ip++) {
-                    const size_t ip_idx = tid * spline_npoints + ip;
-                    if (policy == 1) {
-                        vals_local[ip_idx].resize(lm_tot * 2);
-                        for (size_t lm = 0; lm < lm_tot * 2; lm++) {
-                            auto& vals = vals_local[ip_idx][lm];
-                            vals.resize(natoms);
-                            std::fill(vals.begin(), vals.end(), 0.0);
-                        }
-                    }
-                    else {
-                        vals_local[ip_idx].resize(natoms * 2);
-                        for (size_t iat = 0; iat < natoms * 2; iat++) {
-                            auto& vals = vals_local[ip_idx][iat];
-                            vals.resize(lm_tot);
-                            std::fill(vals.begin(), vals.end(), 0.0);
-                        }
-                    }
-                }
+              auto& vals = vals_local[ip_idx][lm];
+              vals.resize(natoms);
+              std::fill(vals.begin(), vals.end(), 0.0);
+            }
+          }
+          else
+          {
+            vals_local[ip_idx].resize(natoms * 2);
+            for (size_t iat = 0; iat < natoms * 2; iat++)
+            {
+              auto& vals = vals_local[ip_idx][iat];
+              vals.resize(lm_tot);
+              std::fill(vals.begin(), vals.end(), 0.0);
+            }
+          }
+        }
 
-                const size_t size_pw_tile = 32;
-                const size_t num_pw_tiles =
-                    (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile;
-                aligned_vector<double> j_lm_G(lm_tot, 0.0);
-                std::vector<aligned_vector<double>> phase_shift_r(size_pw_tile);
-                std::vector<aligned_vector<double>> phase_shift_i(size_pw_tile);
-                std::vector<aligned_vector<double>> YlmG(size_pw_tile);
-                for (size_t ig = 0; ig < size_pw_tile; ig++) {
-                    phase_shift_r[ig].resize(natoms);
-                    phase_shift_i[ig].resize(natoms);
-                    YlmG[ig].resize(lm_tot);
-                }
-                SoaSphericalTensor<double> Ylm(lmax);
+        const size_t size_pw_tile = 32;
+        const size_t num_pw_tiles = (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile;
+        aligned_vector<double> j_lm_G(lm_tot, 0.0);
+        std::vector<aligned_vector<double>> phase_shift_r(size_pw_tile);
+        std::vector<aligned_vector<double>> phase_shift_i(size_pw_tile);
+        std::vector<aligned_vector<double>> YlmG(size_pw_tile);
+        for (size_t ig = 0; ig < size_pw_tile; ig++)
+        {
+          phase_shift_r[ig].resize(natoms);
+          phase_shift_i[ig].resize(natoms);
+          YlmG[ig].resize(lm_tot);
+        }
+        SoaSphericalTensor<double> Ylm(lmax);
 
 #pragma omp for
-                for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) {
-                    const size_t ig_first = tile_id * size_pw_tile;
-                    const size_t ig_last =
-                        std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs);
-                    for (size_t ig = ig_first; ig < ig_last; ig++) {
-                        const size_t ig_local = ig - ig_first;
-                        // calculate phase shift for all the centers of this
-                        // group
-                        Gvecs.calc_phase_shift(myRSoA, ig,
-                            phase_shift_r[ig_local], phase_shift_i[ig_local]);
-                        Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]);
-                    }
-
-                    for (int ip = 0; ip < spline_npoints; ip++) {
-                        double r = delta * static_cast<double>(ip);
-                        const size_t ip_idx = tid * spline_npoints + ip;
-
-                        for (size_t ig = ig_first; ig < ig_last; ig++) {
-                            const size_t ig_local = ig - ig_first;
-                            // calculate spherical bessel function
-                            Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G);
-                            for (size_t lm = 0; lm < lm_tot; lm++)
-                                j_lm_G[lm] *= YlmG[ig_local][lm];
-
-                            const double cG_r = cG[ig + gvec_first].real();
-                            const double cG_i = cG[ig + gvec_first].imag();
-                            if (policy == 1) {
-                                for (size_t lm = 0; lm < lm_tot; lm++) {
-                                    double* restrict vals_r =
-                                        vals_local[ip_idx][lm * 2].data();
-                                    double* restrict vals_i =
-                                        vals_local[ip_idx][lm * 2 + 1].data();
-                                    const double* restrict ps_r_ptr =
-                                        phase_shift_r[ig_local].data();
-                                    const double* restrict ps_i_ptr =
-                                        phase_shift_i[ig_local].data();
-                                    double cG_j_r = cG_r * j_lm_G[lm];
-                                    double cG_j_i = cG_i * j_lm_G[lm];
-#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr \
-                         : QMC_SIMD_ALIGNMENT)
-                                    for (size_t idx = 0; idx < natoms; idx++) {
-                                        const double ps_r = ps_r_ptr[idx];
-                                        const double ps_i = ps_i_ptr[idx];
-                                        vals_r[idx] +=
-                                            cG_j_r * ps_r - cG_j_i * ps_i;
-                                        vals_i[idx] +=
-                                            cG_j_i * ps_r + cG_j_r * ps_i;
-                                    }
-                                }
-                            }
-                            else {
-                                for (size_t idx = 0; idx < natoms; idx++) {
-                                    double* restrict vals_r =
-                                        vals_local[ip_idx][idx * 2].data();
-                                    double* restrict vals_i =
-                                        vals_local[ip_idx][idx * 2 + 1].data();
-                                    const double* restrict j_lm_G_ptr =
-                                        j_lm_G.data();
-                                    double cG_ps_r =
-                                        cG_r * phase_shift_r[ig_local][idx] -
-                                        cG_i * phase_shift_i[ig_local][idx];
-                                    double cG_ps_i =
-                                        cG_i * phase_shift_r[ig_local][idx] +
-                                        cG_r * phase_shift_i[ig_local][idx];
+        for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++)
+        {
+          const size_t ig_first = tile_id * size_pw_tile;
+          const size_t ig_last  = std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs);
+          for (size_t ig = ig_first; ig < ig_last; ig++)
+          {
+            const size_t ig_local = ig - ig_first;
+            // calculate phase shift for all the centers of this
+            // group
+            Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]);
+            Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]);
+          }
+
+          for (int ip = 0; ip < spline_npoints; ip++)
+          {
+            double r            = delta * static_cast<double>(ip);
+            const size_t ip_idx = tid * spline_npoints + ip;
+
+            for (size_t ig = ig_first; ig < ig_last; ig++)
+            {
+              const size_t ig_local = ig - ig_first;
+              // calculate spherical bessel function
+              Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G);
+              for (size_t lm = 0; lm < lm_tot; lm++)
+                j_lm_G[lm] *= YlmG[ig_local][lm];
+
+              const double cG_r = cG[ig + gvec_first].real();
+              const double cG_i = cG[ig + gvec_first].imag();
+              if (policy == 1)
+              {
+                for (size_t lm = 0; lm < lm_tot; lm++)
+                {
+                  double* restrict vals_r         = vals_local[ip_idx][lm * 2].data();
+                  double* restrict vals_i         = vals_local[ip_idx][lm * 2 + 1].data();
+                  const double* restrict ps_r_ptr = phase_shift_r[ig_local].data();
+                  const double* restrict ps_i_ptr = phase_shift_i[ig_local].data();
+                  double cG_j_r                   = cG_r * j_lm_G[lm];
+                  double cG_j_i                   = cG_i * j_lm_G[lm];
+#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr : QMC_SIMD_ALIGNMENT)
+                  for (size_t idx = 0; idx < natoms; idx++)
+                  {
+                    const double ps_r = ps_r_ptr[idx];
+                    const double ps_i = ps_i_ptr[idx];
+                    vals_r[idx] += cG_j_r * ps_r - cG_j_i * ps_i;
+                    vals_i[idx] += cG_j_i * ps_r + cG_j_r * ps_i;
+                  }
+                }
+              }
+              else
+              {
+                for (size_t idx = 0; idx < natoms; idx++)
+                {
+                  double* restrict vals_r           = vals_local[ip_idx][idx * 2].data();
+                  double* restrict vals_i           = vals_local[ip_idx][idx * 2 + 1].data();
+                  const double* restrict j_lm_G_ptr = j_lm_G.data();
+                  double cG_ps_r = cG_r * phase_shift_r[ig_local][idx] - cG_i * phase_shift_i[ig_local][idx];
+                  double cG_ps_i = cG_i * phase_shift_r[ig_local][idx] + cG_r * phase_shift_i[ig_local][idx];
 #pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT)
-                                    for (size_t lm = 0; lm < lm_tot; lm++) {
-                                        const double jlm = j_lm_G_ptr[lm];
-                                        vals_r[lm] += cG_ps_r * jlm;
-                                        vals_i[lm] += cG_ps_i * jlm;
-                                    }
-                                }
-                            }
-                        }
-                    }
+                  for (size_t lm = 0; lm < lm_tot; lm++)
+                  {
+                    const double jlm = j_lm_G_ptr[lm];
+                    vals_r[lm] += cG_ps_r * jlm;
+                    vals_i[lm] += cG_ps_i * jlm;
+                  }
                 }
+              }
+            }
+          }
+        }
 
 #pragma omp for collapse(2)
-                for (int ip = 0; ip < spline_npoints; ip++)
-                    for (size_t idx = 0; idx < natoms; idx++) {
-                        double* vals = all_vals[idx][ip];
-                        for (size_t tid = 0; tid < nt; tid++)
-                            for (size_t lm = 0; lm < lm_tot; lm++) {
-                                double vals_th_r, vals_th_i;
-                                const size_t ip_idx = tid * spline_npoints + ip;
-                                if (policy == 1) {
-                                    vals_th_r = vals_local[ip_idx][lm * 2][idx];
-                                    vals_th_i =
-                                        vals_local[ip_idx][lm * 2 + 1][idx];
-                                }
-                                else {
-                                    vals_th_r = vals_local[ip_idx][idx * 2][lm];
-                                    vals_th_i =
-                                        vals_local[ip_idx][idx * 2 + 1][lm];
-                                }
-                                const double real_tmp =
-                                    4.0 * M_PI * i_power[lm].real();
-                                const double imag_tmp =
-                                    4.0 * M_PI * i_power[lm].imag();
-                                vals[lm] +=
-                                    vals_th_r * real_tmp - vals_th_i * imag_tmp;
-                                vals[lm + lm_tot] +=
-                                    vals_th_i * real_tmp + vals_th_r * imag_tmp;
-                            }
-                    }
-            }
-            // app_log() << "Building band " << iorb << " at center " <<
-            // center_idx << std::endl;
-
-            for (size_t idx = 0; idx < natoms; idx++) {
-                // reduce all_vals
-                band_group_comm.reduce_in_place(
-                    all_vals[idx].data(), all_vals[idx].size());
-                if (!band_group_comm.isGroupLeader())
-                    continue;
-#pragma omp parallel for
-                for (int lm = 0; lm < lm_tot; lm++) {
-                    auto& mycenter = centers[mygroup[idx]];
-                    aligned_vector<double> splineData_r(spline_npoints);
-                    UBspline_1d_d* atomic_spline_r = nullptr;
-                    for (size_t ip = 0; ip < spline_npoints; ip++)
-                        splineData_r[ip] = all_vals[idx][ip][lm];
-                    atomic_spline_r = einspline::create(atomic_spline_r, 0.0,
-                        spline_radius, spline_npoints, splineData_r.data(),
-                        ((lm == 0) || (lm > 3)));
-                    if (!bspline->isComplex()) {
-                        mycenter.set_spline(atomic_spline_r, lm, iorb);
-                        einspline::destroy(atomic_spline_r);
-                    }
-                    else {
-                        aligned_vector<double> splineData_i(spline_npoints);
-                        UBspline_1d_d* atomic_spline_i = nullptr;
-                        for (size_t ip = 0; ip < spline_npoints; ip++)
-                            splineData_i[ip] = all_vals[idx][ip][lm + lm_tot];
-                        atomic_spline_i = einspline::create(atomic_spline_i,
-                            0.0, spline_radius, spline_npoints,
-                            splineData_i.data(), ((lm == 0) || (lm > 3)));
-                        mycenter.set_spline(atomic_spline_r, lm, iorb * 2);
-                        mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1);
-                        einspline::destroy(atomic_spline_r);
-                        einspline::destroy(atomic_spline_i);
-                    }
+        for (int ip = 0; ip < spline_npoints; ip++)
+          for (size_t idx = 0; idx < natoms; idx++)
+          {
+            double* vals = all_vals[idx][ip];
+            for (size_t tid = 0; tid < nt; tid++)
+              for (size_t lm = 0; lm < lm_tot; lm++)
+              {
+                double vals_th_r, vals_th_i;
+                const size_t ip_idx = tid * spline_npoints + ip;
+                if (policy == 1)
+                {
+                  vals_th_r = vals_local[ip_idx][lm * 2][idx];
+                  vals_th_i = vals_local[ip_idx][lm * 2 + 1][idx];
                 }
-            }
+                else
+                {
+                  vals_th_r = vals_local[ip_idx][idx * 2][lm];
+                  vals_th_i = vals_local[ip_idx][idx * 2 + 1][lm];
+                }
+                const double real_tmp = 4.0 * M_PI * i_power[lm].real();
+                const double imag_tmp = 4.0 * M_PI * i_power[lm].imag();
+                vals[lm] += vals_th_r * real_tmp - vals_th_i * imag_tmp;
+                vals[lm + lm_tot] += vals_th_i * real_tmp + vals_th_r * imag_tmp;
+              }
+          }
+      }
+      // app_log() << "Building band " << iorb << " at center " <<
+      // center_idx << std::endl;
+
+      for (size_t idx = 0; idx < natoms; idx++)
+      {
+        // reduce all_vals
+        band_group_comm.reduce_in_place(all_vals[idx].data(), all_vals[idx].size());
+        if (!band_group_comm.isGroupLeader())
+          continue;
+#pragma omp parallel for
+        for (int lm = 0; lm < lm_tot; lm++)
+        {
+          auto& mycenter = centers[mygroup[idx]];
+          aligned_vector<double> splineData_r(spline_npoints);
+          UBspline_1d_d* atomic_spline_r = nullptr;
+          for (size_t ip = 0; ip < spline_npoints; ip++)
+            splineData_r[ip] = all_vals[idx][ip][lm];
+          atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(),
+                                              ((lm == 0) || (lm > 3)));
+          if (!bspline->isComplex())
+          {
+            mycenter.set_spline(atomic_spline_r, lm, iorb);
+            einspline::destroy(atomic_spline_r);
+          }
+          else
+          {
+            aligned_vector<double> splineData_i(spline_npoints);
+            UBspline_1d_d* atomic_spline_i = nullptr;
+            for (size_t ip = 0; ip < spline_npoints; ip++)
+              splineData_i[ip] = all_vals[idx][ip][lm + lm_tot];
+            atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints,
+                                                splineData_i.data(), ((lm == 0) || (lm > 3)));
+            mycenter.set_spline(atomic_spline_r, lm, iorb * 2);
+            mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1);
+            einspline::destroy(atomic_spline_r);
+            einspline::destroy(atomic_spline_i);
+          }
         }
+      }
     }
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp
index 9c02ad06d2d..a23b5ec2898 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.cpp
@@ -20,485 +20,452 @@
 
 namespace qmcplusplus
 {
-template <typename ST, typename VT>
-SplineC2COMPTargetT<ST, VT>::SplineC2COMPTargetT(
-    const SplineC2COMPTargetT& in) = default;
-
-template <typename ST, typename VT>
-inline void
-SplineC2COMPTargetT<ST, VT>::set_spline(SingleSplineType* spline_r,
-    SingleSplineType* spline_i, int twist, int ispline, int level)
+template<typename ST, typename VT>
+SplineC2COMPTargetT<ST, VT>::SplineC2COMPTargetT(const SplineC2COMPTargetT& in) = default;
+
+template<typename ST, typename VT>
+inline void SplineC2COMPTargetT<ST, VT>::set_spline(SingleSplineType* spline_r,
+                                                    SingleSplineType* spline_i,
+                                                    int twist,
+                                                    int ispline,
+                                                    int level)
 {
-    SplineInst->copy_spline(spline_r, 2 * ispline);
-    SplineInst->copy_spline(spline_i, 2 * ispline + 1);
+  SplineInst->copy_spline(spline_r, 2 * ispline);
+  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
 }
 
-template <typename ST, typename VT>
-bool
-SplineC2COMPTargetT<ST, VT>::read_splines(hdf_archive& h5f)
+template<typename ST, typename VT>
+bool SplineC2COMPTargetT<ST, VT>::read_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template <typename ST, typename VT>
-bool
-SplineC2COMPTargetT<ST, VT>::write_splines(hdf_archive& h5f)
+template<typename ST, typename VT>
+bool SplineC2COMPTargetT<ST, VT>::write_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template <typename ST, typename VT>
-inline void
-SplineC2COMPTargetT<ST, VT>::assign_v(const PointType& r,
-    const vContainer_type& myV, ValueVector& psi, int first, int last) const
+template<typename ST, typename VT>
+inline void SplineC2COMPTargetT<ST, VT>::assign_v(const PointType& r,
+                                                  const vContainer_type& myV,
+                                                  ValueVector& psi,
+                                                  int first,
+                                                  int last) const
 {
-    // protect last
-    last = last > this->kPoints.size() ? this->kPoints.size() : last;
+  // protect last
+  last = last > this->kPoints.size() ? this->kPoints.size() : last;
 
-    const ST x = r[0], y = r[1], z = r[2];
-    const ST* restrict kx = myKcart->data(0);
-    const ST* restrict ky = myKcart->data(1);
-    const ST* restrict kz = myKcart->data(2);
+  const ST x = r[0], y = r[1], z = r[2];
+  const ST* restrict kx = myKcart->data(0);
+  const ST* restrict ky = myKcart->data(1);
+  const ST* restrict kz = myKcart->data(2);
 #pragma omp simd
-    for (size_t j = first; j < last; ++j) {
-        ST s, c;
-        const ST val_r = myV[2 * j];
-        const ST val_i = myV[2 * j + 1];
-        omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
-        psi[j + this->first_spo] =
-            ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
-    }
+  for (size_t j = first; j < last; ++j)
+  {
+    ST s, c;
+    const ST val_r = myV[2 * j];
+    const ST val_i = myV[2 * j + 1];
+    omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
+    psi[j + this->first_spo] = ComplexT(val_r * c - val_i * s, val_i * c + val_r * s);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateValue(
-    const ParticleSetT<VT>& P, const int iat, ValueVector& psi)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
 #pragma omp parallel
-    {
-        int first, last;
-        // Factor of 2 because psi is complex and the spline storage and
-        // evaluation uses a real type
-        FairDivideAligned(2 * psi.size(), getAlignment<ST>(),
-            omp_get_num_threads(), omp_get_thread_num(), first, last);
-
-        spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
-        assign_v(r, myV, psi, first / 2, last / 2);
-    }
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and
+    // evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+    assign_v(r, myV, psi, first / 2, last / 2);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateDetRatios(
-    const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-    const ValueVector& psiinv, std::vector<ValueType>& ratios)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                                                    ValueVector& psi,
+                                                    const ValueVector& psiinv,
+                                                    std::vector<ValueType>& ratios)
 {
-    const int nVP = VP.getTotalNum();
-    psiinv_pos_copy.resize(psiinv.size() + nVP * 3);
-
-    // stage psiinv to psiinv_pos_copy
-    std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
-
-    // pack particle positions
-    auto* restrict pos_scratch =
-        reinterpret_cast<RealType*>(psiinv_pos_copy.data() + psiinv.size());
-    for (int iat = 0; iat < nVP; ++iat) {
-        const PointType& r = VP.activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        pos_scratch[iat * 6] = r[0];
-        pos_scratch[iat * 6 + 1] = r[1];
-        pos_scratch[iat * 6 + 2] = r[2];
-        pos_scratch[iat * 6 + 3] = ru[0];
-        pos_scratch[iat * 6 + 4] = ru[1];
-        pos_scratch[iat * 6 + 5] = ru[2];
-    }
+  const int nVP = VP.getTotalNum();
+  psiinv_pos_copy.resize(psiinv.size() + nVP * 3);
 
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    ratios_private.resize(nVP, NumTeams);
-    const auto padded_size = myV.size();
-    offload_scratch.resize(padded_size * nVP);
-    const auto orb_size = psiinv.size();
-    results_scratch.resize(padded_size * nVP);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* offload_scratch_ptr = offload_scratch.data();
-    auto* results_scratch_ptr = results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* myKcart_ptr = myKcart->data();
-    auto* psiinv_ptr = psiinv_pos_copy.data();
-    auto* ratios_private_ptr = ratios_private.data();
-    const size_t first_spo_local = this->first_spo;
+  // stage psiinv to psiinv_pos_copy
+  std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
 
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
+  // pack particle positions
+  auto* restrict pos_scratch = reinterpret_cast<RealType*>(psiinv_pos_copy.data() + psiinv.size());
+  for (int iat = 0; iat < nVP; ++iat)
+  {
+    const PointType& r = VP.activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    pos_scratch[iat * 6]     = r[0];
+    pos_scratch[iat * 6 + 1] = r[1];
+    pos_scratch[iat * 6 + 2] = r[2];
+    pos_scratch[iat * 6 + 3] = ru[0];
+    pos_scratch[iat * 6 + 4] = ru[1];
+    pos_scratch[iat * 6 + 5] = ru[2];
+  }
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  ratios_private.resize(nVP, NumTeams);
+  const auto padded_size = myV.size();
+  offload_scratch.resize(padded_size * nVP);
+  const auto orb_size = psiinv.size();
+  results_scratch.resize(padded_size * nVP);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr         = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr      = offload_scratch.data();
+  auto* results_scratch_ptr      = results_scratch.data();
+  const auto myKcart_padded_size = myKcart->capacity();
+  auto* myKcart_ptr              = myKcart->data();
+  auto* psiinv_ptr               = psiinv_pos_copy.data();
+  auto* ratios_private_ptr       = ratios_private.data();
+  const size_t first_spo_local   = this->first_spo;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
                 map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \
                 map(always, from: ratios_private_ptr[0:NumTeams*nVP])")
-        for (int iat = 0; iat < nVP; iat++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last =
-                    omptarget::min(first + ChunkSizePerTeam, padded_size);
-
-                auto* restrict offload_scratch_iat_ptr =
-                    offload_scratch_ptr + padded_size * iat;
-                auto* restrict psi_iat_ptr =
-                    results_scratch_ptr + padded_size * iat;
-                auto* restrict pos_scratch =
-                    reinterpret_cast<RealType*>(psiinv_ptr + orb_size);
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
-                    ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++)
-                    spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c,
-                        offload_scratch_iat_ptr + first + index);
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = omptarget::min(last / 2, orb_size);
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2C::assign_v(ST(pos_scratch[iat * 6]),
-                        ST(pos_scratch[iat * 6 + 1]),
-                        ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr,
-                        offload_scratch_iat_ptr, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local, index);
-
-                ComplexT sum(0);
-                PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
-                for (int i = first_cplx; i < last_cplx; i++)
-                    sum += psi_iat_ptr[i] * psiinv_ptr[i];
-                ratios_private_ptr[iat * NumTeams + team_id] = sum;
-            }
-    }
-
-    // do the reduction manually
-    for (int iat = 0; iat < nVP; ++iat) {
-        ratios[iat] = ComplexT(0);
-        for (int tid = 0; tid < NumTeams; tid++)
-            ratios[iat] += ratios_private[iat][tid];
-    }
+    for (int iat = 0; iat < nVP; iat++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, padded_size);
+
+        auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat;
+        auto* restrict psi_iat_ptr             = results_scratch_ptr + padded_size * iat;
+        auto* restrict pos_scratch             = reinterpret_cast<RealType*>(psiinv_ptr + orb_size);
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4];
+        spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
+                                              ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+          spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
+                                             offload_scratch_iat_ptr + first + index);
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = omptarget::min(last / 2, orb_size);
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2C::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
+                        psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index);
+
+        ComplexT sum(0);
+        PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
+        for (int i = first_cplx; i < last_cplx; i++)
+          sum += psi_iat_ptr[i] * psiinv_ptr[i];
+        ratios_private_ptr[iat * NumTeams + team_id] = sum;
+      }
+  }
+
+  // do the reduction manually
+  for (int iat = 0; iat < nVP; ++iat)
+  {
+    ratios[iat] = ComplexT(0);
+    for (int tid = 0; tid < NumTeams; tid++)
+      ratios[iat] += ratios_private[iat][tid];
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::mw_evaluateDetRatios(
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::mw_evaluateDetRatios(
     const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
     const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
     const RefVector<ValueVector>& psi_list,
     const std::vector<const ValueType*>& invRow_ptr_list,
     std::vector<std::vector<ValueType>>& ratios_list) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& phi_leader = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D;
-    auto& mw_ratios_private = mw_mem.mw_ratios_private;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const size_t nw = spo_list.size();
-    const size_t orb_size = phi_leader.size();
-
-    size_t mw_nVP = 0;
-    for (const VirtualParticleSetT<VT>& VP : vp_list)
-        mw_nVP += VP.getTotalNum();
-
-    const size_t packed_size =
-        nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int));
-    det_ratios_buffer_H2D.resize(packed_size);
-
-    // pack invRow_ptr_list to det_ratios_buffer_H2D
-    Vector<const ValueType*> ptr_buffer(
-        reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
-    for (size_t iw = 0; iw < nw; iw++)
-        ptr_buffer[iw] = invRow_ptr_list[iw];
-
-    // pack particle positions
-    auto* pos_ptr = reinterpret_cast<ST*>(
-        det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
-    auto* ref_id_ptr = reinterpret_cast<int*>(det_ratios_buffer_H2D.data() +
-        nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
-    size_t iVP = 0;
-    for (size_t iw = 0; iw < nw; iw++) {
-        const VirtualParticleSetT<VT>& VP = vp_list[iw];
-        assert(ratios_list[iw].size() == VP.getTotalNum());
-        for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) {
-            ref_id_ptr[iVP] = iw;
-            const PointType& r = VP.activeR(iat);
-            PointType ru(PrimLattice.toUnit_floor(r));
-            pos_ptr[0] = r[0];
-            pos_ptr[1] = r[1];
-            pos_ptr[2] = r[2];
-            pos_ptr[3] = ru[0];
-            pos_ptr[4] = ru[1];
-            pos_ptr[5] = ru[2];
-            pos_ptr += 6;
-        }
-    }
-
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    mw_ratios_private.resize(mw_nVP, NumTeams);
-    const auto padded_size = myV.size();
-    mw_offload_scratch.resize(padded_size * mw_nVP);
-    mw_results_scratch.resize(padded_size * mw_nVP);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* offload_scratch_ptr = mw_offload_scratch.data();
-    auto* results_scratch_ptr = mw_results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* myKcart_ptr = myKcart->data();
-    auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data();
-    auto* ratios_private_ptr = mw_ratios_private.data();
-    const size_t first_spo_local = this->first_spo;
-
+  assert(this == &spo_list.getLeader());
+  auto& phi_leader            = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
+  auto& mw_mem                = phi_leader.mw_mem_handle_.getResource();
+  auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D;
+  auto& mw_ratios_private     = mw_mem.mw_ratios_private;
+  auto& mw_offload_scratch    = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch    = mw_mem.mw_results_scratch;
+  const size_t nw             = spo_list.size();
+  const size_t orb_size       = phi_leader.size();
+
+  size_t mw_nVP = 0;
+  for (const VirtualParticleSetT<VT>& VP : vp_list)
+    mw_nVP += VP.getTotalNum();
+
+  const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(ST) + sizeof(int));
+  det_ratios_buffer_H2D.resize(packed_size);
+
+  // pack invRow_ptr_list to det_ratios_buffer_H2D
+  Vector<const ValueType*> ptr_buffer(reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
+  for (size_t iw = 0; iw < nw; iw++)
+    ptr_buffer[iw] = invRow_ptr_list[iw];
+
+  // pack particle positions
+  auto* pos_ptr = reinterpret_cast<ST*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
+  auto* ref_id_ptr =
+      reinterpret_cast<int*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
+  size_t iVP = 0;
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    const VirtualParticleSetT<VT>& VP = vp_list[iw];
+    assert(ratios_list[iw].size() == VP.getTotalNum());
+    for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP)
     {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
+      ref_id_ptr[iVP]    = iw;
+      const PointType& r = VP.activeR(iat);
+      PointType ru(PrimLattice.toUnit_floor(r));
+      pos_ptr[0] = r[0];
+      pos_ptr[1] = r[1];
+      pos_ptr[2] = r[2];
+      pos_ptr[3] = ru[0];
+      pos_ptr[4] = ru[1];
+      pos_ptr[5] = ru[2];
+      pos_ptr += 6;
+    }
+  }
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  mw_ratios_private.resize(mw_nVP, NumTeams);
+  const auto padded_size = myV.size();
+  mw_offload_scratch.resize(padded_size * mw_nVP);
+  mw_results_scratch.resize(padded_size * mw_nVP);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr         = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr      = mw_offload_scratch.data();
+  auto* results_scratch_ptr      = mw_results_scratch.data();
+  const auto myKcart_padded_size = myKcart->capacity();
+  auto* myKcart_ptr              = myKcart->data();
+  auto* buffer_H2D_ptr           = det_ratios_buffer_H2D.data();
+  auto* ratios_private_ptr       = mw_ratios_private.data();
+  const size_t first_spo_local   = this->first_spo;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
                 map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \
                 map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])")
-        for (int iat = 0; iat < mw_nVP; iat++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last =
-                    omptarget::min(first + ChunkSizePerTeam, padded_size);
-
-                auto* restrict offload_scratch_iat_ptr =
-                    offload_scratch_ptr + padded_size * iat;
-                auto* restrict psi_iat_ptr =
-                    results_scratch_ptr + padded_size * iat;
-                auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr +
-                    nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
-                auto* restrict psiinv_ptr = reinterpret_cast<const ValueType**>(
-                    buffer_H2D_ptr)[ref_id_ptr[iat]];
-                auto* restrict pos_scratch = reinterpret_cast<ST*>(
-                    buffer_H2D_ptr + nw * sizeof(ValueType*));
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4],
-                    pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c);
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++)
-                    spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c,
-                        offload_scratch_iat_ptr + first + index);
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = omptarget::min(last / 2, orb_size);
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2C::assign_v(pos_scratch[iat * 6],
-                        pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2],
-                        psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local, index);
-
-                ComplexT sum(0);
-                PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
-                for (int i = first_cplx; i < last_cplx; i++)
-                    sum += psi_iat_ptr[i] * psiinv_ptr[i];
-                ratios_private_ptr[iat * NumTeams + team_id] = sum;
-            }
-    }
-
-    // do the reduction manually
-    iVP = 0;
-    for (size_t iw = 0; iw < nw; iw++) {
-        auto& ratios = ratios_list[iw];
-        for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) {
-            ratios[iat] = ComplexT(0);
-            for (int tid = 0; tid < NumTeams; ++tid)
-                ratios[iat] += mw_ratios_private[iVP][tid];
-        }
+    for (int iat = 0; iat < mw_nVP; iat++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, padded_size);
+
+        auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + padded_size * iat;
+        auto* restrict psi_iat_ptr             = results_scratch_ptr + padded_size * iat;
+        auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(ST));
+        auto* restrict psiinv_ptr  = reinterpret_cast<const ValueType**>(buffer_H2D_ptr)[ref_id_ptr[iat]];
+        auto* restrict pos_scratch = reinterpret_cast<ST*>(buffer_H2D_ptr + nw * sizeof(ValueType*));
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4];
+        spline2::computeLocationAndFractional(spline_ptr, pos_scratch[iat * 6 + 3], pos_scratch[iat * 6 + 4],
+                                              pos_scratch[iat * 6 + 5], ix, iy, iz, a, b, c);
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+          spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
+                                             offload_scratch_iat_ptr + first + index);
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = omptarget::min(last / 2, orb_size);
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2C::assign_v(pos_scratch[iat * 6], pos_scratch[iat * 6 + 1], pos_scratch[iat * 6 + 2], psi_iat_ptr,
+                        offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local, index);
+
+        ComplexT sum(0);
+        PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
+        for (int i = first_cplx; i < last_cplx; i++)
+          sum += psi_iat_ptr[i] * psiinv_ptr[i];
+        ratios_private_ptr[iat * NumTeams + team_id] = sum;
+      }
+  }
+
+  // do the reduction manually
+  iVP = 0;
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    auto& ratios = ratios_list[iw];
+    for (size_t iat = 0; iat < ratios.size(); iat++, iVP++)
+    {
+      ratios[iat] = ComplexT(0);
+      for (int tid = 0; tid < NumTeams; ++tid)
+        ratios[iat] += mw_ratios_private[iVP][tid];
     }
+  }
 }
 
 /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
  * cartesian
  */
-template <typename ST, typename VT>
-inline void
-SplineC2COMPTargetT<ST, VT>::assign_vgl_from_l(
-    const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename ST, typename VT>
+inline void SplineC2COMPTargetT<ST, VT>::assign_vgl_from_l(const PointType& r,
+                                                           ValueVector& psi,
+                                                           GradVector& dpsi,
+                                                           ValueVector& d2psi)
 {
-    constexpr ST two(2);
-    const ST x = r[0], y = r[1], z = r[2];
+  constexpr ST two(2);
+  const ST x = r[0], y = r[1], z = r[2];
 
-    const ST* restrict k0 = myKcart->data(0);
-    const ST* restrict k1 = myKcart->data(1);
-    const ST* restrict k2 = myKcart->data(2);
+  const ST* restrict k0 = myKcart->data(0);
+  const ST* restrict k1 = myKcart->data(1);
+  const ST* restrict k2 = myKcart->data(2);
 
-    const ST* restrict g0 = myG.data(0);
-    const ST* restrict g1 = myG.data(1);
-    const ST* restrict g2 = myG.data(2);
+  const ST* restrict g0 = myG.data(0);
+  const ST* restrict g1 = myG.data(1);
+  const ST* restrict g2 = myG.data(2);
 
-    const size_t N = this->last_spo - this->first_spo;
+  const size_t N = this->last_spo - this->first_spo;
 #pragma omp simd
-    for (size_t j = 0; j < N; ++j) {
-        const size_t jr = j << 1;
-        const size_t ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g0[jr];
-        const ST dY_r = g1[jr];
-        const ST dZ_r = g2[jr];
-
-        const ST dX_i = g0[ji];
-        const ST dY_i = g1[ji];
-        const ST dZ_i = g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const ST lap_r = myL[jr] + (*mKK)[j] * val_r +
-            two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
-        const ST lap_i = myL[ji] + (*mKK)[j] * val_i -
-            two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
-
-        const size_t psiIndex = j + this->first_spo;
-        psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
-        dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
-        dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
-        dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
-        d2psi[psiIndex] =
-            ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
-    }
+  for (size_t j = 0; j < N; ++j)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g0[jr];
+    const ST dY_r = g1[jr];
+    const ST dZ_r = g2[jr];
+
+    const ST dX_i = g0[ji];
+    const ST dY_i = g1[ji];
+    const ST dZ_i = g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+    d2psi[psiIndex]       = ComplexT(c * lap_r - s * lap_i, c * lap_i + s * lap_r);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateVGL(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateVGL(const ParticleSetT<VT>& P,
+                                              const int iat,
+                                              ValueVector& psi,
+                                              GradVector& dpsi,
+                                              ValueVector& d2psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
-
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-
-    const auto padded_size = myV.size();
-    offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS);
-    const auto orb_size = psi.size();
-    // for V(1)G(3)L(1) final result
-    results_scratch.resize(padded_size * 5);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* offload_scratch_ptr = offload_scratch.data();
-    auto* results_scratch_ptr = results_scratch.data();
-    const auto x = r[0], y = r[1], z = r[2];
-    const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    const size_t first_spo_local = this->first_spo;
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+  const auto padded_size = myV.size();
+  offload_scratch.resize(padded_size * SoAFields3D::NUM_FIELDS);
+  const auto orb_size = psi.size();
+  // for V(1)G(3)L(1) final result
+  results_scratch.resize(padded_size * 5);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr    = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr = offload_scratch.data();
+  auto* results_scratch_ptr = results_scratch.data();
+  const auto x = r[0], y = r[1], z = r[2];
+  const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
+  const auto myKcart_padded_size = myKcart->capacity();
+  auto* mKK_ptr                  = mKK->data();
+  auto* GGt_ptr                  = GGt_offload->data();
+  auto* PrimLattice_G_ptr        = PrimLattice_G_offload->data();
+  auto* myKcart_ptr              = myKcart->data();
+  const size_t first_spo_local   = this->first_spo;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
                 map(always, from: results_scratch_ptr[0:padded_size*5])")
-        for (int team_id = 0; team_id < NumTeams; team_id++) {
-            const size_t first = ChunkSizePerTeam * team_id;
-            const size_t last =
-                omptarget::min(first + ChunkSizePerTeam, padded_size);
-
-            int ix, iy, iz;
-            ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
-            spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix,
-                iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
-
-            const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                PrimLattice_G_ptr[8]};
-            const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7],
-                GGt_ptr[8]};
-
-            PRAGMA_OFFLOAD("omp parallel for")
-            for (int index = 0; index < last - first; index++) {
-                spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                    first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                    offload_scratch_ptr + first + index, padded_size);
-                const int output_index = first + index;
-                offload_scratch_ptr[padded_size * SoAFields3D::LAPL +
-                    output_index] =
-                    SymTrace(
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS00 +
-                            output_index],
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS01 +
-                            output_index],
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS02 +
-                            output_index],
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS11 +
-                            output_index],
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS12 +
-                            output_index],
-                        offload_scratch_ptr[padded_size * SoAFields3D::HESS22 +
-                            output_index],
-                        symGGt);
-            }
-
-            const size_t first_cplx = first / 2;
-            const size_t last_cplx = omptarget::min(last / 2, orb_size);
-            PRAGMA_OFFLOAD("omp parallel for")
-            for (int index = first_cplx; index < last_cplx; index++)
-                C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size,
-                    mKK_ptr, offload_scratch_ptr, padded_size, G, myKcart_ptr,
-                    myKcart_padded_size, first_spo_local, index);
-        }
-    }
-
-    for (size_t i = 0; i < orb_size; i++) {
-        psi[i] = results_scratch[i];
-        dpsi[i][0] = results_scratch[i + padded_size];
-        dpsi[i][1] = results_scratch[i + padded_size * 2];
-        dpsi[i][2] = results_scratch[i + padded_size * 3];
-        d2psi[i] = results_scratch[i + padded_size * 4];
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+    {
+      const size_t first = ChunkSizePerTeam * team_id;
+      const size_t last  = omptarget::min(first + ChunkSizePerTeam, padded_size);
+
+      int ix, iy, iz;
+      ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+      spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
+
+      const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                            PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                            PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+      const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                            GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+      PRAGMA_OFFLOAD("omp parallel for")
+      for (int index = 0; index < last - first; index++)
+      {
+        spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
+                                             offload_scratch_ptr + first + index, padded_size);
+        const int output_index = first + index;
+        offload_scratch_ptr[padded_size * SoAFields3D::LAPL + output_index] =
+            SymTrace(offload_scratch_ptr[padded_size * SoAFields3D::HESS00 + output_index],
+                     offload_scratch_ptr[padded_size * SoAFields3D::HESS01 + output_index],
+                     offload_scratch_ptr[padded_size * SoAFields3D::HESS02 + output_index],
+                     offload_scratch_ptr[padded_size * SoAFields3D::HESS11 + output_index],
+                     offload_scratch_ptr[padded_size * SoAFields3D::HESS12 + output_index],
+                     offload_scratch_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt);
+      }
+
+      const size_t first_cplx = first / 2;
+      const size_t last_cplx  = omptarget::min(last / 2, orb_size);
+      PRAGMA_OFFLOAD("omp parallel for")
+      for (int index = first_cplx; index < last_cplx; index++)
+        C2C::assign_vgl(x, y, z, results_scratch_ptr, padded_size, mKK_ptr, offload_scratch_ptr, padded_size, G,
+                        myKcart_ptr, myKcart_padded_size, first_spo_local, index);
     }
+  }
+
+  for (size_t i = 0; i < orb_size; i++)
+  {
+    psi[i]     = results_scratch[i];
+    dpsi[i][0] = results_scratch[i + padded_size];
+    dpsi[i][1] = results_scratch[i + padded_size * 2];
+    dpsi[i][2] = results_scratch[i + padded_size * 3];
+    d2psi[i]   = results_scratch[i + padded_size * 4];
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateVGLMultiPos(
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateVGLMultiPos(
     const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos,
     Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
     Vector<ComplexT, OffloadPinnedAllocator<ComplexT>>& results_scratch,
@@ -506,907 +473,758 @@ SplineC2COMPTargetT<ST, VT>::evaluateVGLMultiPos(
     const RefVector<GradVector>& dpsi_v_list,
     const RefVector<ValueVector>& d2psi_v_list) const
 {
-    const size_t num_pos = psi_v_list.size();
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    const auto padded_size = myV.size();
-    offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS);
-    const auto orb_size = psi_v_list[0].get().size();
-    // for V(1)G(3)L(1) final result
-    results_scratch.resize(padded_size * num_pos * 5);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* pos_copy_ptr = multi_pos.data();
-    auto* offload_scratch_ptr = offload_scratch.data();
-    auto* results_scratch_ptr = results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    const size_t first_spo_local = this->first_spo;
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
+  const size_t num_pos          = psi_v_list.size();
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  const auto padded_size        = myV.size();
+  offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS);
+  const auto orb_size = psi_v_list[0].get().size();
+  // for V(1)G(3)L(1) final result
+  results_scratch.resize(padded_size * num_pos * 5);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr         = SplineInst->getSplinePtr();
+  auto* pos_copy_ptr             = multi_pos.data();
+  auto* offload_scratch_ptr      = offload_scratch.data();
+  auto* results_scratch_ptr      = results_scratch.data();
+  const auto myKcart_padded_size = myKcart->capacity();
+  auto* mKK_ptr                  = mKK->data();
+  auto* GGt_ptr                  = GGt_offload->data();
+  auto* PrimLattice_G_ptr        = PrimLattice_G_offload->data();
+  auto* myKcart_ptr              = myKcart->data();
+  const size_t first_spo_local   = this->first_spo;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
                     map(always, to: pos_copy_ptr[0:num_pos*6]) \
                     map(always, from: results_scratch_ptr[0:padded_size*num_pos*5])")
-        for (int iw = 0; iw < num_pos; iw++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last =
-                    omptarget::min(first + ChunkSizePerTeam, padded_size);
-
-                auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr +
-                    padded_size * iw * SoAFields3D::NUM_FIELDS;
-                auto* restrict psi_iw_ptr =
-                    results_scratch_ptr + padded_size * iw * 5;
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4],
-                    d2c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
-                    pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc,
-                    d2a, d2b, d2c);
-
-                const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                    PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                    PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                    PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                    PrimLattice_G_ptr[8]};
-                const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                    GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4],
-                    GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++) {
-                    spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                        offload_scratch_iw_ptr + first + index, padded_size);
-                    const int output_index = first + index;
-                    offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL +
-                        output_index] =
-                        SymTrace(offload_scratch_iw_ptr[padded_size *
-                                         SoAFields3D::HESS00 +
-                                     output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS01 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS02 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS11 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS12 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS22 +
-                                output_index],
-                            symGGt);
-                }
-
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = omptarget::min(last / 2, orb_size);
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2C::assign_vgl(pos_copy_ptr[iw * 6],
-                        pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2],
-                        psi_iw_ptr, padded_size, mKK_ptr,
-                        offload_scratch_iw_ptr, padded_size, G, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local, index);
-            }
-    }
-
-    for (int iw = 0; iw < num_pos; ++iw) {
-        auto* restrict results_iw_ptr =
-            results_scratch_ptr + padded_size * iw * 5;
-        ValueVector& psi_v(psi_v_list[iw]);
-        GradVector& dpsi_v(dpsi_v_list[iw]);
-        ValueVector& d2psi_v(d2psi_v_list[iw]);
-        for (size_t i = 0; i < orb_size; i++) {
-            psi_v[i] = results_iw_ptr[i];
-            dpsi_v[i][0] = results_iw_ptr[i + padded_size];
-            dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2];
-            dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3];
-            d2psi_v[i] = results_iw_ptr[i + padded_size * 4];
+    for (int iw = 0; iw < num_pos; iw++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, padded_size);
+
+        auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS;
+        auto* restrict psi_iw_ptr             = results_scratch_ptr + padded_size * iw * 5;
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+        spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
+                                              pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
+
+        const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                              PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                              PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+        const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                              GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+        {
+          spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
+                                               d2c, offload_scratch_iw_ptr + first + index, padded_size);
+          const int output_index = first + index;
+          offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] =
+              SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt);
         }
+
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = omptarget::min(last / 2, orb_size);
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2C::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr,
+                          padded_size, mKK_ptr, offload_scratch_iw_ptr, padded_size, G, myKcart_ptr,
+                          myKcart_padded_size, first_spo_local, index);
+      }
+  }
+
+  for (int iw = 0; iw < num_pos; ++iw)
+  {
+    auto* restrict results_iw_ptr = results_scratch_ptr + padded_size * iw * 5;
+    ValueVector& psi_v(psi_v_list[iw]);
+    GradVector& dpsi_v(dpsi_v_list[iw]);
+    ValueVector& d2psi_v(d2psi_v_list[iw]);
+    for (size_t i = 0; i < orb_size; i++)
+    {
+      psi_v[i]     = results_iw_ptr[i];
+      dpsi_v[i][0] = results_iw_ptr[i + padded_size];
+      dpsi_v[i][1] = results_iw_ptr[i + padded_size * 2];
+      dpsi_v[i][2] = results_iw_ptr[i + padded_size * 3];
+      d2psi_v[i]   = results_iw_ptr[i + padded_size * 4];
     }
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::mw_evaluateVGL(
-    const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
-    const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list) const
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
+                                                 const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                                 int iat,
+                                                 const RefVector<ValueVector>& psi_v_list,
+                                                 const RefVector<GradVector>& dpsi_v_list,
+                                                 const RefVector<ValueVector>& d2psi_v_list) const
 {
-    assert(this == &sa_list.getLeader());
-    auto& phi_leader = sa_list.template getCastedLeader<SplineC2COMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& mw_pos_copy = mw_mem.mw_pos_copy;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const int nwalkers = sa_list.size();
-    mw_pos_copy.resize(nwalkers * 6);
-
-    // pack particle positions
-    for (int iw = 0; iw < nwalkers; ++iw) {
-        const PointType& r = P_list[iw].activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        mw_pos_copy[iw * 6] = r[0];
-        mw_pos_copy[iw * 6 + 1] = r[1];
-        mw_pos_copy[iw * 6 + 2] = r[2];
-        mw_pos_copy[iw * 6 + 3] = ru[0];
-        mw_pos_copy[iw * 6 + 4] = ru[1];
-        mw_pos_copy[iw * 6 + 5] = ru[2];
-    }
-
-    phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch,
-        mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
+  assert(this == &sa_list.getLeader());
+  auto& phi_leader         = sa_list.template getCastedLeader<SplineC2COMPTargetT>();
+  auto& mw_mem             = phi_leader.mw_mem_handle_.getResource();
+  auto& mw_pos_copy        = mw_mem.mw_pos_copy;
+  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch = mw_mem.mw_results_scratch;
+  const int nwalkers       = sa_list.size();
+  mw_pos_copy.resize(nwalkers * 6);
+
+  // pack particle positions
+  for (int iw = 0; iw < nwalkers; ++iw)
+  {
+    const PointType& r = P_list[iw].activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    mw_pos_copy[iw * 6]     = r[0];
+    mw_pos_copy[iw * 6 + 1] = r[1];
+    mw_pos_copy[iw * 6 + 2] = r[2];
+    mw_pos_copy[iw * 6 + 3] = ru[0];
+    mw_pos_copy[iw * 6 + 4] = ru[1];
+    mw_pos_copy[iw * 6 + 5] = ru[2];
+  }
+
+  phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list,
+                                 d2psi_v_list);
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::mw_evaluateVGLandDetRatioGrads(
-    const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-    const std::vector<const ValueType*>& invRow_ptr_list,
-    OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-    std::vector<GradType>& grads) const
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                                                 const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                                                 int iat,
+                                                                 const std::vector<const ValueType*>& invRow_ptr_list,
+                                                                 OffloadMWVGLArray& phi_vgl_v,
+                                                                 std::vector<ValueType>& ratios,
+                                                                 std::vector<GradType>& grads) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& phi_leader = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& buffer_H2D = mw_mem.buffer_H2D;
-    auto& rg_private = mw_mem.rg_private;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const int nwalkers = spo_list.size();
-    buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
-
-    // pack particle positions and invRow pointers.
-    for (int iw = 0; iw < nwalkers; ++iw) {
-        const PointType& r = P_list[iw].activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
-
-        pos_copy[0] = r[0];
-        pos_copy[1] = r[1];
-        pos_copy[2] = r[2];
-        pos_copy[3] = ru[0];
-        pos_copy[4] = ru[1];
-        pos_copy[5] = ru[2];
-
-        auto& invRow_ptr = *reinterpret_cast<const ValueType**>(
-            buffer_H2D[iw] + sizeof(ST) * 6);
-        invRow_ptr = invRow_ptr_list[iw];
-    }
-
-    const size_t num_pos = nwalkers;
-    const auto orb_size = phi_vgl_v.size(2);
-    const auto padded_size = myV.size();
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS);
-    // for V(1)G(3)L(1) final result
-    mw_results_scratch.resize(padded_size * num_pos * 5);
-    // per team ratio and grads
-    rg_private.resize(num_pos, NumTeams * 4);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* buffer_H2D_ptr = buffer_H2D.data();
-    auto* offload_scratch_ptr = mw_offload_scratch.data();
-    auto* results_scratch_ptr = mw_results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    auto* phi_vgl_ptr = phi_vgl_v.data();
-    auto* rg_private_ptr = rg_private.data();
-    const size_t buffer_H2D_stride = buffer_H2D.cols();
-    const size_t first_spo_local = this->first_spo;
-    const size_t phi_vgl_stride = num_pos * orb_size;
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
+  assert(this == &spo_list.getLeader());
+  auto& phi_leader         = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
+  auto& mw_mem             = phi_leader.mw_mem_handle_.getResource();
+  auto& buffer_H2D         = mw_mem.buffer_H2D;
+  auto& rg_private         = mw_mem.rg_private;
+  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch = mw_mem.mw_results_scratch;
+  const int nwalkers       = spo_list.size();
+  buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
+
+  // pack particle positions and invRow pointers.
+  for (int iw = 0; iw < nwalkers; ++iw)
+  {
+    const PointType& r = P_list[iw].activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
+
+    pos_copy[0] = r[0];
+    pos_copy[1] = r[1];
+    pos_copy[2] = r[2];
+    pos_copy[3] = ru[0];
+    pos_copy[4] = ru[1];
+    pos_copy[5] = ru[2];
+
+    auto& invRow_ptr = *reinterpret_cast<const ValueType**>(buffer_H2D[iw] + sizeof(ST) * 6);
+    invRow_ptr       = invRow_ptr_list[iw];
+  }
+
+  const size_t num_pos          = nwalkers;
+  const auto orb_size           = phi_vgl_v.size(2);
+  const auto padded_size        = myV.size();
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  mw_offload_scratch.resize(padded_size * num_pos * SoAFields3D::NUM_FIELDS);
+  // for V(1)G(3)L(1) final result
+  mw_results_scratch.resize(padded_size * num_pos * 5);
+  // per team ratio and grads
+  rg_private.resize(num_pos, NumTeams * 4);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr         = SplineInst->getSplinePtr();
+  auto* buffer_H2D_ptr           = buffer_H2D.data();
+  auto* offload_scratch_ptr      = mw_offload_scratch.data();
+  auto* results_scratch_ptr      = mw_results_scratch.data();
+  const auto myKcart_padded_size = myKcart->capacity();
+  auto* mKK_ptr                  = mKK->data();
+  auto* GGt_ptr                  = GGt_offload->data();
+  auto* PrimLattice_G_ptr        = PrimLattice_G_offload->data();
+  auto* myKcart_ptr              = myKcart->data();
+  auto* phi_vgl_ptr              = phi_vgl_v.data();
+  auto* rg_private_ptr           = rg_private.data();
+  const size_t buffer_H2D_stride = buffer_H2D.cols();
+  const size_t first_spo_local   = this->first_spo;
+  const size_t phi_vgl_stride    = num_pos * orb_size;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
                     map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \
                     map(always, from: rg_private_ptr[0:rg_private.size()])")
-        for (int iw = 0; iw < num_pos; iw++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last =
-                    omptarget::min(first + ChunkSizePerTeam, padded_size);
-
-                auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr +
-                    padded_size * iw * SoAFields3D::NUM_FIELDS;
-                auto* restrict psi_iw_ptr =
-                    results_scratch_ptr + padded_size * iw * 5;
-                const auto* restrict pos_iw_ptr = reinterpret_cast<ST*>(
-                    buffer_H2D_ptr + buffer_H2D_stride * iw);
-                const auto* restrict invRow_iw_ptr =
-                    *reinterpret_cast<ValueType**>(buffer_H2D_ptr +
-                        buffer_H2D_stride * iw + sizeof(ST) * 6);
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4],
-                    d2c[4];
-                spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3],
-                    pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, c, da, db,
-                    dc, d2a, d2b, d2c);
-
-                const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                    PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                    PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                    PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                    PrimLattice_G_ptr[8]};
-                const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                    GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4],
-                    GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++) {
-                    spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                        offload_scratch_iw_ptr + first + index, padded_size);
-                    const int output_index = first + index;
-                    offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL +
-                        output_index] =
-                        SymTrace(offload_scratch_iw_ptr[padded_size *
-                                         SoAFields3D::HESS00 +
-                                     output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS01 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS02 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS11 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS12 +
-                                output_index],
-                            offload_scratch_iw_ptr[padded_size *
-                                    SoAFields3D::HESS22 +
-                                output_index],
-                            symGGt);
-                }
-
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = omptarget::min(last / 2, orb_size);
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2],
-                        psi_iw_ptr, padded_size, mKK_ptr,
-                        offload_scratch_iw_ptr, padded_size, G, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local, index);
-
-                ValueType* restrict psi = psi_iw_ptr;
-                ValueType* restrict dpsi_x = psi_iw_ptr + padded_size;
-                ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2;
-                ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3;
-                ValueType* restrict d2psi = psi_iw_ptr + padded_size * 4;
-
-                ValueType* restrict out_phi = phi_vgl_ptr + iw * orb_size;
-                ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
-                ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
-                ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
-                ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride;
-
-                ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
-                PRAGMA_OFFLOAD("omp parallel for \
-                        reduction(+: ratio, grad_x, grad_y, grad_z)")
-                for (size_t j = first_cplx; j < last_cplx; j++) {
-                    const size_t psiIndex = first_spo_local + j;
-
-                    out_phi[psiIndex] = psi[psiIndex];
-                    out_dphi_x[psiIndex] = dpsi_x[psiIndex];
-                    out_dphi_y[psiIndex] = dpsi_y[psiIndex];
-                    out_dphi_z[psiIndex] = dpsi_z[psiIndex];
-                    out_d2phi[psiIndex] = d2psi[psiIndex];
-
-                    ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex];
-                    grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex];
-                    grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex];
-                    grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex];
-                }
-
-                rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
-            }
-    }
+    for (int iw = 0; iw < num_pos; iw++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, padded_size);
+
+        auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + padded_size * iw * SoAFields3D::NUM_FIELDS;
+        auto* restrict psi_iw_ptr             = results_scratch_ptr + padded_size * iw * 5;
+        const auto* restrict pos_iw_ptr       = reinterpret_cast<ST*>(buffer_H2D_ptr + buffer_H2D_stride * iw);
+        const auto* restrict invRow_iw_ptr =
+            *reinterpret_cast<ValueType**>(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6);
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+        spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b,
+                                              c, da, db, dc, d2a, d2b, d2c);
+
+        const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                              PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                              PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+        const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                              GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+        {
+          spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
+                                               d2c, offload_scratch_iw_ptr + first + index, padded_size);
+          const int output_index = first + index;
+          offload_scratch_iw_ptr[padded_size * SoAFields3D::LAPL + output_index] =
+              SymTrace(offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS00 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS01 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS02 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS11 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS12 + output_index],
+                       offload_scratch_iw_ptr[padded_size * SoAFields3D::HESS22 + output_index], symGGt);
+        }
 
-    for (int iw = 0; iw < num_pos; iw++) {
-        ValueType ratio(0);
-        for (int team_id = 0; team_id < NumTeams; team_id++)
-            ratio += rg_private[iw][team_id * 4];
-        ratios[iw] = ratio;
-
-        ValueType grad_x(0), grad_y(0), grad_z(0);
-        for (int team_id = 0; team_id < NumTeams; team_id++) {
-            grad_x += rg_private[iw][team_id * 4 + 1];
-            grad_y += rg_private[iw][team_id * 4 + 2];
-            grad_z += rg_private[iw][team_id * 4 + 3];
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = omptarget::min(last / 2, orb_size);
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2C::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, padded_size, mKK_ptr,
+                          offload_scratch_iw_ptr, padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local,
+                          index);
+
+        ValueType* restrict psi    = psi_iw_ptr;
+        ValueType* restrict dpsi_x = psi_iw_ptr + padded_size;
+        ValueType* restrict dpsi_y = psi_iw_ptr + padded_size * 2;
+        ValueType* restrict dpsi_z = psi_iw_ptr + padded_size * 3;
+        ValueType* restrict d2psi  = psi_iw_ptr + padded_size * 4;
+
+        ValueType* restrict out_phi    = phi_vgl_ptr + iw * orb_size;
+        ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
+        ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
+        ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
+        ValueType* restrict out_d2phi  = out_dphi_z + phi_vgl_stride;
+
+        ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
+        PRAGMA_OFFLOAD("omp parallel for \
+                        reduction(+: ratio, grad_x, grad_y, grad_z)")
+        for (size_t j = first_cplx; j < last_cplx; j++)
+        {
+          const size_t psiIndex = first_spo_local + j;
+
+          out_phi[psiIndex]    = psi[psiIndex];
+          out_dphi_x[psiIndex] = dpsi_x[psiIndex];
+          out_dphi_y[psiIndex] = dpsi_y[psiIndex];
+          out_dphi_z[psiIndex] = dpsi_z[psiIndex];
+          out_d2phi[psiIndex]  = d2psi[psiIndex];
+
+          ratio += psi[psiIndex] * invRow_iw_ptr[psiIndex];
+          grad_x += dpsi_x[psiIndex] * invRow_iw_ptr[psiIndex];
+          grad_y += dpsi_y[psiIndex] * invRow_iw_ptr[psiIndex];
+          grad_z += dpsi_z[psiIndex] * invRow_iw_ptr[psiIndex];
         }
-        grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
+
+        rg_private_ptr[(iw * NumTeams + team_id) * 4]     = ratio;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
+      }
+  }
+
+  for (int iw = 0; iw < num_pos; iw++)
+  {
+    ValueType ratio(0);
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+      ratio += rg_private[iw][team_id * 4];
+    ratios[iw] = ratio;
+
+    ValueType grad_x(0), grad_y(0), grad_z(0);
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+    {
+      grad_x += rg_private[iw][team_id * 4 + 1];
+      grad_y += rg_private[iw][team_id * 4 + 2];
+      grad_z += rg_private[iw][team_id * 4 + 3];
     }
+    grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::assign_vgh(const PointType& r, ValueVector& psi,
-    GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::assign_vgh(const PointType& r,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             HessVector& grad_grad_psi,
+                                             int first,
+                                             int last) const
 {
-    // protect last
-    last = last > this->kPoints.size() ? this->kPoints.size() : last;
-
-    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
-             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
-             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-             g22 = PrimLattice.G(8);
-    const ST x = r[0], y = r[1], z = r[2];
-
-    const ST* restrict k0 = myKcart->data(0);
-    const ST* restrict k1 = myKcart->data(1);
-    const ST* restrict k2 = myKcart->data(2);
-
-    const ST* restrict g0 = myG.data(0);
-    const ST* restrict g1 = myG.data(1);
-    const ST* restrict g2 = myG.data(2);
-    const ST* restrict h00 = myH.data(0);
-    const ST* restrict h01 = myH.data(1);
-    const ST* restrict h02 = myH.data(2);
-    const ST* restrict h11 = myH.data(3);
-    const ST* restrict h12 = myH.data(4);
-    const ST* restrict h22 = myH.data(5);
+  // protect last
+  last = last > this->kPoints.size() ? this->kPoints.size() : last;
+
+  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+           g22 = PrimLattice.G(8);
+  const ST x = r[0], y = r[1], z = r[2];
+
+  const ST* restrict k0 = myKcart->data(0);
+  const ST* restrict k1 = myKcart->data(1);
+  const ST* restrict k2 = myKcart->data(2);
+
+  const ST* restrict g0  = myG.data(0);
+  const ST* restrict g1  = myG.data(1);
+  const ST* restrict g2  = myG.data(2);
+  const ST* restrict h00 = myH.data(0);
+  const ST* restrict h01 = myH.data(1);
+  const ST* restrict h02 = myH.data(2);
+  const ST* restrict h11 = myH.data(3);
+  const ST* restrict h12 = myH.data(4);
+  const ST* restrict h22 = myH.data(5);
 
 #pragma omp simd
-    for (size_t j = first; j < last; ++j) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = j + this->first_spo;
-        psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
-        dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
-        dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
-        dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
-
-        const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g00, g01, g02) +
-            kX * (gX_i + dX_i);
-        const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g10, g11, g12) +
-            kX * (gY_i + dY_i);
-        const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g20, g21, g22) +
-            kX * (gZ_i + dZ_i);
-        const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g00, g01, g02) +
-            kY * (gX_i + dX_i);
-        const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g10, g11, g12) +
-            kY * (gY_i + dY_i);
-        const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g20, g21, g22) +
-            kY * (gZ_i + dZ_i);
-        const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g00, g01, g02) +
-            kZ * (gX_i + dX_i);
-        const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g10, g11, g12) +
-            kZ * (gY_i + dY_i);
-        const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g20, g21, g22) +
-            kZ * (gZ_i + dZ_i);
-
-        const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g00, g01, g02) -
-            kX * (gX_r + dX_r);
-        const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g10, g11, g12) -
-            kX * (gY_r + dY_r);
-        const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g20, g21, g22) -
-            kX * (gZ_r + dZ_r);
-        const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g00, g01, g02) -
-            kY * (gX_r + dX_r);
-        const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g10, g11, g12) -
-            kY * (gY_r + dY_r);
-        const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g20, g21, g22) -
-            kY * (gZ_r + dZ_r);
-        const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g00, g01, g02) -
-            kZ * (gX_r + dX_r);
-        const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g10, g11, g12) -
-            kZ * (gY_r + dY_r);
-        const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g20, g21, g22) -
-            kZ * (gZ_r + dZ_r);
-
-        grad_grad_psi[psiIndex][0] =
-            ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
-        grad_grad_psi[psiIndex][1] =
-            ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
-        grad_grad_psi[psiIndex][2] =
-            ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
-        grad_grad_psi[psiIndex][3] =
-            ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
-        grad_grad_psi[psiIndex][4] =
-            ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
-        grad_grad_psi[psiIndex][5] =
-            ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
-        grad_grad_psi[psiIndex][6] =
-            ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
-        grad_grad_psi[psiIndex][7] =
-            ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
-        grad_grad_psi[psiIndex][8] =
-            ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
-    }
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    const ST h_xx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
+    const ST h_xy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
+    const ST h_xz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
+    const ST h_yx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
+    const ST h_yy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
+    const ST h_yz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
+    const ST h_zx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
+    const ST h_zy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
+    const ST h_zz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
+
+    const ST h_xx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
+    const ST h_xy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
+    const ST h_xz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
+    const ST h_yx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
+    const ST h_yy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
+    const ST h_yz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
+    const ST h_zx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
+    const ST h_zy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
+    const ST h_zz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_yx_r - s * h_yx_i, c * h_yx_i + s * h_yx_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_zx_r - s * h_zx_i, c * h_zx_i + s * h_zx_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_zy_r - s * h_zy_i, c * h_zy_i + s * h_zy_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateVGH(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi,
-    HessVector& grad_grad_psi)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateVGH(const ParticleSetT<VT>& P,
+                                              const int iat,
+                                              ValueVector& psi,
+                                              GradVector& dpsi,
+                                              HessVector& grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
 #pragma omp parallel
-    {
-        int first, last;
-        // Factor of 2 because psi is complex and the spline storage and
-        // evaluation uses a real type
-        FairDivideAligned(2 * psi.size(), getAlignment<ST>(),
-            omp_get_num_threads(), omp_get_thread_num(), first, last);
-
-        spline2::evaluate3d_vgh(
-            SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
-        assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
-    }
+  {
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and
+    // evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::assign_vghgh(const PointType& r, ValueVector& psi,
-    GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi,
-    int first, int last) const
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::assign_vghgh(const PointType& r,
+                                               ValueVector& psi,
+                                               GradVector& dpsi,
+                                               HessVector& grad_grad_psi,
+                                               GGGVector& grad_grad_grad_psi,
+                                               int first,
+                                               int last) const
 {
-    // protect last
-    last = last < 0 ?
-        this->kPoints.size() :
-        (last > this->kPoints.size() ? this->kPoints.size() : last);
-
-    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
-             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
-             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-             g22 = PrimLattice.G(8);
-    const ST x = r[0], y = r[1], z = r[2];
-
-    const ST* restrict k0 = myKcart->data(0);
-    const ST* restrict k1 = myKcart->data(1);
-    const ST* restrict k2 = myKcart->data(2);
-
-    const ST* restrict g0 = myG.data(0);
-    const ST* restrict g1 = myG.data(1);
-    const ST* restrict g2 = myG.data(2);
-    const ST* restrict h00 = myH.data(0);
-    const ST* restrict h01 = myH.data(1);
-    const ST* restrict h02 = myH.data(2);
-    const ST* restrict h11 = myH.data(3);
-    const ST* restrict h12 = myH.data(4);
-    const ST* restrict h22 = myH.data(5);
-
-    const ST* restrict gh000 = mygH.data(0);
-    const ST* restrict gh001 = mygH.data(1);
-    const ST* restrict gh002 = mygH.data(2);
-    const ST* restrict gh011 = mygH.data(3);
-    const ST* restrict gh012 = mygH.data(4);
-    const ST* restrict gh022 = mygH.data(5);
-    const ST* restrict gh111 = mygH.data(6);
-    const ST* restrict gh112 = mygH.data(7);
-    const ST* restrict gh122 = mygH.data(8);
-    const ST* restrict gh222 = mygH.data(9);
+  // protect last
+  last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last);
+
+  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+           g22 = PrimLattice.G(8);
+  const ST x = r[0], y = r[1], z = r[2];
+
+  const ST* restrict k0 = myKcart->data(0);
+  const ST* restrict k1 = myKcart->data(1);
+  const ST* restrict k2 = myKcart->data(2);
+
+  const ST* restrict g0  = myG.data(0);
+  const ST* restrict g1  = myG.data(1);
+  const ST* restrict g2  = myG.data(2);
+  const ST* restrict h00 = myH.data(0);
+  const ST* restrict h01 = myH.data(1);
+  const ST* restrict h02 = myH.data(2);
+  const ST* restrict h11 = myH.data(3);
+  const ST* restrict h12 = myH.data(4);
+  const ST* restrict h22 = myH.data(5);
+
+  const ST* restrict gh000 = mygH.data(0);
+  const ST* restrict gh001 = mygH.data(1);
+  const ST* restrict gh002 = mygH.data(2);
+  const ST* restrict gh011 = mygH.data(3);
+  const ST* restrict gh012 = mygH.data(4);
+  const ST* restrict gh022 = mygH.data(5);
+  const ST* restrict gh111 = mygH.data(6);
+  const ST* restrict gh112 = mygH.data(7);
+  const ST* restrict gh122 = mygH.data(8);
+  const ST* restrict gh222 = mygH.data(9);
 
 // SIMD doesn't work quite right yet.  Comment out until further debugging.
 #pragma omp simd
-    for (size_t j = first; j < last; ++j) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = j + this->first_spo;
-        psi[psiIndex] = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
-        dpsi[psiIndex][0] = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
-        dpsi[psiIndex][1] = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
-        dpsi[psiIndex][2] = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
-
-        // intermediates for computation of hessian. \partial_i \partial_j phi
-        // in cartesian coordinates.
-        const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g20, g21, g22, g20, g21, g22);
-
-        const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g20, g21, g22, g20, g21, g22);
-
-        const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
-        const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
-        const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
-        const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
-        const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
-        const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
-
-        const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
-        const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
-        const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
-        const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
-        const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
-        const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
-
-        grad_grad_psi[psiIndex][0] =
-            ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
-        grad_grad_psi[psiIndex][1] =
-            ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
-        grad_grad_psi[psiIndex][2] =
-            ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
-        grad_grad_psi[psiIndex][3] =
-            ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
-        grad_grad_psi[psiIndex][4] =
-            ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
-        grad_grad_psi[psiIndex][5] =
-            ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
-        grad_grad_psi[psiIndex][6] =
-            ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
-        grad_grad_psi[psiIndex][7] =
-            ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
-        grad_grad_psi[psiIndex][8] =
-            ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
-
-        // These are the real and imaginary components of the third SPO
-        // derivative.  _xxx denotes
-        //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
-        //  and z, and so on.
-
-        const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        // Here is where we build up the components of the physical hessian
-        // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
-        const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r -
-            kX * kX * kX * val_i;
-        const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i +
-            kX * kX * kX * val_r;
-        const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) -
-            (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
-        const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) -
-            (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
-        const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) -
-            (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
-        const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) -
-            (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
-        const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) -
-            (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
-        const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) -
-            (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
-        const ST gh_xyz_r = f3_xyz_r +
-            (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
-            (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) -
-            kX * kY * kZ * val_i;
-        const ST gh_xyz_i = f3_xyz_i -
-            (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
-            (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) +
-            kX * kY * kZ * val_r;
-        const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) -
-            (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
-        const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) -
-            (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
-        const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r -
-            kY * kY * kY * val_i;
-        const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i +
-            kY * kY * kY * val_r;
-        const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) -
-            (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
-        const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) -
-            (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
-        const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) -
-            (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
-        const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) -
-            (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
-        const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r -
-            kZ * kZ * kZ * val_i;
-        const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i +
-            kZ * kZ * kZ * val_r;
-
-        grad_grad_grad_psi[psiIndex][0][0] =
-            ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
-        grad_grad_grad_psi[psiIndex][0][1] =
-            ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
-        grad_grad_grad_psi[psiIndex][0][2] =
-            ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
-        grad_grad_grad_psi[psiIndex][0][3] =
-            ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
-        grad_grad_grad_psi[psiIndex][0][4] =
-            ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
-        grad_grad_grad_psi[psiIndex][0][5] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][0][6] =
-            ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
-        grad_grad_grad_psi[psiIndex][0][7] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][0][8] =
-            ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
-
-        grad_grad_grad_psi[psiIndex][1][0] =
-            ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
-        grad_grad_grad_psi[psiIndex][1][1] =
-            ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
-        grad_grad_grad_psi[psiIndex][1][2] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][1][3] =
-            ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
-        grad_grad_grad_psi[psiIndex][1][4] =
-            ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
-        grad_grad_grad_psi[psiIndex][1][5] =
-            ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
-        grad_grad_grad_psi[psiIndex][1][6] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][1][7] =
-            ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
-        grad_grad_grad_psi[psiIndex][1][8] =
-            ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
-
-        grad_grad_grad_psi[psiIndex][2][0] =
-            ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
-        grad_grad_grad_psi[psiIndex][2][1] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][2][2] =
-            ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
-        grad_grad_grad_psi[psiIndex][2][3] =
-            ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
-        grad_grad_grad_psi[psiIndex][2][4] =
-            ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
-        grad_grad_grad_psi[psiIndex][2][5] =
-            ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
-        grad_grad_grad_psi[psiIndex][2][6] =
-            ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
-        grad_grad_grad_psi[psiIndex][2][7] =
-            ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
-        grad_grad_grad_psi[psiIndex][2][8] =
-            ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
-    }
+  for (size_t j = first; j < last; ++j)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = j + this->first_spo;
+    psi[psiIndex]         = ComplexT(c * val_r - s * val_i, c * val_i + s * val_r);
+    dpsi[psiIndex][0]     = ComplexT(c * gX_r - s * gX_i, c * gX_i + s * gX_r);
+    dpsi[psiIndex][1]     = ComplexT(c * gY_r - s * gY_i, c * gY_i + s * gY_r);
+    dpsi[psiIndex][2]     = ComplexT(c * gZ_r - s * gZ_i, c * gZ_i + s * gZ_r);
+
+    // intermediates for computation of hessian. \partial_i \partial_j phi
+    // in cartesian coordinates.
+    const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
+
+    const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
+
+    const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
+    const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
+    const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
+    const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
+    const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
+    const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
+
+    const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
+    const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
+    const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
+    const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
+    const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
+    const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
+
+    grad_grad_psi[psiIndex][0] = ComplexT(c * h_xx_r - s * h_xx_i, c * h_xx_i + s * h_xx_r);
+    grad_grad_psi[psiIndex][1] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][2] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][3] = ComplexT(c * h_xy_r - s * h_xy_i, c * h_xy_i + s * h_xy_r);
+    grad_grad_psi[psiIndex][4] = ComplexT(c * h_yy_r - s * h_yy_i, c * h_yy_i + s * h_yy_r);
+    grad_grad_psi[psiIndex][5] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][6] = ComplexT(c * h_xz_r - s * h_xz_i, c * h_xz_i + s * h_xz_r);
+    grad_grad_psi[psiIndex][7] = ComplexT(c * h_yz_r - s * h_yz_i, c * h_yz_i + s * h_yz_r);
+    grad_grad_psi[psiIndex][8] = ComplexT(c * h_zz_r - s * h_zz_i, c * h_zz_i + s * h_zz_r);
+
+    // These are the real and imaginary components of the third SPO
+    // derivative.  _xxx denotes
+    //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
+    //  and z, and so on.
+
+    const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    // Here is where we build up the components of the physical hessian
+    // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+    const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
+    const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
+    const ST gh_xxy_r =
+        f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
+    const ST gh_xxy_i =
+        f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
+    const ST gh_xxz_r =
+        f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
+    const ST gh_xxz_i =
+        f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
+    const ST gh_xyy_r =
+        f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
+    const ST gh_xyy_i =
+        f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
+    const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
+        (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
+    const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
+        (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
+    const ST gh_xzz_r =
+        f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
+    const ST gh_xzz_i =
+        f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
+    const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
+    const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
+    const ST gh_yyz_r =
+        f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
+    const ST gh_yyz_i =
+        f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
+    const ST gh_yzz_r =
+        f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
+    const ST gh_yzz_i =
+        f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
+    const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
+    const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
+
+    grad_grad_grad_psi[psiIndex][0][0] = ComplexT(c * gh_xxx_r - s * gh_xxx_i, c * gh_xxx_i + s * gh_xxx_r);
+    grad_grad_grad_psi[psiIndex][0][1] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][2] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][3] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][0][4] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][0][5] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][6] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][0][7] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][0][8] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+
+    grad_grad_grad_psi[psiIndex][1][0] = ComplexT(c * gh_xxy_r - s * gh_xxy_i, c * gh_xxy_i + s * gh_xxy_r);
+    grad_grad_grad_psi[psiIndex][1][1] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][2] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][3] = ComplexT(c * gh_xyy_r - s * gh_xyy_i, c * gh_xyy_i + s * gh_xyy_r);
+    grad_grad_grad_psi[psiIndex][1][4] = ComplexT(c * gh_yyy_r - s * gh_yyy_i, c * gh_yyy_i + s * gh_yyy_r);
+    grad_grad_grad_psi[psiIndex][1][5] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][6] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][1][7] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][1][8] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+
+    grad_grad_grad_psi[psiIndex][2][0] = ComplexT(c * gh_xxz_r - s * gh_xxz_i, c * gh_xxz_i + s * gh_xxz_r);
+    grad_grad_grad_psi[psiIndex][2][1] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][2] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][3] = ComplexT(c * gh_xyz_r - s * gh_xyz_i, c * gh_xyz_i + s * gh_xyz_r);
+    grad_grad_grad_psi[psiIndex][2][4] = ComplexT(c * gh_yyz_r - s * gh_yyz_i, c * gh_yyz_i + s * gh_yyz_r);
+    grad_grad_grad_psi[psiIndex][2][5] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][6] = ComplexT(c * gh_xzz_r - s * gh_xzz_i, c * gh_xzz_i + s * gh_xzz_r);
+    grad_grad_grad_psi[psiIndex][2][7] = ComplexT(c * gh_yzz_r - s * gh_yzz_i, c * gh_yzz_i + s * gh_yzz_r);
+    grad_grad_grad_psi[psiIndex][2][8] = ComplexT(c * gh_zzz_r - s * gh_zzz_i, c * gh_zzz_i + s * gh_zzz_r);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluateVGHGH(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi,
-    HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluateVGHGH(const ParticleSetT<VT>& P,
+                                                const int iat,
+                                                ValueVector& psi,
+                                                GradVector& dpsi,
+                                                HessVector& grad_grad_psi,
+                                                GGGVector& grad_grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 #pragma omp parallel
-    {
-        int first, last;
-        FairDivideAligned(2 * psi.size(), getAlignment<ST>(),
-            omp_get_num_threads(), omp_get_thread_num(), first, last);
-
-        spline2::evaluate3d_vghgh(
-            SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
-        assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2,
-            last / 2);
-    }
+  {
+    int first, last;
+    FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
+    assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2COMPTargetT<ST, VT>::evaluate_notranspose(const ParticleSetT<VT>& P,
-    int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    ValueMatrix& d2logdet)
+template<typename ST, typename VT>
+void SplineC2COMPTargetT<ST, VT>::evaluate_notranspose(const ParticleSetT<VT>& P,
+                                                       int first,
+                                                       int last,
+                                                       ValueMatrix& logdet,
+                                                       GradMatrix& dlogdet,
+                                                       ValueMatrix& d2logdet)
 {
-    // chunk the [first, last) loop into blocks to save temporary memory usage
-    const int block_size = 16;
-
-    // reference vectors refer to the rows of matrices
-    std::vector<ValueVector> multi_psi_v;
-    std::vector<GradVector> multi_dpsi_v;
-    std::vector<ValueVector> multi_d2psi_v;
-    RefVector<ValueVector> psi_v_list;
-    RefVector<GradVector> dpsi_v_list;
-    RefVector<ValueVector> d2psi_v_list;
-
-    multi_psi_v.reserve(block_size);
-    multi_dpsi_v.reserve(block_size);
-    multi_d2psi_v.reserve(block_size);
-    psi_v_list.reserve(block_size);
-    dpsi_v_list.reserve(block_size);
-    d2psi_v_list.reserve(block_size);
-
-    for (int iat = first, i = 0; iat < last;
-         iat += block_size, i += block_size) {
-        const int actual_block_size = std::min(last - iat, block_size);
-        multi_pos_copy.resize(actual_block_size * 6);
-        multi_psi_v.clear();
-        multi_dpsi_v.clear();
-        multi_d2psi_v.clear();
-        psi_v_list.clear();
-        dpsi_v_list.clear();
-        d2psi_v_list.clear();
-
-        for (int ipos = 0; ipos < actual_block_size; ++ipos) {
-            // pack particle positions
-            const PointType& r = P.activeR(iat + ipos);
-            PointType ru(PrimLattice.toUnit_floor(r));
-            multi_pos_copy[ipos * 6] = r[0];
-            multi_pos_copy[ipos * 6 + 1] = r[1];
-            multi_pos_copy[ipos * 6 + 2] = r[2];
-            multi_pos_copy[ipos * 6 + 3] = ru[0];
-            multi_pos_copy[ipos * 6 + 4] = ru[1];
-            multi_pos_copy[ipos * 6 + 5] = ru[2];
-
-            multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols());
-            multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols());
-            multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols());
-
-            psi_v_list.push_back(multi_psi_v[ipos]);
-            dpsi_v_list.push_back(multi_dpsi_v[ipos]);
-            d2psi_v_list.push_back(multi_d2psi_v[ipos]);
-        }
-
-        evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch,
-            psi_v_list, dpsi_v_list, d2psi_v_list);
+  // chunk the [first, last) loop into blocks to save temporary memory usage
+  const int block_size = 16;
+
+  // reference vectors refer to the rows of matrices
+  std::vector<ValueVector> multi_psi_v;
+  std::vector<GradVector> multi_dpsi_v;
+  std::vector<ValueVector> multi_d2psi_v;
+  RefVector<ValueVector> psi_v_list;
+  RefVector<GradVector> dpsi_v_list;
+  RefVector<ValueVector> d2psi_v_list;
+
+  multi_psi_v.reserve(block_size);
+  multi_dpsi_v.reserve(block_size);
+  multi_d2psi_v.reserve(block_size);
+  psi_v_list.reserve(block_size);
+  dpsi_v_list.reserve(block_size);
+  d2psi_v_list.reserve(block_size);
+
+  for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size)
+  {
+    const int actual_block_size = std::min(last - iat, block_size);
+    multi_pos_copy.resize(actual_block_size * 6);
+    multi_psi_v.clear();
+    multi_dpsi_v.clear();
+    multi_d2psi_v.clear();
+    psi_v_list.clear();
+    dpsi_v_list.clear();
+    d2psi_v_list.clear();
+
+    for (int ipos = 0; ipos < actual_block_size; ++ipos)
+    {
+      // pack particle positions
+      const PointType& r = P.activeR(iat + ipos);
+      PointType ru(PrimLattice.toUnit_floor(r));
+      multi_pos_copy[ipos * 6]     = r[0];
+      multi_pos_copy[ipos * 6 + 1] = r[1];
+      multi_pos_copy[ipos * 6 + 2] = r[2];
+      multi_pos_copy[ipos * 6 + 3] = ru[0];
+      multi_pos_copy[ipos * 6 + 4] = ru[1];
+      multi_pos_copy[ipos * 6 + 5] = ru[2];
+
+      multi_psi_v.emplace_back(logdet[i + ipos], logdet.cols());
+      multi_dpsi_v.emplace_back(dlogdet[i + ipos], dlogdet.cols());
+      multi_d2psi_v.emplace_back(d2logdet[i + ipos], d2logdet.cols());
+
+      psi_v_list.push_back(multi_psi_v[ipos]);
+      dpsi_v_list.push_back(multi_dpsi_v[ipos]);
+      d2psi_v_list.push_back(multi_d2psi_v[ipos]);
     }
+
+    evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
+  }
 }
 
 template class SplineC2COMPTargetT<float, std::complex<float>>;
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h
index 86c20dfd5da..11dddeef37f 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2COMPTargetT.h
@@ -40,337 +40,289 @@ namespace qmcplusplus
  * The internal storage of complex spline coefficients uses double sized real
  * arrays of ST type, aligned and padded. All the output orbitals are complex.
  */
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class SplineC2COMPTargetT : public BsplineSetT<VT>
 {
 public:
-    using SplineType = typename bspline_traits<ST, 3>::SplineType;
-    using BCType = typename bspline_traits<ST, 3>::BCType;
-    using DataType = ST;
-    using PointType = TinyVector<ST, 3>;
-    using SingleSplineType = UBspline_3d_d;
-    // types for evaluation results
-    using ComplexT = typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::RealType;
-    using typename BsplineSetT<VT>::GradType;
-    using typename BsplineSetT<VT>::GGGVector;
-    using typename BsplineSetT<VT>::GradVector;
-    using typename BsplineSetT<VT>::GradMatrix;
-    using typename BsplineSetT<VT>::HessVector;
-    using typename BsplineSetT<VT>::ValueVector;
-    using typename BsplineSetT<VT>::ValueMatrix;
-    using typename BsplineSetT<VT>::OffloadMWVGLArray;
-
-    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
-    using gContainer_type = VectorSoaContainer<ST, 3>;
-    using hContainer_type = VectorSoaContainer<ST, 6>;
-    using ghContainer_type = VectorSoaContainer<ST, 10>;
-
-    template <typename DT>
-    using OffloadVector = Vector<DT, OffloadAllocator<DT>>;
-    template <typename DT>
-    using OffloadPosVector = VectorSoaContainer<DT, 3, OffloadAllocator<DT>>;
+  using SplineType       = typename bspline_traits<ST, 3>::SplineType;
+  using BCType           = typename bspline_traits<ST, 3>::BCType;
+  using DataType         = ST;
+  using PointType        = TinyVector<ST, 3>;
+  using SingleSplineType = UBspline_3d_d;
+  // types for evaluation results
+  using ComplexT = typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::RealType;
+  using typename BsplineSetT<VT>::GradType;
+  using typename BsplineSetT<VT>::GGGVector;
+  using typename BsplineSetT<VT>::GradVector;
+  using typename BsplineSetT<VT>::GradMatrix;
+  using typename BsplineSetT<VT>::HessVector;
+  using typename BsplineSetT<VT>::ValueVector;
+  using typename BsplineSetT<VT>::ValueMatrix;
+  using typename BsplineSetT<VT>::OffloadMWVGLArray;
+
+  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
+  using gContainer_type  = VectorSoaContainer<ST, 3>;
+  using hContainer_type  = VectorSoaContainer<ST, 6>;
+  using ghContainer_type = VectorSoaContainer<ST, 10>;
+
+  template<typename DT>
+  using OffloadVector = Vector<DT, OffloadAllocator<DT>>;
+  template<typename DT>
+  using OffloadPosVector = VectorSoaContainer<DT, 3, OffloadAllocator<DT>>;
 
 private:
-    /// timer for offload portion
-    NewTimer& offload_timer_;
-    /// primitive cell
-    CrystalLattice<ST, 3> PrimLattice;
-    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
-    /// CartesianUnit, e.g. Hessian
-    Tensor<ST, 3> GGt;
-    /// multi bspline set
-    std::shared_ptr<
-        MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>
-        SplineInst;
-
-    std::shared_ptr<OffloadVector<ST>> mKK;
-    std::shared_ptr<OffloadPosVector<ST>> myKcart;
-    std::shared_ptr<OffloadVector<ST>> GGt_offload;
-    std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload;
-
-    ResourceHandle<SplineOMPTargetMultiWalkerMem<ST, ComplexT>> mw_mem_handle_;
-
-    /// team private ratios for reduction, numVP x numTeams
-    Matrix<ComplexT, OffloadPinnedAllocator<ComplexT>> ratios_private;
-    /// offload scratch space, dynamically resized to the maximal need
-    Vector<ST, OffloadPinnedAllocator<ST>> offload_scratch;
-    /// result scratch space, dynamically resized to the maximal need
-    Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> results_scratch;
-    /// psiinv and position scratch space, used to avoid allocation on the fly
-    /// and faster transfer
-    Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> psiinv_pos_copy;
-    /// position scratch space, used to avoid allocation on the fly and faster
-    /// transfer
-    Vector<ST, OffloadPinnedAllocator<ST>> multi_pos_copy;
-
-    void
-    evaluateVGLMultiPos(
-        const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos_copy,
-        Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
-        Vector<ComplexT, OffloadPinnedAllocator<ComplexT>>& results_scratch,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const;
+  /// timer for offload portion
+  NewTimer& offload_timer_;
+  /// primitive cell
+  CrystalLattice<ST, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+  /// CartesianUnit, e.g. Hessian
+  Tensor<ST, 3> GGt;
+  /// multi bspline set
+  std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>> SplineInst;
+
+  std::shared_ptr<OffloadVector<ST>> mKK;
+  std::shared_ptr<OffloadPosVector<ST>> myKcart;
+  std::shared_ptr<OffloadVector<ST>> GGt_offload;
+  std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload;
+
+  ResourceHandle<SplineOMPTargetMultiWalkerMem<ST, ComplexT>> mw_mem_handle_;
+
+  /// team private ratios for reduction, numVP x numTeams
+  Matrix<ComplexT, OffloadPinnedAllocator<ComplexT>> ratios_private;
+  /// offload scratch space, dynamically resized to the maximal need
+  Vector<ST, OffloadPinnedAllocator<ST>> offload_scratch;
+  /// result scratch space, dynamically resized to the maximal need
+  Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> results_scratch;
+  /// psiinv and position scratch space, used to avoid allocation on the fly
+  /// and faster transfer
+  Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> psiinv_pos_copy;
+  /// position scratch space, used to avoid allocation on the fly and faster
+  /// transfer
+  Vector<ST, OffloadPinnedAllocator<ST>> multi_pos_copy;
+
+  void evaluateVGLMultiPos(const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos_copy,
+                           Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
+                           Vector<ComplexT, OffloadPinnedAllocator<ComplexT>>& results_scratch,
+                           const RefVector<ValueVector>& psi_v_list,
+                           const RefVector<GradVector>& dpsi_v_list,
+                           const RefVector<ValueVector>& d2psi_v_list) const;
 
 protected:
-    /// intermediate result vectors
-    vContainer_type myV;
-    vContainer_type myL;
-    gContainer_type myG;
-    hContainer_type myH;
-    ghContainer_type mygH;
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
 
 public:
-    SplineC2COMPTargetT(const std::string& my_name) :
-        BsplineSetT<VT>(my_name),
-        offload_timer_(
-            createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)),
+  SplineC2COMPTargetT(const std::string& my_name)
+      : BsplineSetT<VT>(my_name),
+        offload_timer_(createGlobalTimer("SplineC2COMPTarget::offload", timer_level_fine)),
         GGt_offload(std::make_shared<OffloadVector<ST>>(9)),
         PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9))
-    {
-    }
-
-    SplineC2COMPTargetT(const SplineC2COMPTargetT& in);
-
-    virtual std::string
-    getClassName() const override
-    {
-        return "SplineC2COMPTarget";
-    }
-    virtual std::string
-    getKeyword() const override
-    {
-        return "SplineC2C";
-    }
-    bool
-    isComplex() const override
-    {
-        return true;
-    };
-    virtual bool
-    isOMPoffload() const override
-    {
-        return true;
-    }
-
-    void
-    createResource(ResourceCollection& collection) const override
-    {
-        auto resource_index = collection.addResource(
-            std::make_unique<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>());
-    }
-
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
-    {
-        assert(this == &spo_list.getLeader());
-        auto& phi_leader =
-            spo_list.template getCastedLeader<SplineC2COMPTargetT>();
-        phi_leader.mw_mem_handle_ =
-            collection
-                .lendResource<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>();
-    }
-
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
-    {
-        assert(this == &spo_list.getLeader());
-        auto& phi_leader =
-            spo_list.template getCastedLeader<SplineC2COMPTargetT>();
-        collection.takebackResource(phi_leader.mw_mem_handle_);
-    }
-
-    std::unique_ptr<SPOSetT<VT>>
-    makeClone() const override
-    {
-        return std::make_unique<SplineC2COMPTargetT>(*this);
-    }
-
-    inline void
-    resizeStorage(size_t n, size_t nvals)
-    {
-        this->init_base(n);
-        size_t npad = getAlignedSize<ST>(2 * n);
-        myV.resize(npad);
-        myG.resize(npad);
-        myL.resize(npad);
-        myH.resize(npad);
-        mygH.resize(npad);
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm)
-    {
-        if (comm->size() == 1)
-            return;
-        const int Nbands = this->kPoints.size();
-        const int Nbandgroups = comm->size();
-        this->offset.resize(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, this->offset);
-
-        for (size_t ib = 0; ib < this->offset.size(); ib++)
-            this->offset[ib] *= 2;
-        gatherv(comm, SplineInst->getSplinePtr(),
-            SplineInst->getSplinePtr()->z_stride, this->offset);
-    }
-
-    template <typename GT, typename BCT>
-    void
-    create_spline(GT& xyz_g, BCT& xyz_bc)
-    {
-        resize_kpoints();
-        SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>,
-            OffloadAllocator<SplineType>>>();
-        SplineInst->create(xyz_g, xyz_bc, myV.size());
-
-        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
-                  << " MB allocated "
-                  << "for the coefficients in 3D spline orbital representation"
-                  << std::endl;
-    }
-
-    /// this routine can not be called from threaded region
-    void
-    finalizeConstruction() override
-    {
-        // map the SplineInst->getSplinePtr() structure to GPU
-        auto* MultiSpline = SplineInst->getSplinePtr();
-        auto* restrict coefs = MultiSpline->coefs;
-        // attach pointers on the device to achieve deep copy
-        PRAGMA_OFFLOAD("omp target \
+  {}
+
+  SplineC2COMPTargetT(const SplineC2COMPTargetT& in);
+
+  virtual std::string getClassName() const override { return "SplineC2COMPTarget"; }
+  virtual std::string getKeyword() const override { return "SplineC2C"; }
+  bool isComplex() const override { return true; };
+  virtual bool isOMPoffload() const override { return true; }
+
+  void createResource(ResourceCollection& collection) const override
+  {
+    auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>());
+  }
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    auto& phi_leader          = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
+    phi_leader.mw_mem_handle_ = collection.lendResource<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>();
+  }
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    auto& phi_leader = spo_list.template getCastedLeader<SplineC2COMPTargetT>();
+    collection.takebackResource(phi_leader.mw_mem_handle_);
+  }
+
+  std::unique_ptr<SPOSetT<VT>> makeClone() const override { return std::make_unique<SplineC2COMPTargetT>(*this); }
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<ST>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+    this->offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, this->offset);
+
+    for (size_t ib = 0; ib < this->offset.size(); ib++)
+      this->offset[ib] *= 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  /// this routine can not be called from threaded region
+  void finalizeConstruction() override
+  {
+    // map the SplineInst->getSplinePtr() structure to GPU
+    auto* MultiSpline    = SplineInst->getSplinePtr();
+    auto* restrict coefs = MultiSpline->coefs;
+    // attach pointers on the device to achieve deep copy
+    PRAGMA_OFFLOAD("omp target \
                 map(always, to: MultiSpline[0:1], \
                     coefs[0:MultiSpline->coefs_size])")
-        {
-            MultiSpline->coefs = coefs;
-        }
-
-        // transfer static data to GPU
-        auto* mKK_ptr = mKK->data();
-        PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
-        auto* myKcart_ptr = myKcart->data();
-        PRAGMA_OFFLOAD(
-            "omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
-        for (size_t i = 0; i < 9; i++) {
-            (*GGt_offload)[i] = GGt[i];
-            (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
-        }
-        auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-        PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
-        auto* GGt_ptr = GGt_offload->data();
-        PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
+    {
+      MultiSpline->coefs = coefs;
     }
 
-    inline void
-    flush_zero()
+    // transfer static data to GPU
+    auto* mKK_ptr = mKK->data();
+    PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
+    auto* myKcart_ptr = myKcart->data();
+    PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
+    for (size_t i = 0; i < 9; i++)
     {
-        SplineInst->flush_zero();
+      (*GGt_offload)[i]           = GGt[i];
+      (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
     }
-
-    /** remap kPoints to pack the double copy */
-    inline void
-    resize_kpoints()
+    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
+    PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
+    auto* GGt_ptr = GGt_offload->data();
+    PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    const size_t nk = this->kPoints.size();
+    mKK             = std::make_shared<OffloadVector<ST>>(nk);
+    myKcart         = std::make_shared<OffloadPosVector<ST>>(nk);
+    for (size_t i = 0; i < nk; ++i)
     {
-        const size_t nk = this->kPoints.size();
-        mKK = std::make_shared<OffloadVector<ST>>(nk);
-        myKcart = std::make_shared<OffloadPosVector<ST>>(nk);
-        for (size_t i = 0; i < nk; ++i) {
-            (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]);
-            (*myKcart)(i) = this->kPoints[i];
-        }
+      (*mKK)[i]     = -dot(this->kPoints[i], this->kPoints[i]);
+      (*myKcart)(i) = this->kPoints[i];
     }
+  }
 
-    void
-    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
-        int twist, int ispline, int level);
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
 
-    bool
-    read_splines(hdf_archive& h5f);
+  bool read_splines(hdf_archive& h5f);
 
-    bool
-    write_splines(hdf_archive& h5f);
+  bool write_splines(hdf_archive& h5f);
 
-    void
-    assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi,
-        int first, int last) const;
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
 
-    virtual void
-    evaluateValue(
-        const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
+  virtual void evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
 
-    virtual void
-    evaluateDetRatios(const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<ValueType>& ratios) override;
+  virtual void evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                                 ValueVector& psi,
+                                 const ValueVector& psiinv,
+                                 std::vector<ValueType>& ratios) override;
 
-    virtual void
-    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
-        const RefVector<ValueVector>& psi_list,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        std::vector<std::vector<ValueType>>& ratios_list) const override;
+  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                    const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
+                                    const RefVector<ValueVector>& psi_list,
+                                    const std::vector<const ValueType*>& invRow_ptr_list,
+                                    std::vector<std::vector<ValueType>>& ratios_list) const override;
 
-    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
      * cartesian
      */
-    void
-    assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi);
-
-    virtual void
-    evaluateVGL(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    virtual void
-    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
-        const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const override;
-
-    virtual void
-    mw_evaluateVGLandDetRatioGrads(
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-        std::vector<GradType>& grads) const override;
-
-    void
-    assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, int first, int last) const;
-
-    virtual void
-    evaluateVGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override;
-
-    void
-    assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
-        int last = -1) const;
-
-    virtual void
-    evaluateVGHGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override;
-
-    virtual void
-    evaluate_notranspose(const ParticleSetT<VT>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  virtual void evaluateVGL(const ParticleSetT<VT>& P,
+                           const int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           ValueVector& d2psi) override;
+
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
+                              const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                              int iat,
+                                              const std::vector<const ValueType*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<ValueType>& ratios,
+                                              std::vector<GradType>& grads) const override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  virtual void evaluateVGH(const ParticleSetT<VT>& P,
+                           const int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  virtual void evaluateVGHGH(const ParticleSetT<VT>& P,
+                             const int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi,
+                             GGGVector& grad_grad_grad_psi) override;
+
+  virtual void evaluate_notranspose(const ParticleSetT<VT>& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    ValueMatrix& d2logdet) override;
+
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
index ce4855d11bc..dc68edbb82d 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.cpp
@@ -33,26 +33,26 @@ inline void SplineC2CT<ST, VT>::set_spline(SingleSplineType* spline_r,
                                            int ispline,
                                            int level)
 {
-    SplineInst->copy_spline(spline_r, 2 * ispline);
-    SplineInst->copy_spline(spline_i, 2 * ispline + 1);
+  SplineInst->copy_spline(spline_r, 2 * ispline);
+  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
 }
 
 template<typename ST, typename VT>
 bool SplineC2CT<ST, VT>::read_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
 template<typename ST, typename VT>
 bool SplineC2CT<ST, VT>::write_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
 template<typename ST, typename VT>
@@ -62,7 +62,7 @@ void SplineC2CT<ST, VT>::storeParamsBeforeRotation()
   const auto coefs_tot_size = spline_ptr->coefs_size;
   coef_copy_                = std::make_shared<std::vector<ST>>(coefs_tot_size);
 
-    std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
+  std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
 }
 
 /*
@@ -190,8 +190,8 @@ inline void SplineC2CT<ST, VT>::assign_v(const PointType& r,
 template<typename ST, typename VT>
 void SplineC2CT<ST, VT>::evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
 #pragma omp parallel
   {
@@ -211,46 +211,44 @@ void SplineC2CT<ST, VT>::evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
                                            const ValueVector& psiinv,
                                            std::vector<ValueType>& ratios)
 {
-    const bool need_resize = ratios_private.rows() < VP.getTotalNum();
+  const bool need_resize = ratios_private.rows() < VP.getTotalNum();
 
 #pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    // initialize thread private ratios
+    if (need_resize)
     {
-      int tid = omp_get_thread_num();
-      // initialize thread private ratios
-      if (need_resize)
-      {
-        if (tid == 0) // just like #pragma omp master, but one fewer call to
-                      // the runtime
-          ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
+      if (tid == 0) // just like #pragma omp master, but one fewer call to
+                    // the runtime
+        ratios_private.resize(VP.getTotalNum(), omp_get_num_threads());
 #pragma omp barrier
-      }
-        int first, last;
-        // Factor of 2 because psi is complex and the spline storage and
-        // evaluation uses a real type
-        FairDivideAligned(2 * psi.size(), getAlignment<ST>(),
-            omp_get_num_threads(), tid, first, last);
-        const int first_cplx = first / 2;
-        const int last_cplx =
-            this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2;
-
-        for (int iat = 0; iat < VP.getTotalNum(); ++iat) {
-            const PointType& r = VP.activeR(iat);
-            PointType ru(PrimLattice.toUnit_floor(r));
-
-            spline2::evaluate3d(
-                SplineInst->getSplinePtr(), ru, myV, first, last);
-            assign_v(r, myV, psi, first_cplx, last_cplx);
-            ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx,
-                psiinv.data() + first_cplx, last_cplx - first_cplx);
-        }
     }
+    int first, last;
+    // Factor of 2 because psi is complex and the spline storage and
+    // evaluation uses a real type
+    FairDivideAligned(2 * psi.size(), getAlignment<ST>(), omp_get_num_threads(), tid, first, last);
+    const int first_cplx = first / 2;
+    const int last_cplx  = this->kPoints.size() < last / 2 ? this->kPoints.size() : last / 2;
 
-    // do the reduction manually
-    for (int iat = 0; iat < VP.getTotalNum(); ++iat) {
-        ratios[iat] = ComplexT(0);
-        for (int tid = 0; tid < ratios_private.cols(); tid++)
-            ratios[iat] += ratios_private[iat][tid];
+    for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+    {
+      const PointType& r = VP.activeR(iat);
+      PointType ru(PrimLattice.toUnit_floor(r));
+
+      spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+      assign_v(r, myV, psi, first_cplx, last_cplx);
+      ratios_private[iat][tid] = simd::dot(psi.data() + first_cplx, psiinv.data() + first_cplx, last_cplx - first_cplx);
     }
+  }
+
+  // do the reduction manually
+  for (int iat = 0; iat < VP.getTotalNum(); ++iat)
+  {
+    ratios[iat] = ComplexT(0);
+    for (int tid = 0; tid < ratios_private.cols(); tid++)
+      ratios[iat] += ratios_private[iat][tid];
+  }
 }
 
 /** assign_vgl
@@ -354,7 +352,7 @@ inline void SplineC2CT<ST, VT>::assign_vgl_from_l(const PointType& r,
   const ST* restrict g1 = myG.data(1);
   const ST* restrict g2 = myG.data(2);
 
-    const size_t N = this->last_spo - this->first_spo;
+  const size_t N = this->last_spo - this->first_spo;
 #pragma omp simd
   for (size_t j = 0; j < N; ++j)
   {
@@ -407,8 +405,8 @@ void SplineC2CT<ST, VT>::evaluateVGL(const ParticleSetT<VT>& P,
                                      GradVector& dpsi,
                                      ValueVector& d2psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
 #pragma omp parallel
   {
@@ -548,8 +546,8 @@ void SplineC2CT<ST, VT>::evaluateVGH(const ParticleSetT<VT>& P,
                                      GradVector& dpsi,
                                      HessVector& grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
 #pragma omp parallel
   {
@@ -809,8 +807,8 @@ void SplineC2CT<ST, VT>::evaluateVGHGH(const ParticleSetT<VT>& P,
                                        HessVector& grad_grad_psi,
                                        GGGVector& grad_grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 #pragma omp parallel
   {
     int first, last;
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
index e48a285ef1f..a4065d74c26 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2CT.h
@@ -35,98 +35,75 @@ namespace qmcplusplus
  * The internal storage of complex spline coefficients uses double sized real
  * arrays of ST type, aligned and padded. All the output orbitals are complex.
  */
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class SplineC2CT : public BsplineSetT<VT>
 {
 public:
-    using SplineType = typename bspline_traits<ST, 3>::SplineType;
-    using BCType = typename bspline_traits<ST, 3>::BCType;
-    using DataType = ST;
-    using PointType = TinyVector<ST, 3>;
-    using SingleSplineType = UBspline_3d_d;
-
-    // types for evaluation results
-    using ComplexT = typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::IndexType;
-    using typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::RealType;
-    using typename BsplineSetT<VT>::GGGVector;
-    using typename BsplineSetT<VT>::GradVector;
-    using typename BsplineSetT<VT>::HessVector;
-    using typename BsplineSetT<VT>::ValueVector;
-    using typename BsplineSetT<VT>::ValueMatrix;
-
-    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
-    using gContainer_type = VectorSoaContainer<ST, 3>;
-    using hContainer_type = VectorSoaContainer<ST, 6>;
-    using ghContainer_type = VectorSoaContainer<ST, 10>;
+  using SplineType       = typename bspline_traits<ST, 3>::SplineType;
+  using BCType           = typename bspline_traits<ST, 3>::BCType;
+  using DataType         = ST;
+  using PointType        = TinyVector<ST, 3>;
+  using SingleSplineType = UBspline_3d_d;
+
+  // types for evaluation results
+  using ComplexT = typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::IndexType;
+  using typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::RealType;
+  using typename BsplineSetT<VT>::GGGVector;
+  using typename BsplineSetT<VT>::GradVector;
+  using typename BsplineSetT<VT>::HessVector;
+  using typename BsplineSetT<VT>::ValueVector;
+  using typename BsplineSetT<VT>::ValueMatrix;
+
+  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
+  using gContainer_type  = VectorSoaContainer<ST, 3>;
+  using hContainer_type  = VectorSoaContainer<ST, 6>;
+  using ghContainer_type = VectorSoaContainer<ST, 10>;
 
 private:
-    /// primitive cell
-    CrystalLattice<ST, 3> PrimLattice;
-    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
-    ///CartesianUnit, e.g. Hessian
-    Tensor<ST, 3> GGt;
-    /// multi bspline set
-    std::shared_ptr<MultiBspline<ST>> SplineInst;
+  /// primitive cell
+  CrystalLattice<ST, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+  ///CartesianUnit, e.g. Hessian
+  Tensor<ST, 3> GGt;
+  /// multi bspline set
+  std::shared_ptr<MultiBspline<ST>> SplineInst;
 
-    /// Copy of original splines for orbital rotation
-    std::shared_ptr<std::vector<ST>> coef_copy_;
+  /// Copy of original splines for orbital rotation
+  std::shared_ptr<std::vector<ST>> coef_copy_;
 
-    vContainer_type mKK;
-    VectorSoaContainer<ST, 3> myKcart;
+  vContainer_type mKK;
+  VectorSoaContainer<ST, 3> myKcart;
 
-    /// thread private ratios for reduction when using nested threading, numVP x
-    /// numThread
-    Matrix<ComplexT> ratios_private;
+  /// thread private ratios for reduction when using nested threading, numVP x
+  /// numThread
+  Matrix<ComplexT> ratios_private;
 
 protected:
-    /// intermediate result vectors
-    vContainer_type myV;
-    vContainer_type myL;
-    gContainer_type myG;
-    hContainer_type myH;
-    ghContainer_type mygH;
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
 
 public:
-    SplineC2CT(const std::string& my_name) : BsplineSetT<VT>(my_name)
-    {
-    }
+  SplineC2CT(const std::string& my_name) : BsplineSetT<VT>(my_name) {}
 
-    SplineC2CT(const SplineC2CT& in);
-    virtual std::string
-    getClassName() const override
-    {
-        return "SplineC2C";
-    }
-    virtual std::string
-    getKeyword() const override
-    {
-        return "SplineC2C";
-    }
-    bool
-    isComplex() const override
-    {
-        return true;
-    };
+  SplineC2CT(const SplineC2CT& in);
+  virtual std::string getClassName() const override { return "SplineC2C"; }
+  virtual std::string getKeyword() const override { return "SplineC2C"; }
+  bool isComplex() const override { return true; };
 
-    std::unique_ptr<SPOSetT<VT>>
-    makeClone() const override
-    {
-        return std::make_unique<SplineC2CT>(*this);
-    }
+  std::unique_ptr<SPOSetT<VT>> makeClone() const override { return std::make_unique<SplineC2CT>(*this); }
 
-    bool
-    isRotationSupported() const override
-    {
-        return true;
-    }
+  bool isRotationSupported() const override { return true; }
 
-    /// Store an original copy of the spline coefficients for orbital rotation
-    void
-    storeParamsBeforeRotation() override;
+  /// Store an original copy of the spline coefficients for orbital rotation
+  void storeParamsBeforeRotation() override;
 
-    /*
+  /*
       Implements orbital rotations via [1,2].
       Should be called by RotatedSPOs::apply_rotation()
       This implementation requires that NSPOs > Nelec. In other words,
@@ -136,135 +113,122 @@ class SplineC2CT : public BsplineSetT<VT>
       [2] Toulouse & Umrigar, JCP 126, (2007)
       [3] Townsend et al., PRB 102, (2020)
     */
-    void
-    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
-
-    inline void
-    resizeStorage(size_t n, size_t nvals)
-    {
-        this->init_base(n);
-        size_t npad = getAlignedSize<ST>(2 * n);
-        myV.resize(npad);
-        myG.resize(npad);
-        myL.resize(npad);
-        myH.resize(npad);
-        mygH.resize(npad);
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm)
-    {
-        if (comm->size() == 1)
-            return;
-        const int Nbands = this->kPoints.size();
-        const int Nbandgroups = comm->size();
-        this->offset.resize(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, this->offset);
-        for (size_t ib = 0; ib < this->offset.size(); ib++)
-            this->offset[ib] *= 2;
-        gatherv(comm, SplineInst->getSplinePtr(),
-            SplineInst->getSplinePtr()->z_stride, this->offset);
-    }
-
-    template <typename GT, typename BCT>
-    void
-    create_spline(GT& xyz_g, BCT& xyz_bc)
-    {
-        resize_kpoints();
-        SplineInst = std::make_shared<MultiBspline<ST>>();
-        SplineInst->create(xyz_g, xyz_bc, myV.size());
-        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
-                  << " MB allocated "
-                  << "for the coefficients in 3D spline orbital representation"
-                  << std::endl;
-    }
-
-    inline void
-    flush_zero()
-    {
-        SplineInst->flush_zero();
-    }
-
-    /** remap kPoints to pack the double copy */
-    inline void
-    resize_kpoints()
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<ST>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+    this->offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, this->offset);
+    for (size_t ib = 0; ib < this->offset.size(); ib++)
+      this->offset[ib] *= 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<ST>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    const size_t nk = this->kPoints.size();
+    mKK.resize(nk);
+    myKcart.resize(nk);
+    for (size_t i = 0; i < nk; ++i)
     {
-        const size_t nk = this->kPoints.size();
-        mKK.resize(nk);
-        myKcart.resize(nk);
-        for (size_t i = 0; i < nk; ++i) {
-            mKK[i] = -dot(this->kPoints[i], this->kPoints[i]);
-            myKcart(i) = this->kPoints[i];
-        }
+      mKK[i]     = -dot(this->kPoints[i], this->kPoints[i]);
+      myKcart(i) = this->kPoints[i];
     }
+  }
 
-    void
-    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
-        int twist, int ispline, int level);
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
 
-    bool
-    read_splines(hdf_archive& h5f);
+  bool read_splines(hdf_archive& h5f);
 
-    bool
-    write_splines(hdf_archive& h5f);
+  bool write_splines(hdf_archive& h5f);
 
-    void
-    assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi,
-        int first, int last) const;
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
 
-    void
-    evaluateValue(
-        const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
+  void evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
 
-    void
-    evaluateDetRatios(const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<ValueType>& ratios) override;
+  void evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<ValueType>& ratios) override;
 
-    /** assign_vgl
+  /** assign_vgl
      */
-    void
-    assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi, int first, int last) const;
+  void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last)
+      const;
 
-    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
      * cartesian
      */
-    void
-    assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi);
-
-    void
-    evaluateVGL(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    void
-    assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, int first, int last) const;
-
-    void
-    evaluateVGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override;
-
-    void
-    assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
-        int last = -1) const;
-
-    void
-    evaluateVGHGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override;
-
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluateVGL(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   ValueVector& d2psi) override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  void evaluateVGH(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  void evaluateVGHGH(const ParticleSetT<VT>& P,
+                     const int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override;
+
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp
index 1e3e02cd6af..8e6a4dd7bf8 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.cpp
@@ -19,1911 +19,1694 @@
 
 namespace qmcplusplus
 {
-template <typename ST, typename VT>
-SplineC2ROMPTargetT<ST, VT>::SplineC2ROMPTargetT(
-    const SplineC2ROMPTargetT& in) = default;
-
-template <typename ST, typename VT>
-inline void
-SplineC2ROMPTargetT<ST, VT>::set_spline(SingleSplineType* spline_r,
-    SingleSplineType* spline_i, int twist, int ispline, int level)
+template<typename ST, typename VT>
+SplineC2ROMPTargetT<ST, VT>::SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in) = default;
+
+template<typename ST, typename VT>
+inline void SplineC2ROMPTargetT<ST, VT>::set_spline(SingleSplineType* spline_r,
+                                                    SingleSplineType* spline_i,
+                                                    int twist,
+                                                    int ispline,
+                                                    int level)
 {
-    SplineInst->copy_spline(spline_r, 2 * ispline);
-    SplineInst->copy_spline(spline_i, 2 * ispline + 1);
+  SplineInst->copy_spline(spline_r, 2 * ispline);
+  SplineInst->copy_spline(spline_i, 2 * ispline + 1);
 }
 
-template <typename ST, typename VT>
-bool
-SplineC2ROMPTargetT<ST, VT>::read_splines(hdf_archive& h5f)
+template<typename ST, typename VT>
+bool SplineC2ROMPTargetT<ST, VT>::read_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.readEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template <typename ST, typename VT>
-bool
-SplineC2ROMPTargetT<ST, VT>::write_splines(hdf_archive& h5f)
+template<typename ST, typename VT>
+bool SplineC2ROMPTargetT<ST, VT>::write_splines(hdf_archive& h5f)
 {
-    std::ostringstream o;
-    o << "spline_" << this->MyIndex;
-    einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
-    return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
+  std::ostringstream o;
+  o << "spline_" << this->MyIndex;
+  einspline_engine<SplineType> bigtable(SplineInst->getSplinePtr());
+  return h5f.writeEntry(bigtable, o.str().c_str()); //"spline_0");
 }
 
-template <typename ST, typename VT>
-inline void
-SplineC2ROMPTargetT<ST, VT>::assign_v(const PointType& r,
-    const vContainer_type& myV, ValueVector& psi, int first, int last) const
+template<typename ST, typename VT>
+inline void SplineC2ROMPTargetT<ST, VT>::assign_v(const PointType& r,
+                                                  const vContainer_type& myV,
+                                                  ValueVector& psi,
+                                                  int first,
+                                                  int last) const
 {
-    // protect last
-    last = last > this->kPoints.size() ? this->kPoints.size() : last;
+  // protect last
+  last = last > this->kPoints.size() ? this->kPoints.size() : last;
 
-    const ST x = r[0], y = r[1], z = r[2];
-    const ST* restrict kx = myKcart->data(0);
-    const ST* restrict ky = myKcart->data(1);
-    const ST* restrict kz = myKcart->data(2);
+  const ST x = r[0], y = r[1], z = r[2];
+  const ST* restrict kx = myKcart->data(0);
+  const ST* restrict ky = myKcart->data(1);
+  const ST* restrict kz = myKcart->data(2);
 
-    TT* restrict psi_s = psi.data() + this->first_spo;
+  TT* restrict psi_s = psi.data() + this->first_spo;
 #pragma omp simd
-    for (size_t j = first; j < std::min(nComplexBands, last); j++) {
-        ST s, c;
-        const size_t jr = j << 1;
-        const size_t ji = jr + 1;
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-        omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
-        psi_s[jr] = val_r * c - val_i * s;
-        psi_s[ji] = val_i * c + val_r * s;
-    }
-
-    psi_s += nComplexBands;
+  for (size_t j = first; j < std::min(nComplexBands, last); j++)
+  {
+    ST s, c;
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+    const ST val_r  = myV[jr];
+    const ST val_i  = myV[ji];
+    omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
+    psi_s[jr] = val_r * c - val_i * s;
+    psi_s[ji] = val_i * c + val_r * s;
+  }
+
+  psi_s += nComplexBands;
 #pragma omp simd
-    for (size_t j = std::max(nComplexBands, first); j < last; j++) {
-        ST s, c;
-        const ST val_r = myV[2 * j];
-        const ST val_i = myV[2 * j + 1];
-        omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
-        psi_s[j] = val_r * c - val_i * s;
-    }
+  for (size_t j = std::max(nComplexBands, first); j < last; j++)
+  {
+    ST s, c;
+    const ST val_r = myV[2 * j];
+    const ST val_i = myV[2 * j + 1];
+    omptarget::sincos(-(x * kx[j] + y * ky[j] + z * kz[j]), &s, &c);
+    psi_s[j] = val_r * c - val_i * s;
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateValue(
-    const ParticleSetT<VT>& P, const int iat, ValueVector& psi)
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 
-    if (true) {
+  if (true)
+  {
 #pragma omp parallel
-        {
-            int first, last;
-            FairDivideAligned(myV.size(), getAlignment<ST>(),
-                omp_get_num_threads(), omp_get_thread_num(), first, last);
-
-            spline2::evaluate3d(
-                SplineInst->getSplinePtr(), ru, myV, first, last);
-            assign_v(r, myV, psi, first / 2, last / 2);
-        }
-    }
-    else {
-        const size_t ChunkSizePerTeam = 512;
-        const int NumTeams =
-            (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-
-        const auto spline_padded_size = myV.size();
-        const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-        offload_scratch.resize(spline_padded_size);
-        results_scratch.resize(sposet_padded_size);
-
-        // Ye: need to extract sizes and pointers before entering target region
-        const auto* spline_ptr = SplineInst->getSplinePtr();
-        auto* offload_scratch_ptr = offload_scratch.data();
-        auto* results_scratch_ptr = results_scratch.data();
-        auto* psi_ptr = psi.data();
-        const auto x = r[0], y = r[1], z = r[2];
-        const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
-        const auto myKcart_padded_size = myKcart->capacity();
-        auto* myKcart_ptr = myKcart->data();
-        const size_t first_spo_local = this->first_spo;
-        const size_t nComplexBands_local = nComplexBands;
-        const auto requested_orb_size = psi.size();
-
-        {
-            ScopedTimer offload(offload_timer_);
-            PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
-                      map(always, from: results_scratch_ptr[0:sposet_padded_size])")
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last = omptarget::min(
-                    first + ChunkSizePerTeam, spline_padded_size);
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4];
-                spline2::computeLocationAndFractional(
-                    spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c);
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++)
-                    spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c,
-                        offload_scratch_ptr + first + index);
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = last / 2;
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2R::assign_v(x, y, z, results_scratch_ptr,
-                        offload_scratch_ptr, myKcart_ptr, myKcart_padded_size,
-                        first_spo_local, nComplexBands_local, index);
-            }
+    {
+      int first, last;
+      FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
 
-            for (size_t i = 0; i < requested_orb_size; i++)
-                psi[i] = results_scratch[i];
-        }
+      spline2::evaluate3d(SplineInst->getSplinePtr(), ru, myV, first, last);
+      assign_v(r, myV, psi, first / 2, last / 2);
     }
-}
-
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateDetRatios(
-    const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-    const ValueVector& psiinv, std::vector<ValueType>& ratios)
-{
-    const int nVP = VP.getTotalNum();
-    psiinv_pos_copy.resize(psiinv.size() + nVP * 6);
-
-    // stage psiinv to psiinv_pos_copy
-    std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
-
-    // pack particle positions
-    auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size();
-    for (int iat = 0; iat < nVP; ++iat) {
-        const PointType& r = VP.activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        pos_scratch[iat * 6] = r[0];
-        pos_scratch[iat * 6 + 1] = r[1];
-        pos_scratch[iat * 6 + 2] = r[2];
-        pos_scratch[iat * 6 + 3] = ru[0];
-        pos_scratch[iat * 6 + 4] = ru[1];
-        pos_scratch[iat * 6 + 5] = ru[2];
-    }
-
+  }
+  else
+  {
     const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    ratios_private.resize(nVP, NumTeams);
+    const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
     const auto spline_padded_size = myV.size();
     const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-    offload_scratch.resize(spline_padded_size * nVP);
-    results_scratch.resize(sposet_padded_size * nVP);
+    offload_scratch.resize(spline_padded_size);
+    results_scratch.resize(sposet_padded_size);
 
     // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
+    const auto* spline_ptr    = SplineInst->getSplinePtr();
     auto* offload_scratch_ptr = offload_scratch.data();
     auto* results_scratch_ptr = results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* myKcart_ptr = myKcart->data();
-    auto* psiinv_ptr = psiinv_pos_copy.data();
-    auto* ratios_private_ptr = ratios_private.data();
-    const size_t first_spo_local = this->first_spo;
+    auto* psi_ptr             = psi.data();
+    const auto x = r[0], y = r[1], z = r[2];
+    const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
+    const auto myKcart_padded_size   = myKcart->capacity();
+    auto* myKcart_ptr                = myKcart->data();
+    const size_t first_spo_local     = this->first_spo;
     const size_t nComplexBands_local = nComplexBands;
-    const auto requested_orb_size = psiinv.size();
+    const auto requested_orb_size    = psi.size();
 
     {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
+      ScopedTimer offload(offload_timer_);
+      PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
+                      map(always, from: results_scratch_ptr[0:sposet_padded_size])")
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4];
+        spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c);
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+          spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
+                                             offload_scratch_ptr + first + index);
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = last / 2;
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2R::assign_v(x, y, z, results_scratch_ptr, offload_scratch_ptr, myKcart_ptr, myKcart_padded_size,
+                        first_spo_local, nComplexBands_local, index);
+      }
+
+      for (size_t i = 0; i < requested_orb_size; i++)
+        psi[i] = results_scratch[i];
+    }
+  }
+}
+
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                                                    ValueVector& psi,
+                                                    const ValueVector& psiinv,
+                                                    std::vector<ValueType>& ratios)
+{
+  const int nVP = VP.getTotalNum();
+  psiinv_pos_copy.resize(psiinv.size() + nVP * 6);
+
+  // stage psiinv to psiinv_pos_copy
+  std::copy_n(psiinv.data(), psiinv.size(), psiinv_pos_copy.data());
+
+  // pack particle positions
+  auto* restrict pos_scratch = psiinv_pos_copy.data() + psiinv.size();
+  for (int iat = 0; iat < nVP; ++iat)
+  {
+    const PointType& r = VP.activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    pos_scratch[iat * 6]     = r[0];
+    pos_scratch[iat * 6 + 1] = r[1];
+    pos_scratch[iat * 6 + 2] = r[2];
+    pos_scratch[iat * 6 + 3] = ru[0];
+    pos_scratch[iat * 6 + 4] = ru[1];
+    pos_scratch[iat * 6 + 5] = ru[2];
+  }
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  ratios_private.resize(nVP, NumTeams);
+  const auto spline_padded_size = myV.size();
+  const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
+  offload_scratch.resize(spline_padded_size * nVP);
+  results_scratch.resize(sposet_padded_size * nVP);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr           = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr        = offload_scratch.data();
+  auto* results_scratch_ptr        = results_scratch.data();
+  const auto myKcart_padded_size   = myKcart->capacity();
+  auto* myKcart_ptr                = myKcart->data();
+  auto* psiinv_ptr                 = psiinv_pos_copy.data();
+  auto* ratios_private_ptr         = ratios_private.data();
+  const size_t first_spo_local     = this->first_spo;
+  const size_t nComplexBands_local = nComplexBands;
+  const auto requested_orb_size    = psiinv.size();
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*nVP) \
                 map(always, to: psiinv_ptr[0:psiinv_pos_copy.size()]) \
                 map(always, from: ratios_private_ptr[0:NumTeams*nVP])")
-        for (int iat = 0; iat < nVP; iat++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last = omptarget::min(
-                    first + ChunkSizePerTeam, spline_padded_size);
-
-                auto* restrict offload_scratch_iat_ptr =
-                    offload_scratch_ptr + spline_padded_size * iat;
-                auto* restrict psi_iat_ptr =
-                    results_scratch_ptr + sposet_padded_size * iat;
-                auto* restrict pos_scratch = psiinv_ptr + requested_orb_size;
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
-                    ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++)
-                    spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c,
-                        offload_scratch_iat_ptr + first + index);
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = last / 2;
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2R::assign_v(ST(pos_scratch[iat * 6]),
-                        ST(pos_scratch[iat * 6 + 1]),
-                        ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr,
-                        offload_scratch_iat_ptr, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local,
+    for (int iat = 0; iat < nVP; iat++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+        auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
+        auto* restrict psi_iat_ptr             = results_scratch_ptr + sposet_padded_size * iat;
+        auto* restrict pos_scratch             = psiinv_ptr + requested_orb_size;
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4];
+        spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
+                                              ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+          spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
+                                             offload_scratch_iat_ptr + first + index);
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = last / 2;
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
+                        psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local,
                         nComplexBands_local, index);
 
-                const size_t first_real = first_cplx +
-                    omptarget::min(nComplexBands_local, first_cplx);
-                const size_t last_real = omptarget::min(
-                    last_cplx + omptarget::min(nComplexBands_local, last_cplx),
-                    requested_orb_size);
-                TT sum(0);
-                PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
-                for (int i = first_real; i < last_real; i++)
-                    sum += psi_iat_ptr[i] * psiinv_ptr[i];
-                ratios_private_ptr[iat * NumTeams + team_id] = sum;
-            }
-    }
-
-    // do the reduction manually
-    for (int iat = 0; iat < nVP; ++iat) {
-        ratios[iat] = TT(0);
-        for (int tid = 0; tid < NumTeams; tid++)
-            ratios[iat] += ratios_private[iat][tid];
-    }
+        const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
+        const size_t last_real =
+            omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
+        TT sum(0);
+        PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
+        for (int i = first_real; i < last_real; i++)
+          sum += psi_iat_ptr[i] * psiinv_ptr[i];
+        ratios_private_ptr[iat * NumTeams + team_id] = sum;
+      }
+  }
+
+  // do the reduction manually
+  for (int iat = 0; iat < nVP; ++iat)
+  {
+    ratios[iat] = TT(0);
+    for (int tid = 0; tid < NumTeams; tid++)
+      ratios[iat] += ratios_private[iat][tid];
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::mw_evaluateDetRatios(
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::mw_evaluateDetRatios(
     const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
     const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
     const RefVector<ValueVector>& psi_list,
     const std::vector<const ValueType*>& invRow_ptr_list,
     std::vector<std::vector<ValueType>>& ratios_list) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& phi_leader = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& det_ratios_buffer_H2D = mw_mem.det_ratios_buffer_H2D;
-    auto& mw_ratios_private = mw_mem.mw_ratios_private;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const size_t nw = spo_list.size();
-    const size_t requested_orb_size = phi_leader.size();
-
-    size_t mw_nVP = 0;
-    for (const VirtualParticleSetT<VT>& VP : vp_list)
-        mw_nVP += VP.getTotalNum();
-
-    const size_t packed_size =
-        nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int));
-    det_ratios_buffer_H2D.resize(packed_size);
-
-    // pack invRow_ptr_list to det_ratios_buffer_H2D
-    Vector<const ValueType*> ptr_buffer(
-        reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
-    for (size_t iw = 0; iw < nw; iw++)
-        ptr_buffer[iw] = invRow_ptr_list[iw];
-
-    // pack particle positions
-    auto* pos_ptr = reinterpret_cast<TT*>(
-        det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
-    auto* ref_id_ptr = reinterpret_cast<int*>(det_ratios_buffer_H2D.data() +
-        nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
-    size_t iVP = 0;
-    for (size_t iw = 0; iw < nw; iw++) {
-        const VirtualParticleSetT<VT>& VP = vp_list[iw];
-        assert(ratios_list[iw].size() == VP.getTotalNum());
-        for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP) {
-            ref_id_ptr[iVP] = iw;
-            const PointType& r = VP.activeR(iat);
-            PointType ru(PrimLattice.toUnit_floor(r));
-            pos_ptr[0] = r[0];
-            pos_ptr[1] = r[1];
-            pos_ptr[2] = r[2];
-            pos_ptr[3] = ru[0];
-            pos_ptr[4] = ru[1];
-            pos_ptr[5] = ru[2];
-            pos_ptr += 6;
-        }
-    }
-
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    mw_ratios_private.resize(mw_nVP, NumTeams);
-    const auto spline_padded_size = myV.size();
-    const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-    mw_offload_scratch.resize(spline_padded_size * mw_nVP);
-    mw_results_scratch.resize(sposet_padded_size * mw_nVP);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* offload_scratch_ptr = mw_offload_scratch.data();
-    auto* results_scratch_ptr = mw_results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* myKcart_ptr = myKcart->data();
-    auto* buffer_H2D_ptr = det_ratios_buffer_H2D.data();
-    auto* ratios_private_ptr = mw_ratios_private.data();
-    const size_t first_spo_local = this->first_spo;
-    const size_t nComplexBands_local = nComplexBands;
-
+  assert(this == &spo_list.getLeader());
+  auto& phi_leader                = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
+  auto& mw_mem                    = phi_leader.mw_mem_handle_.getResource();
+  auto& det_ratios_buffer_H2D     = mw_mem.det_ratios_buffer_H2D;
+  auto& mw_ratios_private         = mw_mem.mw_ratios_private;
+  auto& mw_offload_scratch        = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch        = mw_mem.mw_results_scratch;
+  const size_t nw                 = spo_list.size();
+  const size_t requested_orb_size = phi_leader.size();
+
+  size_t mw_nVP = 0;
+  for (const VirtualParticleSetT<VT>& VP : vp_list)
+    mw_nVP += VP.getTotalNum();
+
+  const size_t packed_size = nw * sizeof(ValueType*) + mw_nVP * (6 * sizeof(TT) + sizeof(int));
+  det_ratios_buffer_H2D.resize(packed_size);
+
+  // pack invRow_ptr_list to det_ratios_buffer_H2D
+  Vector<const ValueType*> ptr_buffer(reinterpret_cast<const ValueType**>(det_ratios_buffer_H2D.data()), nw);
+  for (size_t iw = 0; iw < nw; iw++)
+    ptr_buffer[iw] = invRow_ptr_list[iw];
+
+  // pack particle positions
+  auto* pos_ptr = reinterpret_cast<TT*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*));
+  auto* ref_id_ptr =
+      reinterpret_cast<int*>(det_ratios_buffer_H2D.data() + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
+  size_t iVP = 0;
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    const VirtualParticleSetT<VT>& VP = vp_list[iw];
+    assert(ratios_list[iw].size() == VP.getTotalNum());
+    for (size_t iat = 0; iat < VP.getTotalNum(); ++iat, ++iVP)
     {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
+      ref_id_ptr[iVP]    = iw;
+      const PointType& r = VP.activeR(iat);
+      PointType ru(PrimLattice.toUnit_floor(r));
+      pos_ptr[0] = r[0];
+      pos_ptr[1] = r[1];
+      pos_ptr[2] = r[2];
+      pos_ptr[3] = ru[0];
+      pos_ptr[4] = ru[1];
+      pos_ptr[5] = ru[2];
+      pos_ptr += 6;
+    }
+  }
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  mw_ratios_private.resize(mw_nVP, NumTeams);
+  const auto spline_padded_size = myV.size();
+  const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
+  mw_offload_scratch.resize(spline_padded_size * mw_nVP);
+  mw_results_scratch.resize(sposet_padded_size * mw_nVP);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr           = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr        = mw_offload_scratch.data();
+  auto* results_scratch_ptr        = mw_results_scratch.data();
+  const auto myKcart_padded_size   = myKcart->capacity();
+  auto* myKcart_ptr                = myKcart->data();
+  auto* buffer_H2D_ptr             = det_ratios_buffer_H2D.data();
+  auto* ratios_private_ptr         = mw_ratios_private.data();
+  const size_t first_spo_local     = this->first_spo;
+  const size_t nComplexBands_local = nComplexBands;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*mw_nVP) \
                 map(always, to: buffer_H2D_ptr[0:det_ratios_buffer_H2D.size()]) \
                 map(always, from: ratios_private_ptr[0:NumTeams*mw_nVP])")
-        for (int iat = 0; iat < mw_nVP; iat++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last = omptarget::min(
-                    first + ChunkSizePerTeam, spline_padded_size);
-
-                auto* restrict offload_scratch_iat_ptr =
-                    offload_scratch_ptr + spline_padded_size * iat;
-                auto* restrict psi_iat_ptr =
-                    results_scratch_ptr + sposet_padded_size * iat;
-                auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr +
-                    nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
-                auto* restrict psiinv_ptr = reinterpret_cast<const ValueType**>(
-                    buffer_H2D_ptr)[ref_id_ptr[iat]];
-                auto* restrict pos_scratch = reinterpret_cast<TT*>(
-                    buffer_H2D_ptr + nw * sizeof(ValueType*));
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
-                    ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++)
-                    spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c,
-                        offload_scratch_iat_ptr + first + index);
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = last / 2;
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2R::assign_v(ST(pos_scratch[iat * 6]),
-                        ST(pos_scratch[iat * 6 + 1]),
-                        ST(pos_scratch[iat * 6 + 2]), psi_iat_ptr,
-                        offload_scratch_iat_ptr, myKcart_ptr,
-                        myKcart_padded_size, first_spo_local,
+    for (int iat = 0; iat < mw_nVP; iat++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+        auto* restrict offload_scratch_iat_ptr = offload_scratch_ptr + spline_padded_size * iat;
+        auto* restrict psi_iat_ptr             = results_scratch_ptr + sposet_padded_size * iat;
+        auto* ref_id_ptr = reinterpret_cast<int*>(buffer_H2D_ptr + nw * sizeof(ValueType*) + mw_nVP * 6 * sizeof(TT));
+        auto* restrict psiinv_ptr  = reinterpret_cast<const ValueType**>(buffer_H2D_ptr)[ref_id_ptr[iat]];
+        auto* restrict pos_scratch = reinterpret_cast<TT*>(buffer_H2D_ptr + nw * sizeof(ValueType*));
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4];
+        spline2::computeLocationAndFractional(spline_ptr, ST(pos_scratch[iat * 6 + 3]), ST(pos_scratch[iat * 6 + 4]),
+                                              ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+          spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c,
+                                             offload_scratch_iat_ptr + first + index);
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = last / 2;
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2R::assign_v(ST(pos_scratch[iat * 6]), ST(pos_scratch[iat * 6 + 1]), ST(pos_scratch[iat * 6 + 2]),
+                        psi_iat_ptr, offload_scratch_iat_ptr, myKcart_ptr, myKcart_padded_size, first_spo_local,
                         nComplexBands_local, index);
 
-                const size_t first_real = first_cplx +
-                    omptarget::min(nComplexBands_local, first_cplx);
-                const size_t last_real = omptarget::min(
-                    last_cplx + omptarget::min(nComplexBands_local, last_cplx),
-                    requested_orb_size);
-                TT sum(0);
-                PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
-                for (int i = first_real; i < last_real; i++)
-                    sum += psi_iat_ptr[i] * psiinv_ptr[i];
-                ratios_private_ptr[iat * NumTeams + team_id] = sum;
-            }
-    }
-
-    // do the reduction manually
-    iVP = 0;
-    for (size_t iw = 0; iw < nw; iw++) {
-        auto& ratios = ratios_list[iw];
-        for (size_t iat = 0; iat < ratios.size(); iat++, iVP++) {
-            ratios[iat] = TT(0);
-            for (int tid = 0; tid < NumTeams; ++tid)
-                ratios[iat] += mw_ratios_private[iVP][tid];
-        }
+        const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
+        const size_t last_real =
+            omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
+        TT sum(0);
+        PRAGMA_OFFLOAD("omp parallel for simd reduction(+:sum)")
+        for (int i = first_real; i < last_real; i++)
+          sum += psi_iat_ptr[i] * psiinv_ptr[i];
+        ratios_private_ptr[iat * NumTeams + team_id] = sum;
+      }
+  }
+
+  // do the reduction manually
+  iVP = 0;
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    auto& ratios = ratios_list[iw];
+    for (size_t iat = 0; iat < ratios.size(); iat++, iVP++)
+    {
+      ratios[iat] = TT(0);
+      for (int tid = 0; tid < NumTeams; ++tid)
+        ratios[iat] += mw_ratios_private[iVP][tid];
     }
+  }
 }
 
 /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
  * cartesian
  */
-template <typename ST, typename VT>
-inline void
-SplineC2ROMPTargetT<ST, VT>::assign_vgl_from_l(
-    const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename ST, typename VT>
+inline void SplineC2ROMPTargetT<ST, VT>::assign_vgl_from_l(const PointType& r,
+                                                           ValueVector& psi,
+                                                           GradVector& dpsi,
+                                                           ValueVector& d2psi)
 {
-    constexpr ST two(2);
-    const ST x = r[0], y = r[1], z = r[2];
+  constexpr ST two(2);
+  const ST x = r[0], y = r[1], z = r[2];
 
-    const ST* restrict k0 = myKcart->data(0);
-    ASSUME_ALIGNED(k0);
-    const ST* restrict k1 = myKcart->data(1);
-    ASSUME_ALIGNED(k1);
-    const ST* restrict k2 = myKcart->data(2);
-    ASSUME_ALIGNED(k2);
+  const ST* restrict k0 = myKcart->data(0);
+  ASSUME_ALIGNED(k0);
+  const ST* restrict k1 = myKcart->data(1);
+  ASSUME_ALIGNED(k1);
+  const ST* restrict k2 = myKcart->data(2);
+  ASSUME_ALIGNED(k2);
 
-    const ST* restrict g0 = myG.data(0);
-    ASSUME_ALIGNED(g0);
-    const ST* restrict g1 = myG.data(1);
-    ASSUME_ALIGNED(g1);
-    const ST* restrict g2 = myG.data(2);
-    ASSUME_ALIGNED(g2);
+  const ST* restrict g0 = myG.data(0);
+  ASSUME_ALIGNED(g0);
+  const ST* restrict g1 = myG.data(1);
+  ASSUME_ALIGNED(g1);
+  const ST* restrict g2 = myG.data(2);
+  ASSUME_ALIGNED(g2);
 
-    const size_t N = this->kPoints.size();
+  const size_t N = this->kPoints.size();
 
 #pragma omp simd
-    for (size_t j = 0; j < nComplexBands; j++) {
-        const size_t jr = j << 1;
-        const size_t ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g0[jr];
-        const ST dY_r = g1[jr];
-        const ST dZ_r = g2[jr];
-
-        const ST dX_i = g0[ji];
-        const ST dY_i = g1[ji];
-        const ST dZ_i = g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const ST lap_r = myL[jr] + (*mKK)[j] * val_r +
-            two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
-        const ST lap_i = myL[ji] + (*mKK)[j] * val_i -
-            two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
-
-        const size_t psiIndex = this->first_spo + jr;
-        psi[psiIndex] = c * val_r - s * val_i;
-        psi[psiIndex + 1] = c * val_i + s * val_r;
-        d2psi[psiIndex] = c * lap_r - s * lap_i;
-        d2psi[psiIndex + 1] = c * lap_i + s * lap_r;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-        dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
-        dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
-        dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
-    }
+  for (size_t j = 0; j < nComplexBands; j++)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g0[jr];
+    const ST dY_r = g1[jr];
+    const ST dZ_r = g2[jr];
+
+    const ST dX_i = g0[ji];
+    const ST dY_i = g1[ji];
+    const ST dZ_i = g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const ST lap_r = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const ST lap_i = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+
+    const size_t psiIndex = this->first_spo + jr;
+    psi[psiIndex]         = c * val_r - s * val_i;
+    psi[psiIndex + 1]     = c * val_i + s * val_r;
+    d2psi[psiIndex]       = c * lap_r - s * lap_i;
+    d2psi[psiIndex + 1]   = c * lap_i + s * lap_r;
+    dpsi[psiIndex][0]     = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1]     = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2]     = c * gZ_r - s * gZ_i;
+    dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
+    dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
+    dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
+  }
 
 #pragma omp simd
-    for (size_t j = nComplexBands; j < N; j++) {
-        const size_t jr = j << 1;
-        const size_t ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g0[jr];
-        const ST dY_r = g1[jr];
-        const ST dZ_r = g2[jr];
-
-        const ST dX_i = g0[ji];
-        const ST dY_i = g1[ji];
-        const ST dZ_i = g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-        const size_t psiIndex = this->first_spo + nComplexBands + j;
-        psi[psiIndex] = c * val_r - s * val_i;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-
-        const ST lap_r = myL[jr] + (*mKK)[j] * val_r +
-            two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
-        const ST lap_i = myL[ji] + (*mKK)[j] * val_i -
-            two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
-        d2psi[psiIndex] = c * lap_r - s * lap_i;
-    }
+  for (size_t j = nComplexBands; j < N; j++)
+  {
+    const size_t jr = j << 1;
+    const size_t ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g0[jr];
+    const ST dY_r = g1[jr];
+    const ST dZ_r = g2[jr];
+
+    const ST dX_i = g0[ji];
+    const ST dY_i = g1[ji];
+    const ST dZ_i = g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r         = dX_r + val_i * kX;
+    const ST gY_r         = dY_r + val_i * kY;
+    const ST gZ_r         = dZ_r + val_i * kZ;
+    const ST gX_i         = dX_i - val_r * kX;
+    const ST gY_i         = dY_i - val_r * kY;
+    const ST gZ_i         = dZ_i - val_r * kZ;
+    const size_t psiIndex = this->first_spo + nComplexBands + j;
+    psi[psiIndex]         = c * val_r - s * val_i;
+    dpsi[psiIndex][0]     = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1]     = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2]     = c * gZ_r - s * gZ_i;
+
+    const ST lap_r  = myL[jr] + (*mKK)[j] * val_r + two * (kX * dX_i + kY * dY_i + kZ * dZ_i);
+    const ST lap_i  = myL[ji] + (*mKK)[j] * val_i - two * (kX * dX_r + kY * dY_r + kZ * dZ_r);
+    d2psi[psiIndex] = c * lap_r - s * lap_i;
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateVGL(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateVGL(const ParticleSetT<VT>& P,
+                                              const int iat,
+                                              ValueVector& psi,
+                                              GradVector& dpsi,
+                                              ValueVector& d2psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
-
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-
-    const auto spline_padded_size = myV.size();
-    const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-    // for V(1)G(3)H(6) intermediate result
-    offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS);
-    // for V(1)G(3)L(1) final result
-    results_scratch.resize(sposet_padded_size * 5);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* offload_scratch_ptr = offload_scratch.data();
-    auto* results_scratch_ptr = results_scratch.data();
-    const auto x = r[0], y = r[1], z = r[2];
-    const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    const size_t first_spo_local = this->first_spo;
-    const size_t nComplexBands_local = nComplexBands;
-    const auto requested_orb_size = psi.size();
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
+
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+  const auto spline_padded_size = myV.size();
+  const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
+  // for V(1)G(3)H(6) intermediate result
+  offload_scratch.resize(spline_padded_size * SoAFields3D::NUM_FIELDS);
+  // for V(1)G(3)L(1) final result
+  results_scratch.resize(sposet_padded_size * 5);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr    = SplineInst->getSplinePtr();
+  auto* offload_scratch_ptr = offload_scratch.data();
+  auto* results_scratch_ptr = results_scratch.data();
+  const auto x = r[0], y = r[1], z = r[2];
+  const auto rux = ru[0], ruy = ru[1], ruz = ru[2];
+  const auto myKcart_padded_size   = myKcart->capacity();
+  auto* mKK_ptr                    = mKK->data();
+  auto* GGt_ptr                    = GGt_offload->data();
+  auto* PrimLattice_G_ptr          = PrimLattice_G_offload->data();
+  auto* myKcart_ptr                = myKcart->data();
+  const size_t first_spo_local     = this->first_spo;
+  const size_t nComplexBands_local = nComplexBands;
+  const auto requested_orb_size    = psi.size();
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute num_teams(NumTeams) \
                 map(always, from: results_scratch_ptr[0:sposet_padded_size*5])")
-        for (int team_id = 0; team_id < NumTeams; team_id++) {
-            const size_t first = ChunkSizePerTeam * team_id;
-            const size_t last =
-                omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
-
-            int ix, iy, iz;
-            ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
-            spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix,
-                iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
-
-            const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                PrimLattice_G_ptr[8]};
-            const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7],
-                GGt_ptr[8]};
-
-            PRAGMA_OFFLOAD("omp parallel for")
-            for (int index = 0; index < last - first; index++) {
-                spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                    first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                    offload_scratch_ptr + first + index, spline_padded_size);
-                const int output_index = first + index;
-                offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL +
-                    output_index] =
-                    SymTrace(offload_scratch_ptr[spline_padded_size *
-                                     SoAFields3D::HESS00 +
-                                 output_index],
-                        offload_scratch_ptr[spline_padded_size *
-                                SoAFields3D::HESS01 +
-                            output_index],
-                        offload_scratch_ptr[spline_padded_size *
-                                SoAFields3D::HESS02 +
-                            output_index],
-                        offload_scratch_ptr[spline_padded_size *
-                                SoAFields3D::HESS11 +
-                            output_index],
-                        offload_scratch_ptr[spline_padded_size *
-                                SoAFields3D::HESS12 +
-                            output_index],
-                        offload_scratch_ptr[spline_padded_size *
-                                SoAFields3D::HESS22 +
-                            output_index],
-                        symGGt);
-            }
-            const size_t first_cplx = first / 2;
-            const size_t last_cplx = last / 2;
-            PRAGMA_OFFLOAD("omp parallel for")
-            for (int index = first_cplx; index < last_cplx; index++)
-                C2R::assign_vgl(x, y, z, results_scratch_ptr,
-                    sposet_padded_size, mKK_ptr, offload_scratch_ptr,
-                    spline_padded_size, G, myKcart_ptr, myKcart_padded_size,
-                    first_spo_local, nComplexBands_local, index);
-        }
-    }
-
-    for (size_t i = 0; i < requested_orb_size; i++) {
-        psi[i] = results_scratch[i];
-        dpsi[i][0] = results_scratch[i + sposet_padded_size * 1];
-        dpsi[i][1] = results_scratch[i + sposet_padded_size * 2];
-        dpsi[i][2] = results_scratch[i + sposet_padded_size * 3];
-        d2psi[i] = results_scratch[i + sposet_padded_size * 4];
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+    {
+      const size_t first = ChunkSizePerTeam * team_id;
+      const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+      int ix, iy, iz;
+      ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+      spline2::computeLocationAndFractional(spline_ptr, rux, ruy, ruz, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
+
+      const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                            PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                            PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+      const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                            GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+      PRAGMA_OFFLOAD("omp parallel for")
+      for (int index = 0; index < last - first; index++)
+      {
+        spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
+                                             offload_scratch_ptr + first + index, spline_padded_size);
+        const int output_index = first + index;
+        offload_scratch_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
+            SymTrace(offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
+                     offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
+                     offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
+                     offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
+                     offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
+                     offload_scratch_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
+      }
+      const size_t first_cplx = first / 2;
+      const size_t last_cplx  = last / 2;
+      PRAGMA_OFFLOAD("omp parallel for")
+      for (int index = first_cplx; index < last_cplx; index++)
+        C2R::assign_vgl(x, y, z, results_scratch_ptr, sposet_padded_size, mKK_ptr, offload_scratch_ptr,
+                        spline_padded_size, G, myKcart_ptr, myKcart_padded_size, first_spo_local, nComplexBands_local,
+                        index);
     }
+  }
+
+  for (size_t i = 0; i < requested_orb_size; i++)
+  {
+    psi[i]     = results_scratch[i];
+    dpsi[i][0] = results_scratch[i + sposet_padded_size * 1];
+    dpsi[i][1] = results_scratch[i + sposet_padded_size * 2];
+    dpsi[i][2] = results_scratch[i + sposet_padded_size * 3];
+    d2psi[i]   = results_scratch[i + sposet_padded_size * 4];
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateVGLMultiPos(
-    const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos,
-    Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
-    Vector<TT, OffloadPinnedAllocator<TT>>& results_scratch,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list) const
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateVGLMultiPos(const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos,
+                                                      Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
+                                                      Vector<TT, OffloadPinnedAllocator<TT>>& results_scratch,
+                                                      const RefVector<ValueVector>& psi_v_list,
+                                                      const RefVector<GradVector>& dpsi_v_list,
+                                                      const RefVector<ValueVector>& d2psi_v_list) const
 {
-    const size_t num_pos = psi_v_list.size();
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-    const auto spline_padded_size = myV.size();
-    const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-    // for V(1)G(3)H(6) intermediate result
-    offload_scratch.resize(
-        spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
-    // for V(1)G(3)L(1) final result
-    results_scratch.resize(sposet_padded_size * num_pos * 5);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* pos_copy_ptr = multi_pos.data();
-    auto* offload_scratch_ptr = offload_scratch.data();
-    auto* results_scratch_ptr = results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    const size_t first_spo_local = this->first_spo;
-    const size_t nComplexBands_local = nComplexBands;
-    const auto requested_orb_size = psi_v_list[0].get().size();
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
+  const size_t num_pos          = psi_v_list.size();
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+  const auto spline_padded_size = myV.size();
+  const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
+  // for V(1)G(3)H(6) intermediate result
+  offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
+  // for V(1)G(3)L(1) final result
+  results_scratch.resize(sposet_padded_size * num_pos * 5);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr           = SplineInst->getSplinePtr();
+  auto* pos_copy_ptr               = multi_pos.data();
+  auto* offload_scratch_ptr        = offload_scratch.data();
+  auto* results_scratch_ptr        = results_scratch.data();
+  const auto myKcart_padded_size   = myKcart->capacity();
+  auto* mKK_ptr                    = mKK->data();
+  auto* GGt_ptr                    = GGt_offload->data();
+  auto* PrimLattice_G_ptr          = PrimLattice_G_offload->data();
+  auto* myKcart_ptr                = myKcart->data();
+  const size_t first_spo_local     = this->first_spo;
+  const size_t nComplexBands_local = nComplexBands;
+  const auto requested_orb_size    = psi_v_list[0].get().size();
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
                     map(always, to: pos_copy_ptr[0:num_pos*6]) \
                     map(always, from: results_scratch_ptr[0:sposet_padded_size*num_pos*5])")
-        for (int iw = 0; iw < num_pos; iw++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last = omptarget::min(
-                    first + ChunkSizePerTeam, spline_padded_size);
-
-                auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr +
-                    spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
-                auto* restrict psi_iw_ptr =
-                    results_scratch_ptr + sposet_padded_size * iw * 5;
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4],
-                    d2c[4];
-                spline2::computeLocationAndFractional(spline_ptr,
-                    pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
-                    pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc,
-                    d2a, d2b, d2c);
-
-                const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                    PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                    PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                    PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                    PrimLattice_G_ptr[8]};
-                const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                    GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4],
-                    GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++) {
-                    spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                        offload_scratch_iw_ptr + first + index,
-                        spline_padded_size);
-                    const int output_index = first + index;
-                    offload_scratch_iw_ptr[spline_padded_size *
-                            SoAFields3D::LAPL +
-                        output_index] =
-                        SymTrace(offload_scratch_iw_ptr[spline_padded_size *
-                                         SoAFields3D::HESS00 +
-                                     output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS01 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS02 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS11 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS12 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS22 +
-                                output_index],
-                            symGGt);
-                }
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = last / 2;
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2R::assign_vgl(pos_copy_ptr[iw * 6],
-                        pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2],
-                        psi_iw_ptr, sposet_padded_size, mKK_ptr,
-                        offload_scratch_iw_ptr, spline_padded_size, G,
-                        myKcart_ptr, myKcart_padded_size, first_spo_local,
-                        nComplexBands_local, index);
-            }
-    }
-
-    for (int iw = 0; iw < num_pos; ++iw) {
-        auto* restrict results_iw_ptr =
-            results_scratch_ptr + sposet_padded_size * iw * 5;
-        ValueVector& psi_v(psi_v_list[iw]);
-        GradVector& dpsi_v(dpsi_v_list[iw]);
-        ValueVector& d2psi_v(d2psi_v_list[iw]);
-        for (size_t i = 0; i < requested_orb_size; i++) {
-            psi_v[i] = results_iw_ptr[i];
-            dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size];
-            dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2];
-            dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3];
-            d2psi_v[i] = results_iw_ptr[i + sposet_padded_size * 4];
+    for (int iw = 0; iw < num_pos; iw++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+        auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
+        auto* restrict psi_iw_ptr             = results_scratch_ptr + sposet_padded_size * iw * 5;
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+        spline2::computeLocationAndFractional(spline_ptr, pos_copy_ptr[iw * 6 + 3], pos_copy_ptr[iw * 6 + 4],
+                                              pos_copy_ptr[iw * 6 + 5], ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c);
+
+        const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                              PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                              PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+        const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                              GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+        {
+          spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
+                                               d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
+          const int output_index = first + index;
+          offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
+              SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
         }
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = last / 2;
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2R::assign_vgl(pos_copy_ptr[iw * 6], pos_copy_ptr[iw * 6 + 1], pos_copy_ptr[iw * 6 + 2], psi_iw_ptr,
+                          sposet_padded_size, mKK_ptr, offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr,
+                          myKcart_padded_size, first_spo_local, nComplexBands_local, index);
+      }
+  }
+
+  for (int iw = 0; iw < num_pos; ++iw)
+  {
+    auto* restrict results_iw_ptr = results_scratch_ptr + sposet_padded_size * iw * 5;
+    ValueVector& psi_v(psi_v_list[iw]);
+    GradVector& dpsi_v(dpsi_v_list[iw]);
+    ValueVector& d2psi_v(d2psi_v_list[iw]);
+    for (size_t i = 0; i < requested_orb_size; i++)
+    {
+      psi_v[i]     = results_iw_ptr[i];
+      dpsi_v[i][0] = results_iw_ptr[i + sposet_padded_size];
+      dpsi_v[i][1] = results_iw_ptr[i + sposet_padded_size * 2];
+      dpsi_v[i][2] = results_iw_ptr[i + sposet_padded_size * 3];
+      d2psi_v[i]   = results_iw_ptr[i + sposet_padded_size * 4];
     }
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::mw_evaluateVGL(
-    const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
-    const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list) const
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
+                                                 const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                                 int iat,
+                                                 const RefVector<ValueVector>& psi_v_list,
+                                                 const RefVector<GradVector>& dpsi_v_list,
+                                                 const RefVector<ValueVector>& d2psi_v_list) const
 {
-    assert(this == &sa_list.getLeader());
-    auto& phi_leader = sa_list.template getCastedLeader<SplineC2ROMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& mw_pos_copy = mw_mem.mw_pos_copy;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const int nwalkers = sa_list.size();
-    mw_pos_copy.resize(nwalkers * 6);
-
-    // pack particle positions
-    for (int iw = 0; iw < nwalkers; ++iw) {
-        const PointType& r = P_list[iw].activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        mw_pos_copy[iw * 6] = r[0];
-        mw_pos_copy[iw * 6 + 1] = r[1];
-        mw_pos_copy[iw * 6 + 2] = r[2];
-        mw_pos_copy[iw * 6 + 3] = ru[0];
-        mw_pos_copy[iw * 6 + 4] = ru[1];
-        mw_pos_copy[iw * 6 + 5] = ru[2];
-    }
-
-    phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch,
-        mw_results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
+  assert(this == &sa_list.getLeader());
+  auto& phi_leader         = sa_list.template getCastedLeader<SplineC2ROMPTargetT>();
+  auto& mw_mem             = phi_leader.mw_mem_handle_.getResource();
+  auto& mw_pos_copy        = mw_mem.mw_pos_copy;
+  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch = mw_mem.mw_results_scratch;
+  const int nwalkers       = sa_list.size();
+  mw_pos_copy.resize(nwalkers * 6);
+
+  // pack particle positions
+  for (int iw = 0; iw < nwalkers; ++iw)
+  {
+    const PointType& r = P_list[iw].activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    mw_pos_copy[iw * 6]     = r[0];
+    mw_pos_copy[iw * 6 + 1] = r[1];
+    mw_pos_copy[iw * 6 + 2] = r[2];
+    mw_pos_copy[iw * 6 + 3] = ru[0];
+    mw_pos_copy[iw * 6 + 4] = ru[1];
+    mw_pos_copy[iw * 6 + 5] = ru[2];
+  }
+
+  phi_leader.evaluateVGLMultiPos(mw_pos_copy, mw_offload_scratch, mw_results_scratch, psi_v_list, dpsi_v_list,
+                                 d2psi_v_list);
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::mw_evaluateVGLandDetRatioGrads(
-    const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-    const std::vector<const ValueType*>& invRow_ptr_list,
-    OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-    std::vector<GradType>& grads) const
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                                                 const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                                                 int iat,
+                                                                 const std::vector<const ValueType*>& invRow_ptr_list,
+                                                                 OffloadMWVGLArray& phi_vgl_v,
+                                                                 std::vector<ValueType>& ratios,
+                                                                 std::vector<GradType>& grads) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& phi_leader = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
-    auto& mw_mem = phi_leader.mw_mem_handle_.getResource();
-    auto& buffer_H2D = mw_mem.buffer_H2D;
-    auto& rg_private = mw_mem.rg_private;
-    auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
-    auto& mw_results_scratch = mw_mem.mw_results_scratch;
-    const int nwalkers = spo_list.size();
-    buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
-
-    // pack particle positions and invRow pointers.
-    for (int iw = 0; iw < nwalkers; ++iw) {
-        const PointType& r = P_list[iw].activeR(iat);
-        PointType ru(PrimLattice.toUnit_floor(r));
-        Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
-
-        pos_copy[0] = r[0];
-        pos_copy[1] = r[1];
-        pos_copy[2] = r[2];
-        pos_copy[3] = ru[0];
-        pos_copy[4] = ru[1];
-        pos_copy[5] = ru[2];
-
-        auto& invRow_ptr = *reinterpret_cast<const ValueType**>(
-            buffer_H2D[iw] + sizeof(ST) * 6);
-        invRow_ptr = invRow_ptr_list[iw];
-    }
-
-    const size_t num_pos = nwalkers;
-    const auto spline_padded_size = myV.size();
-    const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
-    const size_t ChunkSizePerTeam = 512;
-    const int NumTeams = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
-
-    mw_offload_scratch.resize(
-        spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
-    // for V(1)G(3)L(1) final result
-    mw_results_scratch.resize(sposet_padded_size * num_pos * 5);
-    // per team ratio and grads
-    rg_private.resize(num_pos, NumTeams * 4);
-
-    // Ye: need to extract sizes and pointers before entering target region
-    const auto* spline_ptr = SplineInst->getSplinePtr();
-    auto* buffer_H2D_ptr = buffer_H2D.data();
-    auto* offload_scratch_ptr = mw_offload_scratch.data();
-    auto* results_scratch_ptr = mw_results_scratch.data();
-    const auto myKcart_padded_size = myKcart->capacity();
-    auto* mKK_ptr = mKK->data();
-    auto* GGt_ptr = GGt_offload->data();
-    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-    auto* myKcart_ptr = myKcart->data();
-    auto* phi_vgl_ptr = phi_vgl_v.data();
-    auto* rg_private_ptr = rg_private.data();
-    const size_t buffer_H2D_stride = buffer_H2D.cols();
-    const size_t first_spo_local = this->first_spo;
-    const auto requested_orb_size = phi_vgl_v.size(2);
-    const size_t phi_vgl_stride = num_pos * requested_orb_size;
-    const size_t nComplexBands_local = nComplexBands;
-
-    {
-        ScopedTimer offload(offload_timer_);
-        PRAGMA_OFFLOAD(
-            "omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
+  assert(this == &spo_list.getLeader());
+  auto& phi_leader         = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
+  auto& mw_mem             = phi_leader.mw_mem_handle_.getResource();
+  auto& buffer_H2D         = mw_mem.buffer_H2D;
+  auto& rg_private         = mw_mem.rg_private;
+  auto& mw_offload_scratch = mw_mem.mw_offload_scratch;
+  auto& mw_results_scratch = mw_mem.mw_results_scratch;
+  const int nwalkers       = spo_list.size();
+  buffer_H2D.resize(nwalkers, sizeof(ST) * 6 + sizeof(ValueType*));
+
+  // pack particle positions and invRow pointers.
+  for (int iw = 0; iw < nwalkers; ++iw)
+  {
+    const PointType& r = P_list[iw].activeR(iat);
+    PointType ru(PrimLattice.toUnit_floor(r));
+    Vector<ST> pos_copy(reinterpret_cast<ST*>(buffer_H2D[iw]), 6);
+
+    pos_copy[0] = r[0];
+    pos_copy[1] = r[1];
+    pos_copy[2] = r[2];
+    pos_copy[3] = ru[0];
+    pos_copy[4] = ru[1];
+    pos_copy[5] = ru[2];
+
+    auto& invRow_ptr = *reinterpret_cast<const ValueType**>(buffer_H2D[iw] + sizeof(ST) * 6);
+    invRow_ptr       = invRow_ptr_list[iw];
+  }
+
+  const size_t num_pos          = nwalkers;
+  const auto spline_padded_size = myV.size();
+  const auto sposet_padded_size = getAlignedSize<TT>(this->OrbitalSetSize);
+  const size_t ChunkSizePerTeam = 512;
+  const int NumTeams            = (myV.size() + ChunkSizePerTeam - 1) / ChunkSizePerTeam;
+
+  mw_offload_scratch.resize(spline_padded_size * num_pos * SoAFields3D::NUM_FIELDS);
+  // for V(1)G(3)L(1) final result
+  mw_results_scratch.resize(sposet_padded_size * num_pos * 5);
+  // per team ratio and grads
+  rg_private.resize(num_pos, NumTeams * 4);
+
+  // Ye: need to extract sizes and pointers before entering target region
+  const auto* spline_ptr           = SplineInst->getSplinePtr();
+  auto* buffer_H2D_ptr             = buffer_H2D.data();
+  auto* offload_scratch_ptr        = mw_offload_scratch.data();
+  auto* results_scratch_ptr        = mw_results_scratch.data();
+  const auto myKcart_padded_size   = myKcart->capacity();
+  auto* mKK_ptr                    = mKK->data();
+  auto* GGt_ptr                    = GGt_offload->data();
+  auto* PrimLattice_G_ptr          = PrimLattice_G_offload->data();
+  auto* myKcart_ptr                = myKcart->data();
+  auto* phi_vgl_ptr                = phi_vgl_v.data();
+  auto* rg_private_ptr             = rg_private.data();
+  const size_t buffer_H2D_stride   = buffer_H2D.cols();
+  const size_t first_spo_local     = this->first_spo;
+  const auto requested_orb_size    = phi_vgl_v.size(2);
+  const size_t phi_vgl_stride      = num_pos * requested_orb_size;
+  const size_t nComplexBands_local = nComplexBands;
+
+  {
+    ScopedTimer offload(offload_timer_);
+    PRAGMA_OFFLOAD("omp target teams distribute collapse(2) num_teams(NumTeams*num_pos) \
                     map(always, to: buffer_H2D_ptr[:buffer_H2D.size()]) \
                     map(always, from: rg_private_ptr[0:rg_private.size()])")
-        for (int iw = 0; iw < num_pos; iw++)
-            for (int team_id = 0; team_id < NumTeams; team_id++) {
-                const size_t first = ChunkSizePerTeam * team_id;
-                const size_t last = omptarget::min(
-                    first + ChunkSizePerTeam, spline_padded_size);
-
-                auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr +
-                    spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
-                auto* restrict psi_iw_ptr =
-                    results_scratch_ptr + sposet_padded_size * iw * 5;
-                const auto* restrict pos_iw_ptr = reinterpret_cast<ST*>(
-                    buffer_H2D_ptr + buffer_H2D_stride * iw);
-                const auto* restrict invRow_iw_ptr =
-                    *reinterpret_cast<ValueType**>(buffer_H2D_ptr +
-                        buffer_H2D_stride * iw + sizeof(ST) * 6);
-
-                int ix, iy, iz;
-                ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4],
-                    d2c[4];
-                spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3],
-                    pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b, c, da, db,
-                    dc, d2a, d2b, d2c);
-
-                const ST G[9] = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1],
-                    PrimLattice_G_ptr[2], PrimLattice_G_ptr[3],
-                    PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
-                    PrimLattice_G_ptr[6], PrimLattice_G_ptr[7],
-                    PrimLattice_G_ptr[8]};
-                const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3],
-                    GGt_ptr[2] + GGt_ptr[6], GGt_ptr[4],
-                    GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
-
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = 0; index < last - first; index++) {
-                    spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz,
-                        first + index, a, b, c, da, db, dc, d2a, d2b, d2c,
-                        offload_scratch_iw_ptr + first + index,
-                        spline_padded_size);
-                    const int output_index = first + index;
-                    offload_scratch_iw_ptr[spline_padded_size *
-                            SoAFields3D::LAPL +
-                        output_index] =
-                        SymTrace(offload_scratch_iw_ptr[spline_padded_size *
-                                         SoAFields3D::HESS00 +
-                                     output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS01 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS02 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS11 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS12 +
-                                output_index],
-                            offload_scratch_iw_ptr[spline_padded_size *
-                                    SoAFields3D::HESS22 +
-                                output_index],
-                            symGGt);
-                }
-                const size_t first_cplx = first / 2;
-                const size_t last_cplx = last / 2;
-                PRAGMA_OFFLOAD("omp parallel for")
-                for (int index = first_cplx; index < last_cplx; index++)
-                    C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2],
-                        psi_iw_ptr, sposet_padded_size, mKK_ptr,
-                        offload_scratch_iw_ptr, spline_padded_size, G,
-                        myKcart_ptr, myKcart_padded_size, first_spo_local,
-                        nComplexBands_local, index);
-
-                ValueType* restrict psi = psi_iw_ptr;
-                ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size;
-                ValueType* restrict dpsi_y =
-                    psi_iw_ptr + sposet_padded_size * 2;
-                ValueType* restrict dpsi_z =
-                    psi_iw_ptr + sposet_padded_size * 3;
-                ValueType* restrict d2psi = psi_iw_ptr + sposet_padded_size * 4;
-
-                ValueType* restrict out_phi =
-                    phi_vgl_ptr + iw * requested_orb_size;
-                ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
-                ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
-                ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
-                ValueType* restrict out_d2phi = out_dphi_z + phi_vgl_stride;
-
-                const size_t first_real = first_cplx +
-                    omptarget::min(nComplexBands_local, first_cplx);
-                const size_t last_real = omptarget::min(
-                    last_cplx + omptarget::min(nComplexBands_local, last_cplx),
-                    requested_orb_size);
-                ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
-                PRAGMA_OFFLOAD("omp parallel for \
+    for (int iw = 0; iw < num_pos; iw++)
+      for (int team_id = 0; team_id < NumTeams; team_id++)
+      {
+        const size_t first = ChunkSizePerTeam * team_id;
+        const size_t last  = omptarget::min(first + ChunkSizePerTeam, spline_padded_size);
+
+        auto* restrict offload_scratch_iw_ptr = offload_scratch_ptr + spline_padded_size * iw * SoAFields3D::NUM_FIELDS;
+        auto* restrict psi_iw_ptr             = results_scratch_ptr + sposet_padded_size * iw * 5;
+        const auto* restrict pos_iw_ptr       = reinterpret_cast<ST*>(buffer_H2D_ptr + buffer_H2D_stride * iw);
+        const auto* restrict invRow_iw_ptr =
+            *reinterpret_cast<ValueType**>(buffer_H2D_ptr + buffer_H2D_stride * iw + sizeof(ST) * 6);
+
+        int ix, iy, iz;
+        ST a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
+        spline2::computeLocationAndFractional(spline_ptr, pos_iw_ptr[3], pos_iw_ptr[4], pos_iw_ptr[5], ix, iy, iz, a, b,
+                                              c, da, db, dc, d2a, d2b, d2c);
+
+        const ST G[9]      = {PrimLattice_G_ptr[0], PrimLattice_G_ptr[1], PrimLattice_G_ptr[2],
+                              PrimLattice_G_ptr[3], PrimLattice_G_ptr[4], PrimLattice_G_ptr[5],
+                              PrimLattice_G_ptr[6], PrimLattice_G_ptr[7], PrimLattice_G_ptr[8]};
+        const ST symGGt[6] = {GGt_ptr[0], GGt_ptr[1] + GGt_ptr[3], GGt_ptr[2] + GGt_ptr[6],
+                              GGt_ptr[4], GGt_ptr[5] + GGt_ptr[7], GGt_ptr[8]};
+
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = 0; index < last - first; index++)
+        {
+          spline2offload::evaluate_vgh_impl_v2(spline_ptr, ix, iy, iz, first + index, a, b, c, da, db, dc, d2a, d2b,
+                                               d2c, offload_scratch_iw_ptr + first + index, spline_padded_size);
+          const int output_index = first + index;
+          offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::LAPL + output_index] =
+              SymTrace(offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS00 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS01 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS02 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS11 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS12 + output_index],
+                       offload_scratch_iw_ptr[spline_padded_size * SoAFields3D::HESS22 + output_index], symGGt);
+        }
+        const size_t first_cplx = first / 2;
+        const size_t last_cplx  = last / 2;
+        PRAGMA_OFFLOAD("omp parallel for")
+        for (int index = first_cplx; index < last_cplx; index++)
+          C2R::assign_vgl(pos_iw_ptr[0], pos_iw_ptr[1], pos_iw_ptr[2], psi_iw_ptr, sposet_padded_size, mKK_ptr,
+                          offload_scratch_iw_ptr, spline_padded_size, G, myKcart_ptr, myKcart_padded_size,
+                          first_spo_local, nComplexBands_local, index);
+
+        ValueType* restrict psi    = psi_iw_ptr;
+        ValueType* restrict dpsi_x = psi_iw_ptr + sposet_padded_size;
+        ValueType* restrict dpsi_y = psi_iw_ptr + sposet_padded_size * 2;
+        ValueType* restrict dpsi_z = psi_iw_ptr + sposet_padded_size * 3;
+        ValueType* restrict d2psi  = psi_iw_ptr + sposet_padded_size * 4;
+
+        ValueType* restrict out_phi    = phi_vgl_ptr + iw * requested_orb_size;
+        ValueType* restrict out_dphi_x = out_phi + phi_vgl_stride;
+        ValueType* restrict out_dphi_y = out_dphi_x + phi_vgl_stride;
+        ValueType* restrict out_dphi_z = out_dphi_y + phi_vgl_stride;
+        ValueType* restrict out_d2phi  = out_dphi_z + phi_vgl_stride;
+
+        const size_t first_real = first_cplx + omptarget::min(nComplexBands_local, first_cplx);
+        const size_t last_real =
+            omptarget::min(last_cplx + omptarget::min(nComplexBands_local, last_cplx), requested_orb_size);
+        ValueType ratio(0), grad_x(0), grad_y(0), grad_z(0);
+        PRAGMA_OFFLOAD("omp parallel for \
                         reduction(+: ratio, grad_x, grad_y, grad_z)")
-                for (size_t j = first_real; j < last_real; j++) {
-                    out_phi[j] = psi[j];
-                    out_dphi_x[j] = dpsi_x[j];
-                    out_dphi_y[j] = dpsi_y[j];
-                    out_dphi_z[j] = dpsi_z[j];
-                    out_d2phi[j] = d2psi[j];
-
-                    ratio += psi[j] * invRow_iw_ptr[j];
-                    grad_x += dpsi_x[j] * invRow_iw_ptr[j];
-                    grad_y += dpsi_y[j] * invRow_iw_ptr[j];
-                    grad_z += dpsi_z[j] * invRow_iw_ptr[j];
-                }
-
-                rg_private_ptr[(iw * NumTeams + team_id) * 4] = ratio;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
-                rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
-            }
-    }
-
-    for (int iw = 0; iw < num_pos; iw++) {
-        ValueType ratio(0);
-        for (int team_id = 0; team_id < NumTeams; team_id++)
-            ratio += rg_private[iw][team_id * 4];
-        ratios[iw] = ratio;
-
-        ValueType grad_x(0), grad_y(0), grad_z(0);
-        for (int team_id = 0; team_id < NumTeams; team_id++) {
-            grad_x += rg_private[iw][team_id * 4 + 1];
-            grad_y += rg_private[iw][team_id * 4 + 2];
-            grad_z += rg_private[iw][team_id * 4 + 3];
+        for (size_t j = first_real; j < last_real; j++)
+        {
+          out_phi[j]    = psi[j];
+          out_dphi_x[j] = dpsi_x[j];
+          out_dphi_y[j] = dpsi_y[j];
+          out_dphi_z[j] = dpsi_z[j];
+          out_d2phi[j]  = d2psi[j];
+
+          ratio += psi[j] * invRow_iw_ptr[j];
+          grad_x += dpsi_x[j] * invRow_iw_ptr[j];
+          grad_y += dpsi_y[j] * invRow_iw_ptr[j];
+          grad_z += dpsi_z[j] * invRow_iw_ptr[j];
         }
-        grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
+
+        rg_private_ptr[(iw * NumTeams + team_id) * 4]     = ratio;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 1] = grad_x;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 2] = grad_y;
+        rg_private_ptr[(iw * NumTeams + team_id) * 4 + 3] = grad_z;
+      }
+  }
+
+  for (int iw = 0; iw < num_pos; iw++)
+  {
+    ValueType ratio(0);
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+      ratio += rg_private[iw][team_id * 4];
+    ratios[iw] = ratio;
+
+    ValueType grad_x(0), grad_y(0), grad_z(0);
+    for (int team_id = 0; team_id < NumTeams; team_id++)
+    {
+      grad_x += rg_private[iw][team_id * 4 + 1];
+      grad_y += rg_private[iw][team_id * 4 + 2];
+      grad_z += rg_private[iw][team_id * 4 + 3];
     }
+    grads[iw] = GradType{grad_x / ratio, grad_y / ratio, grad_z / ratio};
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::assign_vgh(const PointType& r, ValueVector& psi,
-    GradVector& dpsi, HessVector& grad_grad_psi, int first, int last) const
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::assign_vgh(const PointType& r,
+                                             ValueVector& psi,
+                                             GradVector& dpsi,
+                                             HessVector& grad_grad_psi,
+                                             int first,
+                                             int last) const
 {
-    // protect last
-    last = last > this->kPoints.size() ? this->kPoints.size() : last;
-
-    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
-             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
-             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-             g22 = PrimLattice.G(8);
-    const ST x = r[0], y = r[1], z = r[2];
-
-    const ST* restrict k0 = myKcart->data(0);
-    const ST* restrict k1 = myKcart->data(1);
-    const ST* restrict k2 = myKcart->data(2);
-
-    const ST* restrict g0 = myG.data(0);
-    const ST* restrict g1 = myG.data(1);
-    const ST* restrict g2 = myG.data(2);
-    const ST* restrict h00 = myH.data(0);
-    const ST* restrict h01 = myH.data(1);
-    const ST* restrict h02 = myH.data(2);
-    const ST* restrict h11 = myH.data(3);
-    const ST* restrict h12 = myH.data(4);
-    const ST* restrict h22 = myH.data(5);
+  // protect last
+  last = last > this->kPoints.size() ? this->kPoints.size() : last;
+
+  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+           g22 = PrimLattice.G(8);
+  const ST x = r[0], y = r[1], z = r[2];
+
+  const ST* restrict k0 = myKcart->data(0);
+  const ST* restrict k1 = myKcart->data(1);
+  const ST* restrict k2 = myKcart->data(2);
+
+  const ST* restrict g0  = myG.data(0);
+  const ST* restrict g1  = myG.data(1);
+  const ST* restrict g2  = myG.data(2);
+  const ST* restrict h00 = myH.data(0);
+  const ST* restrict h01 = myH.data(1);
+  const ST* restrict h02 = myH.data(2);
+  const ST* restrict h11 = myH.data(3);
+  const ST* restrict h12 = myH.data(4);
+  const ST* restrict h22 = myH.data(5);
 
 #pragma omp simd
-    for (size_t j = first; j < std::min(nComplexBands, last); j++) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = this->first_spo + jr;
-
-        psi[psiIndex] = c * val_r - s * val_i;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-
-        psi[psiIndex + 1] = c * val_i + s * val_r;
-        dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
-        dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
-        dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
-
-        const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g00, g01, g02) +
-            kX * (gX_i + dX_i);
-        const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g10, g11, g12) +
-            kX * (gY_i + dY_i);
-        const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g20, g21, g22) +
-            kX * (gZ_i + dZ_i);
-        const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g00, g01, g02) +
-            kY * (gX_i + dX_i);
-        const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g10, g11, g12) +
-            kY * (gY_i + dY_i);
-        const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g20, g21, g22) +
-            kY * (gZ_i + dZ_i);
-        const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g00, g01, g02) +
-            kZ * (gX_i + dX_i);
-        const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g10, g11, g12) +
-            kZ * (gY_i + dY_i);
-        const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g20, g21, g22) +
-            kZ * (gZ_i + dZ_i);
-
-        const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g00, g01, g02) -
-            kX * (gX_r + dX_r);
-        const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g10, g11, g12) -
-            kX * (gY_r + dY_r);
-        const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g20, g21, g22) -
-            kX * (gZ_r + dZ_r);
-        const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g00, g01, g02) -
-            kY * (gX_r + dX_r);
-        const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g10, g11, g12) -
-            kY * (gY_r + dY_r);
-        const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g20, g21, g22) -
-            kY * (gZ_r + dZ_r);
-        const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g00, g01, g02) -
-            kZ * (gX_r + dX_r);
-        const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g10, g11, g12) -
-            kZ * (gY_r + dY_r);
-        const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g20, g21, g22) -
-            kZ * (gZ_r + dZ_r);
-
-        grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
-        grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
-        grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
-        grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
-        grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
-        grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
-
-        grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
-        grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
-        grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
-        grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r;
-        grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
-        grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
-        grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r;
-        grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r;
-        grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
-    }
+  for (size_t j = first; j < std::min(nComplexBands, last); j++)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = this->first_spo + jr;
+
+    psi[psiIndex]     = c * val_r - s * val_i;
+    dpsi[psiIndex][0] = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1] = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
+
+    psi[psiIndex + 1]     = c * val_i + s * val_r;
+    dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
+    dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
+    dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
+
+    const ST h_xx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
+    const ST h_xy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
+    const ST h_xz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
+    const ST h_yx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
+    const ST h_yy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
+    const ST h_yz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
+    const ST h_zx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
+    const ST h_zy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
+    const ST h_zz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
+
+    const ST h_xx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
+    const ST h_xy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
+    const ST h_xz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
+    const ST h_yx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
+    const ST h_yy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
+    const ST h_yz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
+    const ST h_zx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
+    const ST h_zy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
+    const ST h_zz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
+
+    grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
+    grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
+    grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
+    grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
+    grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
+    grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
+
+    grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
+    grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
+    grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
+    grad_grad_psi[psiIndex + 1][3] = c * h_yx_i + s * h_yx_r;
+    grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
+    grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
+    grad_grad_psi[psiIndex + 1][6] = c * h_zx_i + s * h_zx_r;
+    grad_grad_psi[psiIndex + 1][7] = c * h_zy_i + s * h_zy_r;
+    grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
+  }
 
 #pragma omp simd
-    for (size_t j = std::max(nComplexBands, first); j < last; j++) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = this->first_spo + nComplexBands + j;
-
-        psi[psiIndex] = c * val_r - s * val_i;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-
-        const ST h_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g00, g01, g02) +
-            kX * (gX_i + dX_i);
-        const ST h_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g10, g11, g12) +
-            kX * (gY_i + dY_i);
-        const ST h_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g00, g01, g02, g20, g21, g22) +
-            kX * (gZ_i + dZ_i);
-        const ST h_yx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g00, g01, g02) +
-            kY * (gX_i + dX_i);
-        const ST h_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g10, g11, g12) +
-            kY * (gY_i + dY_i);
-        const ST h_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g10, g11, g12, g20, g21, g22) +
-            kY * (gZ_i + dZ_i);
-        const ST h_zx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g00, g01, g02) +
-            kZ * (gX_i + dX_i);
-        const ST h_zy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g10, g11, g12) +
-            kZ * (gY_i + dY_i);
-        const ST h_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-                              h22[jr], g20, g21, g22, g20, g21, g22) +
-            kZ * (gZ_i + dZ_i);
-
-        const ST h_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g00, g01, g02) -
-            kX * (gX_r + dX_r);
-        const ST h_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g10, g11, g12) -
-            kX * (gY_r + dY_r);
-        const ST h_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g00, g01, g02, g20, g21, g22) -
-            kX * (gZ_r + dZ_r);
-        const ST h_yx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g00, g01, g02) -
-            kY * (gX_r + dX_r);
-        const ST h_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g10, g11, g12) -
-            kY * (gY_r + dY_r);
-        const ST h_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g10, g11, g12, g20, g21, g22) -
-            kY * (gZ_r + dZ_r);
-        const ST h_zx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g00, g01, g02) -
-            kZ * (gX_r + dX_r);
-        const ST h_zy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g10, g11, g12) -
-            kZ * (gY_r + dY_r);
-        const ST h_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-                              h22[ji], g20, g21, g22, g20, g21, g22) -
-            kZ * (gZ_r + dZ_r);
-
-        grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
-        grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
-        grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
-        grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
-        grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
-        grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
-    }
+  for (size_t j = std::max(nComplexBands, first); j < last; j++)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = this->first_spo + nComplexBands + j;
+
+    psi[psiIndex]     = c * val_r - s * val_i;
+    dpsi[psiIndex][0] = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1] = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
+
+    const ST h_xx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02) + kX * (gX_i + dX_i);
+    const ST h_xy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12) + kX * (gY_i + dY_i);
+    const ST h_xz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22) + kX * (gZ_i + dZ_i);
+    const ST h_yx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g00, g01, g02) + kY * (gX_i + dX_i);
+    const ST h_yy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12) + kY * (gY_i + dY_i);
+    const ST h_yz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22) + kY * (gZ_i + dZ_i);
+    const ST h_zx_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g00, g01, g02) + kZ * (gX_i + dX_i);
+    const ST h_zy_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g10, g11, g12) + kZ * (gY_i + dY_i);
+    const ST h_zz_r =
+        v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22) + kZ * (gZ_i + dZ_i);
+
+    const ST h_xx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02) - kX * (gX_r + dX_r);
+    const ST h_xy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12) - kX * (gY_r + dY_r);
+    const ST h_xz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22) - kX * (gZ_r + dZ_r);
+    const ST h_yx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g00, g01, g02) - kY * (gX_r + dX_r);
+    const ST h_yy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12) - kY * (gY_r + dY_r);
+    const ST h_yz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22) - kY * (gZ_r + dZ_r);
+    const ST h_zx_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g00, g01, g02) - kZ * (gX_r + dX_r);
+    const ST h_zy_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g10, g11, g12) - kZ * (gY_r + dY_r);
+    const ST h_zz_i =
+        v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22) - kZ * (gZ_r + dZ_r);
+
+    grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
+    grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][3] = c * h_yx_r - s * h_yx_i;
+    grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
+    grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][6] = c * h_zx_r - s * h_zx_i;
+    grad_grad_psi[psiIndex][7] = c * h_zy_r - s * h_zy_i;
+    grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateVGH(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi,
-    HessVector& grad_grad_psi)
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateVGH(const ParticleSetT<VT>& P,
+                                              const int iat,
+                                              ValueVector& psi,
+                                              GradVector& dpsi,
+                                              HessVector& grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 #pragma omp parallel
-    {
-        int first, last;
-        FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(),
-            omp_get_thread_num(), first, last);
+  {
+    int first, last;
+    FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
 
-        spline2::evaluate3d_vgh(
-            SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
-        assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
-    }
+    spline2::evaluate3d_vgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, first, last);
+    assign_vgh(r, psi, dpsi, grad_grad_psi, first / 2, last / 2);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::assign_vghgh(const PointType& r, ValueVector& psi,
-    GradVector& dpsi, HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi,
-    int first, int last) const
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::assign_vghgh(const PointType& r,
+                                               ValueVector& psi,
+                                               GradVector& dpsi,
+                                               HessVector& grad_grad_psi,
+                                               GGGVector& grad_grad_grad_psi,
+                                               int first,
+                                               int last) const
 {
-    // protect last
-    last = last < 0 ? this->kPoints.size() :
-                      (last > this->kPoints.size() ? this->kPoints.size() : last);
-
-    const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1),
-             g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
-             g11 = PrimLattice.G(4), g12 = PrimLattice.G(5),
-             g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
-             g22 = PrimLattice.G(8);
-    const ST x = r[0], y = r[1], z = r[2];
-
-    const ST* restrict k0 = myKcart->data(0);
-    const ST* restrict k1 = myKcart->data(1);
-    const ST* restrict k2 = myKcart->data(2);
-
-    const ST* restrict g0 = myG.data(0);
-    const ST* restrict g1 = myG.data(1);
-    const ST* restrict g2 = myG.data(2);
-    const ST* restrict h00 = myH.data(0);
-    const ST* restrict h01 = myH.data(1);
-    const ST* restrict h02 = myH.data(2);
-    const ST* restrict h11 = myH.data(3);
-    const ST* restrict h12 = myH.data(4);
-    const ST* restrict h22 = myH.data(5);
-
-    const ST* restrict gh000 = mygH.data(0);
-    const ST* restrict gh001 = mygH.data(1);
-    const ST* restrict gh002 = mygH.data(2);
-    const ST* restrict gh011 = mygH.data(3);
-    const ST* restrict gh012 = mygH.data(4);
-    const ST* restrict gh022 = mygH.data(5);
-    const ST* restrict gh111 = mygH.data(6);
-    const ST* restrict gh112 = mygH.data(7);
-    const ST* restrict gh122 = mygH.data(8);
-    const ST* restrict gh222 = mygH.data(9);
+  // protect last
+  last = last < 0 ? this->kPoints.size() : (last > this->kPoints.size() ? this->kPoints.size() : last);
+
+  const ST g00 = PrimLattice.G(0), g01 = PrimLattice.G(1), g02 = PrimLattice.G(2), g10 = PrimLattice.G(3),
+           g11 = PrimLattice.G(4), g12 = PrimLattice.G(5), g20 = PrimLattice.G(6), g21 = PrimLattice.G(7),
+           g22 = PrimLattice.G(8);
+  const ST x = r[0], y = r[1], z = r[2];
+
+  const ST* restrict k0 = myKcart->data(0);
+  const ST* restrict k1 = myKcart->data(1);
+  const ST* restrict k2 = myKcart->data(2);
+
+  const ST* restrict g0  = myG.data(0);
+  const ST* restrict g1  = myG.data(1);
+  const ST* restrict g2  = myG.data(2);
+  const ST* restrict h00 = myH.data(0);
+  const ST* restrict h01 = myH.data(1);
+  const ST* restrict h02 = myH.data(2);
+  const ST* restrict h11 = myH.data(3);
+  const ST* restrict h12 = myH.data(4);
+  const ST* restrict h22 = myH.data(5);
+
+  const ST* restrict gh000 = mygH.data(0);
+  const ST* restrict gh001 = mygH.data(1);
+  const ST* restrict gh002 = mygH.data(2);
+  const ST* restrict gh011 = mygH.data(3);
+  const ST* restrict gh012 = mygH.data(4);
+  const ST* restrict gh022 = mygH.data(5);
+  const ST* restrict gh111 = mygH.data(6);
+  const ST* restrict gh112 = mygH.data(7);
+  const ST* restrict gh122 = mygH.data(8);
+  const ST* restrict gh222 = mygH.data(9);
 
 // SIMD doesn't work quite right yet.  Comment out until further debugging.
 #pragma omp simd
-    for (size_t j = first; j < std::min(nComplexBands, last); j++) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = this->first_spo + jr;
-        psi[psiIndex] = c * val_r - s * val_i;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-
-        psi[psiIndex + 1] = c * val_i + s * val_r;
-        dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
-        dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
-        dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
-
-        // intermediates for computation of hessian. \partial_i \partial_j phi
-        // in cartesian coordinates.
-        const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g20, g21, g22, g20, g21, g22);
-
-        const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g20, g21, g22, g20, g21, g22);
-
-        const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
-        const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
-        const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
-        const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
-        const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
-        const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
-
-        const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
-        const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
-        const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
-        const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
-        const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
-        const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
-
-        grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
-        grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
-        grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
-
-        grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
-        grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
-        grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
-        grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r;
-        grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
-        grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
-        grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r;
-        grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r;
-        grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
-
-        // These are the real and imaginary components of the third SPO
-        // derivative.  _xxx denotes
-        //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
-        //  and z, and so on.
-
-        const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        // Here is where we build up the components of the physical hessian
-        // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
-        const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r -
-            kX * kX * kX * val_i;
-        const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i +
-            kX * kX * kX * val_r;
-        const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) -
-            (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
-        const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) -
-            (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
-        const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) -
-            (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
-        const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) -
-            (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
-        const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) -
-            (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
-        const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) -
-            (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
-        const ST gh_xyz_r = f3_xyz_r +
-            (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
-            (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) -
-            kX * kY * kZ * val_i;
-        const ST gh_xyz_i = f3_xyz_i -
-            (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
-            (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) +
-            kX * kY * kZ * val_r;
-        const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) -
-            (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
-        const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) -
-            (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
-        const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r -
-            kY * kY * kY * val_i;
-        const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i +
-            kY * kY * kY * val_r;
-        const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) -
-            (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
-        const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) -
-            (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
-        const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) -
-            (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
-        const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) -
-            (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
-        const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r -
-            kZ * kZ * kZ * val_i;
-        const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i +
-            kZ * kZ * kZ * val_r;
-
-        grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
-        grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
-
-        grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
-        grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
-
-        grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
-        grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
-        grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
-        grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
-        grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
-
-        grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r;
-        grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r;
-        grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r;
-        grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r;
-        grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r;
-        grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r;
-        grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r;
-
-        grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r;
-        grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r;
-        grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r;
-        grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r;
-        grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r;
-        grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r;
-        grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r;
-
-        grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r;
-        grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r;
-    }
+  for (size_t j = first; j < std::min(nComplexBands, last); j++)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = this->first_spo + jr;
+    psi[psiIndex]         = c * val_r - s * val_i;
+    dpsi[psiIndex][0]     = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1]     = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2]     = c * gZ_r - s * gZ_i;
+
+    psi[psiIndex + 1]     = c * val_i + s * val_r;
+    dpsi[psiIndex + 1][0] = c * gX_i + s * gX_r;
+    dpsi[psiIndex + 1][1] = c * gY_i + s * gY_r;
+    dpsi[psiIndex + 1][2] = c * gZ_i + s * gZ_r;
+
+    // intermediates for computation of hessian. \partial_i \partial_j phi
+    // in cartesian coordinates.
+    const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
+
+    const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
+
+    const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
+    const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
+    const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
+    const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
+    const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
+    const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
+
+    const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
+    const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
+    const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
+    const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
+    const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
+    const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
+
+    grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
+    grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
+    grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
+
+    grad_grad_psi[psiIndex + 1][0] = c * h_xx_i + s * h_xx_r;
+    grad_grad_psi[psiIndex + 1][1] = c * h_xy_i + s * h_xy_r;
+    grad_grad_psi[psiIndex + 1][2] = c * h_xz_i + s * h_xz_r;
+    grad_grad_psi[psiIndex + 1][3] = c * h_xy_i + s * h_xy_r;
+    grad_grad_psi[psiIndex + 1][4] = c * h_yy_i + s * h_yy_r;
+    grad_grad_psi[psiIndex + 1][5] = c * h_yz_i + s * h_yz_r;
+    grad_grad_psi[psiIndex + 1][6] = c * h_xz_i + s * h_xz_r;
+    grad_grad_psi[psiIndex + 1][7] = c * h_yz_i + s * h_yz_r;
+    grad_grad_psi[psiIndex + 1][8] = c * h_zz_i + s * h_zz_r;
+
+    // These are the real and imaginary components of the third SPO
+    // derivative.  _xxx denotes
+    //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
+    //  and z, and so on.
+
+    const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    // Here is where we build up the components of the physical hessian
+    // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+    const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
+    const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
+    const ST gh_xxy_r =
+        f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
+    const ST gh_xxy_i =
+        f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
+    const ST gh_xxz_r =
+        f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
+    const ST gh_xxz_i =
+        f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
+    const ST gh_xyy_r =
+        f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
+    const ST gh_xyy_i =
+        f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
+    const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
+        (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
+    const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
+        (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
+    const ST gh_xzz_r =
+        f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
+    const ST gh_xzz_i =
+        f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
+    const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
+    const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
+    const ST gh_yyz_r =
+        f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
+    const ST gh_yyz_i =
+        f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
+    const ST gh_yzz_r =
+        f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
+    const ST gh_yzz_i =
+        f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
+    const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
+    const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
+
+    grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
+    grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
+
+    grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
+    grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
+
+    grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
+    grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
+    grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
+    grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
+    grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
+
+    grad_grad_grad_psi[psiIndex + 1][0][0] = c * gh_xxx_i + s * gh_xxx_r;
+    grad_grad_grad_psi[psiIndex + 1][0][1] = c * gh_xxy_i + s * gh_xxy_r;
+    grad_grad_grad_psi[psiIndex + 1][0][2] = c * gh_xxz_i + s * gh_xxz_r;
+    grad_grad_grad_psi[psiIndex + 1][0][3] = c * gh_xxy_i + s * gh_xxy_r;
+    grad_grad_grad_psi[psiIndex + 1][0][4] = c * gh_xyy_i + s * gh_xyy_r;
+    grad_grad_grad_psi[psiIndex + 1][0][5] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][0][6] = c * gh_xxz_i + s * gh_xxz_r;
+    grad_grad_grad_psi[psiIndex + 1][0][7] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][0][8] = c * gh_xzz_i + s * gh_xzz_r;
+
+    grad_grad_grad_psi[psiIndex + 1][1][0] = c * gh_xxy_i + s * gh_xxy_r;
+    grad_grad_grad_psi[psiIndex + 1][1][1] = c * gh_xyy_i + s * gh_xyy_r;
+    grad_grad_grad_psi[psiIndex + 1][1][2] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][1][3] = c * gh_xyy_i + s * gh_xyy_r;
+    grad_grad_grad_psi[psiIndex + 1][1][4] = c * gh_yyy_i + s * gh_yyy_r;
+    grad_grad_grad_psi[psiIndex + 1][1][5] = c * gh_yyz_i + s * gh_yyz_r;
+    grad_grad_grad_psi[psiIndex + 1][1][6] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][1][7] = c * gh_yyz_i + s * gh_yyz_r;
+    grad_grad_grad_psi[psiIndex + 1][1][8] = c * gh_yzz_i + s * gh_yzz_r;
+
+    grad_grad_grad_psi[psiIndex + 1][2][0] = c * gh_xxz_i + s * gh_xxz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][1] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][2] = c * gh_xzz_i + s * gh_xzz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][3] = c * gh_xyz_i + s * gh_xyz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][4] = c * gh_yyz_i + s * gh_yyz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][5] = c * gh_yzz_i + s * gh_yzz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][6] = c * gh_xzz_i + s * gh_xzz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][7] = c * gh_yzz_i + s * gh_yzz_r;
+    grad_grad_grad_psi[psiIndex + 1][2][8] = c * gh_zzz_i + s * gh_zzz_r;
+  }
 #pragma omp simd
-    for (size_t j = std::max(nComplexBands, first); j < last; j++) {
-        int jr = j << 1;
-        int ji = jr + 1;
-
-        const ST kX = k0[j];
-        const ST kY = k1[j];
-        const ST kZ = k2[j];
-        const ST val_r = myV[jr];
-        const ST val_i = myV[ji];
-
-        // phase
-        ST s, c;
-        omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
-
-        // dot(PrimLattice.G,myG[j])
-        const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
-        const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
-        const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
-
-        const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
-        const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
-        const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
-
-        // \f$\nabla \psi_r + {\bf k}\psi_i\f$
-        const ST gX_r = dX_r + val_i * kX;
-        const ST gY_r = dY_r + val_i * kY;
-        const ST gZ_r = dZ_r + val_i * kZ;
-        const ST gX_i = dX_i - val_r * kX;
-        const ST gY_i = dY_i - val_r * kY;
-        const ST gZ_i = dZ_i - val_r * kZ;
-
-        const size_t psiIndex = this->first_spo + nComplexBands + j;
-        psi[psiIndex] = c * val_r - s * val_i;
-        dpsi[psiIndex][0] = c * gX_r - s * gX_i;
-        dpsi[psiIndex][1] = c * gY_r - s * gY_i;
-        dpsi[psiIndex][2] = c * gZ_r - s * gZ_i;
-
-        // intermediates for computation of hessian. \partial_i \partial_j phi
-        // in cartesian coordinates.
-        const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr],
-            h22[jr], g20, g21, g22, g20, g21, g22);
-
-        const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g00, g01, g02);
-        const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g10, g11, g12);
-        const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g00, g01, g02, g20, g21, g22);
-        const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g10, g11, g12);
-        const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g10, g11, g12, g20, g21, g22);
-        const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji],
-            h22[ji], g20, g21, g22, g20, g21, g22);
-
-        const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
-        const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
-        const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
-        const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
-        const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
-        const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
-
-        const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
-        const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
-        const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
-        const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
-        const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
-        const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
-
-        grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
-        grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
-        grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
-        grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
-        grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
-        grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
-
-        // These are the real and imaginary components of the third SPO
-        // derivative.  _xxx denotes
-        //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
-        //  and z, and so on.
-
-        const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr],
-            gh011[jr], gh012[jr], gh022[jr], gh111[jr], gh112[jr], gh122[jr],
-            gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
-        const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
-        const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
-        const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
-        const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
-        const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
-        const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
-        const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
-        const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
-        const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji],
-            gh011[ji], gh012[ji], gh022[ji], gh111[ji], gh112[ji], gh122[ji],
-            gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
-
-        // Here is where we build up the components of the physical hessian
-        // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
-        const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r -
-            kX * kX * kX * val_i;
-        const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i +
-            kX * kX * kX * val_r;
-        const ST gh_xxy_r = f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) -
-            (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
-        const ST gh_xxy_i = f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) -
-            (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
-        const ST gh_xxz_r = f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) -
-            (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
-        const ST gh_xxz_i = f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) -
-            (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
-        const ST gh_xyy_r = f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) -
-            (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
-        const ST gh_xyy_i = f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) -
-            (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
-        const ST gh_xyz_r = f3_xyz_r +
-            (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
-            (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) -
-            kX * kY * kZ * val_i;
-        const ST gh_xyz_i = f3_xyz_i -
-            (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
-            (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) +
-            kX * kY * kZ * val_r;
-        const ST gh_xzz_r = f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) -
-            (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
-        const ST gh_xzz_i = f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) -
-            (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
-        const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r -
-            kY * kY * kY * val_i;
-        const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i +
-            kY * kY * kY * val_r;
-        const ST gh_yyz_r = f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) -
-            (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
-        const ST gh_yyz_i = f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) -
-            (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
-        const ST gh_yzz_r = f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) -
-            (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
-        const ST gh_yzz_i = f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) -
-            (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
-        const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r -
-            kZ * kZ * kZ * val_i;
-        const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i +
-            kZ * kZ * kZ * val_r;
-        //[x][xx] //These are the unique entries
-        grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
-        grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
-
-        grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
-        grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
-        grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
-        grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
-
-        grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
-        grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
-        grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
-        grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
-        grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
-        grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
-        grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
-        grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
-    }
+  for (size_t j = std::max(nComplexBands, first); j < last; j++)
+  {
+    int jr = j << 1;
+    int ji = jr + 1;
+
+    const ST kX    = k0[j];
+    const ST kY    = k1[j];
+    const ST kZ    = k2[j];
+    const ST val_r = myV[jr];
+    const ST val_i = myV[ji];
+
+    // phase
+    ST s, c;
+    omptarget::sincos(-(x * kX + y * kY + z * kZ), &s, &c);
+
+    // dot(PrimLattice.G,myG[j])
+    const ST dX_r = g00 * g0[jr] + g01 * g1[jr] + g02 * g2[jr];
+    const ST dY_r = g10 * g0[jr] + g11 * g1[jr] + g12 * g2[jr];
+    const ST dZ_r = g20 * g0[jr] + g21 * g1[jr] + g22 * g2[jr];
+
+    const ST dX_i = g00 * g0[ji] + g01 * g1[ji] + g02 * g2[ji];
+    const ST dY_i = g10 * g0[ji] + g11 * g1[ji] + g12 * g2[ji];
+    const ST dZ_i = g20 * g0[ji] + g21 * g1[ji] + g22 * g2[ji];
+
+    // \f$\nabla \psi_r + {\bf k}\psi_i\f$
+    const ST gX_r = dX_r + val_i * kX;
+    const ST gY_r = dY_r + val_i * kY;
+    const ST gZ_r = dZ_r + val_i * kZ;
+    const ST gX_i = dX_i - val_r * kX;
+    const ST gY_i = dY_i - val_r * kY;
+    const ST gZ_i = dZ_i - val_r * kZ;
+
+    const size_t psiIndex = this->first_spo + nComplexBands + j;
+    psi[psiIndex]         = c * val_r - s * val_i;
+    dpsi[psiIndex][0]     = c * gX_r - s * gX_i;
+    dpsi[psiIndex][1]     = c * gY_r - s * gY_i;
+    dpsi[psiIndex][2]     = c * gZ_r - s * gZ_i;
+
+    // intermediates for computation of hessian. \partial_i \partial_j phi
+    // in cartesian coordinates.
+    const ST f_xx_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_r = v_m_v(h00[jr], h01[jr], h02[jr], h11[jr], h12[jr], h22[jr], g20, g21, g22, g20, g21, g22);
+
+    const ST f_xx_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g00, g01, g02);
+    const ST f_xy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g10, g11, g12);
+    const ST f_xz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g00, g01, g02, g20, g21, g22);
+    const ST f_yy_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g10, g11, g12);
+    const ST f_yz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g10, g11, g12, g20, g21, g22);
+    const ST f_zz_i = v_m_v(h00[ji], h01[ji], h02[ji], h11[ji], h12[ji], h22[ji], g20, g21, g22, g20, g21, g22);
+
+    const ST h_xx_r = f_xx_r + 2 * kX * dX_i - kX * kX * val_r;
+    const ST h_xy_r = f_xy_r + (kX * dY_i + kY * dX_i) - kX * kY * val_r;
+    const ST h_xz_r = f_xz_r + (kX * dZ_i + kZ * dX_i) - kX * kZ * val_r;
+    const ST h_yy_r = f_yy_r + 2 * kY * dY_i - kY * kY * val_r;
+    const ST h_yz_r = f_yz_r + (kY * dZ_i + kZ * dY_i) - kY * kZ * val_r;
+    const ST h_zz_r = f_zz_r + 2 * kZ * dZ_i - kZ * kZ * val_r;
+
+    const ST h_xx_i = f_xx_i - 2 * kX * dX_r - kX * kX * val_i;
+    const ST h_xy_i = f_xy_i - (kX * dY_r + kY * dX_r) - kX * kY * val_i;
+    const ST h_xz_i = f_xz_i - (kX * dZ_r + kZ * dX_r) - kX * kZ * val_i;
+    const ST h_yy_i = f_yy_i - 2 * kY * dY_r - kY * kY * val_i;
+    const ST h_yz_i = f_yz_i - (kZ * dY_r + kY * dZ_r) - kZ * kY * val_i;
+    const ST h_zz_i = f_zz_i - 2 * kZ * dZ_r - kZ * kZ * val_i;
+
+    grad_grad_psi[psiIndex][0] = c * h_xx_r - s * h_xx_i;
+    grad_grad_psi[psiIndex][1] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][2] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][3] = c * h_xy_r - s * h_xy_i;
+    grad_grad_psi[psiIndex][4] = c * h_yy_r - s * h_yy_i;
+    grad_grad_psi[psiIndex][5] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][6] = c * h_xz_r - s * h_xz_i;
+    grad_grad_psi[psiIndex][7] = c * h_yz_r - s * h_yz_i;
+    grad_grad_psi[psiIndex][8] = c * h_zz_r - s * h_zz_i;
+
+    // These are the real and imaginary components of the third SPO
+    // derivative.  _xxx denotes
+    //  third derivative w.r.t. x, _xyz, a derivative with resepect to x,y,
+    //  and z, and so on.
+
+    const ST f3_xxx_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_r = t3_contract(gh000[jr], gh001[jr], gh002[jr], gh011[jr], gh012[jr], gh022[jr], gh111[jr],
+                                    gh112[jr], gh122[jr], gh222[jr], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    const ST f3_xxx_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g00, g01, g02);
+    const ST f3_xxy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g10, g11, g12);
+    const ST f3_xxz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g00, g01, g02, g20, g21, g22);
+    const ST f3_xyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g10, g11, g12);
+    const ST f3_xyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g10, g11, g12, g20, g21, g22);
+    const ST f3_xzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g00, g01, g02, g20, g21, g22, g20, g21, g22);
+    const ST f3_yyy_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g10, g11, g12);
+    const ST f3_yyz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g10, g11, g12, g20, g21, g22);
+    const ST f3_yzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g10, g11, g12, g20, g21, g22, g20, g21, g22);
+    const ST f3_zzz_i = t3_contract(gh000[ji], gh001[ji], gh002[ji], gh011[ji], gh012[ji], gh022[ji], gh111[ji],
+                                    gh112[ji], gh122[ji], gh222[ji], g20, g21, g22, g20, g21, g22, g20, g21, g22);
+
+    // Here is where we build up the components of the physical hessian
+    // gradient, namely, d^3/dx^3(e^{-ik*r}\phi(r)
+    const ST gh_xxx_r = f3_xxx_r + 3 * kX * f_xx_i - 3 * kX * kX * dX_r - kX * kX * kX * val_i;
+    const ST gh_xxx_i = f3_xxx_i - 3 * kX * f_xx_r - 3 * kX * kX * dX_i + kX * kX * kX * val_r;
+    const ST gh_xxy_r =
+        f3_xxy_r + (kY * f_xx_i + 2 * kX * f_xy_i) - (kX * kX * dY_r + 2 * kX * kY * dX_r) - kX * kX * kY * val_i;
+    const ST gh_xxy_i =
+        f3_xxy_i - (kY * f_xx_r + 2 * kX * f_xy_r) - (kX * kX * dY_i + 2 * kX * kY * dX_i) + kX * kX * kY * val_r;
+    const ST gh_xxz_r =
+        f3_xxz_r + (kZ * f_xx_i + 2 * kX * f_xz_i) - (kX * kX * dZ_r + 2 * kX * kZ * dX_r) - kX * kX * kZ * val_i;
+    const ST gh_xxz_i =
+        f3_xxz_i - (kZ * f_xx_r + 2 * kX * f_xz_r) - (kX * kX * dZ_i + 2 * kX * kZ * dX_i) + kX * kX * kZ * val_r;
+    const ST gh_xyy_r =
+        f3_xyy_r + (2 * kY * f_xy_i + kX * f_yy_i) - (2 * kX * kY * dY_r + kY * kY * dX_r) - kX * kY * kY * val_i;
+    const ST gh_xyy_i =
+        f3_xyy_i - (2 * kY * f_xy_r + kX * f_yy_r) - (2 * kX * kY * dY_i + kY * kY * dX_i) + kX * kY * kY * val_r;
+    const ST gh_xyz_r = f3_xyz_r + (kX * f_yz_i + kY * f_xz_i + kZ * f_xy_i) -
+        (kX * kY * dZ_r + kY * kZ * dX_r + kZ * kX * dY_r) - kX * kY * kZ * val_i;
+    const ST gh_xyz_i = f3_xyz_i - (kX * f_yz_r + kY * f_xz_r + kZ * f_xy_r) -
+        (kX * kY * dZ_i + kY * kZ * dX_i + kZ * kX * dY_i) + kX * kY * kZ * val_r;
+    const ST gh_xzz_r =
+        f3_xzz_r + (2 * kZ * f_xz_i + kX * f_zz_i) - (2 * kX * kZ * dZ_r + kZ * kZ * dX_r) - kX * kZ * kZ * val_i;
+    const ST gh_xzz_i =
+        f3_xzz_i - (2 * kZ * f_xz_r + kX * f_zz_r) - (2 * kX * kZ * dZ_i + kZ * kZ * dX_i) + kX * kZ * kZ * val_r;
+    const ST gh_yyy_r = f3_yyy_r + 3 * kY * f_yy_i - 3 * kY * kY * dY_r - kY * kY * kY * val_i;
+    const ST gh_yyy_i = f3_yyy_i - 3 * kY * f_yy_r - 3 * kY * kY * dY_i + kY * kY * kY * val_r;
+    const ST gh_yyz_r =
+        f3_yyz_r + (kZ * f_yy_i + 2 * kY * f_yz_i) - (kY * kY * dZ_r + 2 * kY * kZ * dY_r) - kY * kY * kZ * val_i;
+    const ST gh_yyz_i =
+        f3_yyz_i - (kZ * f_yy_r + 2 * kY * f_yz_r) - (kY * kY * dZ_i + 2 * kY * kZ * dY_i) + kY * kY * kZ * val_r;
+    const ST gh_yzz_r =
+        f3_yzz_r + (2 * kZ * f_yz_i + kY * f_zz_i) - (2 * kY * kZ * dZ_r + kZ * kZ * dY_r) - kY * kZ * kZ * val_i;
+    const ST gh_yzz_i =
+        f3_yzz_i - (2 * kZ * f_yz_r + kY * f_zz_r) - (2 * kY * kZ * dZ_i + kZ * kZ * dY_i) + kY * kZ * kZ * val_r;
+    const ST gh_zzz_r = f3_zzz_r + 3 * kZ * f_zz_i - 3 * kZ * kZ * dZ_r - kZ * kZ * kZ * val_i;
+    const ST gh_zzz_i = f3_zzz_i - 3 * kZ * f_zz_r - 3 * kZ * kZ * dZ_i + kZ * kZ * kZ * val_r;
+    //[x][xx] //These are the unique entries
+    grad_grad_grad_psi[psiIndex][0][0] = c * gh_xxx_r - s * gh_xxx_i;
+    grad_grad_grad_psi[psiIndex][0][1] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][0][2] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][0][3] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][0][4] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][0][5] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][0][6] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][0][7] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][0][8] = c * gh_xzz_r - s * gh_xzz_i;
+
+    grad_grad_grad_psi[psiIndex][1][0] = c * gh_xxy_r - s * gh_xxy_i;
+    grad_grad_grad_psi[psiIndex][1][1] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][1][2] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][1][3] = c * gh_xyy_r - s * gh_xyy_i;
+    grad_grad_grad_psi[psiIndex][1][4] = c * gh_yyy_r - s * gh_yyy_i;
+    grad_grad_grad_psi[psiIndex][1][5] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][1][6] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][1][7] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][1][8] = c * gh_yzz_r - s * gh_yzz_i;
+
+    grad_grad_grad_psi[psiIndex][2][0] = c * gh_xxz_r - s * gh_xxz_i;
+    grad_grad_grad_psi[psiIndex][2][1] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][2][2] = c * gh_xzz_r - s * gh_xzz_i;
+    grad_grad_grad_psi[psiIndex][2][3] = c * gh_xyz_r - s * gh_xyz_i;
+    grad_grad_grad_psi[psiIndex][2][4] = c * gh_yyz_r - s * gh_yyz_i;
+    grad_grad_grad_psi[psiIndex][2][5] = c * gh_yzz_r - s * gh_yzz_i;
+    grad_grad_grad_psi[psiIndex][2][6] = c * gh_xzz_r - s * gh_xzz_i;
+    grad_grad_grad_psi[psiIndex][2][7] = c * gh_yzz_r - s * gh_yzz_i;
+    grad_grad_grad_psi[psiIndex][2][8] = c * gh_zzz_r - s * gh_zzz_i;
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluateVGHGH(const ParticleSetT<VT>& P,
-    const int iat, ValueVector& psi, GradVector& dpsi,
-    HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi)
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluateVGHGH(const ParticleSetT<VT>& P,
+                                                const int iat,
+                                                ValueVector& psi,
+                                                GradVector& dpsi,
+                                                HessVector& grad_grad_psi,
+                                                GGGVector& grad_grad_grad_psi)
 {
-    const PointType& r = P.activeR(iat);
-    PointType ru(PrimLattice.toUnit_floor(r));
+  const PointType& r = P.activeR(iat);
+  PointType ru(PrimLattice.toUnit_floor(r));
 #pragma omp parallel
-    {
-        int first, last;
-        FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(),
-            omp_get_thread_num(), first, last);
-
-        spline2::evaluate3d_vghgh(
-            SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
-        assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2,
-            last / 2);
-    }
+  {
+    int first, last;
+    FairDivideAligned(myV.size(), getAlignment<ST>(), omp_get_num_threads(), omp_get_thread_num(), first, last);
+
+    spline2::evaluate3d_vghgh(SplineInst->getSplinePtr(), ru, myV, myG, myH, mygH, first, last);
+    assign_vghgh(r, psi, dpsi, grad_grad_psi, grad_grad_grad_psi, first / 2, last / 2);
+  }
 }
 
-template <typename ST, typename VT>
-void
-SplineC2ROMPTargetT<ST, VT>::evaluate_notranspose(const ParticleSetT<VT>& P,
-    int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    ValueMatrix& d2logdet)
+template<typename ST, typename VT>
+void SplineC2ROMPTargetT<ST, VT>::evaluate_notranspose(const ParticleSetT<VT>& P,
+                                                       int first,
+                                                       int last,
+                                                       ValueMatrix& logdet,
+                                                       GradMatrix& dlogdet,
+                                                       ValueMatrix& d2logdet)
 {
-    // chunk the [first, last) loop into blocks to save temporary memory usage
-    const int block_size = 16;
-
-    // reference vectors refer to the rows of matrices
-    std::vector<ValueVector> multi_psi_v;
-    std::vector<GradVector> multi_dpsi_v;
-    std::vector<ValueVector> multi_d2psi_v;
-    RefVector<ValueVector> psi_v_list;
-    RefVector<GradVector> dpsi_v_list;
-    RefVector<ValueVector> d2psi_v_list;
-
-    multi_psi_v.reserve(block_size);
-    multi_dpsi_v.reserve(block_size);
-    multi_d2psi_v.reserve(block_size);
-    psi_v_list.reserve(block_size);
-    dpsi_v_list.reserve(block_size);
-    d2psi_v_list.reserve(block_size);
-
-    for (int iat = first, i = 0; iat < last;
-         iat += block_size, i += block_size) {
-        const int actual_block_size = std::min(last - iat, block_size);
-        multi_pos_copy.resize(actual_block_size * 6);
-        multi_psi_v.clear();
-        multi_dpsi_v.clear();
-        multi_d2psi_v.clear();
-        psi_v_list.clear();
-        dpsi_v_list.clear();
-        d2psi_v_list.clear();
-
-        for (int ipos = 0; ipos < actual_block_size; ++ipos) {
-            // pack particle positions
-            const PointType& r = P.activeR(iat + ipos);
-            PointType ru(PrimLattice.toUnit_floor(r));
-            multi_pos_copy[ipos * 6] = r[0];
-            multi_pos_copy[ipos * 6 + 1] = r[1];
-            multi_pos_copy[ipos * 6 + 2] = r[2];
-            multi_pos_copy[ipos * 6 + 3] = ru[0];
-            multi_pos_copy[ipos * 6 + 4] = ru[1];
-            multi_pos_copy[ipos * 6 + 5] = ru[2];
-
-            multi_psi_v.emplace_back(logdet[i + ipos], this->OrbitalSetSize);
-            multi_dpsi_v.emplace_back(dlogdet[i + ipos], this->OrbitalSetSize);
-            multi_d2psi_v.emplace_back(d2logdet[i + ipos], this->OrbitalSetSize);
-
-            psi_v_list.push_back(multi_psi_v[ipos]);
-            dpsi_v_list.push_back(multi_dpsi_v[ipos]);
-            d2psi_v_list.push_back(multi_d2psi_v[ipos]);
-        }
-
-        evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch,
-            psi_v_list, dpsi_v_list, d2psi_v_list);
+  // chunk the [first, last) loop into blocks to save temporary memory usage
+  const int block_size = 16;
+
+  // reference vectors refer to the rows of matrices
+  std::vector<ValueVector> multi_psi_v;
+  std::vector<GradVector> multi_dpsi_v;
+  std::vector<ValueVector> multi_d2psi_v;
+  RefVector<ValueVector> psi_v_list;
+  RefVector<GradVector> dpsi_v_list;
+  RefVector<ValueVector> d2psi_v_list;
+
+  multi_psi_v.reserve(block_size);
+  multi_dpsi_v.reserve(block_size);
+  multi_d2psi_v.reserve(block_size);
+  psi_v_list.reserve(block_size);
+  dpsi_v_list.reserve(block_size);
+  d2psi_v_list.reserve(block_size);
+
+  for (int iat = first, i = 0; iat < last; iat += block_size, i += block_size)
+  {
+    const int actual_block_size = std::min(last - iat, block_size);
+    multi_pos_copy.resize(actual_block_size * 6);
+    multi_psi_v.clear();
+    multi_dpsi_v.clear();
+    multi_d2psi_v.clear();
+    psi_v_list.clear();
+    dpsi_v_list.clear();
+    d2psi_v_list.clear();
+
+    for (int ipos = 0; ipos < actual_block_size; ++ipos)
+    {
+      // pack particle positions
+      const PointType& r = P.activeR(iat + ipos);
+      PointType ru(PrimLattice.toUnit_floor(r));
+      multi_pos_copy[ipos * 6]     = r[0];
+      multi_pos_copy[ipos * 6 + 1] = r[1];
+      multi_pos_copy[ipos * 6 + 2] = r[2];
+      multi_pos_copy[ipos * 6 + 3] = ru[0];
+      multi_pos_copy[ipos * 6 + 4] = ru[1];
+      multi_pos_copy[ipos * 6 + 5] = ru[2];
+
+      multi_psi_v.emplace_back(logdet[i + ipos], this->OrbitalSetSize);
+      multi_dpsi_v.emplace_back(dlogdet[i + ipos], this->OrbitalSetSize);
+      multi_d2psi_v.emplace_back(d2logdet[i + ipos], this->OrbitalSetSize);
+
+      psi_v_list.push_back(multi_psi_v[ipos]);
+      dpsi_v_list.push_back(multi_dpsi_v[ipos]);
+      d2psi_v_list.push_back(multi_d2psi_v[ipos]);
     }
+
+    evaluateVGLMultiPos(multi_pos_copy, offload_scratch, results_scratch, psi_v_list, dpsi_v_list, d2psi_v_list);
+  }
 }
 
 template class SplineC2ROMPTargetT<float, float>;
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h
index 0d3aef1f2d0..bf785a03926 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2ROMPTargetT.h
@@ -42,339 +42,292 @@ namespace qmcplusplus
  * orbital. All the output orbitals are real (C2R). The maximal number of output
  * orbitals is OrbitalSetSize.
  */
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class SplineC2ROMPTargetT : public BsplineSetT<VT>
 {
 public:
-    using SplineType = typename bspline_traits<ST, 3>::SplineType;
-    using BCType = typename bspline_traits<ST, 3>::BCType;
-    using DataType = ST;
-    using PointType = TinyVector<ST, 3>;
-    using SingleSplineType = UBspline_3d_d;
-    // types for evaluation results
-    using TT = typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::GradType;
-    using typename BsplineSetT<VT>::GGGVector;
-    using typename BsplineSetT<VT>::GradVector;
-    using typename BsplineSetT<VT>::GradMatrix;
-    using typename BsplineSetT<VT>::HessVector;
-    using typename BsplineSetT<VT>::ValueVector;
-    using typename BsplineSetT<VT>::ValueMatrix;
-    using typename BsplineSetT<VT>::OffloadMWVGLArray;
-
-    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
-    using gContainer_type = VectorSoaContainer<ST, 3>;
-    using hContainer_type = VectorSoaContainer<ST, 6>;
-    using ghContainer_type = VectorSoaContainer<ST, 10>;
-
-    template <typename DT>
-    using OffloadVector = Vector<DT, OffloadAllocator<DT>>;
-    template <typename DT>
-    using OffloadPosVector = VectorSoaContainer<DT, 3, OffloadAllocator<DT>>;
+  using SplineType       = typename bspline_traits<ST, 3>::SplineType;
+  using BCType           = typename bspline_traits<ST, 3>::BCType;
+  using DataType         = ST;
+  using PointType        = TinyVector<ST, 3>;
+  using SingleSplineType = UBspline_3d_d;
+  // types for evaluation results
+  using TT = typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::GradType;
+  using typename BsplineSetT<VT>::GGGVector;
+  using typename BsplineSetT<VT>::GradVector;
+  using typename BsplineSetT<VT>::GradMatrix;
+  using typename BsplineSetT<VT>::HessVector;
+  using typename BsplineSetT<VT>::ValueVector;
+  using typename BsplineSetT<VT>::ValueMatrix;
+  using typename BsplineSetT<VT>::OffloadMWVGLArray;
+
+  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
+  using gContainer_type  = VectorSoaContainer<ST, 3>;
+  using hContainer_type  = VectorSoaContainer<ST, 6>;
+  using ghContainer_type = VectorSoaContainer<ST, 10>;
+
+  template<typename DT>
+  using OffloadVector = Vector<DT, OffloadAllocator<DT>>;
+  template<typename DT>
+  using OffloadPosVector = VectorSoaContainer<DT, 3, OffloadAllocator<DT>>;
 
 private:
-    /// timer for offload portion
-    NewTimer& offload_timer_;
-    /// primitive cell
-    CrystalLattice<ST, 3> PrimLattice;
-    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
-    /// CartesianUnit, e.g. Hessian
-    Tensor<ST, 3> GGt;
-    /// number of complex bands
-    int nComplexBands;
-    /// multi bspline set
-    std::shared_ptr<
-        MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>
-        SplineInst;
-
-    std::shared_ptr<OffloadVector<ST>> mKK;
-    std::shared_ptr<OffloadPosVector<ST>> myKcart;
-    std::shared_ptr<OffloadVector<ST>> GGt_offload;
-    std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload;
-
-    ResourceHandle<SplineOMPTargetMultiWalkerMem<ST, TT>> mw_mem_handle_;
-
-    /// team private ratios for reduction, numVP x numTeams
-    Matrix<TT, OffloadPinnedAllocator<TT>> ratios_private;
-    /// offload scratch space, dynamically resized to the maximal need
-    Vector<ST, OffloadPinnedAllocator<ST>> offload_scratch;
-    /// result scratch space, dynamically resized to the maximal need
-    Vector<TT, OffloadPinnedAllocator<TT>> results_scratch;
-    /// psiinv and position scratch space, used to avoid allocation on the fly
-    /// and faster transfer
-    Vector<TT, OffloadPinnedAllocator<TT>> psiinv_pos_copy;
-    /// position scratch space, used to avoid allocation on the fly and faster
-    /// transfer
-    Vector<ST, OffloadPinnedAllocator<ST>> multi_pos_copy;
-
-    void
-    evaluateVGLMultiPos(
-        const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos_copy,
-        Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
-        Vector<TT, OffloadPinnedAllocator<TT>>& results_scratch,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const;
+  /// timer for offload portion
+  NewTimer& offload_timer_;
+  /// primitive cell
+  CrystalLattice<ST, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+  /// CartesianUnit, e.g. Hessian
+  Tensor<ST, 3> GGt;
+  /// number of complex bands
+  int nComplexBands;
+  /// multi bspline set
+  std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>> SplineInst;
+
+  std::shared_ptr<OffloadVector<ST>> mKK;
+  std::shared_ptr<OffloadPosVector<ST>> myKcart;
+  std::shared_ptr<OffloadVector<ST>> GGt_offload;
+  std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload;
+
+  ResourceHandle<SplineOMPTargetMultiWalkerMem<ST, TT>> mw_mem_handle_;
+
+  /// team private ratios for reduction, numVP x numTeams
+  Matrix<TT, OffloadPinnedAllocator<TT>> ratios_private;
+  /// offload scratch space, dynamically resized to the maximal need
+  Vector<ST, OffloadPinnedAllocator<ST>> offload_scratch;
+  /// result scratch space, dynamically resized to the maximal need
+  Vector<TT, OffloadPinnedAllocator<TT>> results_scratch;
+  /// psiinv and position scratch space, used to avoid allocation on the fly
+  /// and faster transfer
+  Vector<TT, OffloadPinnedAllocator<TT>> psiinv_pos_copy;
+  /// position scratch space, used to avoid allocation on the fly and faster
+  /// transfer
+  Vector<ST, OffloadPinnedAllocator<ST>> multi_pos_copy;
+
+  void evaluateVGLMultiPos(const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos_copy,
+                           Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch,
+                           Vector<TT, OffloadPinnedAllocator<TT>>& results_scratch,
+                           const RefVector<ValueVector>& psi_v_list,
+                           const RefVector<GradVector>& dpsi_v_list,
+                           const RefVector<ValueVector>& d2psi_v_list) const;
 
 protected:
-    /// intermediate result vectors
-    vContainer_type myV;
-    vContainer_type myL;
-    gContainer_type myG;
-    hContainer_type myH;
-    ghContainer_type mygH;
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
 
 public:
-    SplineC2ROMPTargetT(const std::string& my_name) :
-        BsplineSetT<VT>(my_name),
-        offload_timer_(
-            createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)),
+  SplineC2ROMPTargetT(const std::string& my_name)
+      : BsplineSetT<VT>(my_name),
+        offload_timer_(createGlobalTimer("SplineC2ROMPTarget::offload", timer_level_fine)),
         nComplexBands(0),
         GGt_offload(std::make_shared<OffloadVector<ST>>(9)),
         PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9))
-    {
-    }
-
-    SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in);
-
-    virtual std::string
-    getClassName() const override
-    {
-        return "SplineC2ROMPTarget";
-    }
-    virtual std::string
-    getKeyword() const override
-    {
-        return "SplineC2R";
-    }
-    bool
-    isComplex() const override
-    {
-        return true;
-    };
-    virtual bool
-    isOMPoffload() const override
-    {
-        return true;
-    }
-
-    void
-    createResource(ResourceCollection& collection) const override
-    {
-        auto resource_index = collection.addResource(
-            std::make_unique<SplineOMPTargetMultiWalkerMem<ST, TT>>());
-    }
-
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
-    {
-        assert(this == &spo_list.getLeader());
-        auto& phi_leader =
-            spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
-        phi_leader.mw_mem_handle_ =
-            collection.lendResource<SplineOMPTargetMultiWalkerMem<ST, TT>>();
-    }
-
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
-    {
-        assert(this == &spo_list.getLeader());
-        auto& phi_leader =
-            spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
-        collection.takebackResource(phi_leader.mw_mem_handle_);
-    }
-
-    std::unique_ptr<SPOSetT<VT>>
-    makeClone() const override
-    {
-        return std::make_unique<SplineC2ROMPTargetT>(*this);
-    }
-
-    inline void
-    resizeStorage(size_t n, size_t nvals)
-    {
-        this->init_base(n);
-        size_t npad = getAlignedSize<ST>(2 * n);
-        myV.resize(npad);
-        myG.resize(npad);
-        myL.resize(npad);
-        myH.resize(npad);
-        mygH.resize(npad);
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm)
-    {
-        if (comm->size() == 1)
-            return;
-        const int Nbands = this->kPoints.size();
-        const int Nbandgroups = comm->size();
-        this->offset.resize(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, this->offset);
-
-        for (size_t ib = 0; ib < this->offset.size(); ib++)
-            this->offset[ib] = this->offset[ib] * 2;
-        gatherv(comm, SplineInst->getSplinePtr(),
-            SplineInst->getSplinePtr()->z_stride, this->offset);
-    }
-
-    template <typename GT, typename BCT>
-    void
-    create_spline(GT& xyz_g, BCT& xyz_bc)
-    {
-        resize_kpoints();
-        SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>,
-            OffloadAllocator<SplineType>>>();
-        SplineInst->create(xyz_g, xyz_bc, myV.size());
-
-        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
-                  << " MB allocated "
-                  << "for the coefficients in 3D spline orbital representation"
-                  << std::endl;
-    }
-
-    /// this routine can not be called from threaded region
-    void
-    finalizeConstruction() override
-    {
-        // map the SplineInst->getSplinePtr() structure to GPU
-        auto* MultiSpline = SplineInst->getSplinePtr();
-        auto* restrict coefs = MultiSpline->coefs;
-        // attach pointers on the device to achieve deep copy
-        PRAGMA_OFFLOAD("omp target \
+  {}
+
+  SplineC2ROMPTargetT(const SplineC2ROMPTargetT& in);
+
+  virtual std::string getClassName() const override { return "SplineC2ROMPTarget"; }
+  virtual std::string getKeyword() const override { return "SplineC2R"; }
+  bool isComplex() const override { return true; };
+  virtual bool isOMPoffload() const override { return true; }
+
+  void createResource(ResourceCollection& collection) const override
+  {
+    auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, TT>>());
+  }
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    auto& phi_leader          = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
+    phi_leader.mw_mem_handle_ = collection.lendResource<SplineOMPTargetMultiWalkerMem<ST, TT>>();
+  }
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<VT>>& spo_list) const override
+  {
+    assert(this == &spo_list.getLeader());
+    auto& phi_leader = spo_list.template getCastedLeader<SplineC2ROMPTargetT>();
+    collection.takebackResource(phi_leader.mw_mem_handle_);
+  }
+
+  std::unique_ptr<SPOSetT<VT>> makeClone() const override { return std::make_unique<SplineC2ROMPTargetT>(*this); }
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<ST>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+    this->offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, this->offset);
+
+    for (size_t ib = 0; ib < this->offset.size(); ib++)
+      this->offset[ib] = this->offset[ib] * 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  /// this routine can not be called from threaded region
+  void finalizeConstruction() override
+  {
+    // map the SplineInst->getSplinePtr() structure to GPU
+    auto* MultiSpline    = SplineInst->getSplinePtr();
+    auto* restrict coefs = MultiSpline->coefs;
+    // attach pointers on the device to achieve deep copy
+    PRAGMA_OFFLOAD("omp target \
                 map(always, to: MultiSpline[0:1], \
                     coefs[0:MultiSpline->coefs_size])")
-        {
-            MultiSpline->coefs = coefs;
-        }
-
-        // transfer static data to GPU
-        auto* mKK_ptr = mKK->data();
-        PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
-        auto* myKcart_ptr = myKcart->data();
-        PRAGMA_OFFLOAD(
-            "omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
-        for (size_t i = 0; i < 9; i++) {
-            (*GGt_offload)[i] = GGt[i];
-            (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
-        }
-        auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
-        PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
-        auto* GGt_ptr = GGt_offload->data();
-        PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
+    {
+      MultiSpline->coefs = coefs;
     }
 
-    inline void
-    flush_zero()
+    // transfer static data to GPU
+    auto* mKK_ptr = mKK->data();
+    PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])")
+    auto* myKcart_ptr = myKcart->data();
+    PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])")
+    for (size_t i = 0; i < 9; i++)
     {
-        SplineInst->flush_zero();
+      (*GGt_offload)[i]           = GGt[i];
+      (*PrimLattice_G_offload)[i] = PrimLattice.G[i];
     }
-
-    /** remap kPoints to pack the double copy */
-    inline void
-    resize_kpoints()
+    auto* PrimLattice_G_ptr = PrimLattice_G_offload->data();
+    PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])")
+    auto* GGt_ptr = GGt_offload->data();
+    PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])")
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    nComplexBands = this->remap_kpoints();
+    const int nk  = this->kPoints.size();
+    mKK           = std::make_shared<OffloadVector<ST>>(nk);
+    myKcart       = std::make_shared<OffloadPosVector<ST>>(nk);
+    for (size_t i = 0; i < nk; ++i)
     {
-        nComplexBands = this->remap_kpoints();
-        const int nk = this->kPoints.size();
-        mKK = std::make_shared<OffloadVector<ST>>(nk);
-        myKcart = std::make_shared<OffloadPosVector<ST>>(nk);
-        for (size_t i = 0; i < nk; ++i) {
-            (*mKK)[i] = -dot(this->kPoints[i], this->kPoints[i]);
-            (*myKcart)(i) = this->kPoints[i];
-        }
+      (*mKK)[i]     = -dot(this->kPoints[i], this->kPoints[i]);
+      (*myKcart)(i) = this->kPoints[i];
     }
+  }
 
-    void
-    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
-        int twist, int ispline, int level);
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
 
-    bool
-    read_splines(hdf_archive& h5f);
+  bool read_splines(hdf_archive& h5f);
 
-    bool
-    write_splines(hdf_archive& h5f);
+  bool write_splines(hdf_archive& h5f);
 
-    void
-    assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi,
-        int first, int last) const;
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
 
-    virtual void
-    evaluateValue(
-        const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
+  virtual void evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
 
-    virtual void
-    evaluateDetRatios(const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<ValueType>& ratios) override;
+  virtual void evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                                 ValueVector& psi,
+                                 const ValueVector& psiinv,
+                                 std::vector<ValueType>& ratios) override;
 
-    virtual void
-    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
-        const RefVector<ValueVector>& psi_list,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        std::vector<std::vector<ValueType>>& ratios_list) const override;
+  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                    const RefVectorWithLeader<const VirtualParticleSetT<VT>>& vp_list,
+                                    const RefVector<ValueVector>& psi_list,
+                                    const std::vector<const ValueType*>& invRow_ptr_list,
+                                    std::vector<std::vector<ValueType>>& ratios_list) const override;
 
-    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
      * cartesian
      */
-    void
-    assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi);
-
-    virtual void
-    evaluateVGL(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    virtual void
-    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
-        const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const override;
-
-    virtual void
-    mw_evaluateVGLandDetRatioGrads(
-        const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<VT>>& P_list, int iat,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-        std::vector<GradType>& grads) const override;
-
-    void
-    assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, int first, int last) const;
-
-    virtual void
-    evaluateVGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override;
-
-    void
-    assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
-        int last = -1) const;
-
-    virtual void
-    evaluateVGHGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override;
-
-    virtual void
-    evaluate_notranspose(const ParticleSetT<VT>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  virtual void evaluateVGL(const ParticleSetT<VT>& P,
+                           const int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           ValueVector& d2psi) override;
+
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<VT>>& sa_list,
+                              const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<VT>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<VT>>& P_list,
+                                              int iat,
+                                              const std::vector<const ValueType*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<ValueType>& ratios,
+                                              std::vector<GradType>& grads) const override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  virtual void evaluateVGH(const ParticleSetT<VT>& P,
+                           const int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  virtual void evaluateVGHGH(const ParticleSetT<VT>& P,
+                             const int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi,
+                             GGGVector& grad_grad_grad_psi) override;
+
+  virtual void evaluate_notranspose(const ParticleSetT<VT>& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    ValueMatrix& d2logdet) override;
+
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp
index fa81ebc01f1..600c75c672c 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.cpp
@@ -4,16 +4,13 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@intel.com, University of
-//                    Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov,
-//                    Argonne National Laboratory Anouar Benali, benali@anl.gov,
-//                    Argonne National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Anouar Benali, benali@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SplineC2RT.h"
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h
index b7cf9e109dd..e69dacebaaa 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2RT.h
@@ -4,16 +4,13 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@intel.com, University of
-//                    Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov,
-//                    Argonne National Laboratory Anouar Benali, benali@anl.gov,
-//                    Argonne National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@intel.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Anouar Benali, benali@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file
@@ -44,212 +41,180 @@ namespace qmcplusplus
  * orbital. All the output orbitals are real (C2R). The maximal number of output
  * orbitals is OrbitalSetSize.
  */
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class SplineC2RT : public BsplineSetT<VT>
 {
 public:
-    using SplineType = typename bspline_traits<ST, 3>::SplineType;
-    using BCType = typename bspline_traits<ST, 3>::BCType;
-    using DataType = ST;
-    using PointType = TinyVector<ST, 3>;
-    using SingleSplineType = UBspline_3d_d;
-    // types for evaluation results
-    using TT = typename BsplineSetT<VT>::ValueType;
-    using typename BsplineSetT<VT>::GGGVector;
-    using typename BsplineSetT<VT>::GradVector;
-    using typename BsplineSetT<VT>::HessVector;
-    using typename BsplineSetT<VT>::ValueVector;
-
-    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
-    using gContainer_type = VectorSoaContainer<ST, 3>;
-    using hContainer_type = VectorSoaContainer<ST, 6>;
-    using ghContainer_type = VectorSoaContainer<ST, 10>;
+  using SplineType       = typename bspline_traits<ST, 3>::SplineType;
+  using BCType           = typename bspline_traits<ST, 3>::BCType;
+  using DataType         = ST;
+  using PointType        = TinyVector<ST, 3>;
+  using SingleSplineType = UBspline_3d_d;
+  // types for evaluation results
+  using TT = typename BsplineSetT<VT>::ValueType;
+  using typename BsplineSetT<VT>::GGGVector;
+  using typename BsplineSetT<VT>::GradVector;
+  using typename BsplineSetT<VT>::HessVector;
+  using typename BsplineSetT<VT>::ValueVector;
+
+  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
+  using gContainer_type  = VectorSoaContainer<ST, 3>;
+  using hContainer_type  = VectorSoaContainer<ST, 6>;
+  using ghContainer_type = VectorSoaContainer<ST, 10>;
 
 private:
-    /// primitive cell
-    CrystalLattice<ST, 3> PrimLattice;
-    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
-    ///CartesianUnit, e.g. Hessian
-    Tensor<ST, 3> GGt;
-    /// number of complex bands
-    int nComplexBands;
-    /// multi bspline set
-    std::shared_ptr<MultiBspline<ST>> SplineInst;
-
-    vContainer_type mKK;
-    VectorSoaContainer<ST, 3> myKcart;
-
-    /// thread private ratios for reduction when using nested threading, numVP x
-    /// numThread
-    Matrix<TT> ratios_private;
+  /// primitive cell
+  CrystalLattice<ST, 3> PrimLattice;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+  ///CartesianUnit, e.g. Hessian
+  Tensor<ST, 3> GGt;
+  /// number of complex bands
+  int nComplexBands;
+  /// multi bspline set
+  std::shared_ptr<MultiBspline<ST>> SplineInst;
+
+  vContainer_type mKK;
+  VectorSoaContainer<ST, 3> myKcart;
+
+  /// thread private ratios for reduction when using nested threading, numVP x
+  /// numThread
+  Matrix<TT> ratios_private;
 
 protected:
-    /// intermediate result vectors
-    vContainer_type myV;
-    vContainer_type myL;
-    gContainer_type myG;
-    hContainer_type myH;
-    ghContainer_type mygH;
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
 
 public:
-    SplineC2RT(const std::string& my_name) :
-        BsplineSetT<VT>(my_name),
-        nComplexBands(0)
+  SplineC2RT(const std::string& my_name) : BsplineSetT<VT>(my_name), nComplexBands(0) {}
+
+  SplineC2RT(const SplineC2RT& in);
+  virtual std::string getClassName() const override { return "SplineC2R"; }
+  virtual std::string getKeyword() const override { return "SplineC2R"; }
+  bool isComplex() const override { return true; };
+
+  std::unique_ptr<SPOSetT<VT>> makeClone() const override { return std::make_unique<SplineC2RT>(*this); }
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    size_t npad = getAlignedSize<ST>(2 * n);
+    myV.resize(npad);
+    myG.resize(npad);
+    myL.resize(npad);
+    myH.resize(npad);
+    mygH.resize(npad);
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+    this->offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, this->offset);
+
+    for (size_t ib = 0; ib < this->offset.size(); ib++)
+      this->offset[ib] = this->offset[ib] * 2;
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    resize_kpoints();
+    SplineInst = std::make_shared<MultiBspline<ST>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  /** remap kPoints to pack the double copy */
+  inline void resize_kpoints()
+  {
+    nComplexBands = this->remap_kpoints();
+    const int nk  = this->kPoints.size();
+    mKK.resize(nk);
+    myKcart.resize(nk);
+    for (size_t i = 0; i < nk; ++i)
     {
+      mKK[i]     = -dot(this->kPoints[i], this->kPoints[i]);
+      myKcart(i) = this->kPoints[i];
     }
+  }
 
-    SplineC2RT(const SplineC2RT& in);
-    virtual std::string
-    getClassName() const override
-    {
-        return "SplineC2R";
-    }
-    virtual std::string
-    getKeyword() const override
-    {
-        return "SplineC2R";
-    }
-    bool
-    isComplex() const override
-    {
-        return true;
-    };
-
-    std::unique_ptr<SPOSetT<VT>>
-    makeClone() const override
-    {
-        return std::make_unique<SplineC2RT>(*this);
-    }
-
-    inline void
-    resizeStorage(size_t n, size_t nvals)
-    {
-        this->init_base(n);
-        size_t npad = getAlignedSize<ST>(2 * n);
-        myV.resize(npad);
-        myG.resize(npad);
-        myL.resize(npad);
-        myH.resize(npad);
-        mygH.resize(npad);
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm)
-    {
-        if (comm->size() == 1)
-            return;
-        const int Nbands = this->kPoints.size();
-        const int Nbandgroups = comm->size();
-        this->offset.resize(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, this->offset);
-
-        for (size_t ib = 0; ib < this->offset.size(); ib++)
-            this->offset[ib] = this->offset[ib] * 2;
-        gatherv(comm, SplineInst->getSplinePtr(),
-            SplineInst->getSplinePtr()->z_stride, this->offset);
-    }
-
-    template <typename GT, typename BCT>
-    void
-    create_spline(GT& xyz_g, BCT& xyz_bc)
-    {
-        resize_kpoints();
-        SplineInst = std::make_shared<MultiBspline<ST>>();
-        SplineInst->create(xyz_g, xyz_bc, myV.size());
-
-        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
-                  << " MB allocated "
-                  << "for the coefficients in 3D spline orbital representation"
-                  << std::endl;
-    }
-
-    inline void
-    flush_zero()
-    {
-        SplineInst->flush_zero();
-    }
-
-    /** remap kPoints to pack the double copy */
-    inline void
-    resize_kpoints()
-    {
-        nComplexBands = this->remap_kpoints();
-        const int nk = this->kPoints.size();
-        mKK.resize(nk);
-        myKcart.resize(nk);
-        for (size_t i = 0; i < nk; ++i) {
-            mKK[i] = -dot(this->kPoints[i], this->kPoints[i]);
-            myKcart(i) = this->kPoints[i];
-        }
-    }
-
-    void
-    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
-        int twist, int ispline, int level);
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
 
-    bool
-    read_splines(hdf_archive& h5f);
+  bool read_splines(hdf_archive& h5f);
 
-    bool
-    write_splines(hdf_archive& h5f);
+  bool write_splines(hdf_archive& h5f);
 
-    void
-    assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi,
-        int first, int last) const;
+  void assign_v(const PointType& r, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
 
-    void
-    evaluateValue(
-        const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
+  void evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
 
-    void
-    evaluateDetRatios(const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<TT>& ratios) override;
+  void evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<TT>& ratios) override;
 
-    /** assign_vgl
+  /** assign_vgl
      */
-    void
-    assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi, int first, int last) const;
+  void assign_vgl(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last)
+      const;
 
-    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
      * cartesian
      */
-    void
-    assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi);
-
-    void
-    evaluateVGL(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    void
-    assign_vgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, int first, int last) const;
-
-    void
-    evaluateVGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override;
-
-    void
-    assign_vghgh(const PointType& r, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
-        int last = -1) const;
-
-    void
-    evaluateVGHGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override;
-
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
+  void assign_vgl_from_l(const PointType& r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluateVGL(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   ValueVector& d2psi) override;
+
+  void assign_vgh(const PointType& r,
+                  ValueVector& psi,
+                  GradVector& dpsi,
+                  HessVector& grad_grad_psi,
+                  int first,
+                  int last) const;
+
+  void evaluateVGH(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(const PointType& r,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  void evaluateVGHGH(const ParticleSetT<VT>& P,
+                     const int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override;
+
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
index 8b73c0aa0a2..389093fd755 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.cpp
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo,
-//                    yeluo@anl.gov, Argonne National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SplineR2RT.h"
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
index 88265ffbec5..0b0f8222a11 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2RT.h
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Ye Luo,
-//                    yeluo@anl.gov, Argonne National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SPLINE_R2RT_H
@@ -33,95 +30,72 @@ namespace qmcplusplus
  * Requires temporage storage and multiplication of the sign of the real part of
  * the phase Internal storage ST type arrays are aligned and padded.
  */
-template <typename ST, typename VT>
+template<typename ST, typename VT>
 class SplineR2RT : public BsplineSetT<VT>
 {
 public:
-    using SplineType = typename bspline_traits<ST, 3>::SplineType;
-    using BCType = typename bspline_traits<ST, 3>::BCType;
-    using DataType = ST;
-    using RealType = typename SPOSetT<VT>::RealType;
-    using IndexType = typename SPOSetT<VT>::IndexType;
-    using FullPrecValueType = double;
-    using PointType = TinyVector<ST, 3>;
-    using SingleSplineType = UBspline_3d_d;
-
-    // types for evaluation results
-    using TT = typename BsplineSetT<VT>::ValueType;
-    using GGGVector = typename BsplineSetT<VT>::GGGVector;
-    using ValueMatrix = typename BsplineSetT<VT>::ValueMatrix;
-    using GradVector = typename BsplineSetT<VT>::GradVector;
-    using HessVector = typename BsplineSetT<VT>::HessVector;
-    using ValueVector = typename BsplineSetT<VT>::ValueVector;
-
-    using vContainer_type = Vector<ST, aligned_allocator<ST>>;
-    using gContainer_type = VectorSoaContainer<ST, 3>;
-    using hContainer_type = VectorSoaContainer<ST, 6>;
-    using ghContainer_type = VectorSoaContainer<ST, 10>;
+  using SplineType        = typename bspline_traits<ST, 3>::SplineType;
+  using BCType            = typename bspline_traits<ST, 3>::BCType;
+  using DataType          = ST;
+  using RealType          = typename SPOSetT<VT>::RealType;
+  using IndexType         = typename SPOSetT<VT>::IndexType;
+  using FullPrecValueType = double;
+  using PointType         = TinyVector<ST, 3>;
+  using SingleSplineType  = UBspline_3d_d;
+
+  // types for evaluation results
+  using TT          = typename BsplineSetT<VT>::ValueType;
+  using GGGVector   = typename BsplineSetT<VT>::GGGVector;
+  using ValueMatrix = typename BsplineSetT<VT>::ValueMatrix;
+  using GradVector  = typename BsplineSetT<VT>::GradVector;
+  using HessVector  = typename BsplineSetT<VT>::HessVector;
+  using ValueVector = typename BsplineSetT<VT>::ValueVector;
+
+  using vContainer_type  = Vector<ST, aligned_allocator<ST>>;
+  using gContainer_type  = VectorSoaContainer<ST, 3>;
+  using hContainer_type  = VectorSoaContainer<ST, 6>;
+  using ghContainer_type = VectorSoaContainer<ST, 10>;
 
 private:
-    bool IsGamma;
-    ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
-    /// CartesianUnit, e.g. Hessian
-    Tensor<ST, 3> GGt;
-    /// multi bspline set
-    std::shared_ptr<MultiBspline<ST>> SplineInst;
+  bool IsGamma;
+  ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to
+  /// CartesianUnit, e.g. Hessian
+  Tensor<ST, 3> GGt;
+  /// multi bspline set
+  std::shared_ptr<MultiBspline<ST>> SplineInst;
 
-    /// Copy of original splines for orbital rotation
-    std::shared_ptr<std::vector<ST>> coef_copy_;
+  /// Copy of original splines for orbital rotation
+  std::shared_ptr<std::vector<ST>> coef_copy_;
 
-    /// thread private ratios for reduction when using nested threading, numVP x
-    /// numThread
-    Matrix<TT> ratios_private;
+  /// thread private ratios for reduction when using nested threading, numVP x
+  /// numThread
+  Matrix<TT> ratios_private;
 
 protected:
-    /// primitive cell
-    CrystalLattice<ST, 3> PrimLattice;
-    /// intermediate result vectors
-    vContainer_type myV;
-    vContainer_type myL;
-    gContainer_type myG;
-    hContainer_type myH;
-    ghContainer_type mygH;
+  /// primitive cell
+  CrystalLattice<ST, 3> PrimLattice;
+  /// intermediate result vectors
+  vContainer_type myV;
+  vContainer_type myL;
+  gContainer_type myG;
+  hContainer_type myH;
+  ghContainer_type mygH;
 
 public:
-    SplineR2RT(const std::string& my_name) : BsplineSetT<VT>(my_name)
-    {
-    }
-
-    SplineR2RT(const SplineR2RT& in);
-    virtual std::string
-    getClassName() const override
-    {
-        return "SplineR2RT";
-    }
-    virtual std::string
-    getKeyword() const override
-    {
-        return "SplineR2RT";
-    }
-    bool
-    isComplex() const override
-    {
-        return false;
-    };
-    bool
-    isRotationSupported() const override
-    {
-        return true;
-    }
-
-    std::unique_ptr<SPOSetT<VT>>
-    makeClone() const override
-    {
-        return std::make_unique<SplineR2RT<ST, VT>>(*this);
-    }
-
-    /// Store an original copy of the spline coefficients for orbital rotation
-    void
-    storeParamsBeforeRotation() override;
-
-    /*
+  SplineR2RT(const std::string& my_name) : BsplineSetT<VT>(my_name) {}
+
+  SplineR2RT(const SplineR2RT& in);
+  virtual std::string getClassName() const override { return "SplineR2RT"; }
+  virtual std::string getKeyword() const override { return "SplineR2RT"; }
+  bool isComplex() const override { return false; };
+  bool isRotationSupported() const override { return true; }
+
+  std::unique_ptr<SPOSetT<VT>> makeClone() const override { return std::make_unique<SplineR2RT<ST, VT>>(*this); }
+
+  /// Store an original copy of the spline coefficients for orbital rotation
+  void storeParamsBeforeRotation() override;
+
+  /*
        Implements orbital rotations via [1,2].
        Should be called by RotatedSPOs::apply_rotation()
 
@@ -134,139 +108,120 @@ class SplineR2RT : public BsplineSetT<VT>
        [2] Toulouse & Umrigar, JCP 126, (2007)
        [3] Townsend et al., PRB 102, (2020)
     */
-    void
-    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
-
-    inline void
-    resizeStorage(size_t n, size_t nvals)
-    {
-        this->init_base(n);
-        const size_t npad = getAlignedSize<ST>(n);
-        this->myV.resize(npad);
-        this->myG.resize(npad);
-        this->myL.resize(npad);
-        this->myH.resize(npad);
-        this->mygH.resize(npad);
-
-        IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) &&
-            (this->HalfG[2] == 0));
-    }
-
-    void
-    bcast_tables(Communicate* comm)
-    {
-        chunked_bcast(comm, SplineInst->getSplinePtr());
-    }
-
-    void
-    gather_tables(Communicate* comm)
-    {
-        if (comm->size() == 1)
-            return;
-        const int Nbands = this->kPoints.size();
-        const int Nbandgroups = comm->size();
-        this->offset.resize(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, this->offset);
-        gatherv(comm, SplineInst->getSplinePtr(),
-            SplineInst->getSplinePtr()->z_stride, this->offset);
-    }
-
-    template <typename GT, typename BCT>
-    void
-    create_spline(GT& xyz_g, BCT& xyz_bc)
-    {
-        GGt = dot(transpose(PrimLattice.G), PrimLattice.G);
-        SplineInst = std::make_shared<MultiBspline<ST>>();
-        SplineInst->create(xyz_g, xyz_bc, myV.size());
-
-        app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20)
-                  << " MB allocated "
-                  << "for the coefficients in 3D spline orbital representation"
-                  << std::endl;
-    }
-
-    inline void
-    flush_zero()
-    {
-        SplineInst->flush_zero();
-    }
-
-    void
-    set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i,
-        int twist, int ispline, int level);
-
-    bool
-    read_splines(hdf_archive& h5f);
-
-    bool
-    write_splines(hdf_archive& h5f);
-
-    /** convert position in PrimLattice unit and return sign */
-    inline int
-    convertPos(const PointType& r, PointType& ru)
-    {
-        ru = PrimLattice.toUnit(r);
-        int bc_sign = 0;
-        for (int i = 0; i < this->D; i++)
-            if (-std::numeric_limits<ST>::epsilon() < ru[i] && ru[i] < 0)
-                ru[i] = ST(0.0);
-            else {
-                ST img = std::floor(ru[i]);
-                ru[i] -= img;
-                bc_sign += this->HalfG[i] * (int)img;
-            }
-        return bc_sign;
-    }
-
-    void
-    assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi,
-        int first, int last) const;
-
-    void
-    evaluateValue(
-        const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
-
-    void
-    evaluateDetRatios(const VirtualParticleSetT<VT>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<TT>& ratios) override;
-
-    void
-    assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi, int first, int last) const;
-
-    /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) override;
+
+  inline void resizeStorage(size_t n, size_t nvals)
+  {
+    this->init_base(n);
+    const size_t npad = getAlignedSize<ST>(n);
+    this->myV.resize(npad);
+    this->myG.resize(npad);
+    this->myL.resize(npad);
+    this->myH.resize(npad);
+    this->mygH.resize(npad);
+
+    IsGamma = ((this->HalfG[0] == 0) && (this->HalfG[1] == 0) && (this->HalfG[2] == 0));
+  }
+
+  void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); }
+
+  void gather_tables(Communicate* comm)
+  {
+    if (comm->size() == 1)
+      return;
+    const int Nbands      = this->kPoints.size();
+    const int Nbandgroups = comm->size();
+    this->offset.resize(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, this->offset);
+    gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, this->offset);
+  }
+
+  template<typename GT, typename BCT>
+  void create_spline(GT& xyz_g, BCT& xyz_bc)
+  {
+    GGt        = dot(transpose(PrimLattice.G), PrimLattice.G);
+    SplineInst = std::make_shared<MultiBspline<ST>>();
+    SplineInst->create(xyz_g, xyz_bc, myV.size());
+
+    app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated "
+              << "for the coefficients in 3D spline orbital representation" << std::endl;
+  }
+
+  inline void flush_zero() { SplineInst->flush_zero(); }
+
+  void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level);
+
+  bool read_splines(hdf_archive& h5f);
+
+  bool write_splines(hdf_archive& h5f);
+
+  /** convert position in PrimLattice unit and return sign */
+  inline int convertPos(const PointType& r, PointType& ru)
+  {
+    ru          = PrimLattice.toUnit(r);
+    int bc_sign = 0;
+    for (int i = 0; i < this->D; i++)
+      if (-std::numeric_limits<ST>::epsilon() < ru[i] && ru[i] < 0)
+        ru[i] = ST(0.0);
+      else
+      {
+        ST img = std::floor(ru[i]);
+        ru[i] -= img;
+        bc_sign += this->HalfG[i] * (int)img;
+      }
+    return bc_sign;
+  }
+
+  void assign_v(int bc_sign, const vContainer_type& myV, ValueVector& psi, int first, int last) const;
+
+  void evaluateValue(const ParticleSetT<VT>& P, const int iat, ValueVector& psi) override;
+
+  void evaluateDetRatios(const VirtualParticleSetT<VT>& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<TT>& ratios) override;
+
+  void assign_vgl(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, int first, int last) const;
+
+  /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in
      * cartesian
      */
-    void
-    assign_vgl_from_l(
-        int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-
-    void
-    evaluateVGL(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    void
-    assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, int first, int last) const;
-
-    void
-    evaluateVGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override;
-
-    void
-    assign_vghgh(int bc_sign, ValueVector& psi, GradVector& dpsi,
-        HessVector& grad_grad_psi, GGGVector& grad_grad_grad_psi, int first = 0,
-        int last = -1) const;
-
-    void
-    evaluateVGHGH(const ParticleSetT<VT>& P, const int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override;
-
-    template <class BSPLINESPO>
-    friend class SplineSetReaderT;
-    template <typename>
-    friend class BsplineReaderBaseT;
+  void assign_vgl_from_l(int bc_sign, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  void evaluateVGL(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   ValueVector& d2psi) override;
+
+  void assign_vgh(int bc_sign, ValueVector& psi, GradVector& dpsi, HessVector& grad_grad_psi, int first, int last)
+      const;
+
+  void evaluateVGH(const ParticleSetT<VT>& P,
+                   const int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override;
+
+  void assign_vghgh(int bc_sign,
+                    ValueVector& psi,
+                    GradVector& dpsi,
+                    HessVector& grad_grad_psi,
+                    GGGVector& grad_grad_grad_psi,
+                    int first = 0,
+                    int last  = -1) const;
+
+  void evaluateVGHGH(const ParticleSetT<VT>& P,
+                     const int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override;
+
+  template<class BSPLINESPO>
+  friend class SplineSetReaderT;
+  template<typename>
+  friend class BsplineReaderBaseT;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h
index 3fa31272e17..5150ee72811 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReaderT.h
@@ -4,16 +4,13 @@
 //
 // Copyright (c) 2019 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Ye Luo, yeluo@anl.gov,
-//                    Argonne National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@inte.com, Intel Corp.
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@inte.com, Intel Corp.
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SPLINESET_READERT_H
@@ -31,296 +28,268 @@ namespace qmcplusplus
 {
 /** General SplineSetReader to handle any unitcell
  */
-template <typename SA>
+template<typename SA>
 class SplineSetReaderT : public BsplineReaderBaseT<typename SA::ValueType>
 {
 public:
-    using splineset_t = SA;
-    using DataType = typename splineset_t::DataType;
-    using SplineType = typename splineset_t::SplineType;
-    using ValueType = typename splineset_t::ValueType;
+  using splineset_t = SA;
+  using DataType    = typename splineset_t::DataType;
+  using SplineType  = typename splineset_t::SplineType;
+  using ValueType   = typename splineset_t::ValueType;
 
-    Array<std::complex<double>, 3> FFTbox;
-    Array<double, 3> splineData_r, splineData_i;
-    double rotate_phase_r, rotate_phase_i;
-    UBspline_3d_d* spline_r;
-    UBspline_3d_d* spline_i;
-    splineset_t* bspline;
-    fftw_plan FFTplan;
+  Array<std::complex<double>, 3> FFTbox;
+  Array<double, 3> splineData_r, splineData_i;
+  double rotate_phase_r, rotate_phase_i;
+  UBspline_3d_d* spline_r;
+  UBspline_3d_d* spline_i;
+  splineset_t* bspline;
+  fftw_plan FFTplan;
 
-    SplineSetReaderT(EinsplineSetBuilderT<ValueType>* e) :
-        BsplineReaderBaseT<ValueType>(e),
-        spline_r(nullptr),
-        spline_i(nullptr),
-        bspline(nullptr),
-        FFTplan(nullptr)
-    {
-    }
+  SplineSetReaderT(EinsplineSetBuilderT<ValueType>* e)
+      : BsplineReaderBaseT<ValueType>(e), spline_r(nullptr), spline_i(nullptr), bspline(nullptr), FFTplan(nullptr)
+  {}
 
-    ~SplineSetReaderT() override
-    {
-        clear();
-    }
+  ~SplineSetReaderT() override { clear(); }
 
-    void
-    clear()
-    {
-        einspline::destroy(spline_r);
-        einspline::destroy(spline_i);
-        if (FFTplan != nullptr)
-            fftw_destroy_plan(FFTplan);
-        FFTplan = nullptr;
-    }
+  void clear()
+  {
+    einspline::destroy(spline_r);
+    einspline::destroy(spline_i);
+    if (FFTplan != nullptr)
+      fftw_destroy_plan(FFTplan);
+    FFTplan = nullptr;
+  }
 
-    // set info for Hybrid
-    virtual void
-    initialize_hybridrep_atomic_centers()
-    {
-    }
-    // transform cG to radial functions
-    virtual void
-    create_atomic_centers_Gspace(Vector<std::complex<double>>& cG,
-        Communicate& band_group_comm, int iorb)
-    {
-    }
+  // set info for Hybrid
+  virtual void initialize_hybridrep_atomic_centers() {}
+  // transform cG to radial functions
+  virtual void create_atomic_centers_Gspace(Vector<std::complex<double>>& cG, Communicate& band_group_comm, int iorb) {}
 
-    std::unique_ptr<SPOSetT<ValueType>>
-    create_spline_set(const std::string& my_name, int spin,
-        const BandInfoGroup& bandgroup) override
-    {
-        ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)");
-        // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5,
-        // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0,
-        // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0;
-        bspline = new splineset_t(my_name);
-        app_log() << "  ClassName = " << bspline->getClassName() << std::endl;
-        if (bspline->isComplex())
-            app_log() << "  Using complex einspline table" << std::endl;
-        else
-            app_log() << "  Using real einspline table" << std::endl;
+  std::unique_ptr<SPOSetT<ValueType>> create_spline_set(const std::string& my_name,
+                                                        int spin,
+                                                        const BandInfoGroup& bandgroup) override
+  {
+    ReportEngine PRE("SplineSetReader", "create_spline_set(spin,SPE*)");
+    // Timer c_prep, c_unpack,c_fft, c_phase, c_spline, c_newphase, c_h5,
+    // c_init; double t_prep=0.0, t_unpack=0.0, t_fft=0.0, t_phase=0.0,
+    // t_spline=0.0, t_newphase=0.0, t_h5=0.0, t_init=0.0;
+    bspline = new splineset_t(my_name);
+    app_log() << "  ClassName = " << bspline->getClassName() << std::endl;
+    if (bspline->isComplex())
+      app_log() << "  Using complex einspline table" << std::endl;
+    else
+      app_log() << "  Using real einspline table" << std::endl;
 
-        // set info for Hybrid
-        this->initialize_hybridrep_atomic_centers();
+    // set info for Hybrid
+    this->initialize_hybridrep_atomic_centers();
 
-        // baseclass handles twists
-        this->check_twists(bspline, bandgroup);
+    // baseclass handles twists
+    this->check_twists(bspline, bandgroup);
 
-        Ugrid xyz_grid[3];
+    Ugrid xyz_grid[3];
 
-        typename splineset_t::BCType xyz_bc[3];
-        bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc);
-        if (!havePsig)
-            this->myComm->barrier_and_abort(
-                "SplineSetReader needs psi_g. Set precision=\"double\".");
-        bspline->create_spline(xyz_grid, xyz_bc);
+    typename splineset_t::BCType xyz_bc[3];
+    bool havePsig = this->set_grid(bspline->HalfG, xyz_grid, xyz_bc);
+    if (!havePsig)
+      this->myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\".");
+    bspline->create_spline(xyz_grid, xyz_bc);
 
-        std::ostringstream oo;
-        oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x"
-           << this->MeshSize[1] << "x" << this->MeshSize[2] << ".h5";
+    std::ostringstream oo;
+    oo << bandgroup.myName << ".g" << this->MeshSize[0] << "x" << this->MeshSize[1] << "x" << this->MeshSize[2]
+       << ".h5";
 
-        const std::string splinefile(oo.str());
-        bool root = (this->myComm->rank() == 0);
-        int foundspline = 0;
-        Timer now;
-        if (root) {
-            now.restart();
-            hdf_archive h5f(this->myComm);
-            foundspline = h5f.open(splinefile, H5F_ACC_RDONLY);
-            if (foundspline) {
-                std::string aname("none");
-                foundspline = h5f.readEntry(aname, "class_name");
-                foundspline =
-                    (aname.find(bspline->getKeyword()) != std::string::npos);
-            }
-            if (foundspline) {
-                int sizeD = 0;
-                foundspline = h5f.readEntry(sizeD, "sizeof");
-                foundspline = (sizeD == sizeof(DataType));
-            }
-            if (foundspline) {
-                foundspline = bspline->read_splines(h5f);
-                if (foundspline)
-                    app_log() << "  Successfully restored coefficients from "
-                              << splinefile << ". The reading time is "
-                              << now.elapsed() << " sec." << std::endl;
-            }
-            h5f.close();
-        }
-        this->myComm->bcast(foundspline);
-        if (foundspline) {
-            now.restart();
-            bspline->bcast_tables(this->myComm);
-            app_log() << "  SplineSetReader bcast the full table "
-                      << now.elapsed() << " sec." << std::endl;
-            app_log().flush();
-        }
-        else {
-            bspline->flush_zero();
-
-            int nx = this->MeshSize[0];
-            int ny = this->MeshSize[1];
-            int nz = this->MeshSize[2];
-            if (havePsig) // perform FFT using FFTW
-            {
-                FFTbox.resize(nx, ny, nz);
-                FFTplan = fftw_plan_dft_3d(nx, ny, nz,
-                    reinterpret_cast<fftw_complex*>(FFTbox.data()),
-                    reinterpret_cast<fftw_complex*>(FFTbox.data()), +1,
-                    FFTW_ESTIMATE);
-                splineData_r.resize(nx, ny, nz);
-                if (bspline->isComplex())
-                    splineData_i.resize(nx, ny, nz);
+    const std::string splinefile(oo.str());
+    bool root       = (this->myComm->rank() == 0);
+    int foundspline = 0;
+    Timer now;
+    if (root)
+    {
+      now.restart();
+      hdf_archive h5f(this->myComm);
+      foundspline = h5f.open(splinefile, H5F_ACC_RDONLY);
+      if (foundspline)
+      {
+        std::string aname("none");
+        foundspline = h5f.readEntry(aname, "class_name");
+        foundspline = (aname.find(bspline->getKeyword()) != std::string::npos);
+      }
+      if (foundspline)
+      {
+        int sizeD   = 0;
+        foundspline = h5f.readEntry(sizeD, "sizeof");
+        foundspline = (sizeD == sizeof(DataType));
+      }
+      if (foundspline)
+      {
+        foundspline = bspline->read_splines(h5f);
+        if (foundspline)
+          app_log() << "  Successfully restored coefficients from " << splinefile << ". The reading time is "
+                    << now.elapsed() << " sec." << std::endl;
+      }
+      h5f.close();
+    }
+    this->myComm->bcast(foundspline);
+    if (foundspline)
+    {
+      now.restart();
+      bspline->bcast_tables(this->myComm);
+      app_log() << "  SplineSetReader bcast the full table " << now.elapsed() << " sec." << std::endl;
+      app_log().flush();
+    }
+    else
+    {
+      bspline->flush_zero();
 
-                TinyVector<double, 3> start(0.0);
-                TinyVector<double, 3> end(1.0);
-                spline_r = einspline::create(
-                    spline_r, start, end, this->MeshSize, bspline->HalfG);
-                if (bspline->isComplex())
-                    spline_i = einspline::create(
-                        spline_i, start, end, this->MeshSize, bspline->HalfG);
+      int nx = this->MeshSize[0];
+      int ny = this->MeshSize[1];
+      int nz = this->MeshSize[2];
+      if (havePsig) // perform FFT using FFTW
+      {
+        FFTbox.resize(nx, ny, nz);
+        FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast<fftw_complex*>(FFTbox.data()),
+                                   reinterpret_cast<fftw_complex*>(FFTbox.data()), +1, FFTW_ESTIMATE);
+        splineData_r.resize(nx, ny, nz);
+        if (bspline->isComplex())
+          splineData_i.resize(nx, ny, nz);
 
-                now.restart();
-                initialize_spline_pio_gather(spin, bandgroup);
-                app_log() << "  SplineSetReader initialize_spline_pio "
-                          << now.elapsed() << " sec" << std::endl;
+        TinyVector<double, 3> start(0.0);
+        TinyVector<double, 3> end(1.0);
+        spline_r = einspline::create(spline_r, start, end, this->MeshSize, bspline->HalfG);
+        if (bspline->isComplex())
+          spline_i = einspline::create(spline_i, start, end, this->MeshSize, bspline->HalfG);
 
-                fftw_destroy_plan(FFTplan);
-                FFTplan = NULL;
-            }
-            else // why, don't know
-                initialize_spline_psi_r(spin, bandgroup);
-            if (this->saveSplineCoefs && root) {
-                now.restart();
-                hdf_archive h5f;
-                h5f.create(splinefile);
-                std::string classname = bspline->getClassName();
-                h5f.write(classname, "class_name");
-                int sizeD = sizeof(DataType);
-                h5f.write(sizeD, "sizeof");
-                bspline->write_splines(h5f);
-                h5f.close();
-                app_log() << "  Stored spline coefficients in " << splinefile
-                          << " for potential reuse. The writing time is "
-                          << now.elapsed() << " sec." << std::endl;
-            }
-        }
+        now.restart();
+        initialize_spline_pio_gather(spin, bandgroup);
+        app_log() << "  SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl;
 
-        clear();
-        return std::unique_ptr<SPOSetT<ValueType>>{bspline};
+        fftw_destroy_plan(FFTplan);
+        FFTplan = NULL;
+      }
+      else // why, don't know
+        initialize_spline_psi_r(spin, bandgroup);
+      if (this->saveSplineCoefs && root)
+      {
+        now.restart();
+        hdf_archive h5f;
+        h5f.create(splinefile);
+        std::string classname = bspline->getClassName();
+        h5f.write(classname, "class_name");
+        int sizeD = sizeof(DataType);
+        h5f.write(sizeD, "sizeof");
+        bspline->write_splines(h5f);
+        h5f.close();
+        app_log() << "  Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is "
+                  << now.elapsed() << " sec." << std::endl;
+      }
     }
 
-    /** fft and spline cG
+    clear();
+    return std::unique_ptr<SPOSetT<ValueType>>{bspline};
+  }
+
+  /** fft and spline cG
      * @param cG psi_g to be processed
      * @param ti twist index
      * @param iorb orbital index
      *
      * Perform FFT and spline to spline_r and spline_i
      */
-    inline void
-    fft_spline(Vector<std::complex<double>>& cG, int ti)
+  inline void fft_spline(Vector<std::complex<double>>& cG, int ti)
+  {
+    unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox);
+    fftw_execute(FFTplan);
+    if (bspline->isComplex())
     {
-        unpack4fftw(cG, this->mybuilder->Gvecs[0], this->MeshSize, FFTbox);
-        fftw_execute(FFTplan);
-        if (bspline->isComplex()) {
-            if (this->rotate)
-                fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i,
-                    this->mybuilder->primcell_kpoints[ti], rotate_phase_r,
-                    rotate_phase_i);
-            else {
-                split_real_components_c2c(FFTbox, splineData_r, splineData_i);
-                rotate_phase_r = 1.0;
-                rotate_phase_i = 0.0;
-            }
-            einspline::set(spline_r, splineData_r.data());
-            einspline::set(spline_i, splineData_i.data());
-        }
-        else {
-            fix_phase_rotate_c2r(FFTbox, splineData_r,
-                this->mybuilder->primcell_kpoints[ti], rotate_phase_r,
-                rotate_phase_i);
-            einspline::set(spline_r, splineData_r.data());
-        }
+      if (this->rotate)
+        fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, this->mybuilder->primcell_kpoints[ti], rotate_phase_r,
+                             rotate_phase_i);
+      else
+      {
+        split_real_components_c2c(FFTbox, splineData_r, splineData_i);
+        rotate_phase_r = 1.0;
+        rotate_phase_i = 0.0;
+      }
+      einspline::set(spline_r, splineData_r.data());
+      einspline::set(spline_i, splineData_i.data());
     }
+    else
+    {
+      fix_phase_rotate_c2r(FFTbox, splineData_r, this->mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i);
+      einspline::set(spline_r, splineData_r.data());
+    }
+  }
 
-    /** initialize the splines
+  /** initialize the splines
      */
-    void
-    initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup)
-    {
-        // distribute bands over processor groups
-        int Nbands = bandgroup.getNumDistinctOrbitals();
-        const int Nprocs = this->myComm->size();
-        const int Nbandgroups = std::min(Nbands, Nprocs);
-        Communicate band_group_comm(*this->myComm, Nbandgroups);
-        std::vector<int> band_groups(Nbandgroups + 1, 0);
-        FairDivideLow(Nbands, Nbandgroups, band_groups);
-        int iorb_first = band_groups[band_group_comm.getGroupID()];
-        int iorb_last = band_groups[band_group_comm.getGroupID() + 1];
+  void initialize_spline_pio_gather(int spin, const BandInfoGroup& bandgroup)
+  {
+    // distribute bands over processor groups
+    int Nbands            = bandgroup.getNumDistinctOrbitals();
+    const int Nprocs      = this->myComm->size();
+    const int Nbandgroups = std::min(Nbands, Nprocs);
+    Communicate band_group_comm(*this->myComm, Nbandgroups);
+    std::vector<int> band_groups(Nbandgroups + 1, 0);
+    FairDivideLow(Nbands, Nbandgroups, band_groups);
+    int iorb_first = band_groups[band_group_comm.getGroupID()];
+    int iorb_last  = band_groups[band_group_comm.getGroupID() + 1];
 
-        app_log() << "Start transforming plane waves to 3D B-Splines."
-                  << std::endl;
-        hdf_archive h5f(&band_group_comm, false);
-        Vector<std::complex<double>> cG(this->mybuilder->Gvecs[0].size());
-        const std::vector<BandInfo>& cur_bands = bandgroup.myBands;
-        if (band_group_comm.isGroupLeader())
-            h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY);
-        for (int iorb = iorb_first; iorb < iorb_last; iorb++) {
-            if (band_group_comm.isGroupLeader()) {
-                int iorb_h5 = bspline->BandIndexMap[iorb];
-                int ti = cur_bands[iorb_h5].TwistIndex;
-                std::string s =
-                    this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex);
-                if (!h5f.readEntry(cG, s)) {
-                    std::ostringstream msg;
-                    msg << "SplineSetReader Failed to read band(s) from h5 "
-                           "file. "
-                        << "Attempted dataset " << s << " with " << cG.size()
-                        << " complex numbers." << std::endl;
-                    throw std::runtime_error(msg.str());
-                }
-                double total_norm = compute_norm(cG);
-                if ((this->checkNorm) &&
-                    (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) {
-                    std::ostringstream msg;
-                    msg << "SplineSetReader The orbital " << iorb_h5
-                        << " has a wrong norm " << total_norm
-                        << ", computed from plane wave coefficients!"
-                        << std::endl
-                        << "This may indicate a problem with the HDF5 library "
-                           "versions used "
-                        << "during wavefunction conversion or read."
-                        << std::endl;
-                    throw std::runtime_error(msg.str());
-                }
-                fft_spline(cG, ti);
-                bspline->set_spline(
-                    spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0);
-            }
-            this->create_atomic_centers_Gspace(cG, band_group_comm, iorb);
+    app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl;
+    hdf_archive h5f(&band_group_comm, false);
+    Vector<std::complex<double>> cG(this->mybuilder->Gvecs[0].size());
+    const std::vector<BandInfo>& cur_bands = bandgroup.myBands;
+    if (band_group_comm.isGroupLeader())
+      h5f.open(this->mybuilder->H5FileName, H5F_ACC_RDONLY);
+    for (int iorb = iorb_first; iorb < iorb_last; iorb++)
+    {
+      if (band_group_comm.isGroupLeader())
+      {
+        int iorb_h5   = bspline->BandIndexMap[iorb];
+        int ti        = cur_bands[iorb_h5].TwistIndex;
+        std::string s = this->psi_g_path(ti, spin, cur_bands[iorb_h5].BandIndex);
+        if (!h5f.readEntry(cG, s))
+        {
+          std::ostringstream msg;
+          msg << "SplineSetReader Failed to read band(s) from h5 "
+                 "file. "
+              << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl;
+          throw std::runtime_error(msg.str());
         }
-
-        this->myComm->barrier();
-        Timer now;
-        if (band_group_comm.isGroupLeader()) {
-            now.restart();
-            bspline->gather_tables(band_group_comm.getGroupLeaderComm());
-            app_log() << "  Time to gather the table = " << now.elapsed()
-                      << std::endl;
+        double total_norm = compute_norm(cG);
+        if ((this->checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE))
+        {
+          std::ostringstream msg;
+          msg << "SplineSetReader The orbital " << iorb_h5 << " has a wrong norm " << total_norm
+              << ", computed from plane wave coefficients!" << std::endl
+              << "This may indicate a problem with the HDF5 library "
+                 "versions used "
+              << "during wavefunction conversion or read." << std::endl;
+          throw std::runtime_error(msg.str());
         }
-        now.restart();
-        bspline->bcast_tables(this->myComm);
-        app_log() << "  Time to bcast the table = " << now.elapsed()
-                  << std::endl;
+        fft_spline(cG, ti);
+        bspline->set_spline(spline_r, spline_i, cur_bands[iorb_h5].TwistIndex, iorb, 0);
+      }
+      this->create_atomic_centers_Gspace(cG, band_group_comm, iorb);
     }
 
-    void
-    initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup)
+    this->myComm->barrier();
+    Timer now;
+    if (band_group_comm.isGroupLeader())
     {
-        // old implementation buried in the history
-        this->myComm->barrier_and_abort(
-            "SplineSetReaderP initialize_spline_psi_r "
-            "implementation not finished.");
+      now.restart();
+      bspline->gather_tables(band_group_comm.getGroupLeaderComm());
+      app_log() << "  Time to gather the table = " << now.elapsed() << std::endl;
     }
+    now.restart();
+    bspline->bcast_tables(this->myComm);
+    app_log() << "  Time to bcast the table = " << now.elapsed() << std::endl;
+  }
+
+  void initialize_spline_psi_r(int spin, const BandInfoGroup& bandgroup)
+  {
+    // old implementation buried in the history
+    this->myComm->barrier_and_abort("SplineSetReaderP initialize_spline_psi_r "
+                                    "implementation not finished.");
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp
index 6e3007323b3..35bd3ffa6eb 100644
--- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.cpp
@@ -4,7 +4,7 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by:
+// File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h
index 898d8f2a2e4..2f991b228e5 100644
--- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h
+++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReaderT.h
@@ -18,42 +18,42 @@
 namespace qmcplusplus
 {
 /// forward declaration
-template <typename T>
+template<typename T>
 class BsplineReaderBaseT;
-template <typename T>
+template<typename T>
 class EinsplineSetBuilderT;
 
 /** create a reader which handles complex (double size real) splines, C2R or C2C
  * case spline storage and computation precision is double
  */
-template <typename T>
-std::unique_ptr<BsplineReaderBaseT<T>>
-createBsplineComplexDoubleT(
-    EinsplineSetBuilderT<T>* e, bool hybrid_rep, const std::string& useGPU);
+template<typename T>
+std::unique_ptr<BsplineReaderBaseT<T>> createBsplineComplexDoubleT(EinsplineSetBuilderT<T>* e,
+                                                                   bool hybrid_rep,
+                                                                   const std::string& useGPU);
 
 /** create a reader which handles complex (double size real) splines, C2R or C2C
  * case spline storage and computation precision is float
  */
-template <typename T>
-std::unique_ptr<BsplineReaderBaseT<T>>
-createBsplineComplexSingleT(
-    EinsplineSetBuilderT<T>* e, bool hybrid_rep, const std::string& useGPU);
+template<typename T>
+std::unique_ptr<BsplineReaderBaseT<T>> createBsplineComplexSingleT(EinsplineSetBuilderT<T>* e,
+                                                                   bool hybrid_rep,
+                                                                   const std::string& useGPU);
 
 /** create a reader which handles real splines, R2R case
  *  spline storage and computation precision is double
  */
-template <typename T>
-std::unique_ptr<BsplineReaderBaseT<T>>
-createBsplineRealDoubleT(
-    EinsplineSetBuilderT<T>* e, bool hybrid_rep, const std::string& useGPU);
+template<typename T>
+std::unique_ptr<BsplineReaderBaseT<T>> createBsplineRealDoubleT(EinsplineSetBuilderT<T>* e,
+                                                                bool hybrid_rep,
+                                                                const std::string& useGPU);
 
 /** create a reader which handles real splines, R2R case
  *  spline storage and computation precision is float
  */
-template <typename T>
-std::unique_ptr<BsplineReaderBaseT<T>>
-createBsplineRealSingleT(
-    EinsplineSetBuilderT<T>* e, bool hybrid_rep, const std::string& useGPU);
+template<typename T>
+std::unique_ptr<BsplineReaderBaseT<T>> createBsplineRealSingleT(EinsplineSetBuilderT<T>* e,
+                                                                bool hybrid_rep,
+                                                                const std::string& useGPU);
 
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.cpp b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
index 51b01d756f0..d9ae09e8466 100644
--- a/src/QMCWaveFunctions/CompositeSPOSetT.cpp
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.cpp
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "QMCWaveFunctions/CompositeSPOSetT.h"
@@ -33,182 +30,187 @@ namespace MatrixOperators
  * @todo smater and more efficient matrix, move up for others
  * The columns [0,M1) are inserted into [offset_c,offset_c+M1).
  */
-template <typename MAT1, typename MAT2>
-inline void
-insert_columns(const MAT1& small, MAT2& big, int offset_c)
+template<typename MAT1, typename MAT2>
+inline void insert_columns(const MAT1& small, MAT2& big, int offset_c)
 {
-    const int c = small.cols();
-    for (int i = 0; i < small.rows(); ++i)
-        std::copy(small[i], small[i] + c, big[i] + offset_c);
+  const int c = small.cols();
+  for (int i = 0; i < small.rows(); ++i)
+    std::copy(small[i], small[i] + c, big[i] + offset_c);
 }
 } // namespace MatrixOperators
 
-template <typename T>
-CompositeSPOSetT<T>::CompositeSPOSetT(const std::string& my_name) :
-    SPOSetT<T>(my_name)
+template<typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const std::string& my_name) : SPOSetT<T>(my_name)
 {
-    this->OrbitalSetSize = 0;
-    component_offsets.reserve(4);
+  this->OrbitalSetSize = 0;
+  component_offsets.reserve(4);
 }
 
-template <typename T>
-CompositeSPOSetT<T>::CompositeSPOSetT(const CompositeSPOSetT<T>& other) :
-    SPOSetT<T>(other)
+template<typename T>
+CompositeSPOSetT<T>::CompositeSPOSetT(const CompositeSPOSetT<T>& other) : SPOSetT<T>(other)
 {
-    for (auto& element : other.components) {
-        this->add(element->makeClone());
-    }
+  for (auto& element : other.components)
+  {
+    this->add(element->makeClone());
+  }
 }
 
-template <typename T>
+template<typename T>
 CompositeSPOSetT<T>::~CompositeSPOSetT() = default;
 
-template <typename T>
-void
-CompositeSPOSetT<T>::add(std::unique_ptr<SPOSetT<T>> component)
+template<typename T>
+void CompositeSPOSetT<T>::add(std::unique_ptr<SPOSetT<T>> component)
 {
-    if (components.empty())
-        component_offsets.push_back(0); // add 0
+  if (components.empty())
+    component_offsets.push_back(0); // add 0
 
-    int norbs = component->size();
-    components.push_back(std::move(component));
-    component_values.emplace_back(norbs);
-    component_gradients.emplace_back(norbs);
-    component_laplacians.emplace_back(norbs);
+  int norbs = component->size();
+  components.push_back(std::move(component));
+  component_values.emplace_back(norbs);
+  component_gradients.emplace_back(norbs);
+  component_laplacians.emplace_back(norbs);
 
-    this->OrbitalSetSize += norbs;
-    component_offsets.push_back(this->OrbitalSetSize);
+  this->OrbitalSetSize += norbs;
+  component_offsets.push_back(this->OrbitalSetSize);
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::report()
+template<typename T>
+void CompositeSPOSetT<T>::report()
 {
-    app_log() << "CompositeSPOSetT" << std::endl;
-    app_log() << "  ncomponents = " << components.size() << std::endl;
-    app_log() << "  components" << std::endl;
-    for (int i = 0; i < components.size(); ++i) {
-        app_log() << "    " << i << std::endl;
-        components[i]->basic_report("      ");
-    }
+  app_log() << "CompositeSPOSetT" << std::endl;
+  app_log() << "  ncomponents = " << components.size() << std::endl;
+  app_log() << "  components" << std::endl;
+  for (int i = 0; i < components.size(); ++i)
+  {
+    app_log() << "    " << i << std::endl;
+    components[i]->basic_report("      ");
+  }
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-CompositeSPOSetT<T>::makeClone() const
+template<typename T>
+std::unique_ptr<SPOSetT<T>> CompositeSPOSetT<T>::makeClone() const
 {
-    return std::make_unique<CompositeSPOSetT<T>>(*this);
+  return std::make_unique<CompositeSPOSetT<T>>(*this);
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<typename T>
+void CompositeSPOSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    int n = 0;
-    for (int c = 0; c < components.size(); ++c) {
-        SPOSetT<T>& component = *components[c];
-        ValueVector& values = component_values[c];
-        component.evaluateValue(P, iat, values);
-        std::copy(values.begin(), values.end(), psi.begin() + n);
-        n += component.size();
-    }
+  int n = 0;
+  for (int c = 0; c < components.size(); ++c)
+  {
+    SPOSetT<T>& component = *components[c];
+    ValueVector& values   = component_values[c];
+    component.evaluateValue(P, iat, values);
+    std::copy(values.begin(), values.end(), psi.begin() + n);
+    n += component.size();
+  }
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename T>
+void CompositeSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                      int iat,
+                                      ValueVector& psi,
+                                      GradVector& dpsi,
+                                      ValueVector& d2psi)
 {
-    int n = 0;
-    for (int c = 0; c < components.size(); ++c) {
-        SPOSetT<T>& component = *components[c];
-        ValueVector& values = component_values[c];
-        GradVector& gradients = component_gradients[c];
-        ValueVector& laplacians = component_laplacians[c];
-        component.evaluateVGL(P, iat, values, gradients, laplacians);
-        std::copy(values.begin(), values.end(), psi.begin() + n);
-        std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n);
-        std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n);
-        n += component.size();
-    }
+  int n = 0;
+  for (int c = 0; c < components.size(); ++c)
+  {
+    SPOSetT<T>& component   = *components[c];
+    ValueVector& values     = component_values[c];
+    GradVector& gradients   = component_gradients[c];
+    ValueVector& laplacians = component_laplacians[c];
+    component.evaluateVGL(P, iat, values, gradients, laplacians);
+    std::copy(values.begin(), values.end(), psi.begin() + n);
+    std::copy(gradients.begin(), gradients.end(), dpsi.begin() + n);
+    std::copy(laplacians.begin(), laplacians.end(), d2psi.begin() + n);
+    n += component.size();
+  }
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+template<typename T>
+void CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& logdet,
+                                               GradMatrix& dlogdet,
+                                               ValueMatrix& d2logdet)
 {
-    const int nat = last - first;
-    for (int c = 0; c < components.size(); ++c) {
-        int norb = components[c]->size();
-        ValueMatrix v(nat, norb);
-        GradMatrix g(nat, norb);
-        ValueMatrix l(nat, norb);
-        components[c]->evaluate_notranspose(P, first, last, v, g, l);
-        int n = component_offsets[c];
-        MatrixOperators::insert_columns(v, logdet, n);
-        MatrixOperators::insert_columns(g, dlogdet, n);
-        MatrixOperators::insert_columns(l, d2logdet, n);
-    }
+  const int nat = last - first;
+  for (int c = 0; c < components.size(); ++c)
+  {
+    int norb = components[c]->size();
+    ValueMatrix v(nat, norb);
+    GradMatrix g(nat, norb);
+    ValueMatrix l(nat, norb);
+    components[c]->evaluate_notranspose(P, first, last, v, g, l);
+    int n = component_offsets[c];
+    MatrixOperators::insert_columns(v, logdet, n);
+    MatrixOperators::insert_columns(g, dlogdet, n);
+    MatrixOperators::insert_columns(l, d2logdet, n);
+  }
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    HessMatrix& grad_grad_logdet)
+template<typename T>
+void CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& logdet,
+                                               GradMatrix& dlogdet,
+                                               HessMatrix& grad_grad_logdet)
 {
-    const int nat = last - first;
-    for (int c = 0; c < components.size(); ++c) {
-        int norb = components[c]->size();
-        ValueMatrix v(nat, norb);
-        GradMatrix g(nat, norb);
-        HessMatrix h(nat, norb);
-        components[c]->evaluate_notranspose(P, first, last, v, g, h);
-        int n = component_offsets[c];
-        MatrixOperators::insert_columns(v, logdet, n);
-        MatrixOperators::insert_columns(g, dlogdet, n);
-        MatrixOperators::insert_columns(h, grad_grad_logdet, n);
-    }
+  const int nat = last - first;
+  for (int c = 0; c < components.size(); ++c)
+  {
+    int norb = components[c]->size();
+    ValueMatrix v(nat, norb);
+    GradMatrix g(nat, norb);
+    HessMatrix h(nat, norb);
+    components[c]->evaluate_notranspose(P, first, last, v, g, h);
+    int n = component_offsets[c];
+    MatrixOperators::insert_columns(v, logdet, n);
+    MatrixOperators::insert_columns(g, dlogdet, n);
+    MatrixOperators::insert_columns(h, grad_grad_logdet, n);
+  }
 }
 
-template <typename T>
-void
-CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
+template<typename T>
+void CompositeSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& logdet,
+                                               GradMatrix& dlogdet,
+                                               HessMatrix& grad_grad_logdet,
+                                               GGGMatrix& grad_grad_grad_logdet)
 {
-    not_implemented(
-        "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-CompositeSPOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template<typename T>
+std::unique_ptr<SPOSetT<T>> CompositeSPOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-    std::vector<std::string> spolist;
-    putContent(spolist, cur);
-    if (spolist.empty()) {
-        return nullptr;
-    }
-
-    auto spo_now = std::make_unique<CompositeSPOSetT<T>>(
-        getXMLAttributeValue(cur, "name"));
-    for (int i = 0; i < spolist.size(); ++i) {
-        const SPOSetT<T>* spo = sposet_builder_factory_.getSPOSet(spolist[i]);
-        if (spo)
-            spo_now->add(spo->makeClone());
-    }
-    return (spo_now->size()) ? std::unique_ptr<SPOSetT<T>>{std::move(spo_now)} :
-                               nullptr;
+  std::vector<std::string> spolist;
+  putContent(spolist, cur);
+  if (spolist.empty())
+  {
+    return nullptr;
+  }
+
+  auto spo_now = std::make_unique<CompositeSPOSetT<T>>(getXMLAttributeValue(cur, "name"));
+  for (int i = 0; i < spolist.size(); ++i)
+  {
+    const SPOSetT<T>* spo = sposet_builder_factory_.getSPOSet(spolist[i]);
+    if (spo)
+      spo_now->add(spo->makeClone());
+  }
+  return (spo_now->size()) ? std::unique_ptr<SPOSetT<T>>{std::move(spo_now)} : nullptr;
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-CompositeSPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
+template<typename T>
+std::unique_ptr<SPOSetT<T>> CompositeSPOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
 {
-    return createSPOSetFromXML(cur);
+  return createSPOSetFromXML(cur);
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/CompositeSPOSetT.h b/src/QMCWaveFunctions/CompositeSPOSetT.h
index ec597a7eb4e..242c82b1e9e 100644
--- a/src/QMCWaveFunctions/CompositeSPOSetT.h
+++ b/src/QMCWaveFunctions/CompositeSPOSetT.h
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_COMPOSITE_SPOSETT_H
@@ -24,109 +21,97 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class CompositeSPOSetT : public SPOSetT<T>
 {
 public:
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using HessMatrix = typename SPOSetT<T>::HessMatrix;
-    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
-
-    /// component SPOSets
-    std::vector<std::unique_ptr<SPOSetT<T>>> components;
-    /// temporary storage for values
-    std::vector<ValueVector> component_values;
-    /// temporary storage for gradients
-    std::vector<GradVector> component_gradients;
-    /// temporary storage for laplacians
-    std::vector<ValueVector> component_laplacians;
-    /// store the precomputed offsets
-    std::vector<int> component_offsets;
-
-    CompositeSPOSetT(const std::string& my_name);
-    /**
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+
+  /// component SPOSets
+  std::vector<std::unique_ptr<SPOSetT<T>>> components;
+  /// temporary storage for values
+  std::vector<ValueVector> component_values;
+  /// temporary storage for gradients
+  std::vector<GradVector> component_gradients;
+  /// temporary storage for laplacians
+  std::vector<ValueVector> component_laplacians;
+  /// store the precomputed offsets
+  std::vector<int> component_offsets;
+
+  CompositeSPOSetT(const std::string& my_name);
+  /**
      * @TODO: do we want template copy constructor
      * (i.e., copy from other with different type argument)?
      */
-    CompositeSPOSetT(const CompositeSPOSetT& other);
-    ~CompositeSPOSetT() override;
-
-    std::string
-    getClassName() const override
-    {
-        return "CompositeSPOSetT";
-    }
-
-    /// add a sposet component to this composite sposet
-    void
-    add(std::unique_ptr<SPOSetT<T>> component);
-
-    /// print out component info
-    void
-    report();
-
-    // SPOSet interface methods
-    /// size is determined by component sposets and nothing else
-    inline void
-    setOrbitalSetSize(int norbs) override
-    {
-    }
-
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
-
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
-
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    /// unimplemented functions call this to abort
-    inline void
-    not_implemented(const std::string& method)
-    {
-        APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented");
-    }
-
-    // methods to be implemented in the future (possibly)
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        HessMatrix& ddlogdet) override;
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
-        GGGMatrix& dddlogdet) override;
+  CompositeSPOSetT(const CompositeSPOSetT& other);
+  ~CompositeSPOSetT() override;
+
+  std::string getClassName() const override { return "CompositeSPOSetT"; }
+
+  /// add a sposet component to this composite sposet
+  void add(std::unique_ptr<SPOSetT<T>> component);
+
+  /// print out component info
+  void report();
+
+  // SPOSet interface methods
+  /// size is determined by component sposets and nothing else
+  inline void setOrbitalSetSize(int norbs) override {}
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  /// unimplemented functions call this to abort
+  inline void not_implemented(const std::string& method)
+  {
+    APP_ABORT("CompositeSPOSetT::" + method + " has not been implemented");
+  }
+
+  // methods to be implemented in the future (possibly)
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet) override;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet,
+                            GGGMatrix& dddlogdet) override;
 };
 
-template <typename T>
+template<typename T>
 class CompositeSPOSetBuilderT : public SPOSetBuilderT<T>
 {
 public:
-    CompositeSPOSetBuilderT(
-        Communicate* comm, const SPOSetBuilderFactoryT<T>& factory) :
-        SPOSetBuilderT<T>("Composite", comm),
-        sposet_builder_factory_(factory)
-    {
-    }
-
-    // SPOSetBuilder interface
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSetFromXML(xmlNodePtr cur) override;
-
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
-
-    /// reference to the sposet_builder_factory
-    const SPOSetBuilderFactoryT<T>& sposet_builder_factory_;
+  CompositeSPOSetBuilderT(Communicate* comm, const SPOSetBuilderFactoryT<T>& factory)
+      : SPOSetBuilderT<T>("Composite", comm), sposet_builder_factory_(factory)
+  {}
+
+  // SPOSetBuilder interface
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
+
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
+
+  /// reference to the sposet_builder_factory
+  const SPOSetBuilderFactoryT<T>& sposet_builder_factory_;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp
index 41a1ff2076d..3b3ed9ef7b9 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.cpp
@@ -1,3 +1,19 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//
+// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
 
 #include "QMCWaveFunctions/EinsplineSetBuilderT.h"
 
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSetBuilderT.h
index cb5643839fe..e2f13e79d38 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderT.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderT.h
@@ -4,19 +4,15 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Ye Luo, yeluo@anl.gov, Argonne National
-//                    Laboratory Raymond Clay III, j.k.rofling@gmail.com,
-//                    Lawrence Livermore National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
+// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file EinsplineSetBuilder.h
diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h
index c55ef7fd627..26cc29e5594 100644
--- a/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h
+++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilderT.h
@@ -32,7 +32,7 @@ class EinsplineSpinorSetBuilderT : public EinsplineSetBuilderT<T>
 {
   using ParticleSet = ParticleSetT<T>;
   using SPOSet      = SPOSetT<T>;
-  using PSetMap = std::map<std::string, const std::unique_ptr<ParticleSet>>;
+  using PSetMap     = std::map<std::string, const std::unique_ptr<ParticleSet>>;
 
 public:
   ///constructor
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h
index 95eb8b6c2a0..ff396122eb2 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilder.h
@@ -1,3 +1,19 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
 #ifndef QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H
 #define QMCPLUSPLUS_FREE_ORBITAL_BUILDER_H
 
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
index b028eb039b5..df2a9168370 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.cpp
@@ -1,3 +1,19 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
 #include "OhmmsData/AttributeSet.h"
 #include "LongRange/StructFact.h"
 #include "LongRange/KContainerT.h"
@@ -6,12 +22,12 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 FreeOrbitalBuilderT<T>::FreeOrbitalBuilderT(ParticleSetT<T>& els, Communicate* comm, xmlNodePtr cur)
     : SPOSetBuilderT<T>("PW", comm), targetPtcl(els)
 {}
 
-template <typename T>
+template<typename T>
 std::unique_ptr<SPOSetT<T>> FreeOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
   int norb = -1;
@@ -89,7 +105,7 @@ std::unique_ptr<SPOSetT<T>> FreeOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodeP
   return sposet;
 }
 
-template <typename T>
+template<typename T>
 bool FreeOrbitalBuilderT<T>::in_list(const int j, const std::vector<int> l)
 {
   for (int i = 0; i < l.size(); i++)
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
index 06e4b730a3c..f408692ea81 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalBuilderT.h
@@ -1,3 +1,19 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers.
+//
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
 #ifndef QMCPLUSPLUS_FREE_ORBITAL_BUILDERT_H
 #define QMCPLUSPLUS_FREE_ORBITAL_BUILDERT_H
 
@@ -5,12 +21,12 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class FreeOrbitalBuilderT : public SPOSetBuilderT<T>
 {
 public:
-    using RealType = typename SPOSetBuilderT<T>::RealType;
-    using PosType = typename SPOSetBuilderT<T>::PosType;
+  using RealType = typename SPOSetBuilderT<T>::RealType;
+  using PosType  = typename SPOSetBuilderT<T>::PosType;
 
   FreeOrbitalBuilderT(ParticleSetT<T>& els, Communicate* comm, xmlNodePtr cur);
   ~FreeOrbitalBuilderT() {}
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
index bc4bec54085..1a3285d1237 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.cpp
@@ -4,20 +4,15 @@
 //
 // Copyright (c) 2022 QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Yubo
-//                    "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
-//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "FreeOrbitalT.h"
@@ -25,643 +20,677 @@
 namespace qmcplusplus
 {
 
-template <class T>
-void
-FreeOrbitalT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
+template<class T>
+void FreeOrbitalT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                  int iat,
+                                  ValueVector& pvec,
+                                  GradVector& dpvec,
+                                  ValueVector& d2pvec)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluateVGL(const ParticleSetT<float>& P,
+                                      int iat,
+                                      ValueVector& pvec,
+                                      GradVector& dpvec,
+                                      ValueVector& d2pvec)
 {
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
 }
 
-template <>
-void
-FreeOrbitalT<float>::evaluateVGL(const ParticleSetT<float>& P, int iat,
-    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
+template<>
+void FreeOrbitalT<double>::evaluateVGL(const ParticleSetT<double>& P,
+                                       int iat,
+                                       ValueVector& pvec,
+                                       GradVector& dpvec,
+                                       ValueVector& d2pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        const int j2 = 2 * ik;
-        const int j1 = j2 - 1;
-        pvec[j1] = coskr;
-        pvec[j2] = sinkr;
-        dpvec[j1] = -sinkr * kvecs[ik];
-        dpvec[j2] = coskr * kvecs[ik];
-        d2pvec[j1] = k2neg[ik] * coskr;
-        d2pvec[j2] = k2neg[ik] * sinkr;
-    }
-    pvec[0] = 1.0;
-    dpvec[0] = 0.0;
-    d2pvec[0] = 0.0;
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+    dpvec[j1]    = -sinkr * kvecs[ik];
+    dpvec[j2]    = coskr * kvecs[ik];
+    d2pvec[j1]   = k2neg[ik] * coskr;
+    d2pvec[j2]   = k2neg[ik] * sinkr;
+  }
+  pvec[0]   = 1.0;
+  dpvec[0]  = 0.0;
+  d2pvec[0] = 0.0;
 }
 
-template <>
-void
-FreeOrbitalT<double>::evaluateVGL(const ParticleSetT<double>& P, int iat,
-    ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec)
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateVGL(const ParticleSetT<std::complex<float>>& P,
+                                                    int iat,
+                                                    ValueVector& pvec,
+                                                    GradVector& dpvec,
+                                                    ValueVector& d2pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        const int j2 = 2 * ik;
-        const int j1 = j2 - 1;
-        pvec[j1] = coskr;
-        pvec[j2] = sinkr;
-        dpvec[j1] = -sinkr * kvecs[ik];
-        dpvec[j2] = coskr * kvecs[ik];
-        d2pvec[j1] = k2neg[ik] * coskr;
-        d2pvec[j2] = k2neg[ik] * sinkr;
-    }
-    pvec[0] = 1.0;
-    dpvec[0] = 0.0;
-    d2pvec[0] = 0.0;
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<float>>::evaluateVGL(
-    const ParticleSetT<std::complex<float>>& P, int iat, ValueVector& pvec,
-    GradVector& dpvec, ValueVector& d2pvec)
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateVGL(const ParticleSetT<std::complex<double>>& P,
+                                                     int iat,
+                                                     ValueVector& pvec,
+                                                     GradVector& dpvec,
+                                                     ValueVector& d2pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-        pvec[ik] = ValueType(coskr, sinkr);
-        dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik];
-        d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
-    }
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+    pvec[ik]   = ValueType(coskr, sinkr);
+    dpvec[ik]  = ValueType(-sinkr, coskr) * kvecs[ik];
+    d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<double>>::evaluateVGL(
-    const ParticleSetT<std::complex<double>>& P, int iat, ValueVector& pvec,
-    GradVector& dpvec, ValueVector& d2pvec)
+template<>
+void FreeOrbitalT<float>::evaluateValue(const ParticleSetT<float>& P, int iat, ValueVector& pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-        pvec[ik] = ValueType(coskr, sinkr);
-        dpvec[ik] = ValueType(-sinkr, coskr) * kvecs[ik];
-        d2pvec[ik] = ValueType(k2neg[ik] * coskr, k2neg[ik] * sinkr);
-    }
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
 }
 
-template <>
-void
-FreeOrbitalT<float>::evaluateValue(
-    const ParticleSetT<float>& P, int iat, ValueVector& pvec)
+template<>
+void FreeOrbitalT<double>::evaluateValue(const ParticleSetT<double>& P, int iat, ValueVector& pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        const int j2 = 2 * ik;
-        const int j1 = j2 - 1;
-        pvec[j1] = coskr;
-        pvec[j2] = sinkr;
-    }
-    pvec[0] = 1.0;
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    const int j2 = 2 * ik;
+    const int j1 = j2 - 1;
+    pvec[j1]     = coskr;
+    pvec[j2]     = sinkr;
+  }
+  pvec[0] = 1.0;
 }
 
-template <>
-void
-FreeOrbitalT<double>::evaluateValue(
-    const ParticleSetT<double>& P, int iat, ValueVector& pvec)
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluateValue(const ParticleSetT<std::complex<float>>& P,
+                                                      int iat,
+                                                      ValueVector& pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        const int j2 = 2 * ik;
-        const int j1 = j2 - 1;
-        pvec[j1] = coskr;
-        pvec[j2] = sinkr;
-    }
-    pvec[0] = 1.0;
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    pvec[ik] = std::complex<float>(coskr, sinkr);
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<float>>::evaluateValue(
-    const ParticleSetT<std::complex<float>>& P, int iat, ValueVector& pvec)
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluateValue(const ParticleSetT<std::complex<double>>& P,
+                                                       int iat,
+                                                       ValueVector& pvec)
 {
-    const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        pvec[ik] = std::complex<float>(coskr, sinkr);
-    }
+  const PosType& r = P.activeR(iat);
+  RealType sinkr, coskr;
+  for (int ik = mink; ik < maxk; ik++)
+  {
+    sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+    pvec[ik] = std::complex<double>(coskr, sinkr);
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<double>>::evaluateValue(
-    const ParticleSetT<std::complex<double>>& P, int iat, ValueVector& pvec)
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat)
 {
+  RealType sinkr, coskr;
+  float phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
     const PosType& r = P.activeR(iat);
-    RealType sinkr, coskr;
-    for (int ik = mink; ik < maxk; ik++) {
-        sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-        pvec[ik] = std::complex<double>(coskr, sinkr);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
+        }
+      }
     }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
 }
 
-template <class T>
-void
-FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat)
 {
-}
+  RealType sinkr, coskr;
+  double phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
 
-template <>
-void
-FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
-    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
-    HessMatrix& d2phi_mat)
-{
-    RealType sinkr, coskr;
-    float phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], this->OrbitalSetSize);
-        GradVector dp(dphi[i], this->OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const int j2 = 2 * ik;
-            const int j1 = j2 - 1;
-            p[j1] = coskr;
-            p[j2] = sinkr;
-            dp[j1] = -sinkr * kvecs[ik];
-            dp[j2] = coskr * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[j1](la, lb) =
-                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j2](la, lb) =
-                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j1](lb, la) = hess[j1](la, lb);
-                    hess[j2](lb, la) = hess[j2](la, lb);
-                }
-            }
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la) = hess[j1](la, lb);
+          hess[j2](lb, la) = hess[j2](la, lb);
         }
-        p[0] = 1.0;
-        dp[0] = 0.0;
-        hess[0] = 0.0;
+      }
     }
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+  }
 }
 
-template <>
-void
-FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
-    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
-    HessMatrix& d2phi_mat)
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSetT<std::complex<float>>& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat)
 {
-    RealType sinkr, coskr;
-    double phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], this->OrbitalSetSize);
-        GradVector dp(dphi[i], this->OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const int j2 = 2 * ik;
-            const int j1 = j2 - 1;
-            p[j1] = coskr;
-            p[j2] = sinkr;
-            dp[j1] = -sinkr * kvecs[ik];
-            dp[j2] = coskr * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[j1](la, lb) =
-                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j2](la, lb) =
-                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j1](lb, la) = hess[j1](la, lb);
-                    hess[j2](lb, la) = hess[j2](la, lb);
-                }
-            }
-        }
-        p[0] = 1.0;
-        dp[0] = 0.0;
-        hess[0] = 0.0;
-    }
-}
+  RealType sinkr, coskr;
+  std::complex<float> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
 
-template <>
-void
-FreeOrbitalT<std::complex<float>>::evaluate_notranspose(
-    const ParticleSetT<std::complex<float>>& P, int first, int last,
-    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
-{
-    RealType sinkr, coskr;
-    std::complex<float> phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], this->OrbitalSetSize);
-        GradVector dp(dphi[i], this->OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-            phi_of_r = std::complex<float>(coskr, sinkr);
-            p[ik] = phi_of_r;
-
-            dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[ik](la, la) =
-                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[ik](la, lb) =
-                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[ik](lb, la) = hess[ik](la, lb);
-                }
-            }
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<float>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<float>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
         }
+      }
     }
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<double>>::evaluate_notranspose(
-    const ParticleSetT<std::complex<double>>& P, int first, int last,
-    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat)
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSetT<std::complex<double>>& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat)
 {
-    RealType sinkr, coskr;
-    std::complex<double> phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], this->OrbitalSetSize);
-        GradVector dp(dphi[i], this->OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-
-            phi_of_r = std::complex<double>(coskr, sinkr);
-            p[ik] = phi_of_r;
-
-            dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[ik](la, la) =
-                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[ik](la, lb) =
-                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[ik](lb, la) = hess[ik](la, lb);
-                }
-            }
+  RealType sinkr, coskr;
+  std::complex<double> phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], this->OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+
+      phi_of_r = std::complex<double>(coskr, sinkr);
+      p[ik]    = phi_of_r;
+
+      dp[ik] = std::complex<double>(-sinkr, coskr) * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
         }
+      }
     }
+  }
 }
 
-template <class T>
-void
-FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
-    GGGMatrix& d3phi_mat)
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           HessMatrix& d2phi_mat,
+                                           GGGMatrix& d3phi_mat)
+{}
+
+template<>
+void FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
+                                               int first,
+                                               int last,
+                                               ValueMatrix& phi,
+                                               GradMatrix& dphi,
+                                               HessMatrix& d2phi_mat,
+                                               GGGMatrix& d3phi_mat)
 {
-}
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
 
-template <>
-void
-FreeOrbitalT<float>::evaluate_notranspose(const ParticleSetT<float>& P,
-    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
-    HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat)
-{
-    RealType sinkr, coskr;
-    ValueType phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], OrbitalSetSize);
-        GradVector dp(dphi[i], OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], OrbitalSetSize);
-        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const int j2 = 2 * ik;
-            const int j1 = j2 - 1;
-            p[j1] = coskr;
-            p[j2] = sinkr;
-            dp[j1] = -sinkr * kvecs[ik];
-            dp[j2] = coskr * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                ggg[j1][la](la, la) =
-                    sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-                ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] *
-                    (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[j1](la, lb) =
-                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j2](la, lb) =
-                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j1](lb, la) = hess[j1](la, lb);
-                    hess[j2](lb, la) = hess[j2](la, lb);
-                    ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[la];
-                    ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[la];
-                    ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
-                    ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
-                    ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
-                    ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
-                    ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
-                    ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
-                    ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
-                    ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
-                    ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
-                    ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
-                    for (int lc = lb + 1; lc < OHMMS_DIM; lc++) {
-                        ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] *
-                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
-                        ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] *
-                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
-                        ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
-                        ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
-                        ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
-                        ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
-                        ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
-                        ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
-                        ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
-                        ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
-                        ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
-                        ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
-                    }
-                }
-            }
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
         }
-
-        p[0] = 1.0;
-        dp[0] = 0.0;
-        hess[0] = 0.0;
-        ggg[0] = 0.0;
+      }
     }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
 }
 
-template <>
-void
-FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
-    int first, int last, ValueMatrix& phi, GradMatrix& dphi,
-    HessMatrix& d2phi_mat, GGGMatrix& d3phi_mat)
+template<>
+void FreeOrbitalT<double>::evaluate_notranspose(const ParticleSetT<double>& P,
+                                                int first,
+                                                int last,
+                                                ValueMatrix& phi,
+                                                GradMatrix& dphi,
+                                                HessMatrix& d2phi_mat,
+                                                GGGMatrix& d3phi_mat)
 {
-    RealType sinkr, coskr;
-    ValueType phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], OrbitalSetSize);
-        GradVector dp(dphi[i], OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], OrbitalSetSize);
-        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const int j2 = 2 * ik;
-            const int j1 = j2 - 1;
-            p[j1] = coskr;
-            p[j2] = sinkr;
-            dp[j1] = -sinkr * kvecs[ik];
-            dp[j2] = coskr * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[j1](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                hess[j2](la, la) = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
-                ggg[j1][la](la, la) =
-                    sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
-                ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] *
-                    (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[j1](la, lb) =
-                        -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j2](la, lb) =
-                        -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[j1](lb, la) = hess[j1](la, lb);
-                    hess[j2](lb, la) = hess[j2](la, lb);
-                    ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[la];
-                    ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[la];
-                    ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
-                    ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
-                    ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
-                    ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
-                    ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
-                    ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] *
-                        (kvecs[ik])[lb] * (kvecs[ik])[lb];
-                    ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
-                    ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
-                    ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
-                    ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
-                    for (int lc = lb + 1; lc < OHMMS_DIM; lc++) {
-                        ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] *
-                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
-                        ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] *
-                            (kvecs[ik])[lb] * (kvecs[ik])[lc];
-                        ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
-                        ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
-                        ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
-                        ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
-                        ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
-                        ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
-                        ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
-                        ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
-                        ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
-                        ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
-                    }
-                }
-            }
-        }
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
 
-        p[0] = 1.0;
-        dp[0] = 0.0;
-        hess[0] = 0.0;
-        ggg[0] = 0.0;
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const int j2 = 2 * ik;
+      const int j1 = j2 - 1;
+      p[j1]        = coskr;
+      p[j2]        = sinkr;
+      dp[j1]       = -sinkr * kvecs[ik];
+      dp[j2]       = coskr * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[j1](la, la)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        hess[j2](la, la)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j1][la](la, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        ggg[j2][la](la, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[j1](la, lb)    = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j2](la, lb)    = -sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[j1](lb, la)    = hess[j1](la, lb);
+          hess[j2](lb, la)    = hess[j2](la, lb);
+          ggg[j1][la](lb, la) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j2][la](lb, la) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[la];
+          ggg[j1][la](la, lb) = ggg[j1][la](lb, la);
+          ggg[j2][la](la, lb) = ggg[j2][la](lb, la);
+          ggg[j1][lb](la, la) = ggg[j1][la](lb, la);
+          ggg[j2][lb](la, la) = ggg[j2][la](lb, la);
+          ggg[j1][la](lb, lb) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j2][la](lb, lb) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lb];
+          ggg[j1][lb](la, lb) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](la, lb) = ggg[j2][la](lb, lb);
+          ggg[j1][lb](lb, la) = ggg[j1][la](lb, lb);
+          ggg[j2][lb](lb, la) = ggg[j2][la](lb, lb);
+          for (int lc = lb + 1; lc < OHMMS_DIM; lc++)
+          {
+            ggg[j1][la](lb, lc) = sinkr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j2][la](lb, lc) = -coskr * (kvecs[ik])[la] * (kvecs[ik])[lb] * (kvecs[ik])[lc];
+            ggg[j1][la](lc, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][la](lc, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](la, lc) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](la, lc) = ggg[j2][la](lb, lc);
+            ggg[j1][lb](lc, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lb](lc, la) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](la, lb) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](la, lb) = ggg[j2][la](lb, lc);
+            ggg[j1][lc](lb, la) = ggg[j1][la](lb, lc);
+            ggg[j2][lc](lb, la) = ggg[j2][la](lb, lc);
+          }
+        }
+      }
     }
+
+    p[0]    = 1.0;
+    dp[0]   = 0.0;
+    hess[0] = 0.0;
+    ggg[0]  = 0.0;
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<float>>::evaluate_notranspose(
-    const ParticleSetT<std::complex<float>>& P, int first, int last,
-    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
-    GGGMatrix& d3phi_mat)
+template<>
+void FreeOrbitalT<std::complex<float>>::evaluate_notranspose(const ParticleSetT<std::complex<float>>& P,
+                                                             int first,
+                                                             int last,
+                                                             ValueMatrix& phi,
+                                                             GradMatrix& dphi,
+                                                             HessMatrix& d2phi_mat,
+                                                             GGGMatrix& d3phi_mat)
 {
-    RealType sinkr, coskr;
-    ValueType phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], OrbitalSetSize);
-        GradVector dp(dphi[i], OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], OrbitalSetSize);
-        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const ValueType compi(0, 1);
-            phi_of_r = ValueType(coskr, sinkr);
-            p[ik] = phi_of_r;
-            dp[ik] = compi * phi_of_r * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[ik](la, la) =
-                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[ik](la, lb) =
-                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[ik](lb, la) = hess[ik](la, lb);
-                }
-            }
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
-            }
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
         }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
     }
+  }
 }
 
-template <>
-void
-FreeOrbitalT<std::complex<double>>::evaluate_notranspose(
-    const ParticleSetT<std::complex<double>>& P, int first, int last,
-    ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
-    GGGMatrix& d3phi_mat)
+template<>
+void FreeOrbitalT<std::complex<double>>::evaluate_notranspose(const ParticleSetT<std::complex<double>>& P,
+                                                              int first,
+                                                              int last,
+                                                              ValueMatrix& phi,
+                                                              GradMatrix& dphi,
+                                                              HessMatrix& d2phi_mat,
+                                                              GGGMatrix& d3phi_mat)
 {
-    RealType sinkr, coskr;
-    ValueType phi_of_r;
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], OrbitalSetSize);
-        GradVector dp(dphi[i], OrbitalSetSize);
-        HessVector hess(d2phi_mat[i], OrbitalSetSize);
-        GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
-
-        const PosType& r = P.activeR(iat);
-        for (int ik = mink; ik < maxk; ik++) {
-            sincos(dot(kvecs[ik], r), &sinkr, &coskr);
-            const ValueType compi(0, 1);
-            phi_of_r = ValueType(coskr, sinkr);
-            p[ik] = phi_of_r;
-            dp[ik] = compi * phi_of_r * kvecs[ik];
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                hess[ik](la, la) =
-                    -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
-                for (int lb = la + 1; lb < OHMMS_DIM; lb++) {
-                    hess[ik](la, lb) =
-                        -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
-                    hess[ik](lb, la) = hess[ik](la, lb);
-                }
-            }
-            for (int la = 0; la < OHMMS_DIM; la++) {
-                ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
-            }
+  RealType sinkr, coskr;
+  ValueType phi_of_r;
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], OrbitalSetSize);
+    GradVector dp(dphi[i], OrbitalSetSize);
+    HessVector hess(d2phi_mat[i], OrbitalSetSize);
+    GGGVector ggg(d3phi_mat[i], OrbitalSetSize);
+
+    const PosType& r = P.activeR(iat);
+    for (int ik = mink; ik < maxk; ik++)
+    {
+      sincos(dot(kvecs[ik], r), &sinkr, &coskr);
+      const ValueType compi(0, 1);
+      phi_of_r = ValueType(coskr, sinkr);
+      p[ik]    = phi_of_r;
+      dp[ik]   = compi * phi_of_r * kvecs[ik];
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        hess[ik](la, la) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[la];
+        for (int lb = la + 1; lb < OHMMS_DIM; lb++)
+        {
+          hess[ik](la, lb) = -phi_of_r * (kvecs[ik])[la] * (kvecs[ik])[lb];
+          hess[ik](lb, la) = hess[ik](la, lb);
         }
+      }
+      for (int la = 0; la < OHMMS_DIM; la++)
+      {
+        ggg[ik][la] = compi * (kvecs[ik])[la] * hess[ik];
+      }
     }
+  }
 }
 
 // generic implementation
 
-template <class T>
+template<class T>
 FreeOrbitalT<T>::~FreeOrbitalT()
+{}
+
+template<class T>
+void FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                           int first,
+                                           int last,
+                                           ValueMatrix& phi,
+                                           GradMatrix& dphi,
+                                           ValueMatrix& d2phi)
 {
-}
-
-template <class T>
-void
-FreeOrbitalT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi)
-{
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        ValueVector p(phi[i], this->OrbitalSetSize);
-        GradVector dp(dphi[i], this->OrbitalSetSize);
-        ValueVector d2p(d2phi[i], this->OrbitalSetSize);
-        evaluateVGL(P, iat, p, dp, d2p);
-    }
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
+    ValueVector p(phi[i], this->OrbitalSetSize);
+    GradVector dp(dphi[i], this->OrbitalSetSize);
+    ValueVector d2p(d2phi[i], this->OrbitalSetSize);
+    evaluateVGL(P, iat, p, dp, d2p);
+  }
 }
 
 // Explicit template specialization
-template <>
-FreeOrbitalT<float>::FreeOrbitalT(
-    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
-    SPOSetT<float>(my_name),
-    kvecs(kpts_cart),
-    mink(1), // treat k=0 as special case
-    maxk(kpts_cart.size()),
-    k2neg(maxk)
+template<>
+FreeOrbitalT<float>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<float>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
 {
-    this->OrbitalSetSize =
-        2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-    for (int ik = 0; ik < maxk; ik++)
-        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template <>
-FreeOrbitalT<double>::FreeOrbitalT(
-    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
-    SPOSetT<double>(my_name),
-    kvecs(kpts_cart),
-    mink(1), // treat k=0 as special case
-    maxk(kpts_cart.size()),
-    k2neg(maxk)
+template<>
+FreeOrbitalT<double>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<double>(my_name),
+      kvecs(kpts_cart),
+      mink(1), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
 {
-    this->OrbitalSetSize =
-        2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
-    for (int ik = 0; ik < maxk; ik++)
-        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+  this->OrbitalSetSize = 2 * maxk - 1; // k=0 has no (cos, sin) split, SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template <>
-FreeOrbitalT<std::complex<float>>::FreeOrbitalT(
-    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
-    SPOSetT<std::complex<float>>(my_name),
-    kvecs(kpts_cart),
-    mink(0), // treat k=0 as special case
-    maxk(kpts_cart.size()),
-    k2neg(maxk)
+template<>
+FreeOrbitalT<std::complex<float>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<float>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
 {
-    this->OrbitalSetSize = maxk; // SPOSet member
-    for (int ik = 0; ik < maxk; ik++)
-        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template <>
-FreeOrbitalT<std::complex<double>>::FreeOrbitalT(
-    const std::string& my_name, const std::vector<PosType>& kpts_cart) :
-    SPOSetT<std::complex<double>>(my_name),
-    kvecs(kpts_cart),
-    mink(0), // treat k=0 as special case
-    maxk(kpts_cart.size()),
-    k2neg(maxk)
+template<>
+FreeOrbitalT<std::complex<double>>::FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart)
+    : SPOSetT<std::complex<double>>(my_name),
+      kvecs(kpts_cart),
+      mink(0), // treat k=0 as special case
+      maxk(kpts_cart.size()),
+      k2neg(maxk)
 {
-    this->OrbitalSetSize = maxk; // SPOSet member
-    for (int ik = 0; ik < maxk; ik++)
-        k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
+  this->OrbitalSetSize = maxk; // SPOSet member
+  for (int ik = 0; ik < maxk; ik++)
+    k2neg[ik] = -dot(kvecs[ik], kvecs[ik]);
 }
 
-template <class T>
-void
-FreeOrbitalT<T>::report(const std::string& pad) const
+template<class T>
+void FreeOrbitalT<T>::report(const std::string& pad) const
 {
-    app_log() << pad << "FreeOrbital report" << std::endl;
-    for (int ik = 0; ik < kvecs.size(); ik++) {
-        app_log() << pad << ik << " " << kvecs[ik] << std::endl;
-    }
-    app_log() << pad << "end FreeOrbital report" << std::endl;
-    app_log().flush();
+  app_log() << pad << "FreeOrbital report" << std::endl;
+  for (int ik = 0; ik < kvecs.size(); ik++)
+  {
+    app_log() << pad << ik << " " << kvecs[ik] << std::endl;
+  }
+  app_log() << pad << "end FreeOrbital report" << std::endl;
+  app_log().flush();
 }
 
 template class FreeOrbitalT<float>;
diff --git a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
index 18e8899cca3..aec297d68a9 100644
--- a/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
+++ b/src/QMCWaveFunctions/ElectronGas/FreeOrbitalT.h
@@ -4,20 +4,15 @@
 //
 // Copyright (c) 2022 QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Yubo
-//                    "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
-//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Yubo "Paul" Yang, yubo.paul.yang@gmail.com, CCQ @ Flatiron
+//                    William F Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_FREE_ORBITALT_H
@@ -27,74 +22,66 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 class FreeOrbitalT : public SPOSetT<T>
 {
 public:
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using HessVector = typename SPOSetT<T>::HessVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using HessMatrix = typename SPOSetT<T>::HessMatrix;
-    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
-    using RealType = typename SPOSetT<T>::RealType;
-    using PosType = typename SPOSetT<T>::PosType;
-    using ValueType = typename SPOSetT<T>::ValueType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix  = typename SPOSetT<T>::HessMatrix;
+  using GGGMatrix   = typename SPOSetT<T>::GGGMatrix;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueType   = typename SPOSetT<T>::ValueType;
 
-    FreeOrbitalT(
-        const std::string& my_name, const std::vector<PosType>& kpts_cart);
-    ~FreeOrbitalT();
+  FreeOrbitalT(const std::string& my_name, const std::vector<PosType>& kpts_cart);
+  ~FreeOrbitalT();
 
-    inline std::string
-    getClassName() const final
-    {
-        return "FreeOrbital";
-    }
+  inline std::string getClassName() const final { return "FreeOrbital"; }
 
-    // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
-    //  i \in [first, last)
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& phi, GradMatrix& dphi, ValueMatrix& d2phi) final;
+  // phi[i][j] is phi_j(r_i), i.e. electron i in orbital j
+  //  i \in [first, last)
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            ValueMatrix& d2phi) final;
 
-    // plug r_i into all orbitals
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int i, ValueVector& pvec,
-        GradVector& dpvec, ValueVector& d2pvec) final;
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& pvec) final;
+  // plug r_i into all orbitals
+  void evaluateVGL(const ParticleSetT<T>& P, int i, ValueVector& pvec, GradVector& dpvec, ValueVector& d2pvec) final;
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& pvec) final;
 
-    // hessian matrix is needed by backflow
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat) final;
+  // hessian matrix is needed by backflow
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat) final;
 
-    // derivative of hessian is needed to optimize backflow
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& phi, GradMatrix& dphi, HessMatrix& d2phi_mat,
-        GGGMatrix& d3phi_mat) override;
+  // derivative of hessian is needed to optimize backflow
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& phi,
+                            GradMatrix& dphi,
+                            HessMatrix& d2phi_mat,
+                            GGGMatrix& d3phi_mat) override;
 
-    void
-    report(const std::string& pad) const override;
-    // ---- begin required overrides
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const final
-    {
-        return std::make_unique<FreeOrbitalT<T>>(*this);
-    }
-    void
-    setOrbitalSetSize(int norbs) final
-    {
-        throw std::runtime_error("not implemented");
-    }
-    // required overrides end ----
+  void report(const std::string& pad) const override;
+  // ---- begin required overrides
+  std::unique_ptr<SPOSetT<T>> makeClone() const final { return std::make_unique<FreeOrbitalT<T>>(*this); }
+  void setOrbitalSetSize(int norbs) final { throw std::runtime_error("not implemented"); }
+  // required overrides end ----
 private:
-    const std::vector<PosType> kvecs; // kvecs vectors
-    const int mink; // minimum k index
-    const int maxk; // maximum number of kvecs vectors
-    std::vector<RealType> k2neg; // minus kvecs^2
+  const std::vector<PosType> kvecs; // kvecs vectors
+  const int mink;                   // minimum k index
+  const int maxk;                   // maximum number of kvecs vectors
+  std::vector<RealType> k2neg;      // minus kvecs^2
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
index 7c309d5b873..fc727f07784 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.cpp
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SHOSetBuilderT.h"
@@ -22,190 +19,188 @@
 
 namespace qmcplusplus
 {
-template <class T>
-SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm) :
-    SPOSetBuilderT<T>("SHO", comm),
-    Ps(P)
+template<class T>
+SHOSetBuilderT<T>::SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm) : SPOSetBuilderT<T>("SHO", comm), Ps(P)
 {
-    this->ClassName = "SHOSetBuilderT";
-    this->legacy = false;
-    app_log() << "Constructing SHOSetBuilderT" << std::endl;
-    reset();
+  this->ClassName = "SHOSetBuilderT";
+  this->legacy    = false;
+  app_log() << "Constructing SHOSetBuilderT" << std::endl;
+  reset();
 }
 
-template <class T>
+template<class T>
 SHOSetBuilderT<T>::~SHOSetBuilderT() = default;
 
-template <class T>
-void
-SHOSetBuilderT<T>::reset()
+template<class T>
+void SHOSetBuilderT<T>::reset()
 {
-    nstates = 0;
-    mass = -1.0;
-    energy = -1.0;
-    length = -1.0;
-    center = 0.0;
+  nstates = 0;
+  mass    = -1.0;
+  energy  = -1.0;
+  length  = -1.0;
+  center  = 0.0;
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-    APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not "
-              "use legacy interface");
+  APP_ABORT("SHOSetBuilderT::createSPOSetFromXML  SHOSetBuilder should not "
+            "use legacy interface");
 
-    app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
+  app_log() << "SHOSetBuilderT::createSHOSet(xml) " << std::endl;
 
-    SPOSetInputInfo input(cur);
+  SPOSetInputInfo input(cur);
 
-    return createSPOSet(cur, input);
+  return createSPOSet(cur, input);
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
+template<class T>
+std::unique_ptr<SPOSetT<T>> SHOSetBuilderT<T>::createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input)
 {
-    app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
-    reset();
-
-    // read parameters
-    std::string spo_name = "sho";
-    OhmmsAttributeSet attrib;
-    attrib.add(spo_name, "name");
-    attrib.add(spo_name, "id");
-    attrib.add(mass, "mass");
-    attrib.add(energy, "energy");
-    attrib.add(energy, "frequency");
-    attrib.add(length, "length");
-    attrib.add(center, "center");
-    attrib.add(nstates, "size");
-    attrib.put(cur);
-
-    if (energy < 0.0)
-        energy = 1.0;
-    if (mass < 0.0 && length < 0.0)
-        length = 1.0;
-    if (mass < 0.0)
-        mass = 1.0 / (energy * length * length);
-    else if (length < 0.0)
-        length = 1.0 / std::sqrt(mass * energy);
-
-    // initialize states and/or adjust basis
-    int smax = -1;
-    if (input.has_index_info)
-        smax = std::max(smax, input.max_index());
-    if (input.has_energy_info) {
-        smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
-    }
-    if (smax < 0)
-        APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
-    update_basis_states(smax);
-
-    // create sho state request
-    indices_t& indices = input.get_indices(this->states);
-    std::vector<SHOState*> sho_states;
-    for (int i = 0; i < indices.size(); ++i)
-        sho_states.push_back(basis_states[indices[i]]);
-
-    // make the sposet
-    auto sho =
-        std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
-
-    sho->report("  ");
-    return sho;
+  app_log() << "SHOSetBuilderT::createSHOSet(indices) " << std::endl;
+  reset();
+
+  // read parameters
+  std::string spo_name = "sho";
+  OhmmsAttributeSet attrib;
+  attrib.add(spo_name, "name");
+  attrib.add(spo_name, "id");
+  attrib.add(mass, "mass");
+  attrib.add(energy, "energy");
+  attrib.add(energy, "frequency");
+  attrib.add(length, "length");
+  attrib.add(center, "center");
+  attrib.add(nstates, "size");
+  attrib.put(cur);
+
+  if (energy < 0.0)
+    energy = 1.0;
+  if (mass < 0.0 && length < 0.0)
+    length = 1.0;
+  if (mass < 0.0)
+    mass = 1.0 / (energy * length * length);
+  else if (length < 0.0)
+    length = 1.0 / std::sqrt(mass * energy);
+
+  // initialize states and/or adjust basis
+  int smax = -1;
+  if (input.has_index_info)
+    smax = std::max(smax, input.max_index());
+  if (input.has_energy_info)
+  {
+    smax = std::max(smax, (int)std::ceil(input.max_energy() / energy));
+  }
+  if (smax < 0)
+    APP_ABORT("SHOSetBuilderT::Initialize\n  invalid basis size");
+  update_basis_states(smax);
+
+  // create sho state request
+  indices_t& indices = input.get_indices(this->states);
+  std::vector<SHOState*> sho_states;
+  for (int i = 0; i < indices.size(); ++i)
+    sho_states.push_back(basis_states[indices[i]]);
+
+  // make the sposet
+  auto sho = std::make_unique<SHOSetT<T>>(spo_name, length, center, sho_states);
+
+  sho->report("  ");
+  return sho;
 }
 
-template <class T>
-void
-SHOSetBuilderT<T>::update_basis_states(int smax)
+template<class T>
+void SHOSetBuilderT<T>::update_basis_states(int smax)
 {
-    int states_required = smax - basis_states.size() + 1;
-    if (states_required > 0) {
-        RealType N = smax + 1;
-        if (QMCTraits::DIM == 1)
-            nmax = smax;
-        else if (QMCTraits::DIM == 2)
-            nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
-        else if (QMCTraits::DIM == 3) {
-            RealType f = std::exp(1.0 / 3.0 *
-                std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
-            nmax = std::ceil(f / 3. + 1. / f - 2.);
-        }
+  int states_required = smax - basis_states.size() + 1;
+  if (states_required > 0)
+  {
+    RealType N = smax + 1;
+    if (QMCTraits::DIM == 1)
+      nmax = smax;
+    else if (QMCTraits::DIM == 2)
+      nmax = std::ceil(.5 * std::sqrt(8. * N + 1.) - 1.5);
+    else if (QMCTraits::DIM == 3)
+    {
+      RealType f = std::exp(1.0 / 3.0 * std::log(81. * N + 3. * std::sqrt(729. * N * N - 3.)));
+      nmax       = std::ceil(f / 3. + 1. / f - 2.);
+    }
+    else
+      APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other "
+                "than 1, 2, or 3 are not supported");
+    int ndim                     = nmax + 1;
+    ind_dims[QMCTraits::DIM - 1] = 1;
+    for (int d = QMCTraits::DIM - 2; d > -1; --d)
+      ind_dims[d] = ind_dims[d + 1] * ndim;
+    int s    = 0;
+    int ntot = pow(ndim, QMCTraits::DIM);
+    TinyVector<int, QMCTraits::DIM> qnumber;
+    for (int m = 0; m < ntot; ++m)
+    {
+      int n    = 0; // principal quantum number
+      int nrem = m;
+      for (int d = 0; d < QMCTraits::DIM; ++d)
+      {
+        int i = nrem / ind_dims[d];
+        nrem -= i * ind_dims[d];
+        qnumber[d] = i;
+        n += i;
+      }
+      if (n <= nmax)
+      {
+        SHOState* st;
+        if (s < basis_states.size())
+          st = basis_states[s];
         else
-            APP_ABORT("SHOSetBuilderT::update_basis_states  dimensions other "
-                      "than 1, 2, or 3 are not supported");
-        int ndim = nmax + 1;
-        ind_dims[QMCTraits::DIM - 1] = 1;
-        for (int d = QMCTraits::DIM - 2; d > -1; --d)
-            ind_dims[d] = ind_dims[d + 1] * ndim;
-        int s = 0;
-        int ntot = pow(ndim, QMCTraits::DIM);
-        TinyVector<int, QMCTraits::DIM> qnumber;
-        for (int m = 0; m < ntot; ++m) {
-            int n = 0; // principal quantum number
-            int nrem = m;
-            for (int d = 0; d < QMCTraits::DIM; ++d) {
-                int i = nrem / ind_dims[d];
-                nrem -= i * ind_dims[d];
-                qnumber[d] = i;
-                n += i;
-            }
-            if (n <= nmax) {
-                SHOState* st;
-                if (s < basis_states.size())
-                    st = basis_states[s];
-                else {
-                    st = new SHOState();
-                    basis_states.add(st);
-                }
-                RealType e = energy * (n + .5 * QMCTraits::DIM);
-                st->set(qnumber, e);
-                s++;
-            }
+        {
+          st = new SHOState();
+          basis_states.add(st);
         }
-        basis_states.energy_sort(1e-6, true);
+        RealType e = energy * (n + .5 * QMCTraits::DIM);
+        st->set(qnumber, e);
+        s++;
+      }
     }
-
-    // reset energy scale even if no states need to be added
-    for (int i = 0; i < basis_states.size(); ++i) {
-        SHOState& state = *basis_states[i];
-        const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
-        int n = 0;
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-            n += qnumber[d];
-        state.energy = energy * (n + .5 * QMCTraits::DIM);
-    }
-
-    // somewhat redundant, but necessary
-    this->clear_states(0);
-    this->states[0]->finish(basis_states.states);
-
-    if (basis_states.size() <= smax)
-        APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough "
-                  "states");
+    basis_states.energy_sort(1e-6, true);
+  }
+
+  // reset energy scale even if no states need to be added
+  for (int i = 0; i < basis_states.size(); ++i)
+  {
+    SHOState& state                                = *basis_states[i];
+    const TinyVector<int, QMCTraits::DIM>& qnumber = state.quantum_number;
+    int n                                          = 0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      n += qnumber[d];
+    state.energy = energy * (n + .5 * QMCTraits::DIM);
+  }
+
+  // somewhat redundant, but necessary
+  this->clear_states(0);
+  this->states[0]->finish(basis_states.states);
+
+  if (basis_states.size() <= smax)
+    APP_ABORT("SHOSetBuilderT::update_basis_states  failed to make enough "
+              "states");
 }
 
-template <class T>
-void
-SHOSetBuilderT<T>::report(const std::string& pad)
+template<class T>
+void SHOSetBuilderT<T>::report(const std::string& pad)
 {
-    app_log() << pad << "SHOSetBuilderT report" << std::endl;
-    app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
-    app_log() << pad << "  mass      = " << mass << std::endl;
-    app_log() << pad << "  frequency = " << energy << std::endl;
-    app_log() << pad << "  energy    = " << energy << std::endl;
-    app_log() << pad << "  length    = " << length << std::endl;
-    app_log() << pad << "  center    = " << center << std::endl;
-    app_log() << pad << "  nstates   = " << nstates << std::endl;
-    app_log() << pad << "  nmax      = " << nmax << std::endl;
-    app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
-    app_log() << pad << "  # basis states = " << basis_states.size()
-              << std::endl;
-    app_log() << pad << "  basis_states" << std::endl;
-    for (int s = 0; s < basis_states.size(); ++s)
-        basis_states[s]->report(pad + "  " + int2string(s) + " ");
-    app_log() << pad << "end SHOSetBuilderT report" << std::endl;
-    app_log().flush();
+  app_log() << pad << "SHOSetBuilderT report" << std::endl;
+  app_log() << pad << "  dimension = " << QMCTraits::DIM << std::endl;
+  app_log() << pad << "  mass      = " << mass << std::endl;
+  app_log() << pad << "  frequency = " << energy << std::endl;
+  app_log() << pad << "  energy    = " << energy << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nstates   = " << nstates << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  ind_dims  = " << ind_dims << std::endl;
+  app_log() << pad << "  # basis states = " << basis_states.size() << std::endl;
+  app_log() << pad << "  basis_states" << std::endl;
+  for (int s = 0; s < basis_states.size(); ++s)
+    basis_states[s]->report(pad + "  " + int2string(s) + " ");
+  app_log() << pad << "end SHOSetBuilderT report" << std::endl;
+  app_log().flush();
 }
 
 #ifndef QMC_COMPLEX
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
index 96237ab55ee..6ad6e373356 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetBuilderT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SHO_BASIS_BUILDERT_H
@@ -22,48 +19,43 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 class SHOSetBuilderT : public SPOSetBuilderT<T>
 {
 public:
-    using RealType = typename SPOSetT<T>::RealType;
-    using PosType = typename SPOSetT<T>::PosType;
-    using indices_t = typename SPOSetBuilderT<T>::indices_t;
+  using RealType  = typename SPOSetT<T>::RealType;
+  using PosType   = typename SPOSetT<T>::PosType;
+  using indices_t = typename SPOSetBuilderT<T>::indices_t;
 
-    ParticleSetT<T>& Ps;
+  ParticleSetT<T>& Ps;
 
-    RealType length;
-    RealType mass;
-    RealType energy;
-    PosType center;
+  RealType length;
+  RealType mass;
+  RealType energy;
+  PosType center;
 
-    int nstates;
-    int nmax;
-    TinyVector<int, QMCTraits::DIM> ind_dims;
+  int nstates;
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> ind_dims;
 
-    SPOSetInfoSimple<SHOState> basis_states;
+  SPOSetInfoSimple<SHOState> basis_states;
 
-    // construction/destruction
-    SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm);
+  // construction/destruction
+  SHOSetBuilderT(ParticleSetT<T>& P, Communicate* comm);
 
-    ~SHOSetBuilderT() override;
+  ~SHOSetBuilderT() override;
 
-    // reset parameters
-    void
-    reset();
+  // reset parameters
+  void reset();
 
-    // SPOSetBuilder interface
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSetFromXML(xmlNodePtr cur) override;
+  // SPOSetBuilder interface
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
 
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
+  std::unique_ptr<SPOSetT<T>> createSPOSet(xmlNodePtr cur, SPOSetInputInfo& input) override;
 
-    // local functions
-    void
-    update_basis_states(int smax);
-    void
-    report(const std::string& pad = "");
+  // local functions
+  void update_basis_states(int smax);
+  void report(const std::string& pad = "");
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
index 1286b07393f..7e393375a5e 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.cpp
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SHOSetT.h"
@@ -19,537 +16,553 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c,
-    const std::vector<SHOState*>& sho_states) :
-    SPOSetT<T>(my_name),
-    length(l),
-    center(c)
+template<typename T>
+SHOSetT<T>::SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states)
+    : SPOSetT<T>(my_name), length(l), center(c)
 {
-    state_info.resize(sho_states.size());
-    for (int s = 0; s < sho_states.size(); ++s)
-        state_info[s] = *sho_states[s];
-    initialize();
+  state_info.resize(sho_states.size());
+  for (int s = 0; s < sho_states.size(); ++s)
+    state_info[s] = *sho_states[s];
+  initialize();
 }
 
-template <typename T>
-void
-SHOSetT<T>::initialize()
+template<typename T>
+void SHOSetT<T>::initialize()
 {
-    using std::sqrt;
+  using std::sqrt;
 
-    this->OrbitalSetSize = state_info.size();
+  this->OrbitalSetSize = state_info.size();
 
-    qn_max = -1;
-    for (int s = 0; s < state_info.size(); ++s)
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-            qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
-    qn_max += 1;
-
-    nmax = -1;
+  qn_max = -1;
+  for (int s = 0; s < state_info.size(); ++s)
     for (int d = 0; d < QMCTraits::DIM; ++d)
-        nmax = std::max(nmax, qn_max[d]);
-
-    prefactors.resize(nmax);
-    hermite.resize(QMCTraits::DIM, nmax);
-    bvalues.resize(QMCTraits::DIM, nmax);
-
-    if (nmax > 0) {
-        prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
-        for (int n = 1; n < nmax; ++n)
-            prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
-    }
+      qn_max[d] = std::max(qn_max[d], state_info[s].quantum_number[d]);
+  qn_max += 1;
+
+  nmax = -1;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+    nmax = std::max(nmax, qn_max[d]);
+
+  prefactors.resize(nmax);
+  hermite.resize(QMCTraits::DIM, nmax);
+  bvalues.resize(QMCTraits::DIM, nmax);
+
+  if (nmax > 0)
+  {
+    prefactors[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+    for (int n = 1; n < nmax; ++n)
+      prefactors[n] = prefactors[n - 1] / sqrt(2. * n);
+  }
 }
 
-template <typename T>
+template<typename T>
 SHOSetT<T>::~SHOSetT() = default;
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-SHOSetT<T>::makeClone() const
+template<typename T>
+std::unique_ptr<SPOSetT<T>> SHOSetT<T>::makeClone() const
 {
-    return std::make_unique<SHOSetT<T>>(*this);
+  return std::make_unique<SHOSetT<T>>(*this);
 }
 
-template <typename T>
-void
-SHOSetT<T>::report(const std::string& pad) const
+template<typename T>
+void SHOSetT<T>::report(const std::string& pad) const
 {
-    app_log() << pad << "SHOSet report" << std::endl;
-    app_log() << pad << "  length    = " << length << std::endl;
-    app_log() << pad << "  center    = " << center << std::endl;
-    app_log() << pad << "  nmax      = " << nmax << std::endl;
-    app_log() << pad << "  qn_max    = " << qn_max << std::endl;
-    app_log() << pad << "  # states  = " << state_info.size() << std::endl;
-    app_log() << pad << "  states" << std::endl;
-    for (int s = 0; s < state_info.size(); ++s)
-        state_info[s].sho_report(pad + "    " + int2string(s) + " ");
-    app_log() << pad << "end SHOSet report" << std::endl;
-    app_log().flush();
+  app_log() << pad << "SHOSet report" << std::endl;
+  app_log() << pad << "  length    = " << length << std::endl;
+  app_log() << pad << "  center    = " << center << std::endl;
+  app_log() << pad << "  nmax      = " << nmax << std::endl;
+  app_log() << pad << "  qn_max    = " << qn_max << std::endl;
+  app_log() << pad << "  # states  = " << state_info.size() << std::endl;
+  app_log() << pad << "  states" << std::endl;
+  for (int s = 0; s < state_info.size(); ++s)
+    state_info[s].sho_report(pad + "    " + int2string(s) + " ");
+  app_log() << pad << "end SHOSet report" << std::endl;
+  app_log().flush();
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<typename T>
+void SHOSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    const PosType& r(P.activeR(iat));
-    ValueVector p(&psi[0], this->size());
-    evaluate_v(r, p);
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  evaluate_v(r, p);
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-    GradVector& dpsi, ValueVector& d2psi)
+template<typename T>
+void SHOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-    const PosType& r(P.activeR(iat));
-    ValueVector p(&psi[0], this->size());
-    GradVector dp(&dpsi[0], this->size());
-    ValueVector d2p(&d2psi[0], this->size());
-    evaluate_vgl(r, p, dp, d2p);
+  const PosType& r(P.activeR(iat));
+  ValueVector p(&psi[0], this->size());
+  GradVector dp(&dpsi[0], this->size());
+  ValueVector d2p(&d2psi[0], this->size());
+  evaluate_vgl(r, p, dp, d2p);
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+template<typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      ValueMatrix& d2logdet)
 {
-    for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-        ValueVector p(logdet[i], this->size());
-        GradVector dp(dlogdet[i], this->size());
-        ValueVector d2p(d2logdet[i], this->size());
-        evaluate_vgl(P.R[iat], p, dp, d2p);
-    }
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector p(logdet[i], this->size());
+    GradVector dp(dlogdet[i], this->size());
+    ValueVector d2p(d2logdet[i], this->size());
+    evaluate_vgl(P.R[iat], p, dp, d2p);
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
+template<typename T>
+void SHOSetT<T>::evaluate_v(PosType r, ValueVector& psi)
 {
-    PosType x = (r - center) / length;
-    evaluate_hermite(x);
-    evaluate_d0(x, psi);
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_vgl(
-    PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename T>
+void SHOSetT<T>::evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-    PosType x = (r - center) / length;
-    evaluate_hermite(x);
-    evaluate_d0(x, psi);
-    evaluate_d1(x, psi, dpsi);
-    evaluate_d2(x, psi, d2psi);
+  PosType x = (r - center) / length;
+  evaluate_hermite(x);
+  evaluate_d0(x, psi);
+  evaluate_d1(x, psi, dpsi);
+  evaluate_d2(x, psi, d2psi);
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_hermite(const PosType& xpos)
+template<typename T>
+void SHOSetT<T>::evaluate_hermite(const PosType& xpos)
 {
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        int nh = qn_max[d];
-        if (nh > 0) {
-            RealType x = xpos[d];
-            hermite(d, 0) = 1.0;
-            RealType Hnm2 = 0.0;
-            RealType Hnm1 = 1.0;
-            for (int n = 1; n < nh; ++n) {
-                RealType Hn = 2 * (x * Hnm1 - (n - 1) * Hnm2);
-                hermite(d, n) = Hn;
-                Hnm2 = Hnm1;
-                Hnm1 = Hn;
-            }
-        }
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    int nh = qn_max[d];
+    if (nh > 0)
+    {
+      RealType x    = xpos[d];
+      hermite(d, 0) = 1.0;
+      RealType Hnm2 = 0.0;
+      RealType Hnm1 = 1.0;
+      for (int n = 1; n < nh; ++n)
+      {
+        RealType Hn   = 2 * (x * Hnm1 - (n - 1) * Hnm2);
+        hermite(d, n) = Hn;
+        Hnm2          = Hnm1;
+        Hnm1          = Hn;
+      }
     }
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
+template<typename T>
+void SHOSetT<T>::evaluate_d0(const PosType& xpos, ValueVector& psi)
 {
-    using std::exp;
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        RealType x = xpos[d];
-        RealType g = exp(-.5 * x * x);
-        for (int n = 0; n < qn_max[d]; ++n) {
-            bvalues(d, n) = prefactors[n] * g * hermite(d, n);
-        }
-    }
-    for (int s = 0; s < state_info.size(); ++s) {
-        const SHOState& state = state_info[s];
-        RealType phi = 1.0;
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-            phi *= bvalues(d, state.quantum_number[d]);
-        psi[s] = phi;
+  using std::exp;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x = xpos[d];
+    RealType g = exp(-.5 * x * x);
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = prefactors[n] * g * hermite(d, n);
     }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    RealType phi          = 1.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      phi *= bvalues(d, state.quantum_number[d]);
+    psi[s] = phi;
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
+template<typename T>
+void SHOSetT<T>::evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi)
 {
-    RealType ol = 1.0 / length;
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        RealType x = xpos[d];
-        RealType Hnm1 = 0.0;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            RealType Hn = hermite(d, n);
-            bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
-            Hnm1 = Hn;
-        }
-    }
-    for (int s = 0; s < state_info.size(); ++s) {
-        const SHOState& state = state_info[s];
-        TinyVector<T, QMCTraits::DIM> dphi;
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-            dphi[d] = bvalues(d, state.quantum_number[d]);
-        dphi *= psi[s];
-        dpsi[s] = dphi;
+  RealType ol = 1.0 / length;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x    = xpos[d];
+    RealType Hnm1 = 0.0;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      RealType Hn   = hermite(d, n);
+      bvalues(d, n) = (-x + 2 * n * Hnm1 / Hn) * ol;
+      Hnm1          = Hn;
     }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    TinyVector<T, QMCTraits::DIM> dphi;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      dphi[d] = bvalues(d, state.quantum_number[d]);
+    dphi *= psi[s];
+    dpsi[s] = dphi;
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_d2(
-    const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
+template<typename T>
+void SHOSetT<T>::evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi)
 {
-    RealType ol2 = 1.0 / (length * length);
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        RealType x = xpos[d];
-        RealType x2 = x * x;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
-        }
-    }
-    for (int s = 0; s < state_info.size(); ++s) {
-        const SHOState& state = state_info[s];
-        T d2phi = 0.0;
-        for (int d = 0; d < QMCTraits::DIM; ++d)
-            d2phi += bvalues(d, state.quantum_number[d]);
-        d2phi *= psi[s];
-        d2psi[s] = d2phi;
+  RealType ol2 = 1.0 / (length * length);
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = xpos[d];
+    RealType x2 = x * x;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      bvalues(d, n) = (-1.0 + x2 - 2 * n) * ol2;
     }
+  }
+  for (int s = 0; s < state_info.size(); ++s)
+  {
+    const SHOState& state = state_info[s];
+    T d2phi               = 0.0;
+    for (int d = 0; d < QMCTraits::DIM; ++d)
+      d2phi += bvalues(d, state.quantum_number[d]);
+    d2phi *= psi[s];
+    d2psi[s] = d2phi;
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_check(
-    PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename T>
+void SHOSetT<T>::evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-    using std::exp;
-    using std::sqrt;
-
-    evaluate_vgl(r, psi, dpsi, d2psi);
-
-    const int N = 6;
-    RealType H[N], dH[N], d2H[N], pre[N];
-    RealType p[N], dp[N], d2p[N];
-
-    pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
-    for (int n = 1; n < N; ++n)
-        pre[n] = pre[n - 1] / sqrt(2. * n);
-
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        RealType x = (r[d] - center[d]) / length;
-        RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x,
-                 x5 = x * x * x * x * x;
-        H[0] = 1;
-        dH[0] = 0;
-        d2H[0] = 0;
-        H[1] = 2 * x;
-        dH[1] = 2;
-        d2H[1] = 0;
-        H[2] = 4 * x2 - 2;
-        dH[2] = 8 * x;
-        d2H[2] = 8;
-        H[3] = 8 * x3 - 12 * x;
-        dH[3] = 24 * x2 - 12;
-        d2H[3] = 48 * x;
-        H[4] = 16 * x4 - 48 * x2 + 12;
-        dH[4] = 64 * x3 - 96 * x;
-        d2H[4] = 192 * x2 - 96;
-        H[5] = 32 * x5 - 160 * x3 + 120 * x;
-        dH[5] = 160 * x4 - 480 * x2 + 120;
-        d2H[5] = 640 * x3 - 960 * x;
-        RealType g = exp(-x2 / 2);
-        for (int n = 0; n < N; ++n) {
-            p[n] = pre[n] * g * H[n];
-            dp[n] = pre[n] * g * (-x * H[n] + dH[n]);
-            d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
-        }
-        app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
-        app_log() << "  hermite check" << std::endl;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            app_log() << "    " << n << " " << H[n] << std::endl;
-            app_log() << "    " << n << " " << hermite(d, n) << std::endl;
-        }
-        app_log() << "  phi d0 check" << std::endl;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            app_log() << "    " << n << " " << p[n] << std::endl;
-            app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
-        }
-        app_log() << "  phi d1 check" << std::endl;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
-            app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
-        }
-        app_log() << "  phi d2 check" << std::endl;
-        for (int n = 0; n < qn_max[d]; ++n) {
-            app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
-            app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
-        }
+  using std::exp;
+  using std::sqrt;
+
+  evaluate_vgl(r, psi, dpsi, d2psi);
+
+  const int N = 6;
+  RealType H[N], dH[N], d2H[N], pre[N];
+  RealType p[N], dp[N], d2p[N];
+
+  pre[0] = 1.0 / (sqrt(sqrt(M_PI) * length));
+  for (int n = 1; n < N; ++n)
+    pre[n] = pre[n - 1] / sqrt(2. * n);
+
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType x  = (r[d] - center[d]) / length;
+    RealType x2 = x * x, x3 = x * x * x, x4 = x * x * x * x, x5 = x * x * x * x * x;
+    H[0]       = 1;
+    dH[0]      = 0;
+    d2H[0]     = 0;
+    H[1]       = 2 * x;
+    dH[1]      = 2;
+    d2H[1]     = 0;
+    H[2]       = 4 * x2 - 2;
+    dH[2]      = 8 * x;
+    d2H[2]     = 8;
+    H[3]       = 8 * x3 - 12 * x;
+    dH[3]      = 24 * x2 - 12;
+    d2H[3]     = 48 * x;
+    H[4]       = 16 * x4 - 48 * x2 + 12;
+    dH[4]      = 64 * x3 - 96 * x;
+    d2H[4]     = 192 * x2 - 96;
+    H[5]       = 32 * x5 - 160 * x3 + 120 * x;
+    dH[5]      = 160 * x4 - 480 * x2 + 120;
+    d2H[5]     = 640 * x3 - 960 * x;
+    RealType g = exp(-x2 / 2);
+    for (int n = 0; n < N; ++n)
+    {
+      p[n]   = pre[n] * g * H[n];
+      dp[n]  = pre[n] * g * (-x * H[n] + dH[n]);
+      d2p[n] = pre[n] * g * ((x2 - 1) * H[n] - 2 * x * dH[n] + d2H[n]);
+    }
+    app_log() << "eval check dim = " << d << "  x = " << x << std::endl;
+    app_log() << "  hermite check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << H[n] << std::endl;
+      app_log() << "    " << n << " " << hermite(d, n) << std::endl;
     }
+    app_log() << "  phi d0 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << p[n] << std::endl;
+      app_log() << "    " << n << " " << d0_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d1 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << dp[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d1_values(d, n) << std::endl;
+    }
+    app_log() << "  phi d2 check" << std::endl;
+    for (int n = 0; n < qn_max[d]; ++n)
+    {
+      app_log() << "    " << n << " " << d2p[n] / p[n] << std::endl;
+      app_log() << "    " << n << " " << d2_values(d, n) << std::endl;
+    }
+  }
 }
 
-template <typename T>
-void
-SHOSetT<T>::test_derivatives()
+template<typename T>
+void SHOSetT<T>::test_derivatives()
 {
-    int n = 3;
-    PosType c = 5.123;
-    PosType L = 1.0;
-    PosType drg = L / n;
-    PosType dr = L / 1000;
-    int nphi = state_info.size();
-
-    PosType o2dr, odr2;
-
-    ValueVector vpsi, vpsitmp;
-    GradVector vdpsi, vdpsin;
-    ValueVector vd2psi, vd2psin;
-
-    vpsi.resize(nphi);
-    vdpsi.resize(nphi);
-    vd2psi.resize(nphi);
-
-    vpsitmp.resize(nphi);
-    vdpsin.resize(nphi);
-    vd2psin.resize(nphi);
-
-    ValueVector psi(&vpsi[0], this->size());
-    GradVector dpsi(&vdpsi[0], this->size());
-    ValueVector d2psi(&vd2psi[0], this->size());
-
-    ValueVector psitmp(&vpsitmp[0], this->size());
-    GradVector dpsin(&vdpsin[0], this->size());
-    ValueVector d2psin(&vd2psin[0], this->size());
-
-    app_log() << " loading dr" << std::endl;
-
-    RealType odr2sum = 0.0;
-    for (int d = 0; d < QMCTraits::DIM; ++d) {
-        RealType odr = 1.0 / dr[d];
-        o2dr[d] = .5 * odr;
-        odr2[d] = odr * odr;
-        odr2sum += odr2[d];
-    }
+  int n       = 3;
+  PosType c   = 5.123;
+  PosType L   = 1.0;
+  PosType drg = L / n;
+  PosType dr  = L / 1000;
+  int nphi    = state_info.size();
+
+  PosType o2dr, odr2;
+
+  ValueVector vpsi, vpsitmp;
+  GradVector vdpsi, vdpsin;
+  ValueVector vd2psi, vd2psin;
+
+  vpsi.resize(nphi);
+  vdpsi.resize(nphi);
+  vd2psi.resize(nphi);
+
+  vpsitmp.resize(nphi);
+  vdpsin.resize(nphi);
+  vd2psin.resize(nphi);
+
+  ValueVector psi(&vpsi[0], this->size());
+  GradVector dpsi(&vdpsi[0], this->size());
+  ValueVector d2psi(&vd2psi[0], this->size());
+
+  ValueVector psitmp(&vpsitmp[0], this->size());
+  GradVector dpsin(&vdpsin[0], this->size());
+  ValueVector d2psin(&vd2psin[0], this->size());
+
+  app_log() << " loading dr" << std::endl;
+
+  RealType odr2sum = 0.0;
+  for (int d = 0; d < QMCTraits::DIM; ++d)
+  {
+    RealType odr = 1.0 / dr[d];
+    o2dr[d]      = .5 * odr;
+    odr2[d]      = odr * odr;
+    odr2sum += odr2[d];
+  }
+
+  app_log() << "SHOSet::test_derivatives" << std::endl;
+
+  const SimulationCellT<T> simulation_cell;
+  ParticleSetT<T> Ps(simulation_cell);
+
+  int p = 0;
+  PosType r, rtmp;
+  for (int i = 0; i < n; ++i)
+  {
+    r[0] = c[0] + i * drg[0];
+    for (int j = 0; j < n; ++j)
+    {
+      r[1] = c[1] + j * drg[1];
+      for (int k = 0; k < n; ++k)
+      {
+        r[2] = c[2] + k * drg[2];
+
+        evaluate_vgl(r, psi, dpsi, d2psi);
+
+        for (int m = 0; m < nphi; ++m)
+          d2psin[m] = -2 * odr2sum * psi[m];
+        for (int d = 0; d < QMCTraits::DIM; ++d)
+        {
+          rtmp = r;
+          rtmp[d] += dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi       = psitmp[m];
+            dpsin[m][d] = phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+          rtmp = r;
+          rtmp[d] -= dr[d];
+          evaluate_v(rtmp, psitmp);
+          for (int m = 0; m < nphi; ++m)
+          {
+            T phi = psitmp[m];
+            dpsin[m][d] -= phi * o2dr[d];
+            d2psin[m] += phi * odr2[d];
+          }
+        }
 
-    app_log() << "SHOSet::test_derivatives" << std::endl;
-
-    const SimulationCellT<T> simulation_cell;
-    ParticleSetT<T> Ps(simulation_cell);
-
-    int p = 0;
-    PosType r, rtmp;
-    for (int i = 0; i < n; ++i) {
-        r[0] = c[0] + i * drg[0];
-        for (int j = 0; j < n; ++j) {
-            r[1] = c[1] + j * drg[1];
-            for (int k = 0; k < n; ++k) {
-                r[2] = c[2] + k * drg[2];
-
-                evaluate_vgl(r, psi, dpsi, d2psi);
-
-                for (int m = 0; m < nphi; ++m)
-                    d2psin[m] = -2 * odr2sum * psi[m];
-                for (int d = 0; d < QMCTraits::DIM; ++d) {
-                    rtmp = r;
-                    rtmp[d] += dr[d];
-                    evaluate_v(rtmp, psitmp);
-                    for (int m = 0; m < nphi; ++m) {
-                        T phi = psitmp[m];
-                        dpsin[m][d] = phi * o2dr[d];
-                        d2psin[m] += phi * odr2[d];
-                    }
-                    rtmp = r;
-                    rtmp[d] -= dr[d];
-                    evaluate_v(rtmp, psitmp);
-                    for (int m = 0; m < nphi; ++m) {
-                        T phi = psitmp[m];
-                        dpsin[m][d] -= phi * o2dr[d];
-                        d2psin[m] += phi * odr2[d];
-                    }
-                }
-
-                RealType dphi_diff = 0.0;
-                RealType d2phi_diff = 0.0;
-                for (int m = 0; m < nphi; ++m)
-                    for (int d = 0; d < QMCTraits::DIM; ++d)
-                        dphi_diff = std::max<RealType>(dphi_diff,
-                            std::abs(dpsi[m][d] - dpsin[m][d]) /
-                                std::abs(dpsin[m][d]));
-                for (int m = 0; m < nphi; ++m)
-                    d2phi_diff = std::max<RealType>(d2phi_diff,
-                        std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
-                app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff
-                          << std::endl;
-                app_log() << "    derivatives" << std::endl;
-                for (int m = 0; m < nphi; ++m) {
-                    std::string qn = "";
-                    for (int d = 0; d < QMCTraits::DIM; ++d)
-                        qn += int2string(state_info[m].quantum_number[d]) + " ";
-                    app_log() << "    " << qn;
-                    for (int d = 0; d < QMCTraits::DIM; ++d)
-                        app_log() << real(dpsi[m][d]) << " ";
-                    app_log() << std::endl;
-                    app_log() << "    " << qn;
-                    for (int d = 0; d < QMCTraits::DIM; ++d)
-                        app_log() << real(dpsin[m][d]) << " ";
-                    app_log() << std::endl;
-                }
-                app_log() << "    laplacians" << std::endl;
-                PosType x = r / length;
-                for (int m = 0; m < nphi; ++m) {
-                    std::string qn = "";
-                    for (int d = 0; d < QMCTraits::DIM; ++d)
-                        qn += int2string(state_info[m].quantum_number[d]) + " ";
-                    app_log()
-                        << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
-                    app_log() << "    " << qn << real(d2psin[m] / psi[m])
-                              << std::endl;
-                }
-                p++;
-            }
+        RealType dphi_diff  = 0.0;
+        RealType d2phi_diff = 0.0;
+        for (int m = 0; m < nphi; ++m)
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            dphi_diff = std::max<RealType>(dphi_diff, std::abs(dpsi[m][d] - dpsin[m][d]) / std::abs(dpsin[m][d]));
+        for (int m = 0; m < nphi; ++m)
+          d2phi_diff = std::max<RealType>(d2phi_diff, std::abs(d2psi[m] - d2psin[m]) / std::abs(d2psin[m]));
+        app_log() << "  " << p << " " << dphi_diff << " " << d2phi_diff << std::endl;
+        app_log() << "    derivatives" << std::endl;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsi[m][d]) << " ";
+          app_log() << std::endl;
+          app_log() << "    " << qn;
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            app_log() << real(dpsin[m][d]) << " ";
+          app_log() << std::endl;
         }
+        app_log() << "    laplacians" << std::endl;
+        PosType x = r / length;
+        for (int m = 0; m < nphi; ++m)
+        {
+          std::string qn = "";
+          for (int d = 0; d < QMCTraits::DIM; ++d)
+            qn += int2string(state_info[m].quantum_number[d]) + " ";
+          app_log() << "    " << qn << real(d2psi[m] / psi[m]) << std::endl;
+          app_log() << "    " << qn << real(d2psin[m] / psi[m]) << std::endl;
+        }
+        p++;
+      }
     }
+  }
 
-    app_log() << "end SHOSet::test_derivatives" << std::endl;
+  app_log() << "end SHOSet::test_derivatives" << std::endl;
 }
 
-template <typename T>
-void
-SHOSetT<T>::test_overlap()
+template<typename T>
+void SHOSetT<T>::test_overlap()
 {
-    app_log() << "SHOSet::test_overlap" << std::endl;
+  app_log() << "SHOSet::test_overlap" << std::endl;
 
-    // linear
-    int d = 0;
+  // linear
+  int d = 0;
 
-    app_log() << "  length = " << length << std::endl;
-    app_log() << "  prefactors" << std::endl;
-    for (int n = 0; n < qn_max[d]; ++n)
-        app_log() << "    " << n << " " << prefactors[n] << std::endl;
+  app_log() << "  length = " << length << std::endl;
+  app_log() << "  prefactors" << std::endl;
+  for (int n = 0; n < qn_max[d]; ++n)
+    app_log() << "    " << n << " " << prefactors[n] << std::endl;
+
+  app_log() << "  1d overlap" << std::endl;
 
-    app_log() << "  1d overlap" << std::endl;
+  ValueVector vpsi;
+  vpsi.resize(this->size());
+  ValueVector psi(&vpsi[0], this->size());
 
-    ValueVector vpsi;
-    vpsi.resize(this->size());
-    ValueVector psi(&vpsi[0], this->size());
+  double xmax = 4.0;
+  double dx   = .1;
+  double dr   = length * dx;
 
-    double xmax = 4.0;
-    double dx = .1;
-    double dr = length * dx;
+  int nphi = qn_max[d];
+  Array<double, 2> omat;
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+
+  PosType xp = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+  {
+    xp[d] = x;
+    evaluate_hermite(xp);
+    evaluate_d0(xp, psi);
 
-    int nphi = qn_max[d];
-    Array<double, 2> omat;
-    omat.resize(nphi, nphi);
     for (int i = 0; i < nphi; ++i)
-        for (int j = 0; j < nphi; ++j)
-            omat(i, j) = 0.0;
+      for (int j = 0; j < nphi; ++j)
+        omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
+  }
 
-    PosType xp = 0.0;
-    for (double x = -xmax; x < xmax; x += dx) {
-        xp[d] = x;
+  for (int i = 0; i < nphi; ++i)
+  {
+    app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
+
+  // volumetric
+  app_log() << "  3d overlap" << std::endl;
+  double dV = dr * dr * dr;
+  nphi      = this->size();
+  omat.resize(nphi, nphi);
+  for (int i = 0; i < nphi; ++i)
+    for (int j = 0; j < nphi; ++j)
+      omat(i, j) = 0.0;
+  for (double x = -xmax; x < xmax; x += dx)
+    for (double y = -xmax; y < xmax; y += dx)
+      for (double z = -xmax; z < xmax; z += dx)
+      {
+        xp[0] = x;
+        xp[1] = y;
+        xp[2] = z;
         evaluate_hermite(xp);
         evaluate_d0(xp, psi);
 
         for (int i = 0; i < nphi; ++i)
-            for (int j = 0; j < nphi; ++j)
-                omat(i, j) += bvalues(d, i) * bvalues(d, j) * dr;
-    }
-
-    for (int i = 0; i < nphi; ++i) {
-        app_log() << std::endl;
-        for (int j = 0; j < nphi; ++j)
-            app_log() << omat(i, j) << " ";
-    }
-    app_log() << std::endl;
-
-    // volumetric
-    app_log() << "  3d overlap" << std::endl;
-    double dV = dr * dr * dr;
-    nphi = this->size();
-    omat.resize(nphi, nphi);
-    for (int i = 0; i < nphi; ++i)
-        for (int j = 0; j < nphi; ++j)
-            omat(i, j) = 0.0;
-    for (double x = -xmax; x < xmax; x += dx)
-        for (double y = -xmax; y < xmax; y += dx)
-            for (double z = -xmax; z < xmax; z += dx) {
-                xp[0] = x;
-                xp[1] = y;
-                xp[2] = z;
-                evaluate_hermite(xp);
-                evaluate_d0(xp, psi);
-
-                for (int i = 0; i < nphi; ++i)
-                    for (int j = 0; j < nphi; ++j)
-                        omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
-            }
-    for (int i = 0; i < nphi; ++i) {
-        app_log() << std::endl;
-        for (int j = 0; j < nphi; ++j)
-            app_log() << omat(i, j) << " ";
-    }
+          for (int j = 0; j < nphi; ++j)
+            omat(i, j) += std::abs(psi[i] * psi[j]) * dV;
+      }
+  for (int i = 0; i < nphi; ++i)
+  {
     app_log() << std::endl;
+    for (int j = 0; j < nphi; ++j)
+      app_log() << omat(i, j) << " ";
+  }
+  app_log() << std::endl;
 
-    app_log() << "end SHOSet::test_overlap" << std::endl;
+  app_log() << "end SHOSet::test_overlap" << std::endl;
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
-    GGGMatrix& grad_grad_grad_logdet)
+template<typename T>
+void SHOSetT<T>::evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last, GGGMatrix& grad_grad_grad_logdet)
 {
-    not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
+  not_implemented("evaluateThirdDeriv(P,first,last,dddlogdet)");
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet)
+template<typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet)
 {
-    not_implemented(
-        "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet)");
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-    ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
-    GGGMatrix& grad_grad_grad_logdet)
+template<typename T>
+void SHOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                      int first,
+                                      int last,
+                                      ValueMatrix& logdet,
+                                      GradMatrix& dlogdet,
+                                      HessMatrix& grad_grad_logdet,
+                                      GGGMatrix& grad_grad_grad_logdet)
 {
-    not_implemented(
-        "evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
+  not_implemented("evaluate_notranspose(P,first,last,logdet,dlogdet,ddlogdet,dddlogdet)");
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-    const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
+template<typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSetT<T>& source,
+                                    int iat_src,
+                                    GradMatrix& gradphi)
 {
-    not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi)");
 }
 
-template <typename T>
-void
-SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-    const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-    HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi)
+template<typename T>
+void SHOSetT<T>::evaluateGradSource(const ParticleSetT<T>& P,
+                                    int first,
+                                    int last,
+                                    const ParticleSetT<T>& source,
+                                    int iat_src,
+                                    GradMatrix& grad_phi,
+                                    HessMatrix& grad_grad_phi,
+                                    GradMatrix& grad_lapl_phi)
 {
-    not_implemented(
-        "evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
+  not_implemented("evaluateGradSource(P,first,last,source,iat,dphi,ddphi,dd2phi)");
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
index d8e89e9e0ec..5202609ca4a 100644
--- a/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
+++ b/src/QMCWaveFunctions/HarmonicOscillator/SHOSetT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National
-//                    Laboratory
+// File developed by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-// Laboratory
+// File created by: Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SHOSETT_H
@@ -23,153 +20,133 @@ namespace qmcplusplus
 {
 struct SHOState : public SPOInfo
 {
-    TinyVector<int, QMCTraits::DIM> quantum_number;
-
-    SHOState()
-    {
-        quantum_number = -1;
-        energy = 0.0;
-    }
-
-    ~SHOState() override
-    {
-    }
-
-    inline void
-    set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
-    {
-        quantum_number = qn;
-        energy = e;
-    }
-
-    inline void
-    sho_report(const std::string& pad = "") const
-    {
-        app_log() << pad << "qn=" << quantum_number << "  e=" << energy
-                  << std::endl;
-    }
+  TinyVector<int, QMCTraits::DIM> quantum_number;
+
+  SHOState()
+  {
+    quantum_number = -1;
+    energy         = 0.0;
+  }
+
+  ~SHOState() override {}
+
+  inline void set(TinyVector<int, QMCTraits::DIM> qn, RealType e)
+  {
+    quantum_number = qn;
+    energy         = e;
+  }
+
+  inline void sho_report(const std::string& pad = "") const
+  {
+    app_log() << pad << "qn=" << quantum_number << "  e=" << energy << std::endl;
+  }
 };
 
-template <typename T>
+template<typename T>
 class SHOSetT : public SPOSetT<T>
 {
 public:
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using value_type = typename ValueMatrix::value_type;
-    using grad_type = typename GradMatrix::value_type;
-    using RealType = typename SPOSetT<T>::RealType;
-    using PosType = TinyVector<RealType, QMCTraits::DIM>;
-    using HessType = typename OrbitalSetTraits<T>::HessType;
-    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
-    using GGGType = TinyVector<HessType, OHMMS_DIM>;
-    using GGGVector = Vector<GGGType>;
-    using GGGMatrix = Matrix<GGGType>;
-
-    RealType length;
-    PosType center;
-
-    int nmax;
-    TinyVector<int, QMCTraits::DIM> qn_max;
-    std::vector<SHOState> state_info;
-    std::vector<RealType> prefactors;
-    Array<RealType, 2> hermite;
-    Array<RealType, 2> bvalues;
-    Array<RealType, 2> d0_values;
-    Array<RealType, 2> d1_values;
-    Array<RealType, 2> d2_values;
-
-    // construction/destruction
-    SHOSetT(const std::string& my_name, RealType l, PosType c,
-        const std::vector<SHOState*>& sho_states);
-
-    ~SHOSetT() override;
-
-    std::string
-    getClassName() const override
-    {
-        return "SHOSet";
-    }
-
-    void
-    initialize();
-
-    // SPOSet interface methods
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
-
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
-
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-
-    // local functions
-    void
-    evaluate_v(PosType r, ValueVector& psi);
-    void
-    evaluate_vgl(
-        PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-    void
-    evaluate_hermite(const PosType& xpos);
-    void
-    evaluate_d0(const PosType& xpos, ValueVector& psi);
-    void
-    evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
-    void
-    evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
-    void
-    report(const std::string& pad = "") const override;
-    void
-    test_derivatives();
-    void
-    test_overlap();
-    void
-    evaluate_check(
-        PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
-
-    // empty methods
-    /// number of orbitals is determined only by initial request
-    inline void
-    setOrbitalSetSize(int norbs) override
-    {
-    }
-
-    /// unimplemented functions call this to abort
-    inline void
-    not_implemented(const std::string& method)
-    {
-        APP_ABORT("SHOSet::" + method + " has not been implemented.");
-    }
-
-    // methods to be implemented in the future (possibly)
-    void
-    evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
-        GGGMatrix& dddlogdet) override;
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        HessMatrix& ddlogdet) override;
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& ddlogdet,
-        GGGMatrix& dddlogdet) override;
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src,
-        GradMatrix& gradphi) override;
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& dphi,
-        HessMatrix& ddphi, GradMatrix& dlapl_phi) override;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using value_type  = typename ValueMatrix::value_type;
+  using grad_type   = typename GradMatrix::value_type;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using PosType     = TinyVector<RealType, QMCTraits::DIM>;
+  using HessType    = typename OrbitalSetTraits<T>::HessType;
+  using HessMatrix  = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGType     = TinyVector<HessType, OHMMS_DIM>;
+  using GGGVector   = Vector<GGGType>;
+  using GGGMatrix   = Matrix<GGGType>;
+
+  RealType length;
+  PosType center;
+
+  int nmax;
+  TinyVector<int, QMCTraits::DIM> qn_max;
+  std::vector<SHOState> state_info;
+  std::vector<RealType> prefactors;
+  Array<RealType, 2> hermite;
+  Array<RealType, 2> bvalues;
+  Array<RealType, 2> d0_values;
+  Array<RealType, 2> d1_values;
+  Array<RealType, 2> d2_values;
+
+  // construction/destruction
+  SHOSetT(const std::string& my_name, RealType l, PosType c, const std::vector<SHOState*>& sho_states);
+
+  ~SHOSetT() override;
+
+  std::string getClassName() const override { return "SHOSet"; }
+
+  void initialize();
+
+  // SPOSet interface methods
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  // local functions
+  void evaluate_v(PosType r, ValueVector& psi);
+  void evaluate_vgl(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+  void evaluate_hermite(const PosType& xpos);
+  void evaluate_d0(const PosType& xpos, ValueVector& psi);
+  void evaluate_d1(const PosType& xpos, ValueVector& psi, GradVector& dpsi);
+  void evaluate_d2(const PosType& xpos, ValueVector& psi, ValueVector& d2psi);
+  void report(const std::string& pad = "") const override;
+  void test_derivatives();
+  void test_overlap();
+  void evaluate_check(PosType r, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
+
+  // empty methods
+  /// number of orbitals is determined only by initial request
+  inline void setOrbitalSetSize(int norbs) override {}
+
+  /// unimplemented functions call this to abort
+  inline void not_implemented(const std::string& method)
+  {
+    APP_ABORT("SHOSet::" + method + " has not been implemented.");
+  }
+
+  // methods to be implemented in the future (possibly)
+  void evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last, GGGMatrix& dddlogdet) override;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet) override;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& ddlogdet,
+                            GGGMatrix& dddlogdet) override;
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& gradphi) override;
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& dphi,
+                          HessMatrix& ddphi,
+                          GradMatrix& dlapl_phi) override;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
index 022d6db4a50..f84d483749d 100644
--- a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
@@ -1,20 +1,16 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source
-// License. See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
-//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-//                    National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "AOBasisBuilderT.h"
@@ -30,894 +26,877 @@
 
 namespace qmcplusplus
 {
-template <typename COT>
-AOBasisBuilderT<COT>::AOBasisBuilderT(
-    const std::string& eName, Communicate* comm) :
-    MPIObjectBase(comm),
-    addsignforM(false),
-    expandlm(GAUSSIAN_EXPAND),
-    Morder("gaussian"),
-    sph("default"),
-    basisType("Numerical"),
-    elementType(eName),
-    Normalized("yes")
+template<typename COT>
+AOBasisBuilderT<COT>::AOBasisBuilderT(const std::string& eName, Communicate* comm)
+    : MPIObjectBase(comm),
+      addsignforM(false),
+      expandlm(GAUSSIAN_EXPAND),
+      Morder("gaussian"),
+      sph("default"),
+      basisType("Numerical"),
+      elementType(eName),
+      Normalized("yes")
 {
-    // mmorales: for "Cartesian Gaussian", m is an integer that maps
-    //           the component to Gamess notation, see
-    //           Numerics/CartesianTensor.h
-    nlms_id["n"] = q_n;
-    nlms_id["l"] = q_l;
-    nlms_id["m"] = q_m;
-    nlms_id["s"] = q_s;
+  // mmorales: for "Cartesian Gaussian", m is an integer that maps
+  //           the component to Gamess notation, see
+  //           Numerics/CartesianTensor.h
+  nlms_id["n"] = q_n;
+  nlms_id["l"] = q_l;
+  nlms_id["m"] = q_m;
+  nlms_id["s"] = q_s;
 }
 
-template <class COT>
-bool
-AOBasisBuilderT<COT>::put(xmlNodePtr cur)
+template<class COT>
+bool AOBasisBuilderT<COT>::put(xmlNodePtr cur)
 {
-    ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)");
-    // Register valid attributes attributes
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(basisType, "type");
-    aAttrib.add(sph, "angular");
-    aAttrib.add(addsignforM, "expM");
-    aAttrib.add(Morder, "expandYlm");
-    aAttrib.add(Normalized, "normalized");
-    aAttrib.put(cur);
-    PRE.echo(cur);
-    if (sph == "spherical")
-        addsignforM = 1; // include (-1)^m
-
-    if (Morder == "gaussian")
-        expandlm = GAUSSIAN_EXPAND;
-    else if (Morder == "natural")
-        expandlm = NATURAL_EXPAND;
-    else if (Morder == "no")
-        expandlm = DONOT_EXPAND;
-    else if (Morder == "pyscf") {
-        expandlm = MOD_NATURAL_EXPAND;
-        addsignforM = 1;
-        if (sph != "spherical") {
-            myComm->barrier_and_abort(
-                " Error: expandYlm='pyscf' only compatible with "
-                "angular='spherical'. Aborting.\n");
-        }
-    }
-
-    if (sph == "cartesian" || Morder == "Gamess") {
-        expandlm = CARTESIAN_EXPAND;
-        addsignforM = 0;
-    }
-
-    if (Morder == "Dirac") {
-        expandlm = DIRAC_CARTESIAN_EXPAND;
-        addsignforM = 0;
-        if (sph != "cartesian")
-            myComm->barrier_and_abort(
-                " Error: expandYlm='Dirac' only compatible with "
-                "angular='cartesian'. Aborting\n");
+  ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)");
+  // Register valid attributes attributes
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(basisType, "type");
+  aAttrib.add(sph, "angular");
+  aAttrib.add(addsignforM, "expM");
+  aAttrib.add(Morder, "expandYlm");
+  aAttrib.add(Normalized, "normalized");
+  aAttrib.put(cur);
+  PRE.echo(cur);
+  if (sph == "spherical")
+    addsignforM = 1; // include (-1)^m
+
+  if (Morder == "gaussian")
+    expandlm = GAUSSIAN_EXPAND;
+  else if (Morder == "natural")
+    expandlm = NATURAL_EXPAND;
+  else if (Morder == "no")
+    expandlm = DONOT_EXPAND;
+  else if (Morder == "pyscf")
+  {
+    expandlm    = MOD_NATURAL_EXPAND;
+    addsignforM = 1;
+    if (sph != "spherical")
+    {
+      myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with "
+                                "angular='spherical'. Aborting.\n");
     }
-
-    // Numerical basis is a special case
-    if (basisType == "Numerical")
-        myComm->barrier_and_abort(
-            "Purely numerical atomic orbitals are not supported any longer.");
-
-    return true;
+  }
+
+  if (sph == "cartesian" || Morder == "Gamess")
+  {
+    expandlm    = CARTESIAN_EXPAND;
+    addsignforM = 0;
+  }
+
+  if (Morder == "Dirac")
+  {
+    expandlm    = DIRAC_CARTESIAN_EXPAND;
+    addsignforM = 0;
+    if (sph != "cartesian")
+      myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with "
+                                "angular='cartesian'. Aborting\n");
+  }
+
+  // Numerical basis is a special case
+  if (basisType == "Numerical")
+    myComm->barrier_and_abort("Purely numerical atomic orbitals are not supported any longer.");
+
+  return true;
 }
 
-template <class COT>
-bool
-AOBasisBuilderT<COT>::putH5(hdf_archive& hin)
+template<class COT>
+bool AOBasisBuilderT<COT>::putH5(hdf_archive& hin)
 {
-    ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)");
-    std::string CenterID, basisName;
-
-    if (myComm->rank() == 0) {
-        hin.read(sph, "angular");
-        hin.read(CenterID, "elementType");
-        hin.read(Normalized, "normalized");
-        hin.read(Morder, "expandYlm");
-        hin.read(basisName, "name");
+  ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)");
+  std::string CenterID, basisName;
+
+  if (myComm->rank() == 0)
+  {
+    hin.read(sph, "angular");
+    hin.read(CenterID, "elementType");
+    hin.read(Normalized, "normalized");
+    hin.read(Morder, "expandYlm");
+    hin.read(basisName, "name");
+  }
+
+  myComm->bcast(sph);
+  myComm->bcast(Morder);
+  myComm->bcast(CenterID);
+  myComm->bcast(Normalized);
+  myComm->bcast(basisName);
+  myComm->bcast(basisType);
+  myComm->bcast(addsignforM);
+
+  if (sph == "spherical")
+    addsignforM = 1; // include (-1)^m
+
+  if (Morder == "gaussian")
+    expandlm = GAUSSIAN_EXPAND;
+  else if (Morder == "natural")
+    expandlm = NATURAL_EXPAND;
+  else if (Morder == "no")
+    expandlm = DONOT_EXPAND;
+  else if (Morder == "pyscf")
+  {
+    expandlm    = MOD_NATURAL_EXPAND;
+    addsignforM = 1;
+    if (sph != "spherical")
+    {
+      myComm->barrier_and_abort(" Error: expandYlm='pyscf' only compatible with "
+                                "angular='spherical'. Aborting.\n");
     }
-
-    myComm->bcast(sph);
-    myComm->bcast(Morder);
-    myComm->bcast(CenterID);
-    myComm->bcast(Normalized);
-    myComm->bcast(basisName);
-    myComm->bcast(basisType);
-    myComm->bcast(addsignforM);
-
-    if (sph == "spherical")
-        addsignforM = 1; // include (-1)^m
-
-    if (Morder == "gaussian")
-        expandlm = GAUSSIAN_EXPAND;
-    else if (Morder == "natural")
-        expandlm = NATURAL_EXPAND;
-    else if (Morder == "no")
-        expandlm = DONOT_EXPAND;
-    else if (Morder == "pyscf") {
-        expandlm = MOD_NATURAL_EXPAND;
-        addsignforM = 1;
-        if (sph != "spherical") {
-            myComm->barrier_and_abort(
-                " Error: expandYlm='pyscf' only compatible with "
-                "angular='spherical'. Aborting.\n");
-        }
-    }
-
-    if (sph == "cartesian" || Morder == "Gamess") {
-        expandlm = CARTESIAN_EXPAND;
-        addsignforM = 0;
-    }
-
-    if (Morder == "Dirac") {
-        expandlm = DIRAC_CARTESIAN_EXPAND;
-        addsignforM = 0;
-        if (sph != "cartesian")
-            myComm->barrier_and_abort(
-                " Error: expandYlm='Dirac' only compatible with "
-                "angular='cartesian'. Aborting\n");
-    }
-    app_log() << R"(<input node="atomicBasisSet" name=")" << basisName
-              << "\" expandYlm=\"" << Morder << "\" angular=\"" << sph
-              << "\" elementType=\"" << CenterID << "\" normalized=\""
-              << Normalized << "\" type=\"" << basisType << "\" expM=\""
-              << addsignforM << "\" />" << std::endl;
-
-    return true;
+  }
+
+  if (sph == "cartesian" || Morder == "Gamess")
+  {
+    expandlm    = CARTESIAN_EXPAND;
+    addsignforM = 0;
+  }
+
+  if (Morder == "Dirac")
+  {
+    expandlm    = DIRAC_CARTESIAN_EXPAND;
+    addsignforM = 0;
+    if (sph != "cartesian")
+      myComm->barrier_and_abort(" Error: expandYlm='Dirac' only compatible with "
+                                "angular='cartesian'. Aborting\n");
+  }
+  app_log() << R"(<input node="atomicBasisSet" name=")" << basisName << "\" expandYlm=\"" << Morder << "\" angular=\""
+            << sph << "\" elementType=\"" << CenterID << "\" normalized=\"" << Normalized << "\" type=\"" << basisType
+            << "\" expM=\"" << addsignforM << "\" />" << std::endl;
+
+  return true;
 }
 
-template <typename COT>
-std::unique_ptr<COT>
-AOBasisBuilderT<COT>::createAOSet(xmlNodePtr cur)
+template<typename COT>
+std::unique_ptr<COT> AOBasisBuilderT<COT>::createAOSet(xmlNodePtr cur)
 {
-    ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)");
-    app_log() << "  AO BasisSet for " << elementType << "\n";
-
-    if (expandlm != CARTESIAN_EXPAND) {
-        if (addsignforM)
-            app_log() << "   Spherical Harmonics contain (-1)^m factor"
-                      << std::endl;
-        else
-            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
-                      << std::endl;
+  ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)");
+  app_log() << "  AO BasisSet for " << elementType << "\n";
+
+  if (expandlm != CARTESIAN_EXPAND)
+  {
+    if (addsignforM)
+      app_log() << "   Spherical Harmonics contain (-1)^m factor" << std::endl;
+    else
+      app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor" << std::endl;
+  }
+
+  switch (expandlm)
+  {
+  case (GAUSSIAN_EXPAND):
+    app_log() << "   Angular momentum m expanded according to Gaussian" << std::endl;
+    break;
+  case (NATURAL_EXPAND):
+    app_log() << "   Angular momentum m expanded as -l, ... ,l" << std::endl;
+    break;
+  case (MOD_NATURAL_EXPAND):
+    app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                 "exception of L=1 (1,-1,0)"
+              << std::endl;
+    break;
+  case (CARTESIAN_EXPAND):
+    app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                 "y^ly z^lz according to Gamess"
+              << std::endl;
+    break;
+  case (DIRAC_CARTESIAN_EXPAND):
+    app_log() << "   Angular momentum expanded in cartesian functions in "
+                 "DIRAC ordering"
+              << std::endl;
+    break;
+  default:
+    app_log() << "   Angular momentum m is explicitly given." << std::endl;
+  }
+
+  QuantumNumberType nlms;
+  std::string rnl;
+  int Lmax(0); // maxmimum angular momentum of this center
+  int num(0);  // the number of localized basis functions of this center
+  // process the basic property: maximun angular momentum, the number of basis
+  // functions to be added
+  std::vector<xmlNodePtr> radGroup;
+  xmlNodePtr cur1 = cur->xmlChildrenNode;
+  xmlNodePtr gptr = 0;
+  while (cur1 != NULL)
+  {
+    std::string cname1((const char*)(cur1->name));
+    if (cname1 == "basisGroup")
+    {
+      radGroup.push_back(cur1);
+      const int l = std::stoi(getXMLAttributeValue(cur1, "l"));
+      Lmax        = std::max(Lmax, l);
+      // expect that only Rnl is given
+      if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
+        num += (l + 1) * (l + 2) / 2;
+      else if (expandlm)
+        num += 2 * l + 1;
+      else
+        num++;
     }
-
-    switch (expandlm) {
-    case (GAUSSIAN_EXPAND):
-        app_log() << "   Angular momentum m expanded according to Gaussian"
-                  << std::endl;
-        break;
-    case (NATURAL_EXPAND):
-        app_log() << "   Angular momentum m expanded as -l, ... ,l"
-                  << std::endl;
-        break;
-    case (MOD_NATURAL_EXPAND):
-        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
-                     "exception of L=1 (1,-1,0)"
-                  << std::endl;
-        break;
-    case (CARTESIAN_EXPAND):
-        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
-                     "y^ly z^lz according to Gamess"
-                  << std::endl;
-        break;
-    case (DIRAC_CARTESIAN_EXPAND):
-        app_log() << "   Angular momentum expanded in cartesian functions in "
-                     "DIRAC ordering"
-                  << std::endl;
-        break;
-    default:
-        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    else if (cname1 == "grid")
+    {
+      gptr = cur1;
     }
-
-    QuantumNumberType nlms;
-    std::string rnl;
-    int Lmax(0); // maxmimum angular momentum of this center
-    int num(0); // the number of localized basis functions of this center
-    // process the basic property: maximun angular momentum, the number of basis
-    // functions to be added
-    std::vector<xmlNodePtr> radGroup;
-    xmlNodePtr cur1 = cur->xmlChildrenNode;
-    xmlNodePtr gptr = 0;
-    while (cur1 != NULL) {
-        std::string cname1((const char*)(cur1->name));
-        if (cname1 == "basisGroup") {
-            radGroup.push_back(cur1);
-            const int l = std::stoi(getXMLAttributeValue(cur1, "l"));
-            Lmax = std::max(Lmax, l);
-            // expect that only Rnl is given
-            if (expandlm == CARTESIAN_EXPAND ||
-                expandlm == DIRAC_CARTESIAN_EXPAND)
-                num += (l + 1) * (l + 2) / 2;
-            else if (expandlm)
-                num += 2 * l + 1;
-            else
-                num++;
-        }
-        else if (cname1 == "grid") {
-            gptr = cur1;
+    cur1 = cur1->next;
+  }
+
+  // create a new set of atomic orbitals sharing a center with (Lmax, num)
+  // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+  auto aos = std::make_unique<COT>(Lmax, addsignforM);
+  aos->LM.resize(num);
+  aos->NL.resize(num);
+
+  // Now, add distinct Radial Orbitals and (l,m) channels
+  RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+  radFuncBuilder.Normalized = (Normalized == "yes");
+  radFuncBuilder.addGrid(gptr, basisType); // assign a radial grid for the new center
+  std::vector<xmlNodePtr>::iterator it(radGroup.begin());
+  std::vector<xmlNodePtr>::iterator it_end(radGroup.end());
+  std::vector<int> all_nl;
+  while (it != it_end)
+  {
+    cur1           = (*it);
+    xmlAttrPtr att = cur1->properties;
+    while (att != NULL)
+    {
+      std::string aname((const char*)(att->name));
+      if (aname == "rid" || aname == "id")
+      // accept id/rid
+      {
+        rnl = (const char*)(att->children->content);
+      }
+      else
+      {
+        std::map<std::string, int>::iterator iit = nlms_id.find(aname);
+        if (iit != nlms_id.end())
+        // valid for n,l,m,s
+        {
+          nlms[(*iit).second] = atoi((const char*)(att->children->content));
         }
-        cur1 = cur1->next;
+      }
+      att = att->next;
     }
-
-    // create a new set of atomic orbitals sharing a center with (Lmax, num)
-    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
-    auto aos = std::make_unique<COT>(Lmax, addsignforM);
-    aos->LM.resize(num);
-    aos->NL.resize(num);
-
-    // Now, add distinct Radial Orbitals and (l,m) channels
-    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
-    radFuncBuilder.Normalized = (Normalized == "yes");
-    radFuncBuilder.addGrid(
-        gptr, basisType); // assign a radial grid for the new center
-    std::vector<xmlNodePtr>::iterator it(radGroup.begin());
-    std::vector<xmlNodePtr>::iterator it_end(radGroup.end());
-    std::vector<int> all_nl;
-    while (it != it_end) {
-        cur1 = (*it);
-        xmlAttrPtr att = cur1->properties;
-        while (att != NULL) {
-            std::string aname((const char*)(att->name));
-            if (aname == "rid" || aname == "id")
-            // accept id/rid
-            {
-                rnl = (const char*)(att->children->content);
-            }
-            else {
-                std::map<std::string, int>::iterator iit = nlms_id.find(aname);
-                if (iit != nlms_id.end())
-                // valid for n,l,m,s
-                {
-                    nlms[(*iit).second] =
-                        atoi((const char*)(att->children->content));
-                }
-            }
-            att = att->next;
-        }
-        // add Ylm channels
-        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
-                  << nlms[2] << " " << nlms[3] << std::endl;
-        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
-        if (rnl_it == RnlID.end()) {
-            int nl = aos->RnlID.size();
-            if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms))
-                RnlID[rnl] = nl;
-            all_nl.push_back(nl);
-        }
-        else {
-            all_nl.push_back((*rnl_it).second);
-        }
-        ++it;
+    // add Ylm channels
+    app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl;
+    std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+    if (rnl_it == RnlID.end())
+    {
+      int nl = aos->RnlID.size();
+      if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms))
+        RnlID[rnl] = nl;
+      all_nl.push_back(nl);
     }
-
-    if (expandYlm(aos.get(), all_nl, expandlm) != num)
-        myComm->barrier_and_abort(
-            "expandYlm doesn't match the number of basis.");
-    radFuncBuilder.finalize();
-    // aos->Rmax can be set small
-    // aos->setRmax(0);
-    aos->setBasisSetSize(-1);
-    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
-              << std::endl
-              << "   Number of Radial functors = " << aos->RnlID.size()
-              << std::endl
-              << "   Basis size                = " << aos->getBasisSetSize()
-              << "\n\n";
-    return aos;
+    else
+    {
+      all_nl.push_back((*rnl_it).second);
+    }
+    ++it;
+  }
+
+  if (expandYlm(aos.get(), all_nl, expandlm) != num)
+    myComm->barrier_and_abort("expandYlm doesn't match the number of basis.");
+  radFuncBuilder.finalize();
+  // aos->Rmax can be set small
+  // aos->setRmax(0);
+  aos->setBasisSetSize(-1);
+  app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax() << std::endl
+            << "   Number of Radial functors = " << aos->RnlID.size() << std::endl
+            << "   Basis size                = " << aos->getBasisSetSize() << "\n\n";
+  return aos;
 }
 
-template <typename COT>
-std::unique_ptr<COT>
-AOBasisBuilderT<COT>::createAOSetH5(hdf_archive& hin)
+template<typename COT>
+std::unique_ptr<COT> AOBasisBuilderT<COT>::createAOSetH5(hdf_archive& hin)
 {
-    ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)");
-    app_log() << "  AO BasisSet for " << elementType << "\n";
-
-    if (expandlm != CARTESIAN_EXPAND) {
-        if (addsignforM)
-            app_log() << "   Spherical Harmonics contain (-1)^m factor"
-                      << std::endl;
-        else
-            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
-                      << std::endl;
+  ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)");
+  app_log() << "  AO BasisSet for " << elementType << "\n";
+
+  if (expandlm != CARTESIAN_EXPAND)
+  {
+    if (addsignforM)
+      app_log() << "   Spherical Harmonics contain (-1)^m factor" << std::endl;
+    else
+      app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor" << std::endl;
+  }
+
+  switch (expandlm)
+  {
+  case (GAUSSIAN_EXPAND):
+    app_log() << "   Angular momentum m expanded according to Gaussian" << std::endl;
+    break;
+  case (NATURAL_EXPAND):
+    app_log() << "   Angular momentum m expanded as -l, ... ,l" << std::endl;
+    break;
+  case (MOD_NATURAL_EXPAND):
+    app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                 "exception of L=1 (1,-1,0)"
+              << std::endl;
+    break;
+  case (CARTESIAN_EXPAND):
+    app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                 "y^ly z^lz according to Gamess"
+              << std::endl;
+    break;
+  case (DIRAC_CARTESIAN_EXPAND):
+    app_log() << "   Angular momentum expanded in cartesian functions in "
+                 "DIRAC ordering"
+              << std::endl;
+    break;
+  default:
+    app_log() << "   Angular momentum m is explicitly given." << std::endl;
+  }
+
+  QuantumNumberType nlms;
+  std::string rnl;
+  int Lmax(0); // maxmimum angular momentum of this center
+  int num(0);  // the number of localized basis functions of this center
+
+  int numbasisgroups(0);
+  if (myComm->rank() == 0)
+  {
+    if (!hin.readEntry(numbasisgroups, "NbBasisGroups"))
+      PRE.error("Could not read NbBasisGroups in H5; Probably Corrupt H5 file", true);
+  }
+  myComm->bcast(numbasisgroups);
+
+  for (int i = 0; i < numbasisgroups; i++)
+  {
+    std::string basisGroupID = "basisGroup" + std::to_string(i);
+    int l(0);
+    if (myComm->rank() == 0)
+    {
+      hin.push(basisGroupID);
+      hin.read(l, "l");
+      hin.pop();
     }
-
-    switch (expandlm) {
-    case (GAUSSIAN_EXPAND):
-        app_log() << "   Angular momentum m expanded according to Gaussian"
-                  << std::endl;
-        break;
-    case (NATURAL_EXPAND):
-        app_log() << "   Angular momentum m expanded as -l, ... ,l"
-                  << std::endl;
-        break;
-    case (MOD_NATURAL_EXPAND):
-        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
-                     "exception of L=1 (1,-1,0)"
-                  << std::endl;
-        break;
-    case (CARTESIAN_EXPAND):
-        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
-                     "y^ly z^lz according to Gamess"
-                  << std::endl;
-        break;
-    case (DIRAC_CARTESIAN_EXPAND):
-        app_log() << "   Angular momentum expanded in cartesian functions in "
-                     "DIRAC ordering"
-                  << std::endl;
-        break;
-    default:
-        app_log() << "   Angular momentum m is explicitly given." << std::endl;
-    }
-
-    QuantumNumberType nlms;
-    std::string rnl;
-    int Lmax(0); // maxmimum angular momentum of this center
-    int num(0); // the number of localized basis functions of this center
-
-    int numbasisgroups(0);
-    if (myComm->rank() == 0) {
-        if (!hin.readEntry(numbasisgroups, "NbBasisGroups"))
-            PRE.error(
-                "Could not read NbBasisGroups in H5; Probably Corrupt H5 file",
-                true);
+    myComm->bcast(l);
+
+    Lmax = std::max(Lmax, l);
+    // expect that only Rnl is given
+    if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
+      num += (l + 1) * (l + 2) / 2;
+    else if (expandlm)
+      num += 2 * l + 1;
+    else
+      num++;
+  }
+
+  // create a new set of atomic orbitals sharing a center with (Lmax, num)
+  // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+  auto aos = std::make_unique<COT>(Lmax, addsignforM);
+  aos->LM.resize(num);
+  aos->NL.resize(num);
+
+  // Now, add distinct Radial Orbitals and (l,m) channels
+  RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+  radFuncBuilder.Normalized = (Normalized == "yes");
+  radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center
+  std::vector<int> all_nl;
+  for (int i = 0; i < numbasisgroups; i++)
+  {
+    std::string basisGroupID = "basisGroup" + std::to_string(i);
+    if (myComm->rank() == 0)
+    {
+      hin.push(basisGroupID);
+      hin.read(rnl, "rid");
+      hin.read(nlms[0], "n");
+      hin.read(nlms[1], "l");
     }
-    myComm->bcast(numbasisgroups);
-
-    for (int i = 0; i < numbasisgroups; i++) {
-        std::string basisGroupID = "basisGroup" + std::to_string(i);
-        int l(0);
-        if (myComm->rank() == 0) {
-            hin.push(basisGroupID);
-            hin.read(l, "l");
-            hin.pop();
-        }
-        myComm->bcast(l);
-
-        Lmax = std::max(Lmax, l);
-        // expect that only Rnl is given
-        if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
-            num += (l + 1) * (l + 2) / 2;
-        else if (expandlm)
-            num += 2 * l + 1;
-        else
-            num++;
+    myComm->bcast(rnl);
+    myComm->bcast(nlms[0]);
+    myComm->bcast(nlms[1]);
+
+    // add Ylm channels
+    app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " " << nlms[2] << " " << nlms[3] << std::endl;
+    std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+    if (rnl_it == RnlID.end())
+    {
+      int nl = aos->RnlID.size();
+      if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms))
+        RnlID[rnl] = nl;
+      all_nl.push_back(nl);
     }
-
-    // create a new set of atomic orbitals sharing a center with (Lmax, num)
-    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
-    auto aos = std::make_unique<COT>(Lmax, addsignforM);
-    aos->LM.resize(num);
-    aos->NL.resize(num);
-
-    // Now, add distinct Radial Orbitals and (l,m) channels
-    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
-    radFuncBuilder.Normalized = (Normalized == "yes");
-    radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center
-    std::vector<int> all_nl;
-    for (int i = 0; i < numbasisgroups; i++) {
-        std::string basisGroupID = "basisGroup" + std::to_string(i);
-        if (myComm->rank() == 0) {
-            hin.push(basisGroupID);
-            hin.read(rnl, "rid");
-            hin.read(nlms[0], "n");
-            hin.read(nlms[1], "l");
-        }
-        myComm->bcast(rnl);
-        myComm->bcast(nlms[0]);
-        myComm->bcast(nlms[1]);
-
-        // add Ylm channels
-        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
-                  << nlms[2] << " " << nlms[3] << std::endl;
-        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
-        if (rnl_it == RnlID.end()) {
-            int nl = aos->RnlID.size();
-            if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms))
-                RnlID[rnl] = nl;
-            all_nl.push_back(nl);
-        }
-        else {
-            all_nl.push_back((*rnl_it).second);
-        }
-
-        if (myComm->rank() == 0)
-            hin.pop();
+    else
+    {
+      all_nl.push_back((*rnl_it).second);
     }
 
-    if (expandYlm(aos.get(), all_nl, expandlm) != num)
-        myComm->barrier_and_abort(
-            "expandYlm doesn't match the number of basis.");
-    radFuncBuilder.finalize();
-    // aos->Rmax can be set small
-    // aos->setRmax(0);
-    aos->setBasisSetSize(-1);
-    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
-              << std::endl
-              << "   Number of Radial functors = " << aos->RnlID.size()
-              << std::endl
-              << "   Basis size                = " << aos->getBasisSetSize()
-              << "\n\n";
-    return aos;
+    if (myComm->rank() == 0)
+      hin.pop();
+  }
+
+  if (expandYlm(aos.get(), all_nl, expandlm) != num)
+    myComm->barrier_and_abort("expandYlm doesn't match the number of basis.");
+  radFuncBuilder.finalize();
+  // aos->Rmax can be set small
+  // aos->setRmax(0);
+  aos->setBasisSetSize(-1);
+  app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax() << std::endl
+            << "   Number of Radial functors = " << aos->RnlID.size() << std::endl
+            << "   Basis size                = " << aos->getBasisSetSize() << "\n\n";
+  return aos;
 }
 
-template <typename COT>
-int
-AOBasisBuilderT<COT>::expandYlm(
-    COT* aos, std::vector<int>& all_nl, int expandlm)
+template<typename COT>
+int AOBasisBuilderT<COT>::expandYlm(COT* aos, std::vector<int>& all_nl, int expandlm)
 {
-    int num = 0;
-    if (expandlm == GAUSSIAN_EXPAND) {
-        app_log() << "Expanding Ylm according to Gaussian98" << std::endl;
-        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
-            int l = aos->RnlID[nl][q_l];
-            app_log() << "Adding " << 2 * l + 1
-                      << " spherical orbitals for l= " << l << std::endl;
-            switch (l) {
-            case (0):
-                aos->LM[num] = aos->Ylm.index(0, 0);
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (1): // px(1),py(-1),pz(0)
-                aos->LM[num] = aos->Ylm.index(1, 1);
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = aos->Ylm.index(1, -1);
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = aos->Ylm.index(1, 0);
-                aos->NL[num] = nl;
-                num++;
-                break;
-            default: // 0,1,-1,2,-2,...,l,-l
-                aos->LM[num] = aos->Ylm.index(l, 0);
-                aos->NL[num] = nl;
-                num++;
-                for (int tm = 1; tm <= l; tm++) {
-                    aos->LM[num] = aos->Ylm.index(l, tm);
-                    aos->NL[num] = nl;
-                    num++;
-                    aos->LM[num] = aos->Ylm.index(l, -tm);
-                    aos->NL[num] = nl;
-                    num++;
-                }
-                break;
-            }
+  int num = 0;
+  if (expandlm == GAUSSIAN_EXPAND)
+  {
+    app_log() << "Expanding Ylm according to Gaussian98" << std::endl;
+    for (int nl = 0; nl < aos->RnlID.size(); nl++)
+    {
+      int l = aos->RnlID[nl][q_l];
+      app_log() << "Adding " << 2 * l + 1 << " spherical orbitals for l= " << l << std::endl;
+      switch (l)
+      {
+      case (0):
+        aos->LM[num] = aos->Ylm.index(0, 0);
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (1): // px(1),py(-1),pz(0)
+        aos->LM[num] = aos->Ylm.index(1, 1);
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = aos->Ylm.index(1, -1);
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = aos->Ylm.index(1, 0);
+        aos->NL[num] = nl;
+        num++;
+        break;
+      default: // 0,1,-1,2,-2,...,l,-l
+        aos->LM[num] = aos->Ylm.index(l, 0);
+        aos->NL[num] = nl;
+        num++;
+        for (int tm = 1; tm <= l; tm++)
+        {
+          aos->LM[num] = aos->Ylm.index(l, tm);
+          aos->NL[num] = nl;
+          num++;
+          aos->LM[num] = aos->Ylm.index(l, -tm);
+          aos->NL[num] = nl;
+          num++;
         }
+        break;
+      }
     }
-    else if (expandlm == MOD_NATURAL_EXPAND) {
-        app_log()
-            << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l"
-            << std::endl;
-        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
-            int l = aos->RnlID[nl][q_l];
-            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
-                      << std::endl;
-            if (l == 1) {
-                // px(1),py(-1),pz(0)
-                aos->LM[num] = aos->Ylm.index(1, 1);
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = aos->Ylm.index(1, -1);
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = aos->Ylm.index(1, 0);
-                aos->NL[num] = nl;
-                num++;
-            }
-            else {
-                for (int tm = -l; tm <= l; tm++, num++) {
-                    aos->LM[num] = aos->Ylm.index(l, tm);
-                    aos->NL[num] = nl;
-                }
-            }
+  }
+  else if (expandlm == MOD_NATURAL_EXPAND)
+  {
+    app_log() << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l" << std::endl;
+    for (int nl = 0; nl < aos->RnlID.size(); nl++)
+    {
+      int l = aos->RnlID[nl][q_l];
+      app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals" << std::endl;
+      if (l == 1)
+      {
+        // px(1),py(-1),pz(0)
+        aos->LM[num] = aos->Ylm.index(1, 1);
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = aos->Ylm.index(1, -1);
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = aos->Ylm.index(1, 0);
+        aos->NL[num] = nl;
+        num++;
+      }
+      else
+      {
+        for (int tm = -l; tm <= l; tm++, num++)
+        {
+          aos->LM[num] = aos->Ylm.index(l, tm);
+          aos->NL[num] = nl;
         }
+      }
     }
-    else if (expandlm == NATURAL_EXPAND) {
-        app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl;
-        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
-            int l = aos->RnlID[nl][q_l];
-            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
-                      << std::endl;
-            for (int tm = -l; tm <= l; tm++, num++) {
-                aos->LM[num] = aos->Ylm.index(l, tm);
-                aos->NL[num] = nl;
-            }
-        }
+  }
+  else if (expandlm == NATURAL_EXPAND)
+  {
+    app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl;
+    for (int nl = 0; nl < aos->RnlID.size(); nl++)
+    {
+      int l = aos->RnlID[nl][q_l];
+      app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals" << std::endl;
+      for (int tm = -l; tm <= l; tm++, num++)
+      {
+        aos->LM[num] = aos->Ylm.index(l, tm);
+        aos->NL[num] = nl;
+      }
     }
-    else if (expandlm == CARTESIAN_EXPAND) {
-        app_log() << "Expanding Ylm (angular function) according to Gamess "
-                     "using cartesian gaussians"
-                  << std::endl;
-        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
-            int l = aos->RnlID[nl][q_l];
-            app_log() << "Adding " << (l + 1) * (l + 2) / 2
-                      << " cartesian gaussian orbitals for l= " << l
-                      << std::endl;
-            int nbefore = 0;
-            for (int i = 0; i < l; i++)
-                nbefore += (i + 1) * (i + 2) / 2;
-            for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) {
-                aos->LM[num] = nbefore + i;
-                aos->NL[num] = nl;
-                num++;
-            }
-        }
+  }
+  else if (expandlm == CARTESIAN_EXPAND)
+  {
+    app_log() << "Expanding Ylm (angular function) according to Gamess "
+                 "using cartesian gaussians"
+              << std::endl;
+    for (int nl = 0; nl < aos->RnlID.size(); nl++)
+    {
+      int l = aos->RnlID[nl][q_l];
+      app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl;
+      int nbefore = 0;
+      for (int i = 0; i < l; i++)
+        nbefore += (i + 1) * (i + 2) / 2;
+      for (int i = 0; i < (l + 1) * (l + 2) / 2; i++)
+      {
+        aos->LM[num] = nbefore + i;
+        aos->NL[num] = nl;
+        num++;
+      }
     }
-    else if (expandlm == DIRAC_CARTESIAN_EXPAND) {
-        app_log() << "Expanding Ylm (angular function) according to DIRAC "
-                     "using cartesian gaussians"
-                  << std::endl;
-        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
-            int l = aos->RnlID[nl][q_l];
-            app_log() << "Adding " << (l + 1) * (l + 2) / 2
-                      << " cartesian gaussian orbitals for l= " << l
-                      << std::endl;
-            int nbefore = 0;
-            for (int i = 0; i < l; i++)
-                nbefore += (i + 1) * (i + 2) / 2;
-            switch (l) {
-            case (0):
-                aos->LM[num] = nbefore + 0;
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (1):
-                aos->LM[num] = nbefore + 0;
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1;
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2;
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (2):
-                aos->LM[num] = nbefore + 0; // xx
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 3; // xy
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 4; // xz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1; // yy
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 5; // yz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2; // zz
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (3):
-                aos->LM[num] = nbefore + 0; // xxx
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 3; // xxy
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 4; // xxz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 5; // xyy
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 9; // xyz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 7; // xzz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1; // yyy
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 6; // yyz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 8; // yzz
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2; // zzz
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (4):
-                aos->LM[num] = nbefore + 0; // 400
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 3; // 310
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 4; // 301
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 9; // 220
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 12; // 211
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 10; // 202
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 5; // 130
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 13; // 121
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 14; // 112
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 7; // 103
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1; // 040
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 6; // 031
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 11; // 022
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 8; // 013
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2; // 004
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (5):
-                aos->LM[num] = nbefore + 0; // 500
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 3; // 410
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 4; // 401
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 9; // 320
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 15; // 311
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 10; // 302
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 11; // 230
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 18; // 221
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 19; // 212
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 13; // 203
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 5; // 140
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 16; // 131
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 20; // 122
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 17; // 113
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 7; // 104
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1; // 050
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 6; // 041
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 12; // 032
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 14; // 023
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 8; // 014
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2; // 005
-                aos->NL[num] = nl;
-                num++;
-                break;
-            case (6):
-                aos->LM[num] = nbefore + 0; // 600
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 3; // 510
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 4; // 501
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 9; // 420
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 15; // 411
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 10; // 402
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 18; // 330
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 21; // 321
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 22; // 312
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 19; // 303
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 11; // 240
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 23; // 231
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 27; // 222
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 25; // 213
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 13; // 204
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 5; // 150
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 16; // 141
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 24; // 132
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 26; // 123
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 17; // 114
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 7; // 105
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 1; // 060
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 6; // 051
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 12; // 042
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 20; // 033
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 14; // 024
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 8; // 015
-                aos->NL[num] = nl;
-                num++;
-                aos->LM[num] = nbefore + 2; // 006
-                aos->NL[num] = nl;
-                num++;
-                break;
-            default:
-                myComm->barrier_and_abort(
-                    "Cartesian Tensor only defined up to Lmax=6. Aborting\n");
-                break;
-            }
-        }
+  }
+  else if (expandlm == DIRAC_CARTESIAN_EXPAND)
+  {
+    app_log() << "Expanding Ylm (angular function) according to DIRAC "
+                 "using cartesian gaussians"
+              << std::endl;
+    for (int nl = 0; nl < aos->RnlID.size(); nl++)
+    {
+      int l = aos->RnlID[nl][q_l];
+      app_log() << "Adding " << (l + 1) * (l + 2) / 2 << " cartesian gaussian orbitals for l= " << l << std::endl;
+      int nbefore = 0;
+      for (int i = 0; i < l; i++)
+        nbefore += (i + 1) * (i + 2) / 2;
+      switch (l)
+      {
+      case (0):
+        aos->LM[num] = nbefore + 0;
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (1):
+        aos->LM[num] = nbefore + 0;
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1;
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2;
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (2):
+        aos->LM[num] = nbefore + 0; // xx
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 3; // xy
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 4; // xz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1; // yy
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 5; // yz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2; // zz
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (3):
+        aos->LM[num] = nbefore + 0; // xxx
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 3; // xxy
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 4; // xxz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 5; // xyy
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 9; // xyz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 7; // xzz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1; // yyy
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 6; // yyz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 8; // yzz
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2; // zzz
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (4):
+        aos->LM[num] = nbefore + 0; // 400
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 3; // 310
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 4; // 301
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 9; // 220
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 12; // 211
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 10; // 202
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 5; // 130
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 13; // 121
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 14; // 112
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 7; // 103
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1; // 040
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 6; // 031
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 11; // 022
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 8; // 013
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2; // 004
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (5):
+        aos->LM[num] = nbefore + 0; // 500
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 3; // 410
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 4; // 401
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 9; // 320
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 15; // 311
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 10; // 302
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 11; // 230
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 18; // 221
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 19; // 212
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 13; // 203
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 5; // 140
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 16; // 131
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 20; // 122
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 17; // 113
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 7; // 104
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1; // 050
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 6; // 041
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 12; // 032
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 14; // 023
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 8; // 014
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2; // 005
+        aos->NL[num] = nl;
+        num++;
+        break;
+      case (6):
+        aos->LM[num] = nbefore + 0; // 600
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 3; // 510
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 4; // 501
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 9; // 420
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 15; // 411
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 10; // 402
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 18; // 330
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 21; // 321
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 22; // 312
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 19; // 303
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 11; // 240
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 23; // 231
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 27; // 222
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 25; // 213
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 13; // 204
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 5; // 150
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 16; // 141
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 24; // 132
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 26; // 123
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 17; // 114
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 7; // 105
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 1; // 060
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 6; // 051
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 12; // 042
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 20; // 033
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 14; // 024
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 8; // 015
+        aos->NL[num] = nl;
+        num++;
+        aos->LM[num] = nbefore + 2; // 006
+        aos->NL[num] = nl;
+        num++;
+        break;
+      default:
+        myComm->barrier_and_abort("Cartesian Tensor only defined up to Lmax=6. Aborting\n");
+        break;
+      }
     }
-    else {
-        for (int ind = 0; ind < all_nl.size(); ind++) {
-            int nl = all_nl[ind];
-            int l = aos->RnlID[nl][q_l];
-            int m = aos->RnlID[nl][q_m];
-            // assign the index for real Spherical Harmonic with (l,m)
-            aos->LM[num] = aos->Ylm.index(l, m);
-            // assign the index for radial orbital with (n,l)
-            aos->NL[num] = nl;
-            // increment number of basis functions
-            num++;
-        }
+  }
+  else
+  {
+    for (int ind = 0; ind < all_nl.size(); ind++)
+    {
+      int nl = all_nl[ind];
+      int l  = aos->RnlID[nl][q_l];
+      int m  = aos->RnlID[nl][q_m];
+      // assign the index for real Spherical Harmonic with (l,m)
+      aos->LM[num] = aos->Ylm.index(l, m);
+      // assign the index for radial orbital with (n,l)
+      aos->NL[num] = nl;
+      // increment number of basis functions
+      num++;
     }
-    return num;
+  }
+  return num;
 }
 
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
-    SoaCartesianTensor<double>, double>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
-    SoaCartesianTensor<double>, std::complex<double>>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
-    SoaCartesianTensor<float>, float>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
-    SoaCartesianTensor<float>, std::complex<float>>>;
-
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
-    SoaSphericalTensor<double>, double>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
-    SoaSphericalTensor<double>, std::complex<double>>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
-    SoaSphericalTensor<float>, float>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
-    SoaSphericalTensor<float>, std::complex<float>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>, std::complex<float>>>;
 
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaCartesianTensor<double>, double>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaCartesianTensor<double>, double>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaCartesianTensor<double>, std::complex<double>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaCartesianTensor<double>, std::complex<double>>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaCartesianTensor<float>, float>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaCartesianTensor<float>, float>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaCartesianTensor<float>, std::complex<float>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaCartesianTensor<float>, std::complex<float>>>;
 
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaSphericalTensor<double>, double>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaSphericalTensor<double>, double>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaSphericalTensor<double>, std::complex<double>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaSphericalTensor<double>, std::complex<double>>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaSphericalTensor<float>, float>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaSphericalTensor<float>, float>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaSphericalTensor<float>, std::complex<float>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaSphericalTensor<float>, std::complex<float>>>;
 
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaCartesianTensor<double>, double>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaCartesianTensor<double>, double>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaCartesianTensor<double>, std::complex<double>>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<
-    MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaCartesianTensor<double>, std::complex<double>>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaCartesianTensor<float>, std::complex<float>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, std::complex<float>>>;
 
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaSphericalTensor<double>, double>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaSphericalTensor<double>, std::complex<double>>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaSphericalTensor<double>, std::complex<double>>>;
-template class AOBasisBuilderT<SoaAtomicBasisSetT<
-    MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>>;
 template class AOBasisBuilderT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaSphericalTensor<float>, std::complex<float>>>;
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, std::complex<float>>>;
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
index 8ae6df22620..d265cb9959b 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.cpp
@@ -27,772 +27,794 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::splitPhiEta(int center,
-    const std::vector<bool>& corrCenter, LCAOrbitalSetT<T>& Phi,
-    LCAOrbitalSetT<T>& Eta)
+template<typename T>
+void CuspCorrectionConstructionT<T>::splitPhiEta(int center,
+                                                 const std::vector<bool>& corrCenter,
+                                                 LCAOrbitalSetT<T>& Phi,
+                                                 LCAOrbitalSetT<T>& Eta)
 {
-    std::vector<bool> is_s_orbital(Phi.myBasisSet->BasisSetSize, false);
-    std::vector<bool> correct_this_center(corrCenter.size(), false);
-    correct_this_center[center] = corrCenter[center];
-
-    Phi.myBasisSet->queryOrbitalsForSType(correct_this_center, is_s_orbital);
-
-    int nOrbs = Phi.getOrbitalSetSize();
-    int bss = Phi.getBasisSetSize();
-
-    for (int i = 0; i < bss; i++) {
-        if (is_s_orbital[i]) {
-            auto& cref(*(Eta.C));
-            for (int k = 0; k < nOrbs; k++)
-                cref(k, i) = 0.0; // Eta->C(k,i) = 0.0;
-        }
-        else {
-            auto& cref(*(Phi.C));
-            for (int k = 0; k < nOrbs; k++)
-                cref(k, i) = 0.0; // Phi->C(k,i) = 0.0;
-        }
+  std::vector<bool> is_s_orbital(Phi.myBasisSet->BasisSetSize, false);
+  std::vector<bool> correct_this_center(corrCenter.size(), false);
+  correct_this_center[center] = corrCenter[center];
+
+  Phi.myBasisSet->queryOrbitalsForSType(correct_this_center, is_s_orbital);
+
+  int nOrbs = Phi.getOrbitalSetSize();
+  int bss   = Phi.getBasisSetSize();
+
+  for (int i = 0; i < bss; i++)
+  {
+    if (is_s_orbital[i])
+    {
+      auto& cref(*(Eta.C));
+      for (int k = 0; k < nOrbs; k++)
+        cref(k, i) = 0.0; // Eta->C(k,i) = 0.0;
+    }
+    else
+    {
+      auto& cref(*(Phi.C));
+      for (int k = 0; k < nOrbs; k++)
+        cref(k, i) = 0.0; // Phi->C(k,i) = 0.0;
     }
+  }
 }
 
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::removeSTypeOrbitals(
-    const std::vector<bool>& corrCenter, LCAOrbitalSetT<T>& Phi)
+template<typename T>
+void CuspCorrectionConstructionT<T>::removeSTypeOrbitals(const std::vector<bool>& corrCenter, LCAOrbitalSetT<T>& Phi)
 {
-    std::vector<bool> is_s_orbital(Phi.myBasisSet->BasisSetSize, false);
+  std::vector<bool> is_s_orbital(Phi.myBasisSet->BasisSetSize, false);
 
-    Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital);
+  Phi.myBasisSet->queryOrbitalsForSType(corrCenter, is_s_orbital);
 
-    int nOrbs = Phi.getOrbitalSetSize();
-    int bss = Phi.getBasisSetSize();
+  int nOrbs = Phi.getOrbitalSetSize();
+  int bss   = Phi.getBasisSetSize();
 
-    for (int i = 0; i < bss; i++) {
-        if (is_s_orbital[i]) {
-            auto& cref(*(Phi.C));
-            for (int k = 0; k < nOrbs; k++)
-                cref(k, i) = 0.0;
-        }
+  for (int i = 0; i < bss; i++)
+  {
+    if (is_s_orbital[i])
+    {
+      auto& cref(*(Phi.C));
+      for (int k = 0; k < nOrbs; k++)
+        cref(k, i) = 0.0;
     }
+  }
 }
 
 // Will be the corrected value for r < rc and the original wavefunction for r >
 // rc
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::computeRadialPhiBar(ParticleSetT<T>* targetP,
-    ParticleSetT<T>* sourceP, int curOrb_, int curCenter_, SPOSetT<T>* Phi,
-    Vector<RealType>& xgrid, Vector<RealType>& rad_orb,
-    const CuspCorrectionParametersT<T>& data)
+template<typename T>
+void CuspCorrectionConstructionT<T>::computeRadialPhiBar(ParticleSetT<T>* targetP,
+                                                         ParticleSetT<T>* sourceP,
+                                                         int curOrb_,
+                                                         int curCenter_,
+                                                         SPOSetT<T>* Phi,
+                                                         Vector<RealType>& xgrid,
+                                                         Vector<RealType>& rad_orb,
+                                                         const CuspCorrectionParametersT<T>& data)
 {
-    OneMolecularOrbitalT<T> phiMO(targetP, sourceP, Phi);
-    phiMO.changeOrbital(curCenter_, curOrb_);
-    CuspCorrectionT<T> cusp(data);
-
-    for (int i = 0; i < xgrid.size(); i++) {
-        rad_orb[i] = phiBar(cusp, xgrid[i], phiMO);
-    }
+  OneMolecularOrbitalT<T> phiMO(targetP, sourceP, Phi);
+  phiMO.changeOrbital(curCenter_, curOrb_);
+  CuspCorrectionT<T> cusp(data);
+
+  for (int i = 0; i < xgrid.size(); i++)
+  {
+    rad_orb[i] = phiBar(cusp, xgrid[i], phiMO);
+  }
 }
 
 // Get the ideal local energy at one point
 // Eq. 17 in the paper.  Coefficients are taken from the paper.
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::getOneIdealLocalEnergy(
-    RealType r, RealType Z, RealType beta0)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::getOneIdealLocalEnergy(RealType r,
+                                                                                                         RealType Z,
+                                                                                                         RealType beta0)
 {
-    RealType beta[7] = {
-        3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692};
-    RealType idealEL = beta0;
-    RealType r1 = r * r;
-    for (int i = 0; i < 7; i++) {
-        idealEL += beta[i] * r1;
-        r1 *= r;
-    }
-    return idealEL * Z * Z;
+  RealType beta[7] = {3.25819, -15.0126, 33.7308, -42.8705, 31.2276, -12.1316, 1.94692};
+  RealType idealEL = beta0;
+  RealType r1      = r * r;
+  for (int i = 0; i < 7; i++)
+  {
+    idealEL += beta[i] * r1;
+    r1 *= r;
+  }
+  return idealEL * Z * Z;
 }
 
 // Get the ideal local energy for a vector of positions
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::getIdealLocalEnergy(const ValueVector& pos,
-    RealType Z, RealType Rc, RealType ELorigAtRc, ValueVector& ELideal)
+template<typename T>
+void CuspCorrectionConstructionT<T>::getIdealLocalEnergy(const ValueVector& pos,
+                                                         RealType Z,
+                                                         RealType Rc,
+                                                         RealType ELorigAtRc,
+                                                         ValueVector& ELideal)
 {
-    // assert(pos.size() == ELideal.size()
-    RealType beta0 = 0.0;
-    RealType tmp = getOneIdealLocalEnergy(Rc, Z, beta0);
-    beta0 = (ELorigAtRc - tmp) / (Z * Z);
-    for (int i = 0; i < pos.size(); i++) {
-        ELideal[i] = getOneIdealLocalEnergy(pos[i], Z, beta0);
-    }
+  // assert(pos.size() == ELideal.size()
+  RealType beta0 = 0.0;
+  RealType tmp   = getOneIdealLocalEnergy(Rc, Z, beta0);
+  beta0          = (ELorigAtRc - tmp) / (Z * Z);
+  for (int i = 0; i < pos.size(); i++)
+  {
+    ELideal[i] = getOneIdealLocalEnergy(pos[i], Z, beta0);
+  }
 }
 
 // Evaluate constraints. Equations 9-13 in the paper.
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::evalX(RealType valRc, GradType gradRc,
-    ValueType lapRc, RealType Rc, RealType Z, RealType C, RealType valAtZero,
-    RealType eta0, TinyVector<ValueType, 5>& X)
+template<typename T>
+void CuspCorrectionConstructionT<T>::evalX(RealType valRc,
+                                           GradType gradRc,
+                                           ValueType lapRc,
+                                           RealType Rc,
+                                           RealType Z,
+                                           RealType C,
+                                           RealType valAtZero,
+                                           RealType eta0,
+                                           TinyVector<ValueType, 5>& X)
 {
-    X[0] = std::log(std::abs(valRc - C));
-    X[1] = gradRc[0] / (valRc - C);
-    X[2] = (lapRc - 2.0 * gradRc[0] / Rc) / (valRc - C);
-    X[3] = -Z * (valAtZero + eta0) / (valAtZero - C);
-    X[4] = std::log(std::abs(valAtZero - C));
+  X[0] = std::log(std::abs(valRc - C));
+  X[1] = gradRc[0] / (valRc - C);
+  X[2] = (lapRc - 2.0 * gradRc[0] / Rc) / (valRc - C);
+  X[3] = -Z * (valAtZero + eta0) / (valAtZero - C);
+  X[4] = std::log(std::abs(valAtZero - C));
 }
 
 // Compute polynomial coefficients from constraints.  Eq. 14 in the paper.
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::X2alpha(const TinyVector<ValueType, 5>& X,
-    RealType Rc, TinyVector<ValueType, 5>& alpha)
+template<typename T>
+void CuspCorrectionConstructionT<T>::X2alpha(const TinyVector<ValueType, 5>& X,
+                                             RealType Rc,
+                                             TinyVector<ValueType, 5>& alpha)
 {
-    RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv;
-    alpha[0] = X[4];
-    alpha[1] = X[3];
-    alpha[2] = 6.0 * X[0] * RcInv2 - 3.0 * X[1] * RcInv + X[2] * 0.5 -
-        3.0 * X[3] * RcInv - 6.0 * X[4] * RcInv2 - 0.5 * X[1] * X[1];
-    alpha[3] = -8.0 * X[0] * RcInv2 * RcInv + 5.0 * X[1] * RcInv2 -
-        X[2] * RcInv + 3.0 * X[3] * RcInv2 + 8.0 * X[4] * RcInv2 * RcInv +
-        X[1] * X[1] * RcInv;
-    alpha[4] = 3.0 * X[0] * RcInv2 * RcInv2 - 2.0 * X[1] * RcInv2 * RcInv +
-        0.5 * X[2] * RcInv2 - X[3] * RcInv2 * RcInv -
-        3.0 * X[4] * RcInv2 * RcInv2 - 0.5 * X[1] * X[1] * RcInv2;
+  RealType RcInv = 1.0 / Rc, RcInv2 = RcInv * RcInv;
+  alpha[0] = X[4];
+  alpha[1] = X[3];
+  alpha[2] = 6.0 * X[0] * RcInv2 - 3.0 * X[1] * RcInv + X[2] * 0.5 - 3.0 * X[3] * RcInv - 6.0 * X[4] * RcInv2 -
+      0.5 * X[1] * X[1];
+  alpha[3] = -8.0 * X[0] * RcInv2 * RcInv + 5.0 * X[1] * RcInv2 - X[2] * RcInv + 3.0 * X[3] * RcInv2 +
+      8.0 * X[4] * RcInv2 * RcInv + X[1] * X[1] * RcInv;
+  alpha[4] = 3.0 * X[0] * RcInv2 * RcInv2 - 2.0 * X[1] * RcInv2 * RcInv + 0.5 * X[2] * RcInv2 - X[3] * RcInv2 * RcInv -
+      3.0 * X[4] * RcInv2 * RcInv2 - 0.5 * X[1] * X[1] * RcInv2;
 }
 
 // Eq. 16 in the paper.
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::getZeff(
-    RealType Z, RealType etaAtZero, RealType phiBarAtZero)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::getZeff(RealType Z,
+                                                                                          RealType etaAtZero,
+                                                                                          RealType phiBarAtZero)
 {
-    return Z * (1.0 + etaAtZero / phiBarAtZero);
+  return Z * (1.0 + etaAtZero / phiBarAtZero);
 }
 
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::phiBar(
-    const CuspCorrectionT<T>& cusp, RealType r, OneMolecularOrbitalT<T>& phiMO)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::phiBar(const CuspCorrectionT<T>& cusp,
+                                                                                         RealType r,
+                                                                                         OneMolecularOrbitalT<T>& phiMO)
 {
-    if (r <= cusp.cparam.Rc)
-        return cusp.cparam.C + cusp.Rr(r);
-    else
-        return phiMO.phi(r);
+  if (r <= cusp.cparam.Rc)
+    return cusp.cparam.C + cusp.Rr(r);
+  else
+    return phiMO.phi(r);
 }
 
 // Compute the effective one-electron local energy at a vector of points.
 // Eq. 15 in the paper for r < Rc.  Normal local energy for R > Rc.
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::getCurrentLocalEnergy(const ValueVector& pos,
-    RealType Zeff, RealType Rc, RealType originalELatRc,
-    CuspCorrectionT<T>& cusp, OneMolecularOrbitalT<T>& phiMO,
-    ValueVector& ELcurr)
+template<typename T>
+void CuspCorrectionConstructionT<T>::getCurrentLocalEnergy(const ValueVector& pos,
+                                                           RealType Zeff,
+                                                           RealType Rc,
+                                                           RealType originalELatRc,
+                                                           CuspCorrectionT<T>& cusp,
+                                                           OneMolecularOrbitalT<T>& phiMO,
+                                                           ValueVector& ELcurr)
 {
-    // assert(pos.size() == ELcurr.size());
-    ValueType val;
-    GradType grad;
-    ValueType lap;
-    phiMO.phi_vgl(Rc, val, grad, lap);
-    RealType dE = originalELatRc - (-0.5 * lap / val - Zeff / Rc);
-    for (int i = 0; i < pos.size(); i++) {
-        RealType r = pos[i];
-        // prevent NaN's if phiBar is zero
-        RealType offset = 1e-12;
-        if (r <= Rc) {
-            RealType dp = cusp.dpr(r);
-            ELcurr[i] = -0.5 * cusp.Rr(r) *
-                    (2.0 * dp / r + cusp.d2pr(r) + dp * dp) /
-                    (offset + phiBar(cusp, r, phiMO)) -
-                Zeff / r + dE;
-        }
-        else {
-            phiMO.phi_vgl(pos[i], val, grad, lap);
-            ELcurr[i] = -0.5 * lap / val - Zeff / r + dE;
-        }
+  // assert(pos.size() == ELcurr.size());
+  ValueType val;
+  GradType grad;
+  ValueType lap;
+  phiMO.phi_vgl(Rc, val, grad, lap);
+  RealType dE = originalELatRc - (-0.5 * lap / val - Zeff / Rc);
+  for (int i = 0; i < pos.size(); i++)
+  {
+    RealType r = pos[i];
+    // prevent NaN's if phiBar is zero
+    RealType offset = 1e-12;
+    if (r <= Rc)
+    {
+      RealType dp = cusp.dpr(r);
+      ELcurr[i]   = -0.5 * cusp.Rr(r) * (2.0 * dp / r + cusp.d2pr(r) + dp * dp) / (offset + phiBar(cusp, r, phiMO)) -
+          Zeff / r + dE;
     }
+    else
+    {
+      phiMO.phi_vgl(pos[i], val, grad, lap);
+      ELcurr[i] = -0.5 * lap / val - Zeff / r + dE;
+    }
+  }
 }
 
 // Return value is local energy at Rc
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::getOriginalLocalEnergy(const ValueVector& pos,
-    RealType Zeff, RealType Rc, OneMolecularOrbitalT<T>& phiMO,
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::getOriginalLocalEnergy(
+    const ValueVector& pos,
+    RealType Zeff,
+    RealType Rc,
+    OneMolecularOrbitalT<T>& phiMO,
     ValueVector& ELorig)
 {
-    // assert(pos.size() == ELorig.size());
-
-    ValueType val;
-    GradType grad;
-    ValueType lap;
-    for (int i = 0; i < pos.size(); i++) {
-        RealType r = pos[i];
-        phiMO.phi_vgl(r, val, grad, lap);
-        ELorig[i] = -0.5 * lap / val - Zeff / r;
-    }
-
-    phiMO.phi_vgl(Rc, val, grad, lap);
-    return -0.5 * lap / val - Zeff / Rc;
+  // assert(pos.size() == ELorig.size());
+
+  ValueType val;
+  GradType grad;
+  ValueType lap;
+  for (int i = 0; i < pos.size(); i++)
+  {
+    RealType r = pos[i];
+    phiMO.phi_vgl(r, val, grad, lap);
+    ELorig[i] = -0.5 * lap / val - Zeff / r;
+  }
+
+  phiMO.phi_vgl(Rc, val, grad, lap);
+  return -0.5 * lap / val - Zeff / Rc;
 }
 
 // Sum of squares difference between the current local energy and the ideal
 // local energy.
 //  This is the objective function to minimize.
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::getELchi2(
-    const ValueVector& ELcurr, const ValueVector& ELideal)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::getELchi2(const ValueVector& ELcurr,
+                                                                                            const ValueVector& ELideal)
 {
-    assert(ELcurr.size() == ELideal.size());
-
-    RealType chi2 = 0.0;
-    for (int i = 0; i < ELcurr.size(); i++) {
-        RealType diff = ELcurr[i] - ELideal[i];
-        chi2 += diff * diff;
-    }
-    return chi2;
+  assert(ELcurr.size() == ELideal.size());
+
+  RealType chi2 = 0.0;
+  for (int i = 0; i < ELcurr.size(); i++)
+  {
+    RealType diff = ELcurr[i] - ELideal[i];
+    chi2 += diff * diff;
+  }
+  return chi2;
 }
 
 //  Compute the chi squared distance given a value for phi at zero.
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::evaluateForPhi0Body(RealType phi0,
-    ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal,
-    CuspCorrectionT<T>& cusp, OneMolecularOrbitalT<T>& phiMO,
-    ValGradLap phiAtRc, RealType etaAtZero, RealType ELorigAtRc, RealType Z)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::evaluateForPhi0Body(
+    RealType phi0,
+    ValueVector& pos,
+    ValueVector& ELcurr,
+    ValueVector& ELideal,
+    CuspCorrectionT<T>& cusp,
+    OneMolecularOrbitalT<T>& phiMO,
+    ValGradLap phiAtRc,
+    RealType etaAtZero,
+    RealType ELorigAtRc,
+    RealType Z)
 {
-    cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0;
-    cusp.cparam.C = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0;
-    TinyVector<ValueType, 5> X;
-    evalX(phiAtRc.val, phiAtRc.grad, phiAtRc.lap, cusp.cparam.Rc, Z,
-        cusp.cparam.C, phi0, etaAtZero, X);
-    X2alpha(X, cusp.cparam.Rc, cusp.cparam.alpha);
-    RealType Zeff = getZeff(Z, etaAtZero, phiBar(cusp, 0.0, phiMO));
-    getCurrentLocalEnergy(
-        pos, Zeff, cusp.cparam.Rc, ELorigAtRc, cusp, phiMO, ELcurr);
-    RealType chi2 = getELchi2(ELcurr, ELideal);
-    return chi2;
+  cusp.cparam.sg = phi0 > 0.0 ? 1.0 : -1.0;
+  cusp.cparam.C  = (phiAtRc.val * phi0 < 0.0) ? 1.5 * phiAtRc.val : 0.0;
+  TinyVector<ValueType, 5> X;
+  evalX(phiAtRc.val, phiAtRc.grad, phiAtRc.lap, cusp.cparam.Rc, Z, cusp.cparam.C, phi0, etaAtZero, X);
+  X2alpha(X, cusp.cparam.Rc, cusp.cparam.alpha);
+  RealType Zeff = getZeff(Z, etaAtZero, phiBar(cusp, 0.0, phiMO));
+  getCurrentLocalEnergy(pos, Zeff, cusp.cparam.Rc, ELorigAtRc, cusp, phiMO, ELcurr);
+  RealType chi2 = getELchi2(ELcurr, ELideal);
+  return chi2;
 }
 
 // Optimize free parameter (value of phi at zero) to minimize distance to ideal
 // local energy. Output is return value and parameter values are in cusp.cparam
-template <typename T>
-typename CuspCorrectionConstructionT<T>::RealType
-CuspCorrectionConstructionT<T>::minimizeForPhiAtZero(CuspCorrectionT<T>& cusp,
-    OneMolecularOrbitalT<T>& phiMO, RealType Z, RealType eta0, ValueVector& pos,
-    ValueVector& ELcurr, ValueVector& ELideal, RealType start_phi0)
+template<typename T>
+typename CuspCorrectionConstructionT<T>::RealType CuspCorrectionConstructionT<T>::minimizeForPhiAtZero(
+    CuspCorrectionT<T>& cusp,
+    OneMolecularOrbitalT<T>& phiMO,
+    RealType Z,
+    RealType eta0,
+    ValueVector& pos,
+    ValueVector& ELcurr,
+    ValueVector& ELideal,
+    RealType start_phi0)
 {
-    ValGradLap vglAtRc;
-    ValueVector tmp_pos(0);
-    ValueVector ELorig(0);
-    RealType Zeff = getZeff(Z, eta0, phiBar(cusp, 0.0, phiMO));
-
-    RealType ELorigAtRc =
-        getOriginalLocalEnergy(tmp_pos, Zeff, cusp.cparam.Rc, phiMO, ELorig);
-    getIdealLocalEnergy(pos, Z, cusp.cparam.Rc, ELorigAtRc, ELideal);
-    phiMO.phi_vgl(cusp.cparam.Rc, vglAtRc.val, vglAtRc.grad, vglAtRc.lap);
-
-    Bracket_min_t<RealType> bracket(start_phi0, 0.0, 0.0, false);
-    try {
-        bracket = bracket_minimum(
-            [&](RealType x) -> RealType {
-                return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO,
-                    vglAtRc, eta0, ELorigAtRc, Z);
-            },
-            start_phi0);
-    }
-    catch (const std::runtime_error& e) {
-        APP_ABORT("Bracketing minimum failed for finding phi0. \n");
-    }
-
-    auto min_res = find_minimum(
+  ValGradLap vglAtRc;
+  ValueVector tmp_pos(0);
+  ValueVector ELorig(0);
+  RealType Zeff = getZeff(Z, eta0, phiBar(cusp, 0.0, phiMO));
+
+  RealType ELorigAtRc = getOriginalLocalEnergy(tmp_pos, Zeff, cusp.cparam.Rc, phiMO, ELorig);
+  getIdealLocalEnergy(pos, Z, cusp.cparam.Rc, ELorigAtRc, ELideal);
+  phiMO.phi_vgl(cusp.cparam.Rc, vglAtRc.val, vglAtRc.grad, vglAtRc.lap);
+
+  Bracket_min_t<RealType> bracket(start_phi0, 0.0, 0.0, false);
+  try
+  {
+    bracket = bracket_minimum(
         [&](RealType x) -> RealType {
-            return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO,
-                vglAtRc, eta0, ELorigAtRc, Z);
+          return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z);
         },
-        bracket);
-
-    start_phi0 = min_res.first;
-
-    return min_res.second;
+        start_phi0);
+  }
+  catch (const std::runtime_error& e)
+  {
+    APP_ABORT("Bracketing minimum failed for finding phi0. \n");
+  }
+
+  auto min_res = find_minimum(
+      [&](RealType x) -> RealType {
+        return evaluateForPhi0Body(x, pos, ELcurr, ELideal, cusp, phiMO, vglAtRc, eta0, ELorigAtRc, Z);
+      },
+      bracket);
+
+  start_phi0 = min_res.first;
+
+  return min_res.second;
 }
 
 // Optimize the cutoff radius.  There is an inner loop optimizing for phi0 for
 // each value of Rc. Elcurr and ELideal are expected to have the correct size on
 // input (same size as pos) Output is parameter values in cusp.cparam
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::minimizeForRc(CuspCorrectionT<T>& cusp,
-    OneMolecularOrbitalT<T>& phiMO, RealType Z, RealType Rc_init,
-    RealType Rc_max, RealType eta0, ValueVector& pos, ValueVector& ELcurr,
-    ValueVector& ELideal)
+template<typename T>
+void CuspCorrectionConstructionT<T>::minimizeForRc(CuspCorrectionT<T>& cusp,
+                                                   OneMolecularOrbitalT<T>& phiMO,
+                                                   RealType Z,
+                                                   RealType Rc_init,
+                                                   RealType Rc_max,
+                                                   RealType eta0,
+                                                   ValueVector& pos,
+                                                   ValueVector& ELcurr,
+                                                   ValueVector& ELideal)
 {
-    Bracket_min_t<RealType> bracket(Rc_init, 0.0, 0.0, false);
-    RealType start_phi0 = phiMO.phi(0.0);
-    try {
-        bracket = bracket_minimum(
-            [&](RealType x) -> RealType {
-                cusp.cparam.Rc = x;
-                return minimizeForPhiAtZero(
-                    cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
-            },
-            Rc_init, Rc_max);
-    }
-    catch (const std::runtime_error& e) {
-        APP_ABORT("Bracketing minimum failed for finding rc. \n");
-    }
-
-    if (bracket.success) {
-        auto min_res = find_minimum(
-            [&](RealType x) -> RealType {
-                cusp.cparam.Rc = x;
-                return minimizeForPhiAtZero(
-                    cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
-            },
-            bracket);
-    }
-    else {
-        cusp.cparam.Rc = bracket.a;
-        minimizeForPhiAtZero(
-            cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
-    }
+  Bracket_min_t<RealType> bracket(Rc_init, 0.0, 0.0, false);
+  RealType start_phi0 = phiMO.phi(0.0);
+  try
+  {
+    bracket = bracket_minimum(
+        [&](RealType x) -> RealType {
+          cusp.cparam.Rc = x;
+          return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
+        },
+        Rc_init, Rc_max);
+  }
+  catch (const std::runtime_error& e)
+  {
+    APP_ABORT("Bracketing minimum failed for finding rc. \n");
+  }
+
+  if (bracket.success)
+  {
+    auto min_res = find_minimum(
+        [&](RealType x) -> RealType {
+          cusp.cparam.Rc = x;
+          return minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
+        },
+        bracket);
+  }
+  else
+  {
+    cusp.cparam.Rc = bracket.a;
+    minimizeForPhiAtZero(cusp, phiMO, Z, eta0, pos, ELcurr, ELideal, start_phi0);
+  }
 }
 
 // Modifies orbital set lcwc
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::applyCuspCorrection(
-    const Matrix<CuspCorrectionParametersT<T>>& info,
-    ParticleSetT<T>& targetPtcl, ParticleSetT<T>& sourcePtcl,
-    LCAOrbitalSetT<T>& lcao, SoaCuspCorrectionT<T>& cusp, const std::string& id)
+template<typename T>
+void CuspCorrectionConstructionT<T>::applyCuspCorrection(const Matrix<CuspCorrectionParametersT<T>>& info,
+                                                         ParticleSetT<T>& targetPtcl,
+                                                         ParticleSetT<T>& sourcePtcl,
+                                                         LCAOrbitalSetT<T>& lcao,
+                                                         SoaCuspCorrectionT<T>& cusp,
+                                                         const std::string& id)
 {
-    const int num_centers = info.rows();
-    const int orbital_set_size = info.cols();
-    using RealType = typename SPOSetT<T>::RealType;
-
-    NewTimer& cuspApplyTimer = createGlobalTimer(
-        "CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium);
-
-    ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer);
-
-    LCAOrbitalSetT<T> phi("phi",
-        std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-            lcao.myBasisSet->makeClone()));
-    phi.setOrbitalSetSize(lcao.getOrbitalSetSize());
-
-    LCAOrbitalSetT<T> eta("eta",
-        std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-            lcao.myBasisSet->makeClone()));
-    eta.setOrbitalSetSize(lcao.getOrbitalSetSize());
-
-    std::vector<bool> corrCenter(num_centers, "true");
-
-    // What's this grid's lifespan?  Why on the heap?
-    auto radial_grid = std::make_unique<LogGrid<RealType>>();
-    radial_grid->set(0.000001, 100.0, 1001);
-
-    Vector<RealType> xgrid;
-    Vector<RealType> rad_orb;
-    xgrid.resize(radial_grid->size());
-    rad_orb.resize(radial_grid->size());
-    for (int ig = 0; ig < radial_grid->size(); ig++) {
-        xgrid[ig] = radial_grid->r(ig);
-    }
-
-    for (int ic = 0; ic < num_centers; ic++) {
-        *eta.C = *lcao.C;
-        *phi.C = *lcao.C;
-
-        splitPhiEta(ic, corrCenter, phi, eta);
-
-        // loop over MO index - cot must be an array (of len MO size)
-        //   the loop is inside cot - in the multiqunitic
-        auto cot = std::make_unique<CuspCorrectionAtomicBasis<RealType>>();
-        cot->initializeRadialSet(*radial_grid, orbital_set_size);
-        // How is this useful?
-        //  cot->ID.resize(orbital_set_size);
-        //  for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) {
-        //    cot->ID[mo_idx] = mo_idx;
-        //  }
-
-        for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) {
-            computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi,
-                xgrid, rad_orb, info(ic, mo_idx));
-            RealType yprime_i = (rad_orb[1] - rad_orb[0]) /
-                (radial_grid->r(1) - radial_grid->r(0));
-            OneDimQuinticSpline<RealType> radial_spline(
-                radial_grid->makeClone(), rad_orb);
-            radial_spline.spline(0, yprime_i, rad_orb.size() - 1, 0.0);
-            cot->addSpline(mo_idx, radial_spline);
-
-            if (outputManager.isDebugActive()) {
-                // For testing against AoS output
-                // Output phiBar to soaOrbs.downdet.C0.MO0
-                int nElms = 500;
-                RealType dx = info(ic, mo_idx).Rc * 1.2 / nElms;
-                Vector<RealType> pos;
-                Vector<RealType> output_orb;
-                pos.resize(nElms);
-                output_orb.resize(nElms);
-                for (int i = 0; i < nElms; i++) {
-                    pos[i] = (i + 1.0) * dx;
-                }
-                computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi,
-                    pos, output_orb, info(ic, mo_idx));
-                std::string filename = "soaOrbs." + id + ".C" +
-                    std::to_string(ic) + ".MO" + std::to_string(mo_idx);
-                std::cout << "Writing to " << filename << std::endl;
-                std::ofstream out(filename.c_str());
-                out << "# r phiBar(r)" << std::endl;
-                for (int i = 0; i < nElms; i++) {
-                    out << pos[i] << "  " << output_orb[i] << std::endl;
-                }
-                out.close();
-            }
+  const int num_centers      = info.rows();
+  const int orbital_set_size = info.cols();
+  using RealType             = typename SPOSetT<T>::RealType;
+
+  NewTimer& cuspApplyTimer = createGlobalTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium);
+
+  ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer);
+
+  LCAOrbitalSetT<T> phi("phi", std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(lcao.myBasisSet->makeClone()));
+  phi.setOrbitalSetSize(lcao.getOrbitalSetSize());
+
+  LCAOrbitalSetT<T> eta("eta", std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(lcao.myBasisSet->makeClone()));
+  eta.setOrbitalSetSize(lcao.getOrbitalSetSize());
+
+  std::vector<bool> corrCenter(num_centers, "true");
+
+  // What's this grid's lifespan?  Why on the heap?
+  auto radial_grid = std::make_unique<LogGrid<RealType>>();
+  radial_grid->set(0.000001, 100.0, 1001);
+
+  Vector<RealType> xgrid;
+  Vector<RealType> rad_orb;
+  xgrid.resize(radial_grid->size());
+  rad_orb.resize(radial_grid->size());
+  for (int ig = 0; ig < radial_grid->size(); ig++)
+  {
+    xgrid[ig] = radial_grid->r(ig);
+  }
+
+  for (int ic = 0; ic < num_centers; ic++)
+  {
+    *eta.C = *lcao.C;
+    *phi.C = *lcao.C;
+
+    splitPhiEta(ic, corrCenter, phi, eta);
+
+    // loop over MO index - cot must be an array (of len MO size)
+    //   the loop is inside cot - in the multiqunitic
+    auto cot = std::make_unique<CuspCorrectionAtomicBasis<RealType>>();
+    cot->initializeRadialSet(*radial_grid, orbital_set_size);
+    // How is this useful?
+    //  cot->ID.resize(orbital_set_size);
+    //  for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) {
+    //    cot->ID[mo_idx] = mo_idx;
+    //  }
+
+    for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++)
+    {
+      computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, xgrid, rad_orb, info(ic, mo_idx));
+      RealType yprime_i = (rad_orb[1] - rad_orb[0]) / (radial_grid->r(1) - radial_grid->r(0));
+      OneDimQuinticSpline<RealType> radial_spline(radial_grid->makeClone(), rad_orb);
+      radial_spline.spline(0, yprime_i, rad_orb.size() - 1, 0.0);
+      cot->addSpline(mo_idx, radial_spline);
+
+      if (outputManager.isDebugActive())
+      {
+        // For testing against AoS output
+        // Output phiBar to soaOrbs.downdet.C0.MO0
+        int nElms   = 500;
+        RealType dx = info(ic, mo_idx).Rc * 1.2 / nElms;
+        Vector<RealType> pos;
+        Vector<RealType> output_orb;
+        pos.resize(nElms);
+        output_orb.resize(nElms);
+        for (int i = 0; i < nElms; i++)
+        {
+          pos[i] = (i + 1.0) * dx;
+        }
+        computeRadialPhiBar(&targetPtcl, &sourcePtcl, mo_idx, ic, &phi, pos, output_orb, info(ic, mo_idx));
+        std::string filename = "soaOrbs." + id + ".C" + std::to_string(ic) + ".MO" + std::to_string(mo_idx);
+        std::cout << "Writing to " << filename << std::endl;
+        std::ofstream out(filename.c_str());
+        out << "# r phiBar(r)" << std::endl;
+        for (int i = 0; i < nElms; i++)
+        {
+          out << pos[i] << "  " << output_orb[i] << std::endl;
         }
-        cusp.add(ic, std::move(cot));
+        out.close();
+      }
     }
-    removeSTypeOrbitals(corrCenter, lcao);
+    cusp.add(ic, std::move(cot));
+  }
+  removeSTypeOrbitals(corrCenter, lcao);
 }
 
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::generateCuspInfo(
-    Matrix<CuspCorrectionParametersT<T>>& info,
-    const ParticleSetT<T>& targetPtcl, const ParticleSetT<T>& sourcePtcl,
-    const LCAOrbitalSetT<T>& lcao, const std::string& id, Communicate& Comm)
+template<typename T>
+void CuspCorrectionConstructionT<T>::generateCuspInfo(Matrix<CuspCorrectionParametersT<T>>& info,
+                                                      const ParticleSetT<T>& targetPtcl,
+                                                      const ParticleSetT<T>& sourcePtcl,
+                                                      const LCAOrbitalSetT<T>& lcao,
+                                                      const std::string& id,
+                                                      Communicate& Comm)
 {
-    const int num_centers = info.rows();
-    const int orbital_set_size = info.cols();
-    using RealType = typename SPOSetT<T>::RealType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
+  const int num_centers      = info.rows();
+  const int orbital_set_size = info.cols();
+  using RealType             = typename SPOSetT<T>::RealType;
+  using ValueVector          = typename SPOSetT<T>::ValueVector;
 
-    NewTimer& cuspCreateTimer = createGlobalTimer(
-        "CuspCorrectionConstruction::createCuspParameters", timer_level_medium);
-    NewTimer& splitPhiEtaTimer = createGlobalTimer(
-        "CuspCorrectionConstruction::splitPhiEta", timer_level_fine);
-    NewTimer& computeTimer = createGlobalTimer(
-        "CuspCorrectionConstruction::computeCorrection", timer_level_fine);
+  NewTimer& cuspCreateTimer = createGlobalTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium);
+  NewTimer& splitPhiEtaTimer = createGlobalTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine);
+  NewTimer& computeTimer     = createGlobalTimer("CuspCorrectionConstruction::computeCorrection", timer_level_fine);
 
-    ScopedTimer createCuspTimerWrapper(cuspCreateTimer);
+  ScopedTimer createCuspTimerWrapper(cuspCreateTimer);
 
-    LCAOrbitalSetT<T> phi("phi",
-        std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-            lcao.myBasisSet->makeClone()));
-    phi.setOrbitalSetSize(lcao.getOrbitalSetSize());
+  LCAOrbitalSetT<T> phi("phi", std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(lcao.myBasisSet->makeClone()));
+  phi.setOrbitalSetSize(lcao.getOrbitalSetSize());
 
-    LCAOrbitalSetT<T> eta("eta",
-        std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-            lcao.myBasisSet->makeClone()));
-    eta.setOrbitalSetSize(lcao.getOrbitalSetSize());
+  LCAOrbitalSetT<T> eta("eta", std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(lcao.myBasisSet->makeClone()));
+  eta.setOrbitalSetSize(lcao.getOrbitalSetSize());
 
-    std::vector<bool> corrCenter(num_centers, "true");
+  std::vector<bool> corrCenter(num_centers, "true");
 
-    using GridType = OneDimGridBase<RealType>;
-    int npts = 500;
+  using GridType = OneDimGridBase<RealType>;
+  int npts       = 500;
 
-    // Parallelize correction of MO's across MPI ranks
-    std::vector<int> offset;
-    FairDivideLow(orbital_set_size, Comm.size(), offset);
+  // Parallelize correction of MO's across MPI ranks
+  std::vector<int> offset;
+  FairDivideLow(orbital_set_size, Comm.size(), offset);
 
-    int start_mo = offset[Comm.rank()];
-    int end_mo = offset[Comm.rank() + 1];
-    app_log()
-        << "  Number of molecular orbitals to compute correction on this rank: "
-        << end_mo - start_mo << std::endl;
+  int start_mo = offset[Comm.rank()];
+  int end_mo   = offset[Comm.rank() + 1];
+  app_log() << "  Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl;
 
 // Specify dynamic scheduling explicitly for load balancing.   Each iteration
 // should take enough time that scheduling overhead is not an issue.
 #pragma omp parallel for schedule(dynamic) collapse(2)
-    for (int center_idx = 0; center_idx < num_centers; center_idx++) {
-        for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) {
-            ParticleSetT<T> localTargetPtcl(targetPtcl);
-            ParticleSetT<T> localSourcePtcl(sourcePtcl);
+  for (int center_idx = 0; center_idx < num_centers; center_idx++)
+  {
+    for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++)
+    {
+      ParticleSetT<T> localTargetPtcl(targetPtcl);
+      ParticleSetT<T> localSourcePtcl(sourcePtcl);
 
-            LCAOrbitalSetT<T> local_phi("local_phi",
-                std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-                    phi.myBasisSet->makeClone()));
-            local_phi.setOrbitalSetSize(phi.getOrbitalSetSize());
+      LCAOrbitalSetT<T> local_phi("local_phi",
+                                  std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(phi.myBasisSet->makeClone()));
+      local_phi.setOrbitalSetSize(phi.getOrbitalSetSize());
 
-            LCAOrbitalSetT<T> local_eta("local_eta",
-                std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(
-                    eta.myBasisSet->makeClone()));
-            local_eta.setOrbitalSetSize(eta.getOrbitalSetSize());
+      LCAOrbitalSetT<T> local_eta("local_eta",
+                                  std::unique_ptr<typename LCAOrbitalSetT<T>::basis_type>(eta.myBasisSet->makeClone()));
+      local_eta.setOrbitalSetSize(eta.getOrbitalSetSize());
 
 #pragma omp critical
-            app_log() << "   Working on MO: " << mo_idx
-                      << " Center: " << center_idx << std::endl;
-
-            {
-                ScopedTimer local_timer(splitPhiEtaTimer);
-
-                *local_eta.C = *lcao.C;
-                *local_phi.C = *lcao.C;
-                splitPhiEta(center_idx, corrCenter, local_phi, local_eta);
-            }
-
-            bool corrO = false;
-            auto& cref(*(local_phi.C));
-            for (int ip = 0; ip < cref.cols(); ip++) {
-                if (std::abs(cref(mo_idx, ip)) > 0) {
-                    corrO = true;
-                    break;
-                }
-            }
-
-            if (corrO) {
-                OneMolecularOrbitalT<T> etaMO(
-                    &localTargetPtcl, &localSourcePtcl, &local_eta);
-                etaMO.changeOrbital(center_idx, mo_idx);
-
-                OneMolecularOrbitalT<T> phiMO(
-                    &localTargetPtcl, &localSourcePtcl, &local_phi);
-                phiMO.changeOrbital(center_idx, mo_idx);
-
-                SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet());
-                int iz = tspecies.addAttribute("charge");
-                RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]);
-
-                RealType Rc_max = 0.2;
-                RealType rc = 0.1;
-
-                RealType dx = rc * 1.2 / npts;
-                ValueVector pos(npts);
-                ValueVector ELideal(npts);
-                ValueVector ELcurr(npts);
-                for (int i = 0; i < npts; i++) {
-                    pos[i] = (i + 1.0) * dx;
-                }
-
-                RealType eta0 = etaMO.phi(0.0);
-                ValueVector ELorig(npts);
-                CuspCorrectionT<T> cusp(info(center_idx, mo_idx));
-                {
-                    ScopedTimer local_timer(computeTimer);
-                    minimizeForRc(
-                        cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal);
-                }
-                // Update shared object.  Each iteration accesses a different
-                // element and this is an array (no bookkeeping data to update),
-                // so no synchronization is necessary.
-                info(center_idx, mo_idx) = cusp.cparam;
-            }
+      app_log() << "   Working on MO: " << mo_idx << " Center: " << center_idx << std::endl;
+
+      {
+        ScopedTimer local_timer(splitPhiEtaTimer);
+
+        *local_eta.C = *lcao.C;
+        *local_phi.C = *lcao.C;
+        splitPhiEta(center_idx, corrCenter, local_phi, local_eta);
+      }
+
+      bool corrO = false;
+      auto& cref(*(local_phi.C));
+      for (int ip = 0; ip < cref.cols(); ip++)
+      {
+        if (std::abs(cref(mo_idx, ip)) > 0)
+        {
+          corrO = true;
+          break;
+        }
+      }
+
+      if (corrO)
+      {
+        OneMolecularOrbitalT<T> etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta);
+        etaMO.changeOrbital(center_idx, mo_idx);
+
+        OneMolecularOrbitalT<T> phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi);
+        phiMO.changeOrbital(center_idx, mo_idx);
+
+        SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet());
+        int iz     = tspecies.addAttribute("charge");
+        RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]);
+
+        RealType Rc_max = 0.2;
+        RealType rc     = 0.1;
+
+        RealType dx = rc * 1.2 / npts;
+        ValueVector pos(npts);
+        ValueVector ELideal(npts);
+        ValueVector ELcurr(npts);
+        for (int i = 0; i < npts; i++)
+        {
+          pos[i] = (i + 1.0) * dx;
         }
-    }
 
-    for (int root = 0; root < Comm.size(); root++) {
-        int start_mo = offset[root];
-        int end_mo = offset[root + 1];
-        for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) {
-            for (int center_idx = 0; center_idx < num_centers; center_idx++) {
-                broadcastCuspInfo(info(center_idx, mo_idx), Comm, root);
-            }
+        RealType eta0 = etaMO.phi(0.0);
+        ValueVector ELorig(npts);
+        CuspCorrectionT<T> cusp(info(center_idx, mo_idx));
+        {
+          ScopedTimer local_timer(computeTimer);
+          minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal);
         }
+        // Update shared object.  Each iteration accesses a different
+        // element and this is an array (no bookkeeping data to update),
+        // so no synchronization is necessary.
+        info(center_idx, mo_idx) = cusp.cparam;
+      }
+    }
+  }
+
+  for (int root = 0; root < Comm.size(); root++)
+  {
+    int start_mo = offset[root];
+    int end_mo   = offset[root + 1];
+    for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++)
+    {
+      for (int center_idx = 0; center_idx < num_centers; center_idx++)
+      {
+        broadcastCuspInfo(info(center_idx, mo_idx), Comm, root);
+      }
     }
+  }
 }
 
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::broadcastCuspInfo(
-    CuspCorrectionParametersT<T>& param, Communicate& Comm, int root)
+template<typename T>
+void CuspCorrectionConstructionT<T>::broadcastCuspInfo(CuspCorrectionParametersT<T>& param, Communicate& Comm, int root)
 {
 #ifdef HAVE_MPI
-    std::vector<double> buffer(9);
-    buffer[0] = param.Rc;
-    buffer[1] = param.C;
-    buffer[2] = param.sg;
-    buffer[3] = param.alpha[0];
-    buffer[4] = param.alpha[1];
-    buffer[5] = param.alpha[2];
-    buffer[6] = param.alpha[3];
-    buffer[7] = param.alpha[4];
-    buffer[8] = param.redo;
-
-    Comm.comm.broadcast(buffer.begin(), buffer.end(), root);
-
-    param.Rc = buffer[0];
-    param.C = buffer[1];
-    param.sg = buffer[2];
-    param.alpha[0] = buffer[3];
-    param.alpha[1] = buffer[4];
-    param.alpha[2] = buffer[5];
-    param.alpha[3] = buffer[6];
-    param.alpha[4] = buffer[7];
-    param.redo = buffer[8] == 0.0 ? 0 : 1;
+  std::vector<double> buffer(9);
+  buffer[0] = param.Rc;
+  buffer[1] = param.C;
+  buffer[2] = param.sg;
+  buffer[3] = param.alpha[0];
+  buffer[4] = param.alpha[1];
+  buffer[5] = param.alpha[2];
+  buffer[6] = param.alpha[3];
+  buffer[7] = param.alpha[4];
+  buffer[8] = param.redo;
+
+  Comm.comm.broadcast(buffer.begin(), buffer.end(), root);
+
+  param.Rc       = buffer[0];
+  param.C        = buffer[1];
+  param.sg       = buffer[2];
+  param.alpha[0] = buffer[3];
+  param.alpha[1] = buffer[4];
+  param.alpha[2] = buffer[5];
+  param.alpha[3] = buffer[6];
+  param.alpha[4] = buffer[7];
+  param.redo     = buffer[8] == 0.0 ? 0 : 1;
 #endif
 }
 
-template <typename T>
-bool
-CuspCorrectionConstructionT<T>::readCuspInfo(const std::string& cuspInfoFile,
-    const std::string& objectName, int OrbitalSetSize,
-    Matrix<CuspCorrectionParametersT<T>>& info)
+template<typename T>
+bool CuspCorrectionConstructionT<T>::readCuspInfo(const std::string& cuspInfoFile,
+                                                  const std::string& objectName,
+                                                  int OrbitalSetSize,
+                                                  Matrix<CuspCorrectionParametersT<T>>& info)
 {
-    bool success = true;
-    int ncenter = info.rows();
-    app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl;
-    Libxml2Document adoc;
-    if (!adoc.parse(cuspInfoFile)) {
-        app_log() << "Could not find precomputed cusp data for spo set: "
-                  << objectName << std::endl;
-        app_log() << "Recalculating data.\n";
-        return false;
+  bool success = true;
+  int ncenter  = info.rows();
+  app_log() << "Reading cusp info from : " << cuspInfoFile << std::endl;
+  Libxml2Document adoc;
+  if (!adoc.parse(cuspInfoFile))
+  {
+    app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl;
+    app_log() << "Recalculating data.\n";
+    return false;
+  }
+  xmlNodePtr head = adoc.getRoot();
+  head            = head->children;
+  xmlNodePtr cur  = NULL, ctr;
+  while (head != NULL)
+  {
+    std::string cname(getNodeName(head));
+    if (cname == "sposet")
+    {
+      std::string name;
+      OhmmsAttributeSet spoAttrib;
+      spoAttrib.add(name, "name");
+      spoAttrib.put(head);
+      if (name == objectName)
+      {
+        cur = head;
+        break;
+      }
     }
-    xmlNodePtr head = adoc.getRoot();
-    head = head->children;
-    xmlNodePtr cur = NULL, ctr;
-    while (head != NULL) {
-        std::string cname(getNodeName(head));
-        if (cname == "sposet") {
-            std::string name;
-            OhmmsAttributeSet spoAttrib;
-            spoAttrib.add(name, "name");
-            spoAttrib.put(head);
-            if (name == objectName) {
-                cur = head;
-                break;
-            }
+    head = head->next;
+  }
+  if (cur == NULL)
+  {
+    app_log() << "Could not find precomputed cusp data for spo set: " << objectName << std::endl;
+    app_log() << "Recalculating data.\n";
+    return false;
+  }
+  else
+  {
+    app_log() << "Found precomputed cusp data for spo set: " << objectName << std::endl;
+  }
+  cur = cur->children;
+  while (cur != NULL)
+  {
+    std::string cname(getNodeName(cur));
+    if (cname == "center")
+    {
+      int num = -1;
+      OhmmsAttributeSet Attrib;
+      Attrib.add(num, "num");
+      Attrib.put(cur);
+      if (num < 0 || num >= ncenter)
+      {
+        APP_ABORT("Error with cusp info xml block. incorrect center "
+                  "number. \n");
+      }
+      ctr = cur->children;
+      while (ctr != NULL)
+      {
+        std::string cname(getNodeName(ctr));
+        if (cname == "orbital")
+        {
+          int orb = -1;
+          OhmmsAttributeSet orbAttrib;
+          RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9;
+          orbAttrib.add(orb, "num");
+          orbAttrib.add(a1, "redo");
+          orbAttrib.add(a2, "C");
+          orbAttrib.add(a3, "sg");
+          orbAttrib.add(a4, "rc");
+          orbAttrib.add(a5, "a1");
+          orbAttrib.add(a6, "a2");
+          orbAttrib.add(a7, "a3");
+          orbAttrib.add(a8, "a4");
+          orbAttrib.add(a9, "a5");
+          orbAttrib.put(ctr);
+          if (orb < OrbitalSetSize)
+          {
+            info(num, orb).redo     = a1;
+            info(num, orb).C        = a2;
+            info(num, orb).sg       = a3;
+            info(num, orb).Rc       = a4;
+            info(num, orb).alpha[0] = a5;
+            info(num, orb).alpha[1] = a6;
+            info(num, orb).alpha[2] = a7;
+            info(num, orb).alpha[3] = a8;
+            info(num, orb).alpha[4] = a9;
+          }
         }
-        head = head->next;
-    }
-    if (cur == NULL) {
-        app_log() << "Could not find precomputed cusp data for spo set: "
-                  << objectName << std::endl;
-        app_log() << "Recalculating data.\n";
-        return false;
-    }
-    else {
-        app_log() << "Found precomputed cusp data for spo set: " << objectName
-                  << std::endl;
+        ctr = ctr->next;
+      }
     }
-    cur = cur->children;
-    while (cur != NULL) {
-        std::string cname(getNodeName(cur));
-        if (cname == "center") {
-            int num = -1;
-            OhmmsAttributeSet Attrib;
-            Attrib.add(num, "num");
-            Attrib.put(cur);
-            if (num < 0 || num >= ncenter) {
-                APP_ABORT("Error with cusp info xml block. incorrect center "
-                          "number. \n");
-            }
-            ctr = cur->children;
-            while (ctr != NULL) {
-                std::string cname(getNodeName(ctr));
-                if (cname == "orbital") {
-                    int orb = -1;
-                    OhmmsAttributeSet orbAttrib;
-                    RealType a1(0.0), a2, a3, a4, a5, a6, a7, a8, a9;
-                    orbAttrib.add(orb, "num");
-                    orbAttrib.add(a1, "redo");
-                    orbAttrib.add(a2, "C");
-                    orbAttrib.add(a3, "sg");
-                    orbAttrib.add(a4, "rc");
-                    orbAttrib.add(a5, "a1");
-                    orbAttrib.add(a6, "a2");
-                    orbAttrib.add(a7, "a3");
-                    orbAttrib.add(a8, "a4");
-                    orbAttrib.add(a9, "a5");
-                    orbAttrib.put(ctr);
-                    if (orb < OrbitalSetSize) {
-                        info(num, orb).redo = a1;
-                        info(num, orb).C = a2;
-                        info(num, orb).sg = a3;
-                        info(num, orb).Rc = a4;
-                        info(num, orb).alpha[0] = a5;
-                        info(num, orb).alpha[1] = a6;
-                        info(num, orb).alpha[2] = a7;
-                        info(num, orb).alpha[3] = a8;
-                        info(num, orb).alpha[4] = a9;
-                    }
-                }
-                ctr = ctr->next;
-            }
-        }
-        cur = cur->next;
-    }
-    return success;
+    cur = cur->next;
+  }
+  return success;
 }
 
-template <typename T>
-void
-CuspCorrectionConstructionT<T>::saveCusp(const std::string& filename,
-    const Matrix<CuspCorrectionParametersT<T>>& info, const std::string& id)
+template<typename T>
+void CuspCorrectionConstructionT<T>::saveCusp(const std::string& filename,
+                                              const Matrix<CuspCorrectionParametersT<T>>& info,
+                                              const std::string& id)
 {
-    const int num_centers = info.rows();
-    const int orbital_set_size = info.cols();
-    xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0");
-    xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem");
-    xmlNodePtr spo = xmlNewNode(NULL, (const xmlChar*)"sposet");
-    xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str());
-    xmlAddChild(cuspRoot, spo);
-    xmlDocSetRootElement(doc, cuspRoot);
-
-    for (int center_idx = 0; center_idx < num_centers; center_idx++) {
-        xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center");
-        std::ostringstream num;
-        num << center_idx;
-        xmlNewProp(
-            ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str());
-
-        for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++) {
-            std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5;
-            xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital");
-            num0 << mo_idx;
-            xmlNewProp(
-                orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str());
-
-            C.setf(std::ios::scientific, std::ios::floatfield);
-            C.precision(14);
-            C << info(center_idx, mo_idx).C;
-            sg.setf(std::ios::scientific, std::ios::floatfield);
-            sg.precision(14);
-            sg << info(center_idx, mo_idx).sg;
-            rc.setf(std::ios::scientific, std::ios::floatfield);
-            rc.precision(14);
-            rc << info(center_idx, mo_idx).Rc;
-            a1.setf(std::ios::scientific, std::ios::floatfield);
-            a1.precision(14);
-            a1 << info(center_idx, mo_idx).alpha[0];
-            a2.setf(std::ios::scientific, std::ios::floatfield);
-            a2.precision(14);
-            a2 << info(center_idx, mo_idx).alpha[1];
-            a3.setf(std::ios::scientific, std::ios::floatfield);
-            a3.precision(14);
-            a3 << info(center_idx, mo_idx).alpha[2];
-            a4.setf(std::ios::scientific, std::ios::floatfield);
-            a4.precision(14);
-            a4 << info(center_idx, mo_idx).alpha[3];
-            a5.setf(std::ios::scientific, std::ios::floatfield);
-            a5.precision(14);
-            a5 << info(center_idx, mo_idx).alpha[4];
-            xmlNewProp(
-                orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str());
-            xmlNewProp(
-                orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str());
-            xmlAddChild(ctr, orb);
-        }
-        xmlAddChild(spo, ctr);
+  const int num_centers      = info.rows();
+  const int orbital_set_size = info.cols();
+  xmlDocPtr doc              = xmlNewDoc((const xmlChar*)"1.0");
+  xmlNodePtr cuspRoot        = xmlNewNode(NULL, BAD_CAST "qmcsystem");
+  xmlNodePtr spo             = xmlNewNode(NULL, (const xmlChar*)"sposet");
+  xmlNewProp(spo, (const xmlChar*)"name", (const xmlChar*)id.c_str());
+  xmlAddChild(cuspRoot, spo);
+  xmlDocSetRootElement(doc, cuspRoot);
+
+  for (int center_idx = 0; center_idx < num_centers; center_idx++)
+  {
+    xmlNodePtr ctr = xmlNewNode(NULL, (const xmlChar*)"center");
+    std::ostringstream num;
+    num << center_idx;
+    xmlNewProp(ctr, (const xmlChar*)"num", (const xmlChar*)num.str().c_str());
+
+    for (int mo_idx = 0; mo_idx < orbital_set_size; mo_idx++)
+    {
+      std::ostringstream num0, C, sg, rc, a1, a2, a3, a4, a5;
+      xmlNodePtr orb = xmlNewNode(NULL, (const xmlChar*)"orbital");
+      num0 << mo_idx;
+      xmlNewProp(orb, (const xmlChar*)"num", (const xmlChar*)num0.str().c_str());
+
+      C.setf(std::ios::scientific, std::ios::floatfield);
+      C.precision(14);
+      C << info(center_idx, mo_idx).C;
+      sg.setf(std::ios::scientific, std::ios::floatfield);
+      sg.precision(14);
+      sg << info(center_idx, mo_idx).sg;
+      rc.setf(std::ios::scientific, std::ios::floatfield);
+      rc.precision(14);
+      rc << info(center_idx, mo_idx).Rc;
+      a1.setf(std::ios::scientific, std::ios::floatfield);
+      a1.precision(14);
+      a1 << info(center_idx, mo_idx).alpha[0];
+      a2.setf(std::ios::scientific, std::ios::floatfield);
+      a2.precision(14);
+      a2 << info(center_idx, mo_idx).alpha[1];
+      a3.setf(std::ios::scientific, std::ios::floatfield);
+      a3.precision(14);
+      a3 << info(center_idx, mo_idx).alpha[2];
+      a4.setf(std::ios::scientific, std::ios::floatfield);
+      a4.precision(14);
+      a4 << info(center_idx, mo_idx).alpha[3];
+      a5.setf(std::ios::scientific, std::ios::floatfield);
+      a5.precision(14);
+      a5 << info(center_idx, mo_idx).alpha[4];
+      xmlNewProp(orb, (const xmlChar*)"C", (const xmlChar*)C.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"sg", (const xmlChar*)sg.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"rc", (const xmlChar*)rc.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"a1", (const xmlChar*)a1.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"a2", (const xmlChar*)a2.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"a3", (const xmlChar*)a3.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"a4", (const xmlChar*)a4.str().c_str());
+      xmlNewProp(orb, (const xmlChar*)"a5", (const xmlChar*)a5.str().c_str());
+      xmlAddChild(ctr, orb);
     }
+    xmlAddChild(spo, ctr);
+  }
 
-    app_log() << "Saving resulting cusp Info xml block to: " << filename
-              << std::endl;
-    xmlSaveFormatFile(filename.c_str(), doc, 1);
-    xmlFreeDoc(doc);
+  app_log() << "Saving resulting cusp Info xml block to: " << filename << std::endl;
+  xmlSaveFormatFile(filename.c_str(), doc, 1);
+  xmlFreeDoc(doc);
 }
 
 template class CuspCorrectionConstructionT<double>;
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
index 497898bfe8e..cee0f559959 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstructionT.h
@@ -22,132 +22,127 @@ class Communicate;
 namespace qmcplusplus
 {
 
-template <typename T>
+template<typename T>
 class ParticleSetT;
 
-template <typename T>
+template<typename T>
 class OneMolecularOrbitalT
 {
 public:
-    using RealType = typename OrbitalSetTraits<T>::RealType;
-    using ValueType = typename OrbitalSetTraits<T>::ValueType;
-    using GradType = typename OrbitalSetTraits<T>::GradType;
-    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
-    using GradVector = typename OrbitalSetTraits<T>::GradVector;
-    using SPOSetPtr = SPOSetT<T>*;
-
-    ValueType
-    phi(RealType r)
-    {
-        TinyVector<RealType, 3> dr = 0;
-        dr[0] = r;
-
-        targetPtcl->R[0] = sourcePtcl->R[curCenter];
-        targetPtcl->makeMove(0, dr);
-        Psi1->evaluateValue(*targetPtcl, 0, val1);
-
-        return val1[curOrb];
-    }
-
-    void
-    phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap)
-    {
-        TinyVector<RealType, 3> dr = 0;
-        dr[0] = r;
-
-        targetPtcl->R[0] = sourcePtcl->R[curCenter];
-        targetPtcl->makeMove(0, dr);
-        Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1);
-
-        val = val1[curOrb];
-        grad = grad1[curOrb];
-        lap = lap1[curOrb];
-    }
-
-    OneMolecularOrbitalT(
-        ParticleSetT<T>* targetP, ParticleSetT<T>* sourceP, SPOSetPtr Phi) :
-        targetPtcl(targetP),
-        sourcePtcl(sourceP),
-        curOrb(0),
-        curCenter(0)
-    {
-        Psi1 = Phi;
-        int norb = Psi1->getOrbitalSetSize();
-        val1.resize(norb);
-        grad1.resize(norb);
-        lap1.resize(norb);
-    }
-
-    void
-    changeOrbital(int centerIdx, int orbIdx)
-    {
-        curCenter = centerIdx;
-        curOrb = orbIdx;
-    }
+  using RealType    = typename OrbitalSetTraits<T>::RealType;
+  using ValueType   = typename OrbitalSetTraits<T>::ValueType;
+  using GradType    = typename OrbitalSetTraits<T>::GradType;
+  using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
+  using GradVector  = typename OrbitalSetTraits<T>::GradVector;
+  using SPOSetPtr   = SPOSetT<T>*;
+
+  ValueType phi(RealType r)
+  {
+    TinyVector<RealType, 3> dr = 0;
+    dr[0]                      = r;
+
+    targetPtcl->R[0] = sourcePtcl->R[curCenter];
+    targetPtcl->makeMove(0, dr);
+    Psi1->evaluateValue(*targetPtcl, 0, val1);
+
+    return val1[curOrb];
+  }
+
+  void phi_vgl(RealType r, RealType& val, GradType& grad, RealType& lap)
+  {
+    TinyVector<RealType, 3> dr = 0;
+    dr[0]                      = r;
+
+    targetPtcl->R[0] = sourcePtcl->R[curCenter];
+    targetPtcl->makeMove(0, dr);
+    Psi1->evaluateVGL(*targetPtcl, 0, val1, grad1, lap1);
+
+    val  = val1[curOrb];
+    grad = grad1[curOrb];
+    lap  = lap1[curOrb];
+  }
+
+  OneMolecularOrbitalT(ParticleSetT<T>* targetP, ParticleSetT<T>* sourceP, SPOSetPtr Phi)
+      : targetPtcl(targetP), sourcePtcl(sourceP), curOrb(0), curCenter(0)
+  {
+    Psi1     = Phi;
+    int norb = Psi1->getOrbitalSetSize();
+    val1.resize(norb);
+    grad1.resize(norb);
+    lap1.resize(norb);
+  }
+
+  void changeOrbital(int centerIdx, int orbIdx)
+  {
+    curCenter = centerIdx;
+    curOrb    = orbIdx;
+  }
 
 private:
-    /// Temporary storage for real wavefunction values
-    ValueVector val1;
-    GradVector grad1;
-    ValueVector lap1;
+  /// Temporary storage for real wavefunction values
+  ValueVector val1;
+  GradVector grad1;
+  ValueVector lap1;
 
-    /// target ParticleSet
-    ParticleSetT<T>* targetPtcl;
-    /// source ParticleSet
-    ParticleSetT<T>* sourcePtcl;
+  /// target ParticleSet
+  ParticleSetT<T>* targetPtcl;
+  /// source ParticleSet
+  ParticleSetT<T>* sourcePtcl;
 
-    /// Index of orbital
-    int curOrb;
+  /// Index of orbital
+  int curOrb;
 
-    /// Index of atomic center
-    int curCenter;
+  /// Index of atomic center
+  int curCenter;
 
-    SPOSetPtr Psi1;
+  SPOSetPtr Psi1;
 };
 
-template <typename T>
+template<typename T>
 class CuspCorrectionConstructionT
 {
 public:
-    using RealType = typename OrbitalSetTraits<T>::RealType;
-    using ValueType = typename OrbitalSetTraits<T>::ValueType;
-    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
-    using GradType = typename OrbitalSetTraits<T>::GradType;
-    using GradVector = typename OrbitalSetTraits<T>::GradVector;
-
-    struct ValGradLap
-    {
-        ValueType val;
-        GradType grad;
-        ValueType lap;
-    };
-
-    /// Divide molecular orbital into atomic S-orbitals on this center (phi),
-    /// and everything else (eta).
-    static void
-    splitPhiEta(int center, const std::vector<bool>& corrCenter,
-        LCAOrbitalSetT<T>& phi, LCAOrbitalSetT<T>& eta);
-
-    /// Remove S atomic orbitals from all molecular orbitals on all centers.
-    static void
-    removeSTypeOrbitals(
-        const std::vector<bool>& corrCenter, LCAOrbitalSetT<T>& Phi);
-
-    /// Compute the radial part of the corrected wavefunction
-    static void
-    computeRadialPhiBar(ParticleSetT<T>* targetP, ParticleSetT<T>* sourceP,
-        int curOrb_, int curCenter_, SPOSetT<T>* Phi, Vector<RealType>& xgrid,
-        Vector<RealType>& rad_orb, const CuspCorrectionParametersT<T>& data);
-
-    /** Ideal local energy at one point
+  using RealType    = typename OrbitalSetTraits<T>::RealType;
+  using ValueType   = typename OrbitalSetTraits<T>::ValueType;
+  using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
+  using GradType    = typename OrbitalSetTraits<T>::GradType;
+  using GradVector  = typename OrbitalSetTraits<T>::GradVector;
+
+  struct ValGradLap
+  {
+    ValueType val;
+    GradType grad;
+    ValueType lap;
+  };
+
+  /// Divide molecular orbital into atomic S-orbitals on this center (phi),
+  /// and everything else (eta).
+  static void splitPhiEta(int center,
+                          const std::vector<bool>& corrCenter,
+                          LCAOrbitalSetT<T>& phi,
+                          LCAOrbitalSetT<T>& eta);
+
+  /// Remove S atomic orbitals from all molecular orbitals on all centers.
+  static void removeSTypeOrbitals(const std::vector<bool>& corrCenter, LCAOrbitalSetT<T>& Phi);
+
+  /// Compute the radial part of the corrected wavefunction
+  static void computeRadialPhiBar(ParticleSetT<T>* targetP,
+                                  ParticleSetT<T>* sourceP,
+                                  int curOrb_,
+                                  int curCenter_,
+                                  SPOSetT<T>* Phi,
+                                  Vector<RealType>& xgrid,
+                                  Vector<RealType>& rad_orb,
+                                  const CuspCorrectionParametersT<T>& data);
+
+  /** Ideal local energy at one point
      * @param r  input radial distance
      * @param Z  nuclear charge
      * @param beta0  adjustable parameter to make energy continuous at Rc
      */
-    static RealType
-    getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0);
+  static RealType getOneIdealLocalEnergy(RealType r, RealType Z, RealType beta0);
 
-    /** Ideal local energy at a vector of points
+  /** Ideal local energy at a vector of points
      * @param pos input vector of radial distances
      * @param Z nuclear charge
      * @param Rc cutoff radius where the correction meets the actual orbital
@@ -155,11 +150,13 @@ class CuspCorrectionConstructionT
      * continuous at Rc
      * @param ELideal - output the ideal local energy at pos values
      */
-    static void
-    getIdealLocalEnergy(const ValueVector& pos, RealType Z, RealType Rc,
-        RealType ELorigAtRc, ValueVector& ELideal);
+  static void getIdealLocalEnergy(const ValueVector& pos,
+                                  RealType Z,
+                                  RealType Rc,
+                                  RealType ELorigAtRc,
+                                  ValueVector& ELideal);
 
-    /** Evaluate various orbital quantities that enter as constraints on the
+  /** Evaluate various orbital quantities that enter as constraints on the
      * correction
      * @param valRc  orbital value at Rc
      * @param gradRc  orbital gradient at Rc
@@ -171,33 +168,33 @@ class CuspCorrectionConstructionT
      * @param eta0 value of non-corrected pieces of the orbital at zero
      * @param X output
      */
-    static void
-    evalX(RealType valRc, GradType gradRc, ValueType lapRc, RealType Rc,
-        RealType Z, RealType C, RealType valAtZero, RealType eta0,
-        TinyVector<ValueType, 5>& X);
-
-    /** Convert constraints to polynomial parameters
+  static void evalX(RealType valRc,
+                    GradType gradRc,
+                    ValueType lapRc,
+                    RealType Rc,
+                    RealType Z,
+                    RealType C,
+                    RealType valAtZero,
+                    RealType eta0,
+                    TinyVector<ValueType, 5>& X);
+
+  /** Convert constraints to polynomial parameters
      * @param X input from evalX
      * @param Rc cutoff radius
      * @param alpha output the polynomial parameters for the correction
      */
-    static void
-    X2alpha(const TinyVector<ValueType, 5>& X, RealType Rc,
-        TinyVector<ValueType, 5>& alpha);
+  static void X2alpha(const TinyVector<ValueType, 5>& X, RealType Rc, TinyVector<ValueType, 5>& alpha);
 
-    /** Effective nuclear charge to keep effective local energy finite at zero
+  /** Effective nuclear charge to keep effective local energy finite at zero
      * @param Z nuclear charge
      * @param etaAtZero value of non-S orbitals at this center
      * @param phiBarAtZero value of corrected orbital at zero
      */
-    static RealType
-    getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero);
+  static RealType getZeff(RealType Z, RealType etaAtZero, RealType phiBarAtZero);
 
-    static RealType
-    phiBar(const CuspCorrectionT<T>& cusp, RealType r,
-        OneMolecularOrbitalT<T>& phiMO);
+  static RealType phiBar(const CuspCorrectionT<T>& cusp, RealType r, OneMolecularOrbitalT<T>& phiMO);
 
-    /**  Compute effective local energy at vector of points
+  /**  Compute effective local energy at vector of points
      * @param pos input vector of radial distances
      * @param Zeff effective charge from getZeff
      * @param Rc cutoff radius
@@ -207,12 +204,15 @@ class CuspCorrectionConstructionT
      * @param phiMO uncorrected orbital (S-orbitals on this center only)
      * @param ELcurr output local energy at each distance in pos
      */
-    static void
-    getCurrentLocalEnergy(const ValueVector& pos, RealType Zeff, RealType Rc,
-        RealType originalELatRc, CuspCorrectionT<T>& cusp,
-        OneMolecularOrbitalT<T>& phiMO, ValueVector& ELcurr);
-
-    /** Local energy from uncorrected orbital
+  static void getCurrentLocalEnergy(const ValueVector& pos,
+                                    RealType Zeff,
+                                    RealType Rc,
+                                    RealType originalELatRc,
+                                    CuspCorrectionT<T>& cusp,
+                                    OneMolecularOrbitalT<T>& phiMO,
+                                    ValueVector& ELcurr);
+
+  /** Local energy from uncorrected orbital
      * @param pos input vector of radial distances
      * @param Zeff nuclear charge
      * @param Rc cutoff radius
@@ -223,19 +223,20 @@ class CuspCorrectionConstructionT
      * subsequent computations. The routine can be called with an empty vector
      * of positions to get just this value.
      */
-    static RealType
-    getOriginalLocalEnergy(const ValueVector& pos, RealType Zeff, RealType Rc,
-        OneMolecularOrbitalT<T>& phiMO, ValueVector& Elorig);
+  static RealType getOriginalLocalEnergy(const ValueVector& pos,
+                                         RealType Zeff,
+                                         RealType Rc,
+                                         OneMolecularOrbitalT<T>& phiMO,
+                                         ValueVector& Elorig);
 
-    /** Sum of squares difference between the current and ideal local energies
+  /** Sum of squares difference between the current and ideal local energies
      * This is the objective function to be minimized.
      * @param Elcurr  current local energy
      * @param Elideal  ideal local energy
      */
-    static RealType
-    getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal);
+  static RealType getELchi2(const ValueVector& ELcurr, const ValueVector& ELideal);
 
-    /** Minimize chi2 with respect to phi at zero for a fixed Rc
+  /** Minimize chi2 with respect to phi at zero for a fixed Rc
      * @param cusp correction parameters
      * @param phiMO uncorrected orbital (S-orbitals on this center only)
      * @param Z nuclear charge
@@ -246,13 +247,16 @@ class CuspCorrectionConstructionT
      * @param Elcurr storage for current local energy
      * @param Elideal storage for ideal local energy
      */
-    static RealType
-    minimizeForPhiAtZero(CuspCorrectionT<T>& cusp,
-        OneMolecularOrbitalT<T>& phiMO, RealType Z, RealType eta0,
-        ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal,
-        RealType start_phi0);
-
-    /** Minimize chi2 with respect to Rc and phi at zero.
+  static RealType minimizeForPhiAtZero(CuspCorrectionT<T>& cusp,
+                                       OneMolecularOrbitalT<T>& phiMO,
+                                       RealType Z,
+                                       RealType eta0,
+                                       ValueVector& pos,
+                                       ValueVector& ELcurr,
+                                       ValueVector& ELideal,
+                                       RealType start_phi0);
+
+  /** Minimize chi2 with respect to Rc and phi at zero.
      * @param cusp correction parameters
      * @param phiMO uncorrected orbital (S-orbitals on this center only)
      * @param Z nuclear charge
@@ -267,46 +271,56 @@ class CuspCorrectionConstructionT
      *
      * Output is parameter values in cusp.cparam
      */
-    static void
-    minimizeForRc(CuspCorrectionT<T>& cusp, OneMolecularOrbitalT<T>& phiMO,
-        RealType Z, RealType Rc_init, RealType Rc_max, RealType eta0,
-        ValueVector& pos, ValueVector& ELcurr, ValueVector& ELideal);
-
-    // Modifies orbital set lcwc
-    static void
-    applyCuspCorrection(const Matrix<CuspCorrectionParametersT<T>>& info,
-        ParticleSetT<T>& targetPtcl, ParticleSetT<T>& sourcePtcl,
-        LCAOrbitalSetT<T>& lcao, SoaCuspCorrectionT<T>& cusp,
-        const std::string& id);
-
-    static void
-    generateCuspInfo(Matrix<CuspCorrectionParametersT<T>>& info,
-        const ParticleSetT<T>& targetPtcl, const ParticleSetT<T>& sourcePtcl,
-        const LCAOrbitalSetT<T>& lcao, const std::string& id,
-        Communicate& Comm);
-
-    /// Broadcast cusp correction parameters
-    static void
-    broadcastCuspInfo(
-        CuspCorrectionParametersT<T>& param, Communicate& Comm, int root);
-
-    /// Read cusp correction parameters from XML file
-    static bool
-    readCuspInfo(const std::string& cuspInfoFile, const std::string& objectName,
-        int OrbitalSetSize, Matrix<CuspCorrectionParametersT<T>>& info);
-
-    /// save cusp correction info to a file.
-    static void
-    saveCusp(const std::string& filename,
-        const Matrix<CuspCorrectionParametersT<T>>& info,
-        const std::string& id);
+  static void minimizeForRc(CuspCorrectionT<T>& cusp,
+                            OneMolecularOrbitalT<T>& phiMO,
+                            RealType Z,
+                            RealType Rc_init,
+                            RealType Rc_max,
+                            RealType eta0,
+                            ValueVector& pos,
+                            ValueVector& ELcurr,
+                            ValueVector& ELideal);
+
+  // Modifies orbital set lcwc
+  static void applyCuspCorrection(const Matrix<CuspCorrectionParametersT<T>>& info,
+                                  ParticleSetT<T>& targetPtcl,
+                                  ParticleSetT<T>& sourcePtcl,
+                                  LCAOrbitalSetT<T>& lcao,
+                                  SoaCuspCorrectionT<T>& cusp,
+                                  const std::string& id);
+
+  static void generateCuspInfo(Matrix<CuspCorrectionParametersT<T>>& info,
+                               const ParticleSetT<T>& targetPtcl,
+                               const ParticleSetT<T>& sourcePtcl,
+                               const LCAOrbitalSetT<T>& lcao,
+                               const std::string& id,
+                               Communicate& Comm);
+
+  /// Broadcast cusp correction parameters
+  static void broadcastCuspInfo(CuspCorrectionParametersT<T>& param, Communicate& Comm, int root);
+
+  /// Read cusp correction parameters from XML file
+  static bool readCuspInfo(const std::string& cuspInfoFile,
+                           const std::string& objectName,
+                           int OrbitalSetSize,
+                           Matrix<CuspCorrectionParametersT<T>>& info);
+
+  /// save cusp correction info to a file.
+  static void saveCusp(const std::string& filename,
+                       const Matrix<CuspCorrectionParametersT<T>>& info,
+                       const std::string& id);
 
 private:
-    static RealType
-    evaluateForPhi0Body(RealType phi0, ValueVector& pos, ValueVector& ELcurr,
-        ValueVector& ELideal, CuspCorrectionT<T>& cusp,
-        OneMolecularOrbitalT<T>& phiMO, ValGradLap phiAtRc, RealType etaAtZero,
-        RealType ELorigAtRc, RealType Z);
+  static RealType evaluateForPhi0Body(RealType phi0,
+                                      ValueVector& pos,
+                                      ValueVector& ELcurr,
+                                      ValueVector& ELideal,
+                                      CuspCorrectionT<T>& cusp,
+                                      OneMolecularOrbitalT<T>& phiMO,
+                                      ValGradLap phiAtRc,
+                                      RealType etaAtZero,
+                                      RealType ELorigAtRc,
+                                      RealType Z);
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h
index 18fa1ed5315..2c88e4abc0d 100644
--- a/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionT.h
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Mark
-//                    Dewing, mdewing@anl.gov, Argonne National Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
 //
-// File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
+// File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file CuspCorrection.h
@@ -40,74 +37,61 @@ namespace qmcplusplus
  * parameters in those equations.
  */
 
-template <typename T>
+template<typename T>
 struct CuspCorrectionParametersT
 {
-    using ValueType = typename OrbitalSetTraits<T>::ValueType;
-    using RealType = typename OrbitalSetTraits<T>::RealType;
+  using ValueType = typename OrbitalSetTraits<T>::ValueType;
+  using RealType  = typename OrbitalSetTraits<T>::RealType;
 
-    /// The cutoff radius
-    RealType Rc;
+  /// The cutoff radius
+  RealType Rc;
 
-    /// A shift to keep correction to a single sign
-    RealType C;
+  /// A shift to keep correction to a single sign
+  RealType C;
 
-    /// The sign of the wavefunction at the nucleus
-    RealType sg;
+  /// The sign of the wavefunction at the nucleus
+  RealType sg;
 
-    /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8
-    TinyVector<ValueType, 5> alpha;
+  /// The coefficients of the polynomial \f$p(r)\f$ in Eq 8
+  TinyVector<ValueType, 5> alpha;
 
-    /// Flag to indicate the correction should be recalculated
-    int redo;
+  /// Flag to indicate the correction should be recalculated
+  int redo;
 
-    CuspCorrectionParametersT() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0)
-    {
-    }
+  CuspCorrectionParametersT() : Rc(0.0), C(0.0), sg(1.0), alpha(0.0), redo(0) {}
 };
 
 /// Formulas for applying the cusp correction
 
-template <typename T>
+template<typename T>
 class CuspCorrectionT
 {
-    using RealType = typename OrbitalSetTraits<T>::RealType;
+  using RealType = typename OrbitalSetTraits<T>::RealType;
 
 public:
-    inline RealType
-    Rr(RealType r) const
-    {
-        return cparam.sg * std::exp(pr(r));
-    }
-
-    inline RealType
-    pr(RealType r) const
-    {
-        auto& alpha = cparam.alpha;
-        return alpha[0] + alpha[1] * r + alpha[2] * r * r +
-            alpha[3] * r * r * r + alpha[4] * r * r * r * r;
-    }
-
-    inline RealType
-    dpr(RealType r) const
-    {
-        auto& alpha = cparam.alpha;
-        return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r +
-            4.0 * alpha[4] * r * r * r;
-    }
-
-    inline RealType
-    d2pr(RealType r) const
-    {
-        auto& alpha = cparam.alpha;
-        return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r;
-    }
-
-    CuspCorrectionT(const CuspCorrectionParametersT<T>& param) : cparam(param)
-    {
-    }
-
-    CuspCorrectionParametersT<T> cparam;
+  inline RealType Rr(RealType r) const { return cparam.sg * std::exp(pr(r)); }
+
+  inline RealType pr(RealType r) const
+  {
+    auto& alpha = cparam.alpha;
+    return alpha[0] + alpha[1] * r + alpha[2] * r * r + alpha[3] * r * r * r + alpha[4] * r * r * r * r;
+  }
+
+  inline RealType dpr(RealType r) const
+  {
+    auto& alpha = cparam.alpha;
+    return alpha[1] + 2.0 * alpha[2] * r + 3.0 * alpha[3] * r * r + 4.0 * alpha[4] * r * r * r;
+  }
+
+  inline RealType d2pr(RealType r) const
+  {
+    auto& alpha = cparam.alpha;
+    return 2.0 * alpha[2] + 6.0 * alpha[3] * r + 12.0 * alpha[4] * r * r;
+  }
+
+  CuspCorrectionT(const CuspCorrectionParametersT<T>& param) : cparam(param) {}
+
+  CuspCorrectionParametersT<T> cparam;
 };
 } // namespace qmcplusplus
 
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
index 14de5a549cf..f3dab3c13db 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.cpp
@@ -4,11 +4,9 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National
-// Laboratories
+// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //
-// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National
-// Laboratories
+// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "LCAOSpinorBuilderT.h"
@@ -21,190 +19,193 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 LCAOSpinorBuilderT<T>::LCAOSpinorBuilderT(ParticleSetT<T>& els,
-    ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur) :
-    LCAOrbitalBuilderT<T>(els, ions, comm, cur)
+                                          ParticleSetT<T>& ions,
+                                          Communicate* comm,
+                                          xmlNodePtr cur)
+    : LCAOrbitalBuilderT<T>(els, ions, comm, cur)
 {
-    this->ClassName = "LCAOSpinorBuilder";
+  this->ClassName = "LCAOSpinorBuilder";
 
-    if (this->h5_path == "")
-        this->myComm->barrier_and_abort(
-            "LCAOSpinorBuilder only works with href");
+  if (this->h5_path == "")
+    this->myComm->barrier_and_abort("LCAOSpinorBuilder only works with href");
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template<class T>
+std::unique_ptr<SPOSetT<T>> LCAOSpinorBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-    ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
-    std::string spo_name(""), optimize("no");
-    std::string basisset_name("LCAOBSet");
-    OhmmsAttributeSet spoAttrib;
-    spoAttrib.add(spo_name, "name");
-    spoAttrib.add(optimize, "optimize");
-    spoAttrib.add(basisset_name, "basisset");
-    spoAttrib.put(cur);
-
-    BasisSet_t* myBasisSet = nullptr;
-    if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end())
-        this->myComm->barrier_and_abort(
-            "basisset \"" + basisset_name + "\" cannot be found\n");
-    else
-        myBasisSet = this->basisset_map_[basisset_name].get();
-
-    if (optimize == "yes")
-        app_log() << "  SPOSet " << spo_name << " is optimizable\n";
-
-    auto upspo = std::make_unique<LCAOrbitalSetT<T>>(
-        spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-    auto dnspo = std::make_unique<LCAOrbitalSetT<T>>(
-        spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
-
-    loadMO(*upspo, *dnspo, cur);
-
-    // create spinor and register up/dn
-    auto spinor_set = std::make_unique<SpinorSetT<T>>(spo_name);
-    spinor_set->set_spos(std::move(upspo), std::move(dnspo));
-    return spinor_set;
+  ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
+  std::string spo_name(""), optimize("no");
+  std::string basisset_name("LCAOBSet");
+  OhmmsAttributeSet spoAttrib;
+  spoAttrib.add(spo_name, "name");
+  spoAttrib.add(optimize, "optimize");
+  spoAttrib.add(basisset_name, "basisset");
+  spoAttrib.put(cur);
+
+  BasisSet_t* myBasisSet = nullptr;
+  if (this->basisset_map_.find(basisset_name) == this->basisset_map_.end())
+    this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n");
+  else
+    myBasisSet = this->basisset_map_[basisset_name].get();
+
+  if (optimize == "yes")
+    app_log() << "  SPOSet " << spo_name << " is optimizable\n";
+
+  auto upspo =
+      std::make_unique<LCAOrbitalSetT<T>>(spo_name + "_up", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+  auto dnspo =
+      std::make_unique<LCAOrbitalSetT<T>>(spo_name + "_dn", std::unique_ptr<BasisSet_t>(myBasisSet->makeClone()));
+
+  loadMO(*upspo, *dnspo, cur);
+
+  // create spinor and register up/dn
+  auto spinor_set = std::make_unique<SpinorSetT<T>>(spo_name);
+  spinor_set->set_spos(std::move(upspo), std::move(dnspo));
+  return spinor_set;
 }
 
-template <class T>
-bool
-LCAOSpinorBuilderT<T>::loadMO(
-    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
+template<class T>
+bool LCAOSpinorBuilderT<T>::loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur)
 {
-    bool PBC = false;
-    int norb = up.getBasisSetSize();
-    std::string debugc("no");
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(norb, "size");
-    aAttrib.add(debugc, "debug");
-    aAttrib.put(cur);
-
-    up.setOrbitalSetSize(norb);
-    dn.setOrbitalSetSize(norb);
-
-    xmlNodePtr occ_ptr = nullptr;
-    cur = cur->xmlChildrenNode;
-    while (cur != nullptr) {
-        std::string cname((const char*)(cur->name));
-        if (cname == "occupation") {
-            occ_ptr = cur;
-        }
-        cur = cur->next;
+  bool PBC = false;
+  int norb = up.getBasisSetSize();
+  std::string debugc("no");
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(norb, "size");
+  aAttrib.add(debugc, "debug");
+  aAttrib.put(cur);
+
+  up.setOrbitalSetSize(norb);
+  dn.setOrbitalSetSize(norb);
+
+  xmlNodePtr occ_ptr = nullptr;
+  cur                = cur->xmlChildrenNode;
+  while (cur != nullptr)
+  {
+    std::string cname((const char*)(cur->name));
+    if (cname == "occupation")
+    {
+      occ_ptr = cur;
     }
-
-    hdf_archive hin(this->myComm);
-    if (this->myComm->rank() == 0) {
-        if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
-            this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing "
-                                            "or incorrect path to H5 file.");
-        hin.push("PBC");
-        PBC = false;
-        hin.read(PBC, "PBC");
-        hin.close();
-    }
-    this->myComm->bcast(PBC);
-    if (PBC)
-        this->myComm->barrier_and_abort(
-            "LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
-
-    bool success = putFromH5(up, dn, occ_ptr);
-
-    if (debugc == "yes") {
-        app_log() << "UP:  Single-particle orbital coefficients dims="
-                  << up.C->rows() << " x " << up.C->cols() << std::endl;
-        app_log() << *up.C << std::endl;
-        app_log() << "DN:  Single-particle orbital coefficients dims="
-                  << dn.C->rows() << " x " << dn.C->cols() << std::endl;
-        app_log() << *dn.C << std::endl;
-    }
-    return success;
+    cur = cur->next;
+  }
+
+  hdf_archive hin(this->myComm);
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+      this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO missing "
+                                      "or incorrect path to H5 file.");
+    hin.push("PBC");
+    PBC = false;
+    hin.read(PBC, "PBC");
+    hin.close();
+  }
+  this->myComm->bcast(PBC);
+  if (PBC)
+    this->myComm->barrier_and_abort("LCAOSpinorBuilder::loadMO lcao spinors not implemented in PBC");
+
+  bool success = putFromH5(up, dn, occ_ptr);
+
+  if (debugc == "yes")
+  {
+    app_log() << "UP:  Single-particle orbital coefficients dims=" << up.C->rows() << " x " << up.C->cols()
+              << std::endl;
+    app_log() << *up.C << std::endl;
+    app_log() << "DN:  Single-particle orbital coefficients dims=" << dn.C->rows() << " x " << dn.C->cols()
+              << std::endl;
+    app_log() << *dn.C << std::endl;
+  }
+  return success;
 }
 
-template <class T>
-bool
-LCAOSpinorBuilderT<T>::putFromH5(
-    LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
+template<class T>
+bool LCAOSpinorBuilderT<T>::putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr occ_ptr)
 {
-    if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0) {
-        this->myComm->barrier_and_abort(
-            "LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
-        return false;
+  if (up.getBasisSetSize() == 0 || dn.getBasisSetSize() == 0)
+  {
+    this->myComm->barrier_and_abort("LCASpinorBuilder::loadMO  detected ZERO BasisSetSize");
+    return false;
+  }
+
+  bool success = true;
+  hdf_archive hin(this->myComm);
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
+      this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or "
+                                      "incorrect path to H5 file");
+
+    Matrix<RealType> upReal;
+    Matrix<RealType> upImag;
+    std::string setname = "/Super_Twist/eigenset_0";
+    this->readRealMatrixFromH5(hin, setname, upReal);
+    setname += "_imag";
+    this->readRealMatrixFromH5(hin, setname, upImag);
+
+
+    assert(upReal.rows() == upImag.rows());
+    assert(upReal.cols() == upImag.cols());
+
+    Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
+    for (int i = 0; i < upTemp.rows(); i++)
+    {
+      for (int j = 0; j < upTemp.cols(); j++)
+      {
+        upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]};
+      }
     }
 
-    bool success = true;
-    hdf_archive hin(this->myComm);
-    if (this->myComm->rank() == 0) {
-      if (!hin.open(this->h5_path, H5F_ACC_RDONLY))
-        this->myComm->barrier_and_abort("LCAOSpinorBuilder::putFromH5 missing or "
-                                        "incorrect path to H5 file");
+    Matrix<RealType> dnReal;
+    Matrix<RealType> dnImag;
+    setname = "/Super_Twist/eigenset_1";
+    this->readRealMatrixFromH5(hin, setname, dnReal);
+    setname += "_imag";
+    this->readRealMatrixFromH5(hin, setname, dnImag);
 
-      Matrix<RealType> upReal;
-      Matrix<RealType> upImag;
-      std::string setname = "/Super_Twist/eigenset_0";
-      this->readRealMatrixFromH5(hin, setname, upReal);
-      setname += "_imag";
-      this->readRealMatrixFromH5(hin, setname, upImag);
+    assert(dnReal.rows() == dnImag.rows());
+    assert(dnReal.cols() == dnImag.cols());
 
+    Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
+    for (int i = 0; i < dnTemp.rows(); i++)
+    {
+      for (int j = 0; j < dnTemp.cols(); j++)
+      {
+        dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
+      }
+    }
+
+    assert(upReal.rows() == dnReal.rows());
+    assert(upReal.cols() == dnReal.cols());
 
-      assert(upReal.rows() == upImag.rows());
-      assert(upReal.cols() == upImag.cols());
+    this->Occ.resize(upReal.rows());
+    success = this->putOccupation(up, occ_ptr);
 
-      Matrix<ValueType> upTemp(upReal.rows(), upReal.cols());
-      for (int i = 0; i < upTemp.rows(); i++)
+    int norbs = up.getOrbitalSetSize();
+
+    int n = 0, i = 0;
+    while (i < norbs)
+    {
+      if (this->Occ[n] > 0.0)
       {
-        for (int j = 0; j < upTemp.cols(); j++)
-        {
-          upTemp[i][j] = ValueType{upReal[i][j], upImag[i][j]};
-        }
-        }
-
-        Matrix<RealType> dnReal;
-        Matrix<RealType> dnImag;
-        setname = "/Super_Twist/eigenset_1";
-        this->readRealMatrixFromH5(hin, setname, dnReal);
-        setname += "_imag";
-        this->readRealMatrixFromH5(hin, setname, dnImag);
-
-        assert(dnReal.rows() == dnImag.rows());
-        assert(dnReal.cols() == dnImag.cols());
-
-        Matrix<ValueType> dnTemp(dnReal.rows(), dnReal.cols());
-        for (int i = 0; i < dnTemp.rows(); i++) {
-            for (int j = 0; j < dnTemp.cols(); j++) {
-                dnTemp[i][j] = ValueType(dnReal[i][j], dnImag[i][j]);
-            }
-        }
-
-        assert(upReal.rows() == dnReal.rows());
-        assert(upReal.cols() == dnReal.cols());
-
-        this->Occ.resize(upReal.rows());
-        success = this->putOccupation(up, occ_ptr);
-
-        int norbs = up.getOrbitalSetSize();
-
-        int n = 0, i = 0;
-        while (i < norbs) {
-            if (this->Occ[n] > 0.0) {
-                std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
-                std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
-                i++;
-            }
-            n++;
-        }
-
-        hin.close();
+        std::copy(upTemp[n], upTemp[n + 1], (*up.C)[i]);
+        std::copy(dnTemp[n], dnTemp[n + 1], (*dn.C)[i]);
+        i++;
+      }
+      n++;
     }
 
+    hin.close();
+  }
+
 #ifdef HAVE_MPI
-    this->myComm->comm.broadcast_n(up.C->data(), up.C->size());
-    this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
+  this->myComm->comm.broadcast_n(up.C->data(), up.C->size());
+  this->myComm->comm.broadcast_n(dn.C->data(), dn.C->size());
 #endif
 
-    return success;
+  return success;
 }
 
 #ifdef QMC_COMPLEX
diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
index e23014f44dc..a1330d123e4 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilderT.h
@@ -4,11 +4,9 @@
 //
 // Copyright (c) 2020 QMCPACK developers.
 //
-// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National
-// Laboratories
+// File developed by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //
-// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National
-// Laboratories
+// File created by: Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SOA_LCAO_SPINOR_BUILDERT_H
@@ -24,42 +22,39 @@ namespace qmcplusplus
  * read up and down channel from HDF5 and construct SpinorSet
  *
  */
-template <class T>
+template<class T>
 class LCAOSpinorBuilderT : public LCAOrbitalBuilderT<T>
 {
 public:
-    using BasisSet_t = typename LCAOrbitalBuilderT<T>::BasisSet_t;
-    using RealType = typename LCAOrbitalBuilderT<T>::RealType;
-    using ValueType = typename LCAOrbitalBuilderT<T>::ValueType;
+  using BasisSet_t = typename LCAOrbitalBuilderT<T>::BasisSet_t;
+  using RealType   = typename LCAOrbitalBuilderT<T>::RealType;
+  using ValueType  = typename LCAOrbitalBuilderT<T>::ValueType;
 
-    /** constructor
+  /** constructor
      * \param els reference to the electrons
      * \param ions reference to the ions
      *
      * Derives from LCAOrbitalBuilder, but will require an h5_path to be set
      */
-    LCAOSpinorBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions,
-        Communicate* comm, xmlNodePtr cur);
+  LCAOSpinorBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur);
 
-    /** creates and returns SpinorSet
+  /** creates and returns SpinorSet
      *
      * Creates an up and down LCAOrbitalSet
      * calls LCAOSpinorBuilder::loadMO to build up and down from the H5 file
      * registers up and down into a SpinorSet and returns
      */
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSetFromXML(xmlNodePtr cur) override;
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
 
 private:
-    /** load the up and down MO sets
+  /** load the up and down MO sets
      *
      * checks to make sure not PBC and initialize the Occ vector.
      * call putFromH5 to parse the up and down MO coefficients
      */
-    bool
-    loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur);
+  bool loadMO(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr cur);
 
-    /** parse h5 file for spinor info
+  /** parse h5 file for spinor info
      *
      * assumes the h5 file has KPTS_0/eigenset_0(_imag) for the real/imag part
      * of up component of spinor assumes the h5 file as KPTS_0/eigenset_1(_imag)
@@ -67,8 +62,7 @@ class LCAOSpinorBuilderT : public LCAOrbitalBuilderT<T>
      * coefficient matricies and broadcast after this, we have up/dn
      * LCAOrbitalSet that can be registered to the SpinorSet
      */
-    bool
-    putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr);
+  bool putFromH5(LCAOrbitalSetT<T>& up, LCAOrbitalSetT<T>& dn, xmlNodePtr);
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
index 5abad9e9500..92f3590661c 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Ye Luo, yeluo@anl.gov, Argonne National
-//                    Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge
-//                    National Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
@@ -48,1139 +45,1137 @@ namespace qmcplusplus
  * SH {0=cartesian, 1=spherical}
  * If too confusing, inroduce enumeration.
  */
-template <typename T, typename ORBT, int ROT, int SH>
+template<typename T, typename ORBT, int ROT, int SH>
 struct ao_traits
-{
-};
+{};
 
 /** specialization for numerical-cartesian AO */
-template <typename T, typename ORBT>
+template<typename T, typename ORBT>
 struct ao_traits<T, ORBT, 0, 0>
 {
-    using radial_type = MultiQuinticSpline1D<T>;
-    using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
-    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
+  using radial_type  = MultiQuinticSpline1D<T>;
+  using angular_type = SoaCartesianTensor<T>;
+  using ao_type      = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+  using basis_type   = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for numerical-spherical AO */
-template <typename T, typename ORBT>
+template<typename T, typename ORBT>
 struct ao_traits<T, ORBT, 0, 1>
 {
-    using radial_type = MultiQuinticSpline1D<T>;
-    using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
-    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
+  using radial_type  = MultiQuinticSpline1D<T>;
+  using angular_type = SoaSphericalTensor<T>;
+  using ao_type      = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+  using basis_type   = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
-template <typename T, typename ORBT>
+template<typename T, typename ORBT>
 struct ao_traits<T, ORBT, 1, 0>
 {
-    using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
-    using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
-    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
+  using radial_type  = MultiFunctorAdapter<GaussianCombo<T>>;
+  using angular_type = SoaCartesianTensor<T>;
+  using ao_type      = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+  using basis_type   = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
-template <typename T, typename ORBT>
+template<typename T, typename ORBT>
 struct ao_traits<T, ORBT, 1, 1>
 {
-    using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
-    using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
-    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
+  using radial_type  = MultiFunctorAdapter<GaussianCombo<T>>;
+  using angular_type = SoaSphericalTensor<T>;
+  using ao_type      = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+  using basis_type   = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for STO-spherical AO */
-template <typename T, typename ORBT>
+template<typename T, typename ORBT>
 struct ao_traits<T, ORBT, 2, 1>
 {
-    using radial_type = MultiFunctorAdapter<SlaterCombo<T>>;
-    using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
-    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
+  using radial_type  = MultiFunctorAdapter<SlaterCombo<T>>;
+  using angular_type = SoaSphericalTensor<T>;
+  using ao_type      = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+  using basis_type   = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
-inline bool
-is_same(const xmlChar* a, const char* b)
-{
-    return !strcmp((const char*)a, b);
-}
+inline bool is_same(const xmlChar* a, const char* b) { return !strcmp((const char*)a, b); }
 
-template <typename T>
+template<typename T>
 LCAOrbitalBuilderT<T>::LCAOrbitalBuilderT(ParticleSetT<T>& els,
-    ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur) :
-    SPOSetBuilderT<T>("LCAO", comm),
-    targetPtcl(els),
-    sourcePtcl(ions),
-    h5_path(""),
-    SuperTwist(0.0),
-    doCuspCorrection(false)
+                                          ParticleSetT<T>& ions,
+                                          Communicate* comm,
+                                          xmlNodePtr cur)
+    : SPOSetBuilderT<T>("LCAO", comm),
+      targetPtcl(els),
+      sourcePtcl(ions),
+      h5_path(""),
+      SuperTwist(0.0),
+      doCuspCorrection(false)
 {
-    this->ClassName = "LCAOrbitalBuilder";
-    ReportEngine PRE(this->ClassName, "createBasisSet");
-
-    std::string cuspC("no"); // cusp correction
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(cuspC, "cuspCorrection");
-    aAttrib.add(h5_path, "href");
-    aAttrib.add(PBCImages, "PBCimages");
-    aAttrib.add(SuperTwist, "twist");
-    aAttrib.put(cur);
-
-    if (cuspC == "yes")
-        doCuspCorrection = true;
-    // Evaluate the Phase factor. Equals 1 for OBC.
-    EvalPeriodicImagePhaseFactors(SuperTwist, PeriodicImagePhaseFactors);
-
-    // no need to wait but load the basis set
-    processChildren(
-        cur, [&](const std::string& cname, const xmlNodePtr element) {
-            if (cname == "basisset") {
-                std::string basisset_name_input(
-                    getXMLAttributeValue(element, "name"));
-                std::string basisset_name(basisset_name_input.empty() ?
-                        "LCAOBSet" :
-                        basisset_name_input);
-                if (basisset_map_.find(basisset_name) != basisset_map_.end()) {
-                    std::ostringstream err_msg;
-                    err_msg << "Cannot create basisset " << basisset_name
-                            << " which already exists." << std::endl;
-                    throw std::runtime_error(err_msg.str());
-                }
-                if (h5_path != "")
-                    basisset_map_[basisset_name] = loadBasisSetFromH5(element);
-                else
-                    basisset_map_[basisset_name] =
-                        loadBasisSetFromXML(element, cur);
-            }
-        });
-
-    // deprecated h5 basis set handling when basisset element is missing
-    if (basisset_map_.size() == 0 && h5_path != "") {
-        app_warning()
-            << "!!!!!!! Deprecated input style: missing basisset element. "
-            << "LCAO needs an explicit basisset XML element. "
-            << "Fallback on loading an implicit one." << std::endl;
-        basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur);
+  this->ClassName = "LCAOrbitalBuilder";
+  ReportEngine PRE(this->ClassName, "createBasisSet");
+
+  std::string cuspC("no"); // cusp correction
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(cuspC, "cuspCorrection");
+  aAttrib.add(h5_path, "href");
+  aAttrib.add(PBCImages, "PBCimages");
+  aAttrib.add(SuperTwist, "twist");
+  aAttrib.put(cur);
+
+  if (cuspC == "yes")
+    doCuspCorrection = true;
+  // Evaluate the Phase factor. Equals 1 for OBC.
+  EvalPeriodicImagePhaseFactors(SuperTwist, PeriodicImagePhaseFactors);
+
+  // no need to wait but load the basis set
+  processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) {
+    if (cname == "basisset")
+    {
+      std::string basisset_name_input(getXMLAttributeValue(element, "name"));
+      std::string basisset_name(basisset_name_input.empty() ? "LCAOBSet" : basisset_name_input);
+      if (basisset_map_.find(basisset_name) != basisset_map_.end())
+      {
+        std::ostringstream err_msg;
+        err_msg << "Cannot create basisset " << basisset_name << " which already exists." << std::endl;
+        throw std::runtime_error(err_msg.str());
+      }
+      if (h5_path != "")
+        basisset_map_[basisset_name] = loadBasisSetFromH5(element);
+      else
+        basisset_map_[basisset_name] = loadBasisSetFromXML(element, cur);
     }
-
-    if (basisset_map_.size() == 0)
-        throw std::runtime_error("No basisset found in the XML input!");
+  });
+
+  // deprecated h5 basis set handling when basisset element is missing
+  if (basisset_map_.size() == 0 && h5_path != "")
+  {
+    app_warning() << "!!!!!!! Deprecated input style: missing basisset element. "
+                  << "LCAO needs an explicit basisset XML element. "
+                  << "Fallback on loading an implicit one." << std::endl;
+    basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur);
+  }
+
+  if (basisset_map_.size() == 0)
+    throw std::runtime_error("No basisset found in the XML input!");
 }
 
-template <typename T>
+template<typename T>
 LCAOrbitalBuilderT<T>::~LCAOrbitalBuilderT()
 {
-    // properly cleanup
+  // properly cleanup
 }
 
-template <typename T>
-int
-LCAOrbitalBuilderT<T>::determineRadialOrbType(xmlNodePtr cur) const
+template<typename T>
+int LCAOrbitalBuilderT<T>::determineRadialOrbType(xmlNodePtr cur) const
 {
-    std::string keyOpt;
-    std::string transformOpt;
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(keyOpt, "keyword");
-    aAttrib.add(keyOpt, "key");
-    aAttrib.add(transformOpt, "transform");
-    aAttrib.put(cur);
-
-    int radialOrbType = -1;
-    if (transformOpt == "yes" || keyOpt == "NMO")
-        radialOrbType = 0;
-    else {
-        if (keyOpt == "GTO")
-            radialOrbType = 1;
-        if (keyOpt == "STO")
-            radialOrbType = 2;
-    }
-    return radialOrbType;
+  std::string keyOpt;
+  std::string transformOpt;
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(keyOpt, "keyword");
+  aAttrib.add(keyOpt, "key");
+  aAttrib.add(transformOpt, "transform");
+  aAttrib.put(cur);
+
+  int radialOrbType = -1;
+  if (transformOpt == "yes" || keyOpt == "NMO")
+    radialOrbType = 0;
+  else
+  {
+    if (keyOpt == "GTO")
+      radialOrbType = 1;
+    if (keyOpt == "STO")
+      radialOrbType = 2;
+  }
+  return radialOrbType;
 }
 
-template <typename T>
-std::unique_ptr<typename LCAOrbitalBuilderT<T>::BasisSet_t>
-LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
+template<typename T>
+std::unique_ptr<typename LCAOrbitalBuilderT<T>::BasisSet_t> LCAOrbitalBuilderT<T>::loadBasisSetFromXML(
+    xmlNodePtr cur,
+    xmlNodePtr parent)
 {
-    ReportEngine PRE(this->ClassName, "loadBasisSetFromXML(xmlNodePtr)");
-    int ylm = -1;
+  ReportEngine PRE(this->ClassName, "loadBasisSetFromXML(xmlNodePtr)");
+  int ylm = -1;
+  {
+    xmlNodePtr cur1 = cur->xmlChildrenNode;
+    while (cur1 != NULL && ylm < 0)
     {
-        xmlNodePtr cur1 = cur->xmlChildrenNode;
-        while (cur1 != NULL && ylm < 0) {
-            if (is_same(cur1->name, "atomicBasisSet")) {
-                std::string sph;
-                OhmmsAttributeSet att;
-                att.add(sph, "angular");
-                att.put(cur1);
-                ylm = (sph == "cartesian") ? 0 : 1;
-            }
-            cur1 = cur1->next;
-        }
-    }
-
-    if (ylm < 0)
-        PRE.error("Missing angular attribute of atomicBasisSet.", true);
-
-    int radialOrbType = determineRadialOrbType(cur);
-    if (radialOrbType < 0) {
-        app_warning() << "Radial orbital type cannot be determined based on "
-                         "the attributes of basisset line. "
-                      << "Trying the parent element." << std::endl;
-        radialOrbType = determineRadialOrbType(parent);
-    }
-
-    if (radialOrbType < 0)
-        PRE.error("Unknown radial function for LCAO orbitals. Specify "
-                  "keyword=\"NMO/GTO/STO\" .",
-            true);
-
-    BasisSet_t* myBasisSet = nullptr;
-    /** process atomicBasisSet per ion species */
-    switch (radialOrbType) {
-    case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
-                  << std::endl;
-        if (ylm)
-            myBasisSet = createBasisSet<0, 1>(cur);
-        else
-            myBasisSet = createBasisSet<0, 0>(cur);
-        break;
-    case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
-                  << std::endl;
-        if (ylm)
-            myBasisSet = createBasisSet<1, 1>(cur);
-        else
-            myBasisSet = createBasisSet<1, 0>(cur);
-        break;
-    case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
-                  << std::endl;
-        myBasisSet = createBasisSet<2, 1>(cur);
-        break;
-    default:
-        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
-        break;
+      if (is_same(cur1->name, "atomicBasisSet"))
+      {
+        std::string sph;
+        OhmmsAttributeSet att;
+        att.add(sph, "angular");
+        att.put(cur1);
+        ylm = (sph == "cartesian") ? 0 : 1;
+      }
+      cur1 = cur1->next;
     }
-
-    return std::unique_ptr<BasisSet_t>(myBasisSet);
+  }
+
+  if (ylm < 0)
+    PRE.error("Missing angular attribute of atomicBasisSet.", true);
+
+  int radialOrbType = determineRadialOrbType(cur);
+  if (radialOrbType < 0)
+  {
+    app_warning() << "Radial orbital type cannot be determined based on "
+                     "the attributes of basisset line. "
+                  << "Trying the parent element." << std::endl;
+    radialOrbType = determineRadialOrbType(parent);
+  }
+
+  if (radialOrbType < 0)
+    PRE.error("Unknown radial function for LCAO orbitals. Specify "
+              "keyword=\"NMO/GTO/STO\" .",
+              true);
+
+  BasisSet_t* myBasisSet = nullptr;
+  /** process atomicBasisSet per ion species */
+  switch (radialOrbType)
+  {
+  case (0): // numerical
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">" << std::endl;
+    if (ylm)
+      myBasisSet = createBasisSet<0, 1>(cur);
+    else
+      myBasisSet = createBasisSet<0, 0>(cur);
+    break;
+  case (1): // gto
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">" << std::endl;
+    if (ylm)
+      myBasisSet = createBasisSet<1, 1>(cur);
+    else
+      myBasisSet = createBasisSet<1, 0>(cur);
+    break;
+  case (2): // sto
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">" << std::endl;
+    myBasisSet = createBasisSet<2, 1>(cur);
+    break;
+  default:
+    PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
+    break;
+  }
+
+  return std::unique_ptr<BasisSet_t>(myBasisSet);
 }
 
-template <typename T>
-std::unique_ptr<typename LCAOrbitalBuilderT<T>::BasisSet_t>
-LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
+template<typename T>
+std::unique_ptr<typename LCAOrbitalBuilderT<T>::BasisSet_t> LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
 {
-    ReportEngine PRE(this->ClassName, "loadBasisSetFromH5()");
+  ReportEngine PRE(this->ClassName, "loadBasisSetFromH5()");
 
-    hdf_archive hin(this->myComm);
-    int ylm = -1;
-    if (this->myComm->rank() == 0) {
-        if (!hin.open(h5_path, H5F_ACC_RDONLY))
-            PRE.error("Could not open H5 file", true);
+  hdf_archive hin(this->myComm);
+  int ylm = -1;
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(h5_path, H5F_ACC_RDONLY))
+      PRE.error("Could not open H5 file", true);
 
-        hin.push("basisset", false);
+    hin.push("basisset", false);
 
-        std::string sph;
-        std::string ElemID0 = "atomicBasisSet0";
+    std::string sph;
+    std::string ElemID0 = "atomicBasisSet0";
 
-        hin.push(ElemID0.c_str(), false);
+    hin.push(ElemID0.c_str(), false);
 
-        if (!hin.readEntry(sph, "angular"))
-            PRE.error("Could not find name of  basisset group in H5; Probably "
-                      "Corrupt H5 file",
+    if (!hin.readEntry(sph, "angular"))
+      PRE.error("Could not find name of  basisset group in H5; Probably "
+                "Corrupt H5 file",
                 true);
-        ylm = (sph == "cartesian") ? 0 : 1;
-        hin.close();
-    }
-
-    this->myComm->bcast(ylm);
-    if (ylm < 0)
-        PRE.error("Missing angular attribute of atomicBasisSet.", true);
-
-    int radialOrbType = determineRadialOrbType(parent);
-    if (radialOrbType < 0)
-        PRE.error("Unknown radial function for LCAO orbitals. Specify "
-                  "keyword=\"NMO/GTO/STO\" .",
-            true);
-
-    BasisSet_t* myBasisSet = nullptr;
-    /** process atomicBasisSet per ion species */
-    switch (radialOrbType) {
-    case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
-                  << std::endl;
-        if (ylm)
-            myBasisSet = createBasisSetH5<0, 1>();
-        else
-            myBasisSet = createBasisSetH5<0, 0>();
-        break;
-    case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
-                  << std::endl;
-        if (ylm)
-            myBasisSet = createBasisSetH5<1, 1>();
-        else
-            myBasisSet = createBasisSetH5<1, 0>();
-        break;
-    case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
-                  << std::endl;
-        myBasisSet = createBasisSetH5<2, 1>();
-        break;
-    default:
-        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
-        break;
-    }
-    return std::unique_ptr<BasisSet_t>(myBasisSet);
+    ylm = (sph == "cartesian") ? 0 : 1;
+    hin.close();
+  }
+
+  this->myComm->bcast(ylm);
+  if (ylm < 0)
+    PRE.error("Missing angular attribute of atomicBasisSet.", true);
+
+  int radialOrbType = determineRadialOrbType(parent);
+  if (radialOrbType < 0)
+    PRE.error("Unknown radial function for LCAO orbitals. Specify "
+              "keyword=\"NMO/GTO/STO\" .",
+              true);
+
+  BasisSet_t* myBasisSet = nullptr;
+  /** process atomicBasisSet per ion species */
+  switch (radialOrbType)
+  {
+  case (0): // numerical
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">" << std::endl;
+    if (ylm)
+      myBasisSet = createBasisSetH5<0, 1>();
+    else
+      myBasisSet = createBasisSetH5<0, 0>();
+    break;
+  case (1): // gto
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">" << std::endl;
+    if (ylm)
+      myBasisSet = createBasisSetH5<1, 1>();
+    else
+      myBasisSet = createBasisSetH5<1, 0>();
+    break;
+  case (2): // sto
+    app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">" << std::endl;
+    myBasisSet = createBasisSetH5<2, 1>();
+    break;
+  default:
+    PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
+    break;
+  }
+  return std::unique_ptr<BasisSet_t>(myBasisSet);
 }
 
-template <typename T>
-template <int I, int J>
-typename LCAOrbitalBuilderT<T>::BasisSet_t*
-LCAOrbitalBuilderT<T>::createBasisSet(xmlNodePtr cur)
+template<typename T>
+template<int I, int J>
+typename LCAOrbitalBuilderT<T>::BasisSet_t* LCAOrbitalBuilderT<T>::createBasisSet(xmlNodePtr cur)
 {
-    ReportEngine PRE(this->ClassName, "createBasisSet(xmlNodePtr)");
+  ReportEngine PRE(this->ClassName, "createBasisSet(xmlNodePtr)");
+
+  using ao_type    = typename ao_traits<RealType, T, I, J>::ao_type;
+  using basis_type = typename ao_traits<RealType, T, I, J>::basis_type;
 
-    using ao_type = typename ao_traits<RealType, T, I, J>::ao_type;
-    using basis_type = typename ao_traits<RealType, T, I, J>::basis_type;
+  basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl);
 
-    basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl);
+  // list of built centers
+  std::vector<std::string> ao_built_centers;
 
-    // list of built centers
-    std::vector<std::string> ao_built_centers;
+  /** process atomicBasisSet per ion species */
+  cur = cur->xmlChildrenNode;
+  while (cur != NULL) // loop over unique ioons
+  {
+    std::string cname((const char*)(cur->name));
 
-    /** process atomicBasisSet per ion species */
-    cur = cur->xmlChildrenNode;
-    while (cur != NULL) // loop over unique ioons
+    if (cname == "atomicBasisSet")
     {
-        std::string cname((const char*)(cur->name));
-
-        if (cname == "atomicBasisSet") {
-            std::string elementType;
-            std::string sph;
-            OhmmsAttributeSet att;
-            att.add(elementType, "elementType");
-            att.put(cur);
-
-            if (elementType.empty())
-                PRE.error(
-                    "Missing elementType attribute of atomicBasisSet.", true);
-
-            auto it = std::find(
-                ao_built_centers.begin(), ao_built_centers.end(), elementType);
-            if (it == ao_built_centers.end()) {
-                AOBasisBuilderT<ao_type> any(elementType, this->myComm);
-                any.put(cur);
-                auto aoBasis = any.createAOSet(cur);
-                if (aoBasis) {
-                    // add the new atomic basis to the basis set
-                    int activeCenter =
-                        sourcePtcl.getSpeciesSet().findSpecies(elementType);
-                    mBasisSet->add(activeCenter, std::move(aoBasis));
-                }
-                ao_built_centers.push_back(elementType);
-            }
+      std::string elementType;
+      std::string sph;
+      OhmmsAttributeSet att;
+      att.add(elementType, "elementType");
+      att.put(cur);
+
+      if (elementType.empty())
+        PRE.error("Missing elementType attribute of atomicBasisSet.", true);
+
+      auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType);
+      if (it == ao_built_centers.end())
+      {
+        AOBasisBuilderT<ao_type> any(elementType, this->myComm);
+        any.put(cur);
+        auto aoBasis = any.createAOSet(cur);
+        if (aoBasis)
+        {
+          // add the new atomic basis to the basis set
+          int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType);
+          mBasisSet->add(activeCenter, std::move(aoBasis));
         }
-        cur = cur->next;
-    } // done with basis set
-    mBasisSet->setBasisSetSize(-1);
-    mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors);
-    return mBasisSet;
+        ao_built_centers.push_back(elementType);
+      }
+    }
+    cur = cur->next;
+  } // done with basis set
+  mBasisSet->setBasisSetSize(-1);
+  mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors);
+  return mBasisSet;
 }
 
-template <typename T>
-template <int I, int J>
-typename LCAOrbitalBuilderT<T>::BasisSet_t*
-LCAOrbitalBuilderT<T>::createBasisSetH5()
+template<typename T>
+template<int I, int J>
+typename LCAOrbitalBuilderT<T>::BasisSet_t* LCAOrbitalBuilderT<T>::createBasisSetH5()
 {
-    ReportEngine PRE(this->ClassName, "createBasisSetH5(xmlNodePtr)");
+  ReportEngine PRE(this->ClassName, "createBasisSetH5(xmlNodePtr)");
 
-    using ao_type = typename ao_traits<RealType, T, I, J>::ao_type;
-    using basis_type = typename ao_traits<RealType, T, I, J>::basis_type;
+  using ao_type    = typename ao_traits<RealType, T, I, J>::ao_type;
+  using basis_type = typename ao_traits<RealType, T, I, J>::basis_type;
 
-    basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl);
+  basis_type* mBasisSet = new basis_type(sourcePtcl, targetPtcl);
 
-    // list of built centers
-    std::vector<std::string> ao_built_centers;
+  // list of built centers
+  std::vector<std::string> ao_built_centers;
 
-    int Nb_Elements(0);
-    std::string basiset_name;
+  int Nb_Elements(0);
+  std::string basiset_name;
 
-    /** process atomicBasisSet per ion species */
-    app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl;
+  /** process atomicBasisSet per ion species */
+  app_log() << "Reading BasisSet from HDF5 file:" << h5_path << std::endl;
 
-    hdf_archive hin(this->myComm);
-    if (this->myComm->rank() == 0) {
-        if (!hin.open(h5_path, H5F_ACC_RDONLY))
-            PRE.error("Could not open H5 file", true);
+  hdf_archive hin(this->myComm);
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(h5_path, H5F_ACC_RDONLY))
+      PRE.error("Could not open H5 file", true);
 
-        hin.push("basisset", false);
+    hin.push("basisset", false);
 
-        hin.read(Nb_Elements, "NbElements");
-    }
+    hin.read(Nb_Elements, "NbElements");
+  }
 
-    this->myComm->bcast(Nb_Elements);
-    if (Nb_Elements < 1)
-        PRE.error("Missing elementType attribute of atomicBasisSet.", true);
+  this->myComm->bcast(Nb_Elements);
+  if (Nb_Elements < 1)
+    PRE.error("Missing elementType attribute of atomicBasisSet.", true);
 
-    for (int i = 0; i < Nb_Elements; i++) {
-        std::string elementType, dataset;
-        std::stringstream tempElem;
-        std::string ElemID0 = "atomicBasisSet", ElemType;
-        tempElem << ElemID0 << i;
-        ElemType = tempElem.str();
-
-        if (this->myComm->rank() == 0) {
-            hin.push(ElemType.c_str(), false);
-
-            if (!hin.readEntry(basiset_name, "name"))
-                PRE.error("Could not find name of  basisset group in H5; "
-                          "Probably Corrupt H5 file",
-                    true);
-            if (!hin.readEntry(elementType, "elementType"))
-                PRE.error("Could not read elementType in H5; Probably Corrupt "
-                          "H5 file",
-                    true);
-        }
-        this->myComm->bcast(basiset_name);
-        this->myComm->bcast(elementType);
-
-        auto it = std::find(
-            ao_built_centers.begin(), ao_built_centers.end(), elementType);
-        if (it == ao_built_centers.end()) {
-            AOBasisBuilderT<ao_type> any(elementType, this->myComm);
-            any.putH5(hin);
-            auto aoBasis = any.createAOSetH5(hin);
-            if (aoBasis) {
-                // add the new atomic basis to the basis set
-                int activeCenter =
-                    sourcePtcl.getSpeciesSet().findSpecies(elementType);
-                mBasisSet->add(activeCenter, std::move(aoBasis));
-            }
-            ao_built_centers.push_back(elementType);
-        }
+  for (int i = 0; i < Nb_Elements; i++)
+  {
+    std::string elementType, dataset;
+    std::stringstream tempElem;
+    std::string ElemID0 = "atomicBasisSet", ElemType;
+    tempElem << ElemID0 << i;
+    ElemType = tempElem.str();
 
-        if (this->myComm->rank() == 0)
-            hin.pop();
+    if (this->myComm->rank() == 0)
+    {
+      hin.push(ElemType.c_str(), false);
+
+      if (!hin.readEntry(basiset_name, "name"))
+        PRE.error("Could not find name of  basisset group in H5; "
+                  "Probably Corrupt H5 file",
+                  true);
+      if (!hin.readEntry(elementType, "elementType"))
+        PRE.error("Could not read elementType in H5; Probably Corrupt "
+                  "H5 file",
+                  true);
     }
+    this->myComm->bcast(basiset_name);
+    this->myComm->bcast(elementType);
 
-    if (this->myComm->rank() == 0) {
-        hin.pop();
-        hin.close();
+    auto it = std::find(ao_built_centers.begin(), ao_built_centers.end(), elementType);
+    if (it == ao_built_centers.end())
+    {
+      AOBasisBuilderT<ao_type> any(elementType, this->myComm);
+      any.putH5(hin);
+      auto aoBasis = any.createAOSetH5(hin);
+      if (aoBasis)
+      {
+        // add the new atomic basis to the basis set
+        int activeCenter = sourcePtcl.getSpeciesSet().findSpecies(elementType);
+        mBasisSet->add(activeCenter, std::move(aoBasis));
+      }
+      ao_built_centers.push_back(elementType);
     }
-    mBasisSet->setBasisSetSize(-1);
-    mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors);
-    return mBasisSet;
+
+    if (this->myComm->rank() == 0)
+      hin.pop();
+  }
+
+  if (this->myComm->rank() == 0)
+  {
+    hin.pop();
+    hin.close();
+  }
+  mBasisSet->setBasisSetSize(-1);
+  mBasisSet->setPBCParams(PBCImages, SuperTwist, PeriodicImagePhaseFactors);
+  return mBasisSet;
 }
 #ifndef QMC_COMPLEX
-template <>
-std::unique_ptr<SPOSetT<double>>
-LCAOrbitalBuilderT<double>::createWithCuspCorrection(xmlNodePtr cur,
-    const std::string& spo_name, std::string cusp_file,
+template<>
+std::unique_ptr<SPOSetT<double>> LCAOrbitalBuilderT<double>::createWithCuspCorrection(
+    xmlNodePtr cur,
+    const std::string& spo_name,
+    std::string cusp_file,
     std::unique_ptr<BasisSet_t>&& myBasisSet)
 {
-    app_summary() << "        Using cusp correction." << std::endl;
-    std::unique_ptr<SPOSetT<double>> sposet;
-    {
-        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<double>>(
-            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
-        loadMO(lcwc->lcao, cur);
-        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
-        sposet = std::move(lcwc);
-    }
-
-    // Create a temporary particle set to use for cusp initialization.
-    // The particle coordinates left at the end are unsuitable for further
-    // computations. The coordinates get set to nuclear positions, which
-    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
-    // This problem only appears when the electron positions are specified
-    // in the input. The random particle placement step executes after this
-    // part of the code, overwriting the leftover positions from the cusp
-    // initialization.
-    ParticleSetT<double> tmp_targetPtcl(targetPtcl);
-
-    const int num_centers = sourcePtcl.getTotalNum();
-    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<double>&>(*sposet);
-
-    const int orbital_set_size = lcwc.getOrbitalSetSize();
-    Matrix<CuspCorrectionParametersT<double>> info(
-        num_centers, orbital_set_size);
-
-    // set a default file name if not given
-    if (cusp_file.empty())
-        cusp_file = spo_name + ".cuspInfo.xml";
-
-    bool file_exists(
-        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
-    this->myComm->bcast(file_exists);
-    app_log() << "  Cusp correction file " << cusp_file
-              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
-
-    // validate file if it exists
-    if (file_exists) {
-        bool valid = 0;
-        if (this->myComm->rank() == 0)
-            valid = CuspCorrectionConstructionT<double>::readCuspInfo(
-                cusp_file, spo_name, orbital_set_size, info);
-        this->myComm->bcast(valid);
-        if (!valid)
-            this->myComm->barrier_and_abort(
-                "Invalid cusp correction file " + cusp_file);
+  app_summary() << "        Using cusp correction." << std::endl;
+  std::unique_ptr<SPOSetT<double>> sposet;
+  {
+    auto lcwc =
+        std::make_unique<LCAOrbitalSetWithCorrectionT<double>>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+    loadMO(lcwc->lcao, cur);
+    lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+    sposet = std::move(lcwc);
+  }
+
+  // Create a temporary particle set to use for cusp initialization.
+  // The particle coordinates left at the end are unsuitable for further
+  // computations. The coordinates get set to nuclear positions, which
+  // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+  // This problem only appears when the electron positions are specified
+  // in the input. The random particle placement step executes after this
+  // part of the code, overwriting the leftover positions from the cusp
+  // initialization.
+  ParticleSetT<double> tmp_targetPtcl(targetPtcl);
+
+  const int num_centers = sourcePtcl.getTotalNum();
+  auto& lcwc            = dynamic_cast<LCAOrbitalSetWithCorrectionT<double>&>(*sposet);
+
+  const int orbital_set_size = lcwc.getOrbitalSetSize();
+  Matrix<CuspCorrectionParametersT<double>> info(num_centers, orbital_set_size);
+
+  // set a default file name if not given
+  if (cusp_file.empty())
+    cusp_file = spo_name + ".cuspInfo.xml";
+
+  bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+  this->myComm->bcast(file_exists);
+  app_log() << "  Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+  // validate file if it exists
+  if (file_exists)
+  {
+    bool valid = 0;
+    if (this->myComm->rank() == 0)
+      valid = CuspCorrectionConstructionT<double>::readCuspInfo(cusp_file, spo_name, orbital_set_size, info);
+    this->myComm->bcast(valid);
+    if (!valid)
+      this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file);
 #ifdef HAVE_MPI
-        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
-            for (int center_idx = 0; center_idx < num_centers; center_idx++)
-                CuspCorrectionConstructionT<double>::broadcastCuspInfo(
-                    info(center_idx, orb_idx), *this->myComm, 0);
+    for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+      for (int center_idx = 0; center_idx < num_centers; center_idx++)
+        CuspCorrectionConstructionT<double>::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0);
 #endif
-    }
-    else {
-        CuspCorrectionConstructionT<double>::generateCuspInfo(info,
-            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
-        if (this->myComm->rank() == 0)
-            CuspCorrectionConstructionT<double>::saveCusp(
-                cusp_file, info, spo_name);
-    }
-
-    CuspCorrectionConstructionT<double>::applyCuspCorrection(
-        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
-
-    return sposet;
+  }
+  else
+  {
+    CuspCorrectionConstructionT<double>::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name,
+                                                          *this->myComm);
+    if (this->myComm->rank() == 0)
+      CuspCorrectionConstructionT<double>::saveCusp(cusp_file, info, spo_name);
+  }
+
+  CuspCorrectionConstructionT<double>::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp,
+                                                           spo_name);
+
+  return sposet;
 }
 
-template <>
-std::unique_ptr<SPOSetT<float>>
-LCAOrbitalBuilderT<float>::createWithCuspCorrection(xmlNodePtr cur,
-    const std::string& spo_name, std::string cusp_file,
+template<>
+std::unique_ptr<SPOSetT<float>> LCAOrbitalBuilderT<float>::createWithCuspCorrection(
+    xmlNodePtr cur,
+    const std::string& spo_name,
+    std::string cusp_file,
     std::unique_ptr<BasisSet_t>&& myBasisSet)
 {
-    app_summary() << "        Using cusp correction." << std::endl;
-    std::unique_ptr<SPOSetT<float>> sposet;
-    {
-        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<float>>(
-            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
-        loadMO(lcwc->lcao, cur);
-        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
-        sposet = std::move(lcwc);
-    }
-
-    // Create a temporary particle set to use for cusp initialization.
-    // The particle coordinates left at the end are unsuitable for further
-    // computations. The coordinates get set to nuclear positions, which
-    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
-    // This problem only appears when the electron positions are specified
-    // in the input. The random particle placement step executes after this
-    // part of the code, overwriting the leftover positions from the cusp
-    // initialization.
-    ParticleSetT<float> tmp_targetPtcl(targetPtcl);
-
-    const int num_centers = sourcePtcl.getTotalNum();
-    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<float>&>(*sposet);
-
-    const int orbital_set_size = lcwc.getOrbitalSetSize();
-    Matrix<CuspCorrectionParametersT<float>> info(
-        num_centers, orbital_set_size);
-
-    // set a default file name if not given
-    if (cusp_file.empty())
-        cusp_file = spo_name + ".cuspInfo.xml";
-
-    bool file_exists(
-        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
-    this->myComm->bcast(file_exists);
-    app_log() << "  Cusp correction file " << cusp_file
-              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
-
-    // validate file if it exists
-    if (file_exists) {
-        bool valid = 0;
-        if (this->myComm->rank() == 0)
-            valid = CuspCorrectionConstructionT<float>::readCuspInfo(
-                cusp_file, spo_name, orbital_set_size, info);
-        this->myComm->bcast(valid);
-        if (!valid)
-            this->myComm->barrier_and_abort(
-                "Invalid cusp correction file " + cusp_file);
+  app_summary() << "        Using cusp correction." << std::endl;
+  std::unique_ptr<SPOSetT<float>> sposet;
+  {
+    auto lcwc =
+        std::make_unique<LCAOrbitalSetWithCorrectionT<float>>(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+    loadMO(lcwc->lcao, cur);
+    lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+    sposet = std::move(lcwc);
+  }
+
+  // Create a temporary particle set to use for cusp initialization.
+  // The particle coordinates left at the end are unsuitable for further
+  // computations. The coordinates get set to nuclear positions, which
+  // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+  // This problem only appears when the electron positions are specified
+  // in the input. The random particle placement step executes after this
+  // part of the code, overwriting the leftover positions from the cusp
+  // initialization.
+  ParticleSetT<float> tmp_targetPtcl(targetPtcl);
+
+  const int num_centers = sourcePtcl.getTotalNum();
+  auto& lcwc            = dynamic_cast<LCAOrbitalSetWithCorrectionT<float>&>(*sposet);
+
+  const int orbital_set_size = lcwc.getOrbitalSetSize();
+  Matrix<CuspCorrectionParametersT<float>> info(num_centers, orbital_set_size);
+
+  // set a default file name if not given
+  if (cusp_file.empty())
+    cusp_file = spo_name + ".cuspInfo.xml";
+
+  bool file_exists(this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+  this->myComm->bcast(file_exists);
+  app_log() << "  Cusp correction file " << cusp_file << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+  // validate file if it exists
+  if (file_exists)
+  {
+    bool valid = 0;
+    if (this->myComm->rank() == 0)
+      valid = CuspCorrectionConstructionT<float>::readCuspInfo(cusp_file, spo_name, orbital_set_size, info);
+    this->myComm->bcast(valid);
+    if (!valid)
+      this->myComm->barrier_and_abort("Invalid cusp correction file " + cusp_file);
 #ifdef HAVE_MPI
-        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
-            for (int center_idx = 0; center_idx < num_centers; center_idx++)
-                CuspCorrectionConstructionT<float>::broadcastCuspInfo(
-                    info(center_idx, orb_idx), *this->myComm, 0);
+    for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+      for (int center_idx = 0; center_idx < num_centers; center_idx++)
+        CuspCorrectionConstructionT<float>::broadcastCuspInfo(info(center_idx, orb_idx), *this->myComm, 0);
 #endif
-    }
-    else {
-        CuspCorrectionConstructionT<float>::generateCuspInfo(info,
-            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
-        if (this->myComm->rank() == 0)
-            CuspCorrectionConstructionT<float>::saveCusp(
-                cusp_file, info, spo_name);
-    }
-
-    CuspCorrectionConstructionT<float>::applyCuspCorrection(
-        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
-
-    return sposet;
+  }
+  else
+  {
+    CuspCorrectionConstructionT<float>::generateCuspInfo(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name,
+                                                         *this->myComm);
+    if (this->myComm->rank() == 0)
+      CuspCorrectionConstructionT<float>::saveCusp(cusp_file, info, spo_name);
+  }
+
+  CuspCorrectionConstructionT<float>::applyCuspCorrection(info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp,
+                                                          spo_name);
+
+  return sposet;
 }
 
 #else
 
-template <>
-std::unique_ptr<SPOSetT<std::complex<double>>>
-LCAOrbitalBuilderT<std::complex<double>>::createWithCuspCorrection(
-    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+template<>
+std::unique_ptr<SPOSetT<std::complex<double>>> LCAOrbitalBuilderT<std::complex<double>>::createWithCuspCorrection(
+    xmlNodePtr,
+    const std::string&,
+    std::string,
+    std::unique_ptr<BasisSet_t>&&)
 {
-    this->myComm->barrier_and_abort(
-        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
-        "supported on complex LCAO.");
-    return std::unique_ptr<SPOSetT<std::complex<double>>>{};
+  this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+                                  "supported on complex LCAO.");
+  return std::unique_ptr<SPOSetT<std::complex<double>>>{};
 }
 
-template <>
-std::unique_ptr<SPOSetT<std::complex<float>>>
-LCAOrbitalBuilderT<std::complex<float>>::createWithCuspCorrection(
-    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+template<>
+std::unique_ptr<SPOSetT<std::complex<float>>> LCAOrbitalBuilderT<std::complex<float>>::createWithCuspCorrection(
+    xmlNodePtr,
+    const std::string&,
+    std::string,
+    std::unique_ptr<BasisSet_t>&&)
 {
-    this->myComm->barrier_and_abort(
-        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
-        "supported on complex LCAO.");
-    return std::unique_ptr<SPOSetT<std::complex<float>>>{};
+  this->myComm->barrier_and_abort("LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+                                  "supported on complex LCAO.");
+  return std::unique_ptr<SPOSetT<std::complex<float>>>{};
 }
 
 #endif
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
+template<typename T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 {
-    ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
-    std::string spo_name(""), cusp_file(""), optimize("no");
-    std::string basisset_name("LCAOBSet");
-    OhmmsAttributeSet spoAttrib;
-    spoAttrib.add(spo_name, "name");
-    spoAttrib.add(spo_name, "id");
-    spoAttrib.add(cusp_file, "cuspInfo");
-    spoAttrib.add(basisset_name, "basisset");
-    spoAttrib.put(cur);
-
-    std::unique_ptr<BasisSet_t> myBasisSet;
-    if (basisset_map_.find(basisset_name) == basisset_map_.end())
-        this->myComm->barrier_and_abort(
-            "basisset \"" + basisset_name + "\" cannot be found\n");
-    else
-        myBasisSet.reset(basisset_map_[basisset_name]->makeClone());
-
-    std::unique_ptr<SPOSetT<T>> sposet;
-    if (doCuspCorrection) {
-      sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet));
-    }
-    else {
-        auto lcos = std::make_unique<LCAOrbitalSetT<T>>(
-            spo_name, std::move(myBasisSet));
-        loadMO(*lcos, cur);
-        sposet = std::move(lcos);
-    }
-
-    return sposet;
+  ReportEngine PRE(this->ClassName, "createSPO(xmlNodePtr)");
+  std::string spo_name(""), cusp_file(""), optimize("no");
+  std::string basisset_name("LCAOBSet");
+  OhmmsAttributeSet spoAttrib;
+  spoAttrib.add(spo_name, "name");
+  spoAttrib.add(spo_name, "id");
+  spoAttrib.add(cusp_file, "cuspInfo");
+  spoAttrib.add(basisset_name, "basisset");
+  spoAttrib.put(cur);
+
+  std::unique_ptr<BasisSet_t> myBasisSet;
+  if (basisset_map_.find(basisset_name) == basisset_map_.end())
+    this->myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n");
+  else
+    myBasisSet.reset(basisset_map_[basisset_name]->makeClone());
+
+  std::unique_ptr<SPOSetT<T>> sposet;
+  if (doCuspCorrection)
+  {
+    sposet = createWithCuspCorrection(cur, spo_name, cusp_file, std::move(myBasisSet));
+  }
+  else
+  {
+    auto lcos = std::make_unique<LCAOrbitalSetT<T>>(spo_name, std::move(myBasisSet));
+    loadMO(*lcos, cur);
+    sposet = std::move(lcos);
+  }
+
+  return sposet;
 }
 
 /** Parse the xml file for information on the Dirac determinants.
  *@param cur the current xmlNode
  */
-template <typename T>
-bool
-LCAOrbitalBuilderT<T>::loadMO(LCAOrbitalSetT<T>& spo, xmlNodePtr cur)
+template<typename T>
+bool LCAOrbitalBuilderT<T>::loadMO(LCAOrbitalSetT<T>& spo, xmlNodePtr cur)
 {
 #undef FunctionName
-#define FunctionName \
-    printf("Calling FunctionName from %s\n", __FUNCTION__); \
-    FunctionNameReal
-    // Check if HDF5 present
-    ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)");
-
-    // initialize the number of orbital by the basis set size
-    int norb = spo.getBasisSetSize();
-    std::string debugc("no");
-    double orbital_mix_magnitude = 0.0;
-    bool PBC = false;
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(norb, "orbitals");
-    aAttrib.add(norb, "size");
-    aAttrib.add(debugc, "debug");
-    aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude");
-    aAttrib.put(cur);
-    xmlNodePtr occ_ptr = NULL;
-    xmlNodePtr coeff_ptr = NULL;
-    cur = cur->xmlChildrenNode;
-    while (cur != NULL) {
-        std::string cname((const char*)(cur->name));
-        if (cname == "occupation") {
-            occ_ptr = cur;
-        }
-        else if (cname.find("coeff") < cname.size() || cname == "parameter" ||
-            cname == "Var") {
-            coeff_ptr = cur;
-        }
-        cur = cur->next;
-    }
-    if (coeff_ptr == NULL) {
-        app_log() << "   Using Identity for the LCOrbitalSet " << std::endl;
-        return true;
+#define FunctionName                                      \
+  printf("Calling FunctionName from %s\n", __FUNCTION__); \
+  FunctionNameReal
+  // Check if HDF5 present
+  ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)");
+
+  // initialize the number of orbital by the basis set size
+  int norb = spo.getBasisSetSize();
+  std::string debugc("no");
+  double orbital_mix_magnitude = 0.0;
+  bool PBC                     = false;
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(norb, "orbitals");
+  aAttrib.add(norb, "size");
+  aAttrib.add(debugc, "debug");
+  aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude");
+  aAttrib.put(cur);
+  xmlNodePtr occ_ptr   = NULL;
+  xmlNodePtr coeff_ptr = NULL;
+  cur                  = cur->xmlChildrenNode;
+  while (cur != NULL)
+  {
+    std::string cname((const char*)(cur->name));
+    if (cname == "occupation")
+    {
+      occ_ptr = cur;
     }
-    spo.setOrbitalSetSize(norb);
-    bool success = putOccupation(spo, occ_ptr);
-    if (h5_path == "")
-        success = putFromXML(spo, coeff_ptr);
-    else {
-        hdf_archive hin(this->myComm);
-
-        if (this->myComm->rank() == 0) {
-            if (!hin.open(h5_path, H5F_ACC_RDONLY))
-                APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect "
-                          "path to H5 file.");
-
-            try {
-                hin.push("PBC", false);
-                PBC = true;
-            }
-            catch (const std::exception& e) {
-                app_debug() << e.what() << std::endl;
-                PBC = false;
-            }
-
-            if (PBC)
-                hin.read(PBC, "PBC");
-
-            hin.close();
-        }
-        this->myComm->bcast(PBC);
-        if (PBC)
-            success = putPBCFromH5(spo, coeff_ptr);
-        else
-            success = putFromH5(spo, coeff_ptr);
+    else if (cname.find("coeff") < cname.size() || cname == "parameter" || cname == "Var")
+    {
+      coeff_ptr = cur;
     }
+    cur = cur->next;
+  }
+  if (coeff_ptr == NULL)
+  {
+    app_log() << "   Using Identity for the LCOrbitalSet " << std::endl;
+    return true;
+  }
+  spo.setOrbitalSetSize(norb);
+  bool success = putOccupation(spo, occ_ptr);
+  if (h5_path == "")
+    success = putFromXML(spo, coeff_ptr);
+  else
+  {
+    hdf_archive hin(this->myComm);
 
-    // Ye: used to construct cusp correction
-    // bool success2 = transformSPOSet();
-    if (debugc == "yes") {
-        app_log() << "   Single-particle orbital coefficients dims="
-                  << spo.C->rows() << " x " << spo.C->cols() << std::endl;
-        app_log() << *spo.C << std::endl;
+    if (this->myComm->rank() == 0)
+    {
+      if (!hin.open(h5_path, H5F_ACC_RDONLY))
+        APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect "
+                  "path to H5 file.");
+
+      try
+      {
+        hin.push("PBC", false);
+        PBC = true;
+      }
+      catch (const std::exception& e)
+      {
+        app_debug() << e.what() << std::endl;
+        PBC = false;
+      }
+
+      if (PBC)
+        hin.read(PBC, "PBC");
+
+      hin.close();
     }
-
-    return success;
+    this->myComm->bcast(PBC);
+    if (PBC)
+      success = putPBCFromH5(spo, coeff_ptr);
+    else
+      success = putFromH5(spo, coeff_ptr);
+  }
+
+  // Ye: used to construct cusp correction
+  // bool success2 = transformSPOSet();
+  if (debugc == "yes")
+  {
+    app_log() << "   Single-particle orbital coefficients dims=" << spo.C->rows() << " x " << spo.C->cols()
+              << std::endl;
+    app_log() << *spo.C << std::endl;
+  }
+
+  return success;
 }
 
-template <typename T>
-bool
-LCAOrbitalBuilderT<T>::putFromXML(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
+template<typename T>
+bool LCAOrbitalBuilderT<T>::putFromXML(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
 {
-    int norbs = 0;
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(norbs, "size");
-    aAttrib.add(norbs, "orbitals");
-    aAttrib.put(coeff_ptr);
-    if (norbs < spo.getOrbitalSetSize()) {
-        return false;
-        APP_ABORT("LCAOrbitalBuilder::putFromXML missing or incorrect size");
-    }
-    if (norbs) {
-        std::vector<T> Ctemp;
-        int BasisSetSize = spo.getBasisSetSize();
-        Ctemp.resize(norbs * BasisSetSize);
-        putContent(Ctemp, coeff_ptr);
-        int n = 0, i = 0;
-        typename std::vector<T>::iterator cit(Ctemp.begin());
-        while (i < spo.getOrbitalSetSize()) {
-            if (Occ[n] > std::numeric_limits<RealType>::epsilon()) {
-                std::copy(cit, cit + BasisSetSize, (*spo.C)[i]);
-                i++;
-            }
-            n++;
-            cit += BasisSetSize;
-        }
+  int norbs = 0;
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(norbs, "size");
+  aAttrib.add(norbs, "orbitals");
+  aAttrib.put(coeff_ptr);
+  if (norbs < spo.getOrbitalSetSize())
+  {
+    return false;
+    APP_ABORT("LCAOrbitalBuilder::putFromXML missing or incorrect size");
+  }
+  if (norbs)
+  {
+    std::vector<T> Ctemp;
+    int BasisSetSize = spo.getBasisSetSize();
+    Ctemp.resize(norbs * BasisSetSize);
+    putContent(Ctemp, coeff_ptr);
+    int n = 0, i = 0;
+    typename std::vector<T>::iterator cit(Ctemp.begin());
+    while (i < spo.getOrbitalSetSize())
+    {
+      if (Occ[n] > std::numeric_limits<RealType>::epsilon())
+      {
+        std::copy(cit, cit + BasisSetSize, (*spo.C)[i]);
+        i++;
+      }
+      n++;
+      cit += BasisSetSize;
     }
-    return true;
+  }
+  return true;
 }
 
 /** read data from a hdf5 file
  * @param norb number of orbitals to be initialized
  * @param coeff_ptr xmlnode for coefficients
  */
-template <typename T>
-bool
-LCAOrbitalBuilderT<T>::putFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
+template<typename T>
+bool LCAOrbitalBuilderT<T>::putFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
 {
-    int neigs = spo.getBasisSetSize();
-    int setVal = -1;
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(setVal, "spindataset");
-    aAttrib.add(neigs, "size");
-    aAttrib.add(neigs, "orbitals");
-    aAttrib.put(coeff_ptr);
-    hdf_archive hin(this->myComm);
-    if (this->myComm->rank() == 0) {
-        if (!hin.open(h5_path, H5F_ACC_RDONLY))
-            APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path "
-                      "to H5 file.");
-
-        Matrix<RealType> Ctemp;
-        std::array<char, 72> name;
-
-        // This is to make sure of Backward compatibility with previous tags.
-        int name_len = std::snprintf(
-            name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
-        if (name_len < 0)
-            throw std::runtime_error("Error generating name");
-        std::string setname(name.data(), name_len);
-        if (!hin.readEntry(Ctemp, setname)) {
-            name_len = std::snprintf(
-                name.data(), name.size(), "%s%d", "/KPTS_0/eigenset_", setVal);
-            if (name_len < 0)
-                throw std::runtime_error("Error generating name");
-            setname = std::string(name.data(), name_len);
-            hin.read(Ctemp, setname);
-        }
-        hin.close();
-
-        if (Ctemp.cols() != spo.getBasisSetSize()) {
-            std::ostringstream err_msg;
-            err_msg << "Basis set size " << spo.getBasisSetSize()
-                    << " mismatched the number of MO coefficients columns "
-                    << Ctemp.cols() << " from h5." << std::endl;
-            this->myComm->barrier_and_abort(err_msg.str());
-        }
+  int neigs  = spo.getBasisSetSize();
+  int setVal = -1;
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(setVal, "spindataset");
+  aAttrib.add(neigs, "size");
+  aAttrib.add(neigs, "orbitals");
+  aAttrib.put(coeff_ptr);
+  hdf_archive hin(this->myComm);
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(h5_path, H5F_ACC_RDONLY))
+      APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path "
+                "to H5 file.");
+
+    Matrix<RealType> Ctemp;
+    std::array<char, 72> name;
 
-        int norbs = spo.getOrbitalSetSize();
-        if (Ctemp.rows() < norbs) {
-            std::ostringstream err_msg;
-            err_msg << "Need " << norbs
-                    << " orbitals. Insufficient rows of MO coefficients "
-                    << Ctemp.rows() << " from h5." << std::endl;
-            this->myComm->barrier_and_abort(err_msg.str());
-        }
+    // This is to make sure of Backward compatibility with previous tags.
+    int name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
+    if (name_len < 0)
+      throw std::runtime_error("Error generating name");
+    std::string setname(name.data(), name_len);
+    if (!hin.readEntry(Ctemp, setname))
+    {
+      name_len = std::snprintf(name.data(), name.size(), "%s%d", "/KPTS_0/eigenset_", setVal);
+      if (name_len < 0)
+        throw std::runtime_error("Error generating name");
+      setname = std::string(name.data(), name_len);
+      hin.read(Ctemp, setname);
+    }
+    hin.close();
 
-        int n = 0, i = 0;
-        while (i < norbs) {
-            if (Occ[n] > 0.0) {
-                std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]);
-                i++;
-            }
-            n++;
-        }
+    if (Ctemp.cols() != spo.getBasisSetSize())
+    {
+      std::ostringstream err_msg;
+      err_msg << "Basis set size " << spo.getBasisSetSize() << " mismatched the number of MO coefficients columns "
+              << Ctemp.cols() << " from h5." << std::endl;
+      this->myComm->barrier_and_abort(err_msg.str());
     }
-    this->myComm->bcast(spo.C->data(), spo.C->size());
-    return true;
+
+    int norbs = spo.getOrbitalSetSize();
+    if (Ctemp.rows() < norbs)
+    {
+      std::ostringstream err_msg;
+      err_msg << "Need " << norbs << " orbitals. Insufficient rows of MO coefficients " << Ctemp.rows() << " from h5."
+              << std::endl;
+      this->myComm->barrier_and_abort(err_msg.str());
+    }
+
+    int n = 0, i = 0;
+    while (i < norbs)
+    {
+      if (Occ[n] > 0.0)
+      {
+        std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]);
+        i++;
+      }
+      n++;
+    }
+  }
+  this->myComm->bcast(spo.C->data(), spo.C->size());
+  return true;
 }
 
 /** read data from a hdf5 file
  * @param norb number of orbitals to be initialized
  * @param coeff_ptr xmlnode for coefficients
  */
-template <typename T>
-bool
-LCAOrbitalBuilderT<T>::putPBCFromH5(
-    LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
+template<typename T>
+bool LCAOrbitalBuilderT<T>::putPBCFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr)
 {
-    ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5");
-    int norbs = spo.getOrbitalSetSize();
-    int neigs = spo.getBasisSetSize();
-    int setVal = -1;
-    bool IsComplex = false;
-    bool MultiDet = false;
-    PosType SuperTwist(0.0);
-    PosType SuperTwistH5(0.0);
-    OhmmsAttributeSet aAttrib;
-    aAttrib.add(setVal, "spindataset");
-    aAttrib.add(neigs, "size");
-    aAttrib.add(neigs, "orbitals");
-    aAttrib.put(coeff_ptr);
-    hdf_archive hin(this->myComm);
-
-    xmlNodePtr curtemp = coeff_ptr;
-
-    std::string xmlTag("determinantset");
-    std::string MSDTag("sposet");
-    std::string SDTag("determinant");
-    std::string EndTag("qmcsystem");
-    std::string curname;
-
-    do {
-        std::stringstream ss;
-        curtemp = curtemp->parent;
-        ss << curtemp->name;
-        ss >> curname;
-        if (curname == MSDTag)
-            MultiDet = true; /// Used to know if running an MSD calculation -
-                             /// needed for order of Orbitals.
-        if (curname == SDTag)
-            MultiDet = false;
-
-    } while ((xmlTag != curname) && (curname != EndTag));
-    if (curname == EndTag) {
-        APP_ABORT("Could not find in wf file the \"sposet\" or \"determinant\" "
-                  "tags. Please verify input or contact developers");
+  ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5");
+  int norbs      = spo.getOrbitalSetSize();
+  int neigs      = spo.getBasisSetSize();
+  int setVal     = -1;
+  bool IsComplex = false;
+  bool MultiDet  = false;
+  PosType SuperTwist(0.0);
+  PosType SuperTwistH5(0.0);
+  OhmmsAttributeSet aAttrib;
+  aAttrib.add(setVal, "spindataset");
+  aAttrib.add(neigs, "size");
+  aAttrib.add(neigs, "orbitals");
+  aAttrib.put(coeff_ptr);
+  hdf_archive hin(this->myComm);
+
+  xmlNodePtr curtemp = coeff_ptr;
+
+  std::string xmlTag("determinantset");
+  std::string MSDTag("sposet");
+  std::string SDTag("determinant");
+  std::string EndTag("qmcsystem");
+  std::string curname;
+
+  do
+  {
+    std::stringstream ss;
+    curtemp = curtemp->parent;
+    ss << curtemp->name;
+    ss >> curname;
+    if (curname == MSDTag)
+      MultiDet = true; /// Used to know if running an MSD calculation -
+                       /// needed for order of Orbitals.
+    if (curname == SDTag)
+      MultiDet = false;
+
+  } while ((xmlTag != curname) && (curname != EndTag));
+  if (curname == EndTag)
+  {
+    APP_ABORT("Could not find in wf file the \"sposet\" or \"determinant\" "
+              "tags. Please verify input or contact developers");
+  }
+
+  aAttrib.add(SuperTwist, "twist");
+  aAttrib.put(curtemp);
+
+  if (this->myComm->rank() == 0)
+  {
+    if (!hin.open(h5_path, H5F_ACC_RDONLY))
+      APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path "
+                "to H5 file.");
+    hin.push("parameters");
+    hin.read(IsComplex, "IsComplex");
+    hin.pop();
+
+    std::string setname("/Super_Twist/Coord");
+    hin.read(SuperTwistH5, setname);
+    if (std::abs(SuperTwistH5[0] - SuperTwist[0]) >= 1e-6 || std::abs(SuperTwistH5[1] - SuperTwist[1]) >= 1e-6 ||
+        std::abs(SuperTwistH5[2] - SuperTwist[2]) >= 1e-6)
+    {
+      app_log() << "Super Twist in XML : " << SuperTwist[0] << "    In H5:" << SuperTwistH5[0] << std::endl;
+      app_log() << "                     " << SuperTwist[1] << "          " << SuperTwistH5[1] << std::endl;
+      app_log() << "                     " << SuperTwist[2] << "          " << SuperTwistH5[2] << std::endl;
+      app_log() << "Diff in Coord     x :" << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl;
+      app_log() << "                  y :" << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl;
+      app_log() << "                  z :" << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl;
+      APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do "
+                "not Match!!! Aborting.");
     }
+    // SuperTwist=SuperTwistH5;
+    Matrix<T> Ctemp;
+    LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet);
 
-    aAttrib.add(SuperTwist, "twist");
-    aAttrib.put(curtemp);
-
-    if (this->myComm->rank() == 0) {
-        if (!hin.open(h5_path, H5F_ACC_RDONLY))
-            APP_ABORT("LCAOrbitalBuilder::putFromH5 missing or incorrect path "
-                      "to H5 file.");
-        hin.push("parameters");
-        hin.read(IsComplex, "IsComplex");
-        hin.pop();
-
-        std::string setname("/Super_Twist/Coord");
-        hin.read(SuperTwistH5, setname);
-        if (std::abs(SuperTwistH5[0] - SuperTwist[0]) >= 1e-6 ||
-            std::abs(SuperTwistH5[1] - SuperTwist[1]) >= 1e-6 ||
-            std::abs(SuperTwistH5[2] - SuperTwist[2]) >= 1e-6) {
-            app_log() << "Super Twist in XML : " << SuperTwist[0]
-                      << "    In H5:" << SuperTwistH5[0] << std::endl;
-            app_log() << "                     " << SuperTwist[1]
-                      << "          " << SuperTwistH5[1] << std::endl;
-            app_log() << "                     " << SuperTwist[2]
-                      << "          " << SuperTwistH5[2] << std::endl;
-            app_log() << "Diff in Coord     x :"
-                      << std::abs(SuperTwistH5[0] - SuperTwist[0]) << std::endl;
-            app_log() << "                  y :"
-                      << std::abs(SuperTwistH5[1] - SuperTwist[1]) << std::endl;
-            app_log() << "                  z :"
-                      << std::abs(SuperTwistH5[2] - SuperTwist[2]) << std::endl;
-            APP_ABORT("Requested Super Twist in XML and Super Twist in HDF5 do "
-                      "not Match!!! Aborting.");
-        }
-        // SuperTwist=SuperTwistH5;
-        Matrix<T> Ctemp;
-        LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet);
-
-        int n = 0, i = 0;
-        while (i < norbs) {
-            if (Occ[n] > 0.0) {
-                std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]);
-                i++;
-            }
-            n++;
-        }
-
-        hin.close();
+    int n = 0, i = 0;
+    while (i < norbs)
+    {
+      if (Occ[n] > 0.0)
+      {
+        std::copy(Ctemp[n], Ctemp[n + 1], (*spo.C)[i]);
+        i++;
+      }
+      n++;
     }
+
+    hin.close();
+  }
 #ifdef HAVE_MPI
-    this->myComm->comm.broadcast_n(spo.C->data(), spo.C->size());
+  this->myComm->comm.broadcast_n(spo.C->data(), spo.C->size());
 #endif
-    return true;
+  return true;
 }
 
-template <typename T>
-bool
-LCAOrbitalBuilderT<T>::putOccupation(LCAOrbitalSetT<T>& spo, xmlNodePtr occ_ptr)
+template<typename T>
+bool LCAOrbitalBuilderT<T>::putOccupation(LCAOrbitalSetT<T>& spo, xmlNodePtr occ_ptr)
 {
-    // die??
-    if (spo.getBasisSetSize() == 0) {
-        APP_ABORT(
-            "LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize");
-        return false;
-    }
-    Occ.resize(std::max(spo.getBasisSetSize(), spo.getOrbitalSetSize()));
-    Occ = 0.0;
-    for (int i = 0; i < spo.getOrbitalSetSize(); i++)
-        Occ[i] = 1.0;
-    std::vector<int> occ_in;
-    std::string occ_mode("table");
-    if (occ_ptr == NULL) {
-        occ_mode = "ground";
-    }
-    else {
-        const std::string o(getXMLAttributeValue(occ_ptr, "mode"));
-        if (!o.empty())
-            occ_mode = o;
-    }
-    // Do nothing if mode == ground
-    if (occ_mode == "excited") {
-        putContent(occ_in, occ_ptr);
-        for (int k = 0; k < occ_in.size(); k++) {
-            if (occ_in[k] < 0) // remove this, -1 is to adjust the base
-                Occ[-occ_in[k] - 1] = 0.0;
-            else
-                Occ[occ_in[k] - 1] = 1.0;
-        }
-    }
-    else if (occ_mode == "table") {
-        putContent(Occ, occ_ptr);
+  // die??
+  if (spo.getBasisSetSize() == 0)
+  {
+    APP_ABORT("LCAOrbitalBuilder::putOccupation detected ZERO BasisSetSize");
+    return false;
+  }
+  Occ.resize(std::max(spo.getBasisSetSize(), spo.getOrbitalSetSize()));
+  Occ = 0.0;
+  for (int i = 0; i < spo.getOrbitalSetSize(); i++)
+    Occ[i] = 1.0;
+  std::vector<int> occ_in;
+  std::string occ_mode("table");
+  if (occ_ptr == NULL)
+  {
+    occ_mode = "ground";
+  }
+  else
+  {
+    const std::string o(getXMLAttributeValue(occ_ptr, "mode"));
+    if (!o.empty())
+      occ_mode = o;
+  }
+  // Do nothing if mode == ground
+  if (occ_mode == "excited")
+  {
+    putContent(occ_in, occ_ptr);
+    for (int k = 0; k < occ_in.size(); k++)
+    {
+      if (occ_in[k] < 0) // remove this, -1 is to adjust the base
+        Occ[-occ_in[k] - 1] = 0.0;
+      else
+        Occ[occ_in[k] - 1] = 1.0;
     }
-    return true;
+  }
+  else if (occ_mode == "table")
+  {
+    putContent(Occ, occ_ptr);
+  }
+  return true;
 }
 
-template <typename T>
-void
-LCAOrbitalBuilderT<T>::readRealMatrixFromH5(
-    hdf_archive& hin, const std::string& setname, Matrix<RealType>& Creal) const
+template<typename T>
+void LCAOrbitalBuilderT<T>::readRealMatrixFromH5(hdf_archive& hin,
+                                                 const std::string& setname,
+                                                 Matrix<RealType>& Creal) const
 {
-    hin.read(Creal, setname);
+  hin.read(Creal, setname);
 }
 
-template <typename T>
-void
-LCAOrbitalBuilderT<T>::LoadFullCoefsFromH5(hdf_archive& hin, int setVal,
-    PosType& SuperTwist, Matrix<std::complex<RealType>>& Ctemp, bool MultiDet)
+template<typename T>
+void LCAOrbitalBuilderT<T>::LoadFullCoefsFromH5(hdf_archive& hin,
+                                                int setVal,
+                                                PosType& SuperTwist,
+                                                Matrix<std::complex<RealType>>& Ctemp,
+                                                bool MultiDet)
 {
-    Matrix<RealType> Creal;
-    Matrix<RealType> Ccmplx;
-
-    std::array<char, 72> name;
-    int name_len{0};
-    /// When running Single Determinant calculations, MO coeff loaded based on
-    /// occupation and lowest eingenvalue. However, for solids with
-    /// multideterminants, orbitals are order by kpoints; first all MOs for
-    /// kpoint 1, then 2 etc
-    ///  The multideterminants occupation is specified in the input/HDF5 and
-    ///  theefore as long as there is consistency between the order in which we
-    ///  read the orbitals and the occupation, we are safe. In the case of
-    ///  Multideterminants generated by pyscf and Quantum Package, They are
-    ///  stored in the same order as generated for quantum package and one
-    ///  should use the orbitals labelled eigenset_unsorted.
-
-    if (MultiDet == false)
-        name_len = std::snprintf(
-            name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
-    else
-        name_len = std::snprintf(name.data(), name.size(), "%s%d",
-            "/Super_Twist/eigenset_unsorted_", setVal);
-    if (name_len < 0)
-        throw std::runtime_error("Error generating name");
-
-    std::string setname(name.data(), name_len);
-    readRealMatrixFromH5(hin, setname, Creal);
-
-    bool IsComplex = true;
-    hin.read(IsComplex, "/parameters/IsComplex");
-    if (IsComplex == false) {
-        Ccmplx.resize(Creal.rows(), Creal.cols());
-        Ccmplx = 0.0;
-    }
-    else {
-        setname += "_imag";
-        readRealMatrixFromH5(hin, setname, Ccmplx);
-    }
-
-    Ctemp.resize(Creal.rows(), Creal.cols());
-    for (int i = 0; i < Ctemp.rows(); i++)
-        for (int j = 0; j < Ctemp.cols(); j++)
-            Ctemp[i][j] = std::complex<RealType>(Creal[i][j], Ccmplx[i][j]);
+  Matrix<RealType> Creal;
+  Matrix<RealType> Ccmplx;
+
+  std::array<char, 72> name;
+  int name_len{0};
+  /// When running Single Determinant calculations, MO coeff loaded based on
+  /// occupation and lowest eingenvalue. However, for solids with
+  /// multideterminants, orbitals are order by kpoints; first all MOs for
+  /// kpoint 1, then 2 etc
+  ///  The multideterminants occupation is specified in the input/HDF5 and
+  ///  theefore as long as there is consistency between the order in which we
+  ///  read the orbitals and the occupation, we are safe. In the case of
+  ///  Multideterminants generated by pyscf and Quantum Package, They are
+  ///  stored in the same order as generated for quantum package and one
+  ///  should use the orbitals labelled eigenset_unsorted.
+
+  if (MultiDet == false)
+    name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
+  else
+    name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal);
+  if (name_len < 0)
+    throw std::runtime_error("Error generating name");
+
+  std::string setname(name.data(), name_len);
+  readRealMatrixFromH5(hin, setname, Creal);
+
+  bool IsComplex = true;
+  hin.read(IsComplex, "/parameters/IsComplex");
+  if (IsComplex == false)
+  {
+    Ccmplx.resize(Creal.rows(), Creal.cols());
+    Ccmplx = 0.0;
+  }
+  else
+  {
+    setname += "_imag";
+    readRealMatrixFromH5(hin, setname, Ccmplx);
+  }
+
+  Ctemp.resize(Creal.rows(), Creal.cols());
+  for (int i = 0; i < Ctemp.rows(); i++)
+    for (int j = 0; j < Ctemp.cols(); j++)
+      Ctemp[i][j] = std::complex<RealType>(Creal[i][j], Ccmplx[i][j]);
 }
 
-template <typename T>
-void
-LCAOrbitalBuilderT<T>::LoadFullCoefsFromH5(hdf_archive& hin, int setVal,
-    PosType& SuperTwist, Matrix<RealType>& Creal, bool MultiDet)
+template<typename T>
+void LCAOrbitalBuilderT<T>::LoadFullCoefsFromH5(hdf_archive& hin,
+                                                int setVal,
+                                                PosType& SuperTwist,
+                                                Matrix<RealType>& Creal,
+                                                bool MultiDet)
 {
-    bool IsComplex = false;
-    hin.read(IsComplex, "/parameters/IsComplex");
-    if (IsComplex &&
-        (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 ||
-            std::abs(SuperTwist[2]) >= 1e-6)) {
-        std::string setname(
-            "This Wavefunction is Complex and you are using the real version "
-            "of QMCPACK. "
-            "Please re-run this job with the Complex build of QMCPACK.");
-        APP_ABORT(setname.c_str());
-    }
-
-    std::array<char, 72> name;
-    int name_len{0};
-    bool PBC = false;
-    hin.read(PBC, "/PBC/PBC");
-    if (MultiDet && PBC)
-        name_len = std::snprintf(name.data(), name.size(), "%s%d",
-            "/Super_Twist/eigenset_unsorted_", setVal);
-    else
-        name_len = std::snprintf(
-            name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
-    if (name_len < 0)
-        throw std::runtime_error("Error generating name");
-
-    readRealMatrixFromH5(hin, std::string(name.data(), name_len), Creal);
+  bool IsComplex = false;
+  hin.read(IsComplex, "/parameters/IsComplex");
+  if (IsComplex &&
+      (std::abs(SuperTwist[0]) >= 1e-6 || std::abs(SuperTwist[1]) >= 1e-6 || std::abs(SuperTwist[2]) >= 1e-6))
+  {
+    std::string setname("This Wavefunction is Complex and you are using the real version "
+                        "of QMCPACK. "
+                        "Please re-run this job with the Complex build of QMCPACK.");
+    APP_ABORT(setname.c_str());
+  }
+
+  std::array<char, 72> name;
+  int name_len{0};
+  bool PBC = false;
+  hin.read(PBC, "/PBC/PBC");
+  if (MultiDet && PBC)
+    name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_unsorted_", setVal);
+  else
+    name_len = std::snprintf(name.data(), name.size(), "%s%d", "/Super_Twist/eigenset_", setVal);
+  if (name_len < 0)
+    throw std::runtime_error("Error generating name");
+
+  readRealMatrixFromH5(hin, std::string(name.data(), name_len), Creal);
 }
 
 /// Periodic Image Phase Factors computation to be determined
-template <typename T>
-void
-LCAOrbitalBuilderT<T>::EvalPeriodicImagePhaseFactors(
-    PosType SuperTwist, std::vector<RealType>& LocPeriodicImagePhaseFactors)
+template<typename T>
+void LCAOrbitalBuilderT<T>::EvalPeriodicImagePhaseFactors(PosType SuperTwist,
+                                                          std::vector<RealType>& LocPeriodicImagePhaseFactors)
 {
-    const int NbImages =
-        (PBCImages[0] + 1) * (PBCImages[1] + 1) * (PBCImages[2] + 1);
-    LocPeriodicImagePhaseFactors.resize(NbImages);
-    for (size_t i = 0; i < NbImages; i++)
-        LocPeriodicImagePhaseFactors[i] = 1.0;
+  const int NbImages = (PBCImages[0] + 1) * (PBCImages[1] + 1) * (PBCImages[2] + 1);
+  LocPeriodicImagePhaseFactors.resize(NbImages);
+  for (size_t i = 0; i < NbImages; i++)
+    LocPeriodicImagePhaseFactors[i] = 1.0;
 }
 
-template <typename T>
-void
-LCAOrbitalBuilderT<T>::EvalPeriodicImagePhaseFactors(PosType SuperTwist,
+template<typename T>
+void LCAOrbitalBuilderT<T>::EvalPeriodicImagePhaseFactors(
+    PosType SuperTwist,
     std::vector<std::complex<RealType>>& LocPeriodicImagePhaseFactors)
 {
-    // Allow computation to continue with no HDF file if the system has open
-    // boundary conditions. The complex build is usually only used with open BC
-    // for testing.
-    bool usesOpenBC =
-        PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0;
-
-    /// Exp(ik.g) where i is imaginary, k is the supertwist and g is the
-    /// translation vector PBCImage.
-    if (h5_path != "" && !usesOpenBC) {
-        hdf_archive hin(this->myComm);
-        if (this->myComm->rank() == 0) {
-            if (!hin.open(h5_path, H5F_ACC_RDONLY))
-                APP_ABORT("Could not open H5 file");
-
-            hin.push("Cell", false);
-
-            hin.read(Lattice, "LatticeVectors");
-            hin.close();
-        }
-        for (int i = 0; i < 3; i++)
-            for (int j = 0; j < 3; j++)
-                this->myComm->bcast(Lattice(i, j));
-    }
-    else if (!usesOpenBC) {
-        APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour "
-                  "is unknown. Safer to exit");
-    }
+  // Allow computation to continue with no HDF file if the system has open
+  // boundary conditions. The complex build is usually only used with open BC
+  // for testing.
+  bool usesOpenBC = PBCImages[0] == 0 && PBCImages[1] == 0 && PBCImages[2] == 0;
+
+  /// Exp(ik.g) where i is imaginary, k is the supertwist and g is the
+  /// translation vector PBCImage.
+  if (h5_path != "" && !usesOpenBC)
+  {
+    hdf_archive hin(this->myComm);
+    if (this->myComm->rank() == 0)
+    {
+      if (!hin.open(h5_path, H5F_ACC_RDONLY))
+        APP_ABORT("Could not open H5 file");
 
-    int phase_idx = 0;
-    int TransX, TransY, TransZ;
-    RealType phase;
+      hin.push("Cell", false);
 
-    for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+      hin.read(Lattice, "LatticeVectors");
+      hin.close();
+    }
+    for (int i = 0; i < 3; i++)
+      for (int j = 0; j < 3; j++)
+        this->myComm->bcast(Lattice(i, j));
+  }
+  else if (!usesOpenBC)
+  {
+    APP_ABORT("Attempting to run PBC LCAO with no HDF5 support. Behaviour "
+              "is unknown. Safer to exit");
+  }
+
+  int phase_idx = 0;
+  int TransX, TransY, TransZ;
+  RealType phase;
+
+  for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+  {
+    TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+    for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
     {
-        TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
-        for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
-        {
-            TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
-            for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
-            {
-                TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
-                RealType s, c;
-                PosType Val;
-                Val[0] = TransX * Lattice(0, 0) + TransY * Lattice(1, 0) +
-                    TransZ * Lattice(2, 0);
-                Val[1] = TransX * Lattice(0, 1) + TransY * Lattice(1, 1) +
-                    TransZ * Lattice(2, 1);
-                Val[2] = TransX * Lattice(0, 2) + TransY * Lattice(1, 2) +
-                    TransZ * Lattice(2, 2);
-
-                phase = dot(SuperTwist, Val);
-                qmcplusplus::sincos(phase, &s, &c);
-
-                LocPeriodicImagePhaseFactors.emplace_back(c, s);
-            }
-        }
+      TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+      for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
+      {
+        TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+        RealType s, c;
+        PosType Val;
+        Val[0] = TransX * Lattice(0, 0) + TransY * Lattice(1, 0) + TransZ * Lattice(2, 0);
+        Val[1] = TransX * Lattice(0, 1) + TransY * Lattice(1, 1) + TransZ * Lattice(2, 1);
+        Val[2] = TransX * Lattice(0, 2) + TransY * Lattice(1, 2) + TransZ * Lattice(2, 2);
+
+        phase = dot(SuperTwist, Val);
+        qmcplusplus::sincos(phase, &s, &c);
+
+        LocPeriodicImagePhaseFactors.emplace_back(c, s);
+      }
     }
+  }
 }
 
 #ifndef QMC_COMPLEX
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
index 5cff3a5612a..afe2541c34a 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
@@ -4,15 +4,12 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-// National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Ye Luo, yeluo@anl.gov, Argonne National
-//                    Laboratory Mark A. Berrill, berrillma@ornl.gov, Oak Ridge
-//                    National Laboratory
+// File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
 //////////////////////////////////////////////////////////////////////////////////////
@@ -32,113 +29,97 @@ namespace qmcplusplus
  * Reimplement MolecularSPOSetBuilder
  * - support both CartesianTensor and SphericalTensor
  */
-template <typename T>
+template<typename T>
 class LCAOrbitalBuilderT : public SPOSetBuilderT<T>
 {
 public:
-    using BasisSet_t = typename LCAOrbitalSetT<T>::basis_type;
-    using RealType = typename LCAOrbitalSetT<T>::RealType;
-    using ValueType = typename LCAOrbitalSetT<T>::ValueType;
-    using PosType = typename LCAOrbitalSetT<T>::PosType;
+  using BasisSet_t = typename LCAOrbitalSetT<T>::basis_type;
+  using RealType   = typename LCAOrbitalSetT<T>::RealType;
+  using ValueType  = typename LCAOrbitalSetT<T>::ValueType;
+  using PosType    = typename LCAOrbitalSetT<T>::PosType;
 
-    /** constructor
+  /** constructor
      * \param els reference to the electrons
      * \param ions reference to the ions
      */
-    LCAOrbitalBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions,
-        Communicate* comm, xmlNodePtr cur);
-    ~LCAOrbitalBuilderT() override;
-    std::unique_ptr<SPOSetT<T>>
-    createSPOSetFromXML(xmlNodePtr cur) override;
+  LCAOrbitalBuilderT(ParticleSetT<T>& els, ParticleSetT<T>& ions, Communicate* comm, xmlNodePtr cur);
+  ~LCAOrbitalBuilderT() override;
+  std::unique_ptr<SPOSetT<T>> createSPOSetFromXML(xmlNodePtr cur) override;
 
 protected:
-    /// target ParticleSet
-    ParticleSetT<T>& targetPtcl;
-    /// source ParticleSet
-    ParticleSetT<T>& sourcePtcl;
-    /// localized basis set map
-    std::map<std::string, std::unique_ptr<BasisSet_t>> basisset_map_;
-    /// if true, add cusp correction to orbitals
-    bool cuspCorr;
-    /// Path to HDF5 Wavefunction
-    std::string h5_path;
-    /// Number of periodic Images for Orbital evaluation
-    TinyVector<int, 3> PBCImages;
-    /// Coordinates Super Twist
-    PosType SuperTwist;
-    /// Periodic Image Phase Factors. Correspond to the phase from the
-    /// PBCImages. Computed only once.
-    std::vector<T> PeriodicImagePhaseFactors;
-    /// Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors
-    Tensor<double, 3> Lattice;
+  /// target ParticleSet
+  ParticleSetT<T>& targetPtcl;
+  /// source ParticleSet
+  ParticleSetT<T>& sourcePtcl;
+  /// localized basis set map
+  std::map<std::string, std::unique_ptr<BasisSet_t>> basisset_map_;
+  /// if true, add cusp correction to orbitals
+  bool cuspCorr;
+  /// Path to HDF5 Wavefunction
+  std::string h5_path;
+  /// Number of periodic Images for Orbital evaluation
+  TinyVector<int, 3> PBCImages;
+  /// Coordinates Super Twist
+  PosType SuperTwist;
+  /// Periodic Image Phase Factors. Correspond to the phase from the
+  /// PBCImages. Computed only once.
+  std::vector<T> PeriodicImagePhaseFactors;
+  /// Store Lattice parameters from HDF5 to use in PeriodicImagePhaseFactors
+  Tensor<double, 3> Lattice;
 
-    /// Enable cusp correction
-    bool doCuspCorrection;
+  /// Enable cusp correction
+  bool doCuspCorrection;
 
-    /** create basis set
+  /** create basis set
      *
      * Use ao_traits<T,I,J> to match (ROT)x(SH) combo
      */
-    template <int I, int J>
-    BasisSet_t*
-    createBasisSet(xmlNodePtr cur);
-    template <int I, int J>
-    BasisSet_t*
-    createBasisSetH5();
+  template<int I, int J>
+  BasisSet_t* createBasisSet(xmlNodePtr cur);
+  template<int I, int J>
+  BasisSet_t* createBasisSetH5();
 
-    // The following items were previously in SPOSet
-    /// occupation number
-    Vector<RealType> Occ;
-    bool
-    loadMO(LCAOrbitalSetT<T>& spo, xmlNodePtr cur);
-    bool
-    putOccupation(LCAOrbitalSetT<T>& spo, xmlNodePtr occ_ptr);
-    bool
-    putFromXML(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
-    bool
-    putFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
-    bool
-    putPBCFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
-    // the dimensions of Ctemp are determined by the dataset on file
-    void
-    LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist,
-        Matrix<std::complex<RealType>>& Ctemp, bool MultiDet);
-    // the dimensions of Creal are determined by the dataset on file
-    void
-    LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist,
-        Matrix<RealType>& Creal, bool Multidet);
-    void
-    EvalPeriodicImagePhaseFactors(PosType SuperTwist,
-        std::vector<RealType>& LocPeriodicImagePhaseFactors);
-    void
-    EvalPeriodicImagePhaseFactors(PosType SuperTwist,
-        std::vector<std::complex<RealType>>& LocPeriodicImagePhaseFactors);
-    /** read matrix from h5 file
+  // The following items were previously in SPOSet
+  /// occupation number
+  Vector<RealType> Occ;
+  bool loadMO(LCAOrbitalSetT<T>& spo, xmlNodePtr cur);
+  bool putOccupation(LCAOrbitalSetT<T>& spo, xmlNodePtr occ_ptr);
+  bool putFromXML(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
+  bool putFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
+  bool putPBCFromH5(LCAOrbitalSetT<T>& spo, xmlNodePtr coeff_ptr);
+  // the dimensions of Ctemp are determined by the dataset on file
+  void LoadFullCoefsFromH5(hdf_archive& hin,
+                           int setVal,
+                           PosType& SuperTwist,
+                           Matrix<std::complex<RealType>>& Ctemp,
+                           bool MultiDet);
+  // the dimensions of Creal are determined by the dataset on file
+  void LoadFullCoefsFromH5(hdf_archive& hin, int setVal, PosType& SuperTwist, Matrix<RealType>& Creal, bool Multidet);
+  void EvalPeriodicImagePhaseFactors(PosType SuperTwist, std::vector<RealType>& LocPeriodicImagePhaseFactors);
+  void EvalPeriodicImagePhaseFactors(PosType SuperTwist,
+                                     std::vector<std::complex<RealType>>& LocPeriodicImagePhaseFactors);
+  /** read matrix from h5 file
      * \param[in] hin: hdf5 arhive to be read from
      * \param setname: where to read from in hdf5 archive
      * \param[out] Creal: matrix read from h5
      *
      * added in header to allow use from derived class LCAOSpinorBuilder as well
      */
-    void
-    readRealMatrixFromH5(hdf_archive& hin, const std::string& setname,
-        Matrix<RealType>& Creal) const;
+  void readRealMatrixFromH5(hdf_archive& hin, const std::string& setname, Matrix<RealType>& Creal) const;
 
 private:
-    /// enable cusp correction
-    std::unique_ptr<SPOSetT<T>>
-    createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name,
-        std::string cusp_file, std::unique_ptr<BasisSet_t>&& myBasisSet);
-    /// load a basis set from XML input
-    std::unique_ptr<BasisSet_t>
-    loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent);
-    /// load a basis set from h5 file
-    std::unique_ptr<BasisSet_t>
-    loadBasisSetFromH5(xmlNodePtr parent);
-    /// determine radial orbital type based on "keyword" and "transform"
-    /// attributes
-    int
-    determineRadialOrbType(xmlNodePtr cur) const;
+  /// enable cusp correction
+  std::unique_ptr<SPOSetT<T>> createWithCuspCorrection(xmlNodePtr cur,
+                                                       const std::string& spo_name,
+                                                       std::string cusp_file,
+                                                       std::unique_ptr<BasisSet_t>&& myBasisSet);
+  /// load a basis set from XML input
+  std::unique_ptr<BasisSet_t> loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent);
+  /// load a basis set from h5 file
+  std::unique_ptr<BasisSet_t> loadBasisSetFromH5(xmlNodePtr parent);
+  /// determine radial orbital type based on "keyword" and "transform"
+  /// attributes
+  int determineRadialOrbType(xmlNodePtr cur) const;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
index 81f6b64da41..4da67b60332 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.cpp
@@ -18,941 +18,972 @@
 namespace qmcplusplus
 {
 
-template <class T>
+template<class T>
 struct LCAOrbitalSetT<T>::LCAOMultiWalkerMem : public Resource
 {
-    LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT")
-    {
-    }
-    LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem()
-    {
-    }
+  LCAOMultiWalkerMem() : Resource("LCAOrbitalSetT") {}
+  LCAOMultiWalkerMem(const LCAOMultiWalkerMem&) : LCAOMultiWalkerMem() {}
 
-    std::unique_ptr<Resource>
-    makeClone() const override
-    {
-        return std::make_unique<LCAOMultiWalkerMem>(*this);
-    }
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<LCAOMultiWalkerMem>(*this); }
 
-    OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
-    OffloadMWVGLArray basis_mw; // [5][NW][NumAO]
-    OffloadMWVArray phi_v; // [NW][NumMO]
-    OffloadMWVArray basis_v_mw; // [NW][NumMO]
+  OffloadMWVGLArray phi_vgl_v; // [5][NW][NumMO]
+  OffloadMWVGLArray basis_mw;  // [5][NW][NumAO]
+  OffloadMWVArray phi_v;       // [NW][NumMO]
+  OffloadMWVArray basis_v_mw;  // [NW][NumMO]
 };
 
-template <class T>
-LCAOrbitalSetT<T>::LCAOrbitalSetT(
-    const std::string& my_name, std::unique_ptr<basis_type>&& bs) :
-    SPOSetT<T>(my_name),
-    BasisSetSize(bs ? bs->getBasisSetSize() : 0),
-    Identity(true),
-    basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
-    mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
-{
-    if (!bs)
-        throw std::runtime_error(
-            "LCAOrbitalSetT cannot take nullptr as its  basis set!");
-    myBasisSet = std::move(bs);
-    Temp.resize(BasisSetSize);
-    Temph.resize(BasisSetSize);
-    Tempgh.resize(BasisSetSize);
-    this->OrbitalSetSize = BasisSetSize;
-    LCAOrbitalSetT<T>::checkObject();
-}
-
-template <class T>
-LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in) :
-    SPOSetT<T>(in),
-    myBasisSet(in.myBasisSet->makeClone()),
-    C(in.C),
-    BasisSetSize(in.BasisSetSize),
-    C_copy(in.C_copy),
-    Identity(in.Identity),
-    basis_timer_(in.basis_timer_),
-    mo_timer_(in.mo_timer_)
-{
-    Temp.resize(BasisSetSize);
-    Temph.resize(BasisSetSize);
-    Tempgh.resize(BasisSetSize);
-    if (!in.Identity) {
-        Tempv.resize(this->OrbitalSetSize);
-        Temphv.resize(this->OrbitalSetSize);
-        Tempghv.resize(this->OrbitalSetSize);
-    }
-    LCAOrbitalSetT<T>::checkObject();
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs)
+    : SPOSetT<T>(my_name),
+      BasisSetSize(bs ? bs->getBasisSetSize() : 0),
+      Identity(true),
+      basis_timer_(createGlobalTimer("LCAOrbitalSetT::Basis", timer_level_fine)),
+      mo_timer_(createGlobalTimer("LCAOrbitalSetT::MO", timer_level_fine))
+{
+  if (!bs)
+    throw std::runtime_error("LCAOrbitalSetT cannot take nullptr as its  basis set!");
+  myBasisSet = std::move(bs);
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  this->OrbitalSetSize = BasisSetSize;
+  LCAOrbitalSetT<T>::checkObject();
+}
+
+template<class T>
+LCAOrbitalSetT<T>::LCAOrbitalSetT(const LCAOrbitalSetT<T>& in)
+    : SPOSetT<T>(in),
+      myBasisSet(in.myBasisSet->makeClone()),
+      C(in.C),
+      BasisSetSize(in.BasisSetSize),
+      C_copy(in.C_copy),
+      Identity(in.Identity),
+      basis_timer_(in.basis_timer_),
+      mo_timer_(in.mo_timer_)
+{
+  Temp.resize(BasisSetSize);
+  Temph.resize(BasisSetSize);
+  Tempgh.resize(BasisSetSize);
+  if (!in.Identity)
+  {
+    Tempv.resize(this->OrbitalSetSize);
+    Temphv.resize(this->OrbitalSetSize);
+    Tempghv.resize(this->OrbitalSetSize);
+  }
+  LCAOrbitalSetT<T>::checkObject();
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void LCAOrbitalSetT<T>::setOrbitalSetSize(int norbs)
 {
-    if (C)
-        throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot "
-                                 "reset existing MO coefficients");
+  if (C)
+    throw std::runtime_error("LCAOrbitalSetT::setOrbitalSetSize cannot "
+                             "reset existing MO coefficients");
 
-    Identity = false;
-    this->OrbitalSetSize = norbs;
-    C = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
-    Tempv.resize(this->OrbitalSetSize);
-    Temphv.resize(this->OrbitalSetSize);
-    Tempghv.resize(this->OrbitalSetSize);
-    LCAOrbitalSetT<T>::checkObject();
+  Identity             = false;
+  this->OrbitalSetSize = norbs;
+  C                    = std::make_shared<ValueMatrix>(this->OrbitalSetSize, BasisSetSize);
+  Tempv.resize(this->OrbitalSetSize);
+  Temphv.resize(this->OrbitalSetSize);
+  Tempghv.resize(this->OrbitalSetSize);
+  LCAOrbitalSetT<T>::checkObject();
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::checkObject() const
+template<class T>
+void LCAOrbitalSetT<T>::checkObject() const
 {
-    if (Identity) {
-        if (this->OrbitalSetSize != BasisSetSize)
-            throw std::runtime_error(
-                "LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize "
-                "must be equal if Identity = true!");
-        if (C)
-            throw std::runtime_error("LCAOrbitalSetT::checkObject C should be "
-                                     "nullptr if Identity = true!");
-    }
-    else {
-        if (!C)
-            throw std::runtime_error("LCAOrbitalSetT::checkObject C should not "
-                                     "be nullptr if Identity = false!");
-        if (this->OrbitalSetSize != C->rows())
-            throw std::runtime_error("LCAOrbitalSetT::checkObject C rows "
-                                     "doesn't match OrbitalSetSize.");
-        if (BasisSetSize != C->cols())
-            throw std::runtime_error("LCAOrbitalSetT::checkObject C columns "
-                                     "doesn't match BasisSetSize.");
-    }
+  if (Identity)
+  {
+    if (this->OrbitalSetSize != BasisSetSize)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject OrbitalSetSize and BasisSetSize "
+                               "must be equal if Identity = true!");
+    if (C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should be "
+                               "nullptr if Identity = true!");
+  }
+  else
+  {
+    if (!C)
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C should not "
+                               "be nullptr if Identity = false!");
+    if (this->OrbitalSetSize != C->rows())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C rows "
+                               "doesn't match OrbitalSetSize.");
+    if (BasisSetSize != C->cols())
+      throw std::runtime_error("LCAOrbitalSetT::checkObject C columns "
+                               "doesn't match BasisSetSize.");
+  }
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
+template<class T>
+void LCAOrbitalSetT<T>::createResource(ResourceCollection& collection) const
 {
-    myBasisSet->createResource(collection);
-    auto resource_index =
-        collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
+  myBasisSet->createResource(collection);
+  auto resource_index = collection.addResource(std::make_unique<LCAOMultiWalkerMem>());
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<class T>
+void LCAOrbitalSetT<T>::acquireResource(ResourceCollection& collection,
+                                        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list));
-    spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  spo_leader.myBasisSet->acquireResource(collection, extractBasisRefList(spo_list));
+  spo_leader.mw_mem_handle_ = collection.lendResource<LCAOMultiWalkerMem>();
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<class T>
+void LCAOrbitalSetT<T>::releaseResource(ResourceCollection& collection,
+                                        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list));
-    collection.takebackResource(spo_leader.mw_mem_handle_);
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  spo_leader.myBasisSet->releaseResource(collection, extractBasisRefList(spo_list));
+  collection.takebackResource(spo_leader.mw_mem_handle_);
 }
 
-template <class T>
+template<class T>
 RefVectorWithLeader<typename LCAOrbitalSetT<T>::basis_type> LCAOrbitalSetT<T>::extractBasisRefList(
     const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    RefVectorWithLeader<basis_type> basis_list(*spo_list.template getCastedLeader<LCAOrbitalSetT<T>>().myBasisSet);
-    basis_list.reserve(spo_list.size());
-    for (size_t iw = 0; iw < spo_list.size(); iw++)
-      basis_list.push_back(*spo_list.template getCastedElement<LCAOrbitalSetT<T>>(iw).myBasisSet);
-    return basis_list;
+  RefVectorWithLeader<basis_type> basis_list(*spo_list.template getCastedLeader<LCAOrbitalSetT<T>>().myBasisSet);
+  basis_list.reserve(spo_list.size());
+  for (size_t iw = 0; iw < spo_list.size(); iw++)
+    basis_list.push_back(*spo_list.template getCastedElement<LCAOrbitalSetT<T>>(iw).myBasisSet);
+  return basis_list;
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-LCAOrbitalSetT<T>::makeClone() const
+template<class T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetT<T>::makeClone() const
 {
-    return std::make_unique<LCAOrbitalSetT<T>>(*this);
+  return std::make_unique<LCAOrbitalSetT<T>>(*this);
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void LCAOrbitalSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    if (Identity) { // PAY ATTENTION TO COMPLEX
-        myBasisSet->evaluateV(P, iat, psi.data());
-    }
-    else {
-        Vector<T> vTemp(Temp.data(0), BasisSetSize);
-        this->myBasisSet->evaluateV(P, iat, vTemp.data());
-        assert(psi.size() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-        MatrixOperators::product(C_partial_view, vTemp, psi);
-    }
+  if (Identity)
+  { // PAY ATTENTION TO COMPLEX
+    myBasisSet->evaluateV(P, iat, psi.data());
+  }
+  else
+  {
+    Vector<T> vTemp(Temp.data(0), BasisSetSize);
+    this->myBasisSet->evaluateV(P, iat, vTemp.data());
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    MatrixOperators::product(C_partial_view, vTemp, psi);
+  }
 }
 
 /** Find a better place for other user classes, Matrix should be padded as well
  */
-template <typename T, unsigned D>
-static void
-Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B,
-    VectorSoaContainer<T, D>& C)
-{
-    constexpr char transa = 't';
-    constexpr char transb = 'n';
-    constexpr T zone(1);
-    constexpr T zero(0);
-    BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(),
-        A.data(), A.capacity(), zero, C.data(), C.capacity());
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi,
-    GradVector& dpsi, ValueVector& d2psi) const
-{
-    const size_t output_size = psi.size();
-    std::copy_n(temp.data(0), output_size, psi.data());
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    for (size_t j = 0; j < output_size; j++) {
-        dpsi[j][0] = gx[j];
-        dpsi[j][1] = gy[j];
-        dpsi[j][2] = gz[j];
-    }
-    std::copy_n(temp.data(4), output_size, d2psi.data());
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi,
-    GradVector& dpsi, HessVector& d2psi) const
-{
-    const size_t output_size = psi.size();
-    std::copy_n(temp.data(0), output_size, psi.data());
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    const T* restrict hxx = temp.data(4);
-    const T* restrict hxy = temp.data(5);
-    const T* restrict hxz = temp.data(6);
-    const T* restrict hyy = temp.data(7);
-    const T* restrict hyz = temp.data(8);
-    const T* restrict hzz = temp.data(9);
-
-    for (size_t j = 0; j < output_size; j++) {
-        dpsi[j][0] = gx[j];
-        dpsi[j][1] = gy[j];
-        dpsi[j][2] = gz[j];
-
-        d2psi[j](0, 0) = hxx[j];
-        d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
-        d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
-        d2psi[j](1, 1) = hyy[j];
-        d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
-        d2psi[j](2, 2) = hzz[j];
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp, int i,
-    ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi,
-    GGGMatrix& dghpsi) const
-{
-    const size_t output_size = psi.cols();
-    std::copy_n(temp.data(0), output_size, psi[i]);
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    const T* restrict hxx = temp.data(4);
-    const T* restrict hxy = temp.data(5);
-    const T* restrict hxz = temp.data(6);
-    const T* restrict hyy = temp.data(7);
-    const T* restrict hyz = temp.data(8);
-    const T* restrict hzz = temp.data(9);
-    const T* restrict gh_xxx = temp.data(10);
-    const T* restrict gh_xxy = temp.data(11);
-    const T* restrict gh_xxz = temp.data(12);
-    const T* restrict gh_xyy = temp.data(13);
-    const T* restrict gh_xyz = temp.data(14);
-    const T* restrict gh_xzz = temp.data(15);
-    const T* restrict gh_yyy = temp.data(16);
-    const T* restrict gh_yyz = temp.data(17);
-    const T* restrict gh_yzz = temp.data(18);
-    const T* restrict gh_zzz = temp.data(19);
-
-    for (size_t j = 0; j < output_size; j++) {
-        dpsi[i][j][0] = gx[j];
-        dpsi[i][j][1] = gy[j];
-        dpsi[i][j][2] = gz[j];
-
-        d2psi[i][j](0, 0) = hxx[j];
-        d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
-        d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
-        d2psi[i][j](1, 1) = hyy[j];
-        d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
-        d2psi[i][j](2, 2) = hzz[j];
-
-        dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx
-        dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy
-        dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz
-        dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
-        dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy
-        dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz
-        dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
-        dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
-        dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz
-
-        dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
-        dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
-        dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
-        dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
-        dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy
-        dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz
-        dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
-        dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz
-        dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz
-
-        dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz
-        dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
-        dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
-        dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
-        dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
-        dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
-        dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
-        dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
-        dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi,
-    GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const
-{
-    const size_t output_size = psi.size();
-    std::copy_n(temp.data(0), output_size, psi.data());
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    const T* restrict hxx = temp.data(4);
-    const T* restrict hxy = temp.data(5);
-    const T* restrict hxz = temp.data(6);
-    const T* restrict hyy = temp.data(7);
-    const T* restrict hyz = temp.data(8);
-    const T* restrict hzz = temp.data(9);
-    const T* restrict gh_xxx = temp.data(10);
-    const T* restrict gh_xxy = temp.data(11);
-    const T* restrict gh_xxz = temp.data(12);
-    const T* restrict gh_xyy = temp.data(13);
-    const T* restrict gh_xyz = temp.data(14);
-    const T* restrict gh_xzz = temp.data(15);
-    const T* restrict gh_yyy = temp.data(16);
-    const T* restrict gh_yyz = temp.data(17);
-    const T* restrict gh_yzz = temp.data(18);
-    const T* restrict gh_zzz = temp.data(19);
-
-    for (size_t j = 0; j < output_size; j++) {
-        dpsi[j][0] = gx[j];
-        dpsi[j][1] = gy[j];
-        dpsi[j][2] = gz[j];
-
-        d2psi[j](0, 0) = hxx[j];
-        d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
-        d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
-        d2psi[j](1, 1) = hyy[j];
-        d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
-        d2psi[j](2, 2) = hzz[j];
-
-        dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx
-        dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy
-        dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz
-        dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
-        dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy
-        dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz
-        dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
-        dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
-        dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz
-
-        dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
-        dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
-        dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
-        dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
-        dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy
-        dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz
-        dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
-        dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy
-        dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz
-
-        dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz
-        dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
-        dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
-        dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
-        dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
-        dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
-        dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
-        dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
-        dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(
-    const vgl_type& temp, GradVector& dpsi) const
-{
-    const size_t output_size = dpsi.size();
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-
-    for (size_t j = 0; j < output_size; j++) {
-        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
-        // that
-        //  for an atomic center, the ion gradient is the negative of the
-        //  elecron gradient. Hence minus signs for each of these.
-        dpsi[j][0] = -gx[j];
-        dpsi[j][1] = -gy[j];
-        dpsi[j][2] = -gz[j];
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
-{
-    // TAKE CARE OF IDENTITY
+template<typename T, unsigned D>
+static void Product_ABt(const VectorSoaContainer<T, D>& A, const Matrix<T>& B, VectorSoaContainer<T, D>& C)
+{
+  constexpr char transa = 't';
+  constexpr char transb = 'n';
+  constexpr T zone(1);
+  constexpr T zero(0);
+  BLAS::gemm(transa, transb, B.rows(), D, B.cols(), zone, B.data(), B.cols(), A.data(), A.capacity(), zero, C.data(),
+             C.capacity());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                          ValueVector& psi,
+                                          GradVector& dpsi,
+                                          ValueVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2psi.data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                          ValueVector& psi,
+                                          GradVector& dpsi,
+                                          HessVector& d2psi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                            int i,
+                                            ValueMatrix& psi,
+                                            GradMatrix& dpsi,
+                                            HessMatrix& d2psi,
+                                            GGGMatrix& dghpsi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+
+    dghpsi[i][j][0](0, 0) = gh_xxx[j]; // x|xx
+    dghpsi[i][j][0](0, 1) = gh_xxy[j]; // x|xy
+    dghpsi[i][j][0](0, 2) = gh_xxz[j]; // x|xz
+    dghpsi[i][j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
+    dghpsi[i][j][0](1, 1) = gh_xyy[j]; // x|yy
+    dghpsi[i][j][0](1, 2) = gh_xyz[j]; // x|yz
+    dghpsi[i][j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
+    dghpsi[i][j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
+    dghpsi[i][j][0](2, 2) = gh_xzz[j]; // x|zz
+
+    dghpsi[i][j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
+    dghpsi[i][j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
+    dghpsi[i][j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
+    dghpsi[i][j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
+    dghpsi[i][j][1](1, 1) = gh_yyy[j]; // y|yy
+    dghpsi[i][j][1](1, 2) = gh_yyz[j]; // y|yz
+    dghpsi[i][j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
+    dghpsi[i][j][1](2, 1) = gh_yyz[j]; // y|zy = yyz
+    dghpsi[i][j][1](2, 2) = gh_yzz[j]; // y|zz
+
+    dghpsi[i][j][2](0, 0) = gh_xxz[j]; // z|xx = xxz
+    dghpsi[i][j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
+    dghpsi[i][j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
+    dghpsi[i][j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
+    dghpsi[i][j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
+    dghpsi[i][j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
+    dghpsi[i][j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
+    dghpsi[i][j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
+    dghpsi[i][j][2](2, 2) = gh_zzz[j]; // z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vghgh_impl(const vghgh_type& temp,
+                                            ValueVector& psi,
+                                            GradVector& dpsi,
+                                            HessVector& d2psi,
+                                            GGGVector& dghpsi) const
+{
+  const size_t output_size = psi.size();
+  std::copy_n(temp.data(0), output_size, psi.data());
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xyz = temp.data(14);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[j][0] = gx[j];
+    dpsi[j][1] = gy[j];
+    dpsi[j][2] = gz[j];
+
+    d2psi[j](0, 0) = hxx[j];
+    d2psi[j](0, 1) = d2psi[j](1, 0) = hxy[j];
+    d2psi[j](0, 2) = d2psi[j](2, 0) = hxz[j];
+    d2psi[j](1, 1)                  = hyy[j];
+    d2psi[j](2, 1) = d2psi[j](1, 2) = hyz[j];
+    d2psi[j](2, 2)                  = hzz[j];
+
+    dghpsi[j][0](0, 0) = gh_xxx[j]; // x|xx
+    dghpsi[j][0](0, 1) = gh_xxy[j]; // x|xy
+    dghpsi[j][0](0, 2) = gh_xxz[j]; // x|xz
+    dghpsi[j][0](1, 0) = gh_xxy[j]; // x|yx = xxy
+    dghpsi[j][0](1, 1) = gh_xyy[j]; // x|yy
+    dghpsi[j][0](1, 2) = gh_xyz[j]; // x|yz
+    dghpsi[j][0](2, 0) = gh_xxz[j]; // x|zx = xxz
+    dghpsi[j][0](2, 1) = gh_xyz[j]; // x|zy = xyz
+    dghpsi[j][0](2, 2) = gh_xzz[j]; // x|zz
+
+    dghpsi[j][1](0, 0) = gh_xxy[j]; // y|xx = xxy
+    dghpsi[j][1](0, 1) = gh_xyy[j]; // y|xy = xyy
+    dghpsi[j][1](0, 2) = gh_xyz[j]; // y|xz = xyz
+    dghpsi[j][1](1, 0) = gh_xyy[j]; // y|yx = xyy
+    dghpsi[j][1](1, 1) = gh_yyy[j]; // y|yy
+    dghpsi[j][1](1, 2) = gh_yyz[j]; // y|yz
+    dghpsi[j][1](2, 0) = gh_xyz[j]; // y|zx = xyz
+    dghpsi[j][1](2, 1) = gh_xyy[j]; // y|xy = xyy
+    dghpsi[j][1](2, 2) = gh_yzz[j]; // y|zz
+
+    dghpsi[j][2](0, 0) = gh_xzz[j]; // z|xx = xzz
+    dghpsi[j][2](0, 1) = gh_xyz[j]; // z|xy = xyz
+    dghpsi[j][2](0, 2) = gh_xzz[j]; // z|xz = xzz
+    dghpsi[j][2](1, 0) = gh_xyz[j]; // z|yx = xyz
+    dghpsi[j][2](1, 1) = gh_yyz[j]; // z|yy = yyz
+    dghpsi[j][2](1, 2) = gh_yzz[j]; // z|yz = yzz
+    dghpsi[j][2](2, 0) = gh_xzz[j]; // z|zx = xzz
+    dghpsi[j][2](2, 1) = gh_yzz[j]; // z|zy = yzz
+    dghpsi[j][2](2, 2) = gh_zzz[j]; // z|zz
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dpsi) const
+{
+  const size_t output_size = dpsi.size();
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+    // that
+    //  for an atomic center, the ion gradient is the negative of the
+    //  elecron gradient. Hence minus signs for each of these.
+    dpsi[j][0] = -gx[j];
+    dpsi[j][1] = -gy[j];
+    dpsi[j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                    int iat,
+                                    ValueVector& psi,
+                                    GradVector& dpsi,
+                                    ValueVector& d2psi)
+{
+  // TAKE CARE OF IDENTITY
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->evaluateVGL(P, iat, Temp);
+  }
+
+  if (Identity)
+    evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
     {
-        ScopedTimer local(basis_timer_);
-        myBasisSet->evaluateVGL(P, iat, Temp);
-    }
-
-    if (Identity)
-        evaluate_vgl_impl(Temp, psi, dpsi, d2psi);
-    else {
-        assert(psi.size() <= this->OrbitalSetSize);
-        {
-            ScopedTimer local(mo_timer_);
-            ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-            Product_ABt(Temp, C_partial_view, Tempv);
-        }
-        evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateVGL(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list) const
-{
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    auto& phi_vgl_v = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
-
-    phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
-    mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
-
-    const size_t nw = phi_vgl_v.size(1);
-
-    // TODO: make this cleaner?
-    for (int iw = 0; iw < nw; iw++) {
-        const size_t output_size = psi_v_list[iw].get().size();
-        std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size,
-            psi_v_list[iw].get().data());
-        std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size,
-            d2psi_v_list[iw].get().data());
-        // grads are [dim, walker, orb] in phi_vgl_v
-        //           [walker][orb, dim] in dpsi_v_list
-        for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
-            BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1,
-                &dpsi_v_list[iw].get().data()[0][idim], QMCTraits::DIM);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    OffloadMWVGLArray& phi_vgl_v) const
-{
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    auto& basis_mw = spo_leader.mw_mem_handle_.getResource().basis_mw;
-    basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
-
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+      Product_ABt(Temp, C_partial_view, Tempv);
+    }
+    evaluate_vgl_impl(Tempv, psi, dpsi, d2psi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                       int iat,
+                                       const RefVector<ValueVector>& psi_v_list,
+                                       const RefVector<GradVector>& dpsi_v_list,
+                                       const RefVector<ValueVector>& d2psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_vgl_v  = spo_leader.mw_mem_handle_.getResource().phi_vgl_v;
+
+  phi_vgl_v.resize(QMCTraits::DIM_VGL, spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+
+  const size_t nw = phi_vgl_v.size(1);
+
+  // TODO: make this cleaner?
+  for (int iw = 0; iw < nw; iw++)
+  {
+    const size_t output_size = psi_v_list[iw].get().size();
+    std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
+    std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
+    // grads are [dim, walker, orb] in phi_vgl_v
+    //           [walker][orb, dim] in dpsi_v_list
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim],
+                 QMCTraits::DIM);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                               const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                               int iat,
+                                               OffloadMWVGLArray& phi_vgl_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& basis_mw   = spo_leader.mw_mem_handle_.getResource().basis_mw;
+  basis_mw.resize(QMCTraits::DIM_VGL, spo_list.size(), BasisSetSize);
+
+  {
+    ScopedTimer local(basis_timer_);
+    myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
+  }
+
+  if (Identity)
+  {
+    // output_size can be smaller than BasisSetSize
+    const size_t output_size = phi_vgl_v.size(2);
+    const size_t nw          = phi_vgl_v.size(1);
+
+    for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iw = 0; iw < nw; iw++)
+        std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_vgl_v.size(2);
+    assert(requested_orb_size <= this->OrbitalSetSize);
     {
-        ScopedTimer local(basis_timer_);
-        myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
-    }
-
-    if (Identity) {
-        // output_size can be smaller than BasisSetSize
-        const size_t output_size = phi_vgl_v.size(2);
-        const size_t nw = phi_vgl_v.size(1);
-
-        for (size_t idim = 0; idim < QMCTraits::DIM_VGL; idim++)
-            for (int iw = 0; iw < nw; iw++)
-                std::copy_n(basis_mw.data_at(idim, iw, 0), output_size,
-                    phi_vgl_v.data_at(idim, iw, 0));
-    }
-    else {
-        const size_t requested_orb_size = phi_vgl_v.size(2);
-        assert(requested_orb_size <= this->OrbitalSetSize);
-        {
-            ScopedTimer local(mo_timer_);
-            ValueMatrix C_partial_view(
-                C->data(), requested_orb_size, BasisSetSize);
-            // TODO: make class for general blas interface in Platforms
-            // have instance of that class as member of LCAOrbitalSetT, call
-            // gemm through that
-            BLAS::gemm('T', 'N',
-                requested_orb_size, // MOs
-                spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
-                BasisSetSize, // AOs
-                1, C_partial_view.data(), BasisSetSize, basis_mw.data(),
-                BasisSetSize, 0, phi_vgl_v.data(), requested_orb_size);
-        }
+      ScopedTimer local(mo_timer_);
+      ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+      // TODO: make class for general blas interface in Platforms
+      // have instance of that class as member of LCAOrbitalSetT, call
+      // gemm through that
+      BLAS::gemm('T', 'N',
+                 requested_orb_size,                   // MOs
+                 spo_list.size() * QMCTraits::DIM_VGL, // walkers * DIM_VGL
+                 BasisSetSize,                         // AOs
+                 1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
+                 requested_orb_size);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                         const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                         int iat,
+                                         const RefVector<ValueVector>& psi_v_list) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  auto& phi_v      = spo_leader.mw_mem_handle_.getResource().phi_v;
+  phi_v.resize(spo_list.size(), this->OrbitalSetSize);
+  mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
+
+  const size_t output_size = phi_v.size(1);
+  const size_t nw          = phi_v.size(0);
+
+  for (int iw = 0; iw < nw; iw++)
+    std::copy_n(phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                 const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                 int iat,
+                                                 OffloadMWVArray& phi_v) const
+{
+  assert(this == &spo_list.getLeader());
+  auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
+  const size_t nw  = spo_list.size();
+  auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
+  basis_v_mw.resize(nw, BasisSetSize);
+
+  myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
+
+  if (Identity)
+  {
+    std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw, phi_v.data_at(0, 0));
+  }
+  else
+  {
+    const size_t requested_orb_size = phi_v.size(1);
+    assert(requested_orb_size <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
+    BLAS::gemm('T', 'N',
+               requested_orb_size, // MOs
+               spo_list.size(),    // walkers
+               BasisSetSize,       // AOs
+               1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(), BasisSetSize, 0, phi_v.data(),
+               requested_orb_size);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+                                             const RefVector<ValueVector>& psi_list,
+                                             const std::vector<const T*>& invRow_ptr_list,
+                                             std::vector<std::vector<T>>& ratios_list) const
+{
+  const size_t nw = spo_list.size();
+  for (size_t iw = 0; iw < nw; iw++)
+  {
+    for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++)
+    {
+      spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
+      ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(), invRow_ptr_list[iw], psi_list[iw].get().size());
     }
+  }
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateValue(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list) const
-{
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    auto& phi_v = spo_leader.mw_mem_handle_.getResource().phi_v;
-    phi_v.resize(spo_list.size(), this->OrbitalSetSize);
-    mw_evaluateValueImplGEMM(spo_list, P_list, iat, phi_v);
-
-    const size_t output_size = phi_v.size(1);
-    const size_t nw = phi_v.size(0);
-
-    for (int iw = 0; iw < nw; iw++)
-        std::copy_n(
-            phi_v.data_at(iw, 0), output_size, psi_v_list[iw].get().data());
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateValueImplGEMM(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    OffloadMWVArray& phi_v) const
+template<class T>
+void LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+                                          ValueVector& psi,
+                                          const ValueVector& psiinv,
+                                          std::vector<T>& ratios)
 {
-    assert(this == &spo_list.getLeader());
-    auto& spo_leader = spo_list.template getCastedLeader<LCAOrbitalSetT<T>>();
-    const size_t nw = spo_list.size();
-    auto& basis_v_mw = spo_leader.mw_mem_handle_.getResource().basis_v_mw;
-    basis_v_mw.resize(nw, BasisSetSize);
-
-    myBasisSet->mw_evaluateValue(P_list, iat, basis_v_mw);
-
-    if (Identity) {
-        std::copy_n(basis_v_mw.data_at(0, 0), this->OrbitalSetSize * nw,
-            phi_v.data_at(0, 0));
-    }
-    else {
-        const size_t requested_orb_size = phi_v.size(1);
-        assert(requested_orb_size <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
-        BLAS::gemm('T', 'N',
-            requested_orb_size, // MOs
-            spo_list.size(), // walkers
-            BasisSetSize, // AOs
-            1, C_partial_view.data(), BasisSetSize, basis_v_mw.data(),
-            BasisSetSize, 0, phi_v.data(), requested_orb_size);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateDetRatios(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
-    const RefVector<ValueVector>& psi_list,
-    const std::vector<const T*>& invRow_ptr_list,
-    std::vector<std::vector<T>>& ratios_list) const
-{
-    const size_t nw = spo_list.size();
-    for (size_t iw = 0; iw < nw; iw++) {
-        for (size_t iat = 0; iat < vp_list[iw].getTotalNum(); iat++) {
-            spo_list[iw].evaluateValue(vp_list[iw], iat, psi_list[iw]);
-            ratios_list[iw][iat] = simd::dot(psi_list[iw].get().data(),
-                invRow_ptr_list[iw], psi_list[iw].get().size());
-        }
-    }
-}
+  Vector<T> vTemp(Temp.data(0), BasisSetSize);
+  Vector<T> invTemp(Temp.data(1), BasisSetSize);
 
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateDetRatios(const VirtualParticleSetT<T>& VP,
-    ValueVector& psi, const ValueVector& psiinv, std::vector<T>& ratios)
-{
-    Vector<T> vTemp(Temp.data(0), BasisSetSize);
-    Vector<T> invTemp(Temp.data(1), BasisSetSize);
+  {
+    ScopedTimer local(mo_timer_);
+    // when only a subset of orbitals is used, extract limited rows of C.
+    Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
+    MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
+  }
 
+  for (size_t j = 0; j < VP.getTotalNum(); j++)
+  {
     {
-        ScopedTimer local(mo_timer_);
-        // when only a subset of orbitals is used, extract limited rows of C.
-        Matrix<T> C_occupied(C->data(), psiinv.size(), BasisSetSize);
-        MatrixOperators::product_Atx(C_occupied, psiinv, invTemp);
-    }
-
-    for (size_t j = 0; j < VP.getTotalNum(); j++) {
-        {
-            ScopedTimer local(basis_timer_);
-            myBasisSet->evaluateV(VP, j, vTemp.data());
-        }
-        ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
-    std::vector<T>& ratios, std::vector<GradType>& grads) const
-{
-    assert(this == &spo_list.getLeader());
-    assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
-    assert(phi_vgl_v.size(1) == spo_list.size());
-
-    mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
-    // Device data of phi_vgl_v must be up-to-date upon return
-    phi_vgl_v.updateTo();
-
-    const size_t nw = spo_list.size();
-    const size_t norb_requested = phi_vgl_v.size(2);
-    for (int iw = 0; iw < nw; iw++) {
-        ratios[iw] = simd::dot(
-            invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
-        GradType dphi;
-        for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
-            dphi[idim] =
-                simd::dot(invRow_ptr_list[iw],
-                    phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) /
-                ratios[iw];
-        grads[iw] = dphi;
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateVGH(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
-{
-    // TAKE CARE OF IDENTITY
-    myBasisSet->evaluateVGH(P, iat, Temph);
-    if (Identity)
-        evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
-    else {
-        assert(psi.size() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-        Product_ABt(Temph, C_partial_view, Temphv);
-        evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, HessVector& dhpsi, GGGVector& dghpsi)
-{
-    // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not
-    // implemented\n");
-
-    // TAKE CARE OF IDENTITY
-    myBasisSet->evaluateVGHGH(P, iat, Tempgh);
-    if (Identity)
-        evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
-    else {
-        assert(psi.size() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
-        Product_ABt(Tempgh, C_partial_view, Tempghv);
-        evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
-    }
+      ScopedTimer local(basis_timer_);
+      myBasisSet->evaluateV(VP, j, vTemp.data());
+    }
+    ratios[j] = simd::dot(vTemp.data(), invTemp.data(), BasisSetSize);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                       const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                       int iat,
+                                                       const std::vector<const T*>& invRow_ptr_list,
+                                                       OffloadMWVGLArray& phi_vgl_v,
+                                                       std::vector<T>& ratios,
+                                                       std::vector<GradType>& grads) const
+{
+  assert(this == &spo_list.getLeader());
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+
+  mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
+  // Device data of phi_vgl_v must be up-to-date upon return
+  phi_vgl_v.updateTo();
+
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
+    GradType dphi;
+    for (size_t idim = 0; idim < QMCTraits::DIM; idim++)
+      dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
+    grads[iw] = dphi;
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGH(const ParticleSetT<T>& P,
+                                    int iat,
+                                    ValueVector& psi,
+                                    GradVector& dpsi,
+                                    HessVector& dhpsi)
+{
+  // TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGH(P, iat, Temph);
+  if (Identity)
+    evaluate_vgh_impl(Temph, psi, dpsi, dhpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Temph, C_partial_view, Temphv);
+    evaluate_vgh_impl(Temphv, psi, dpsi, dhpsi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateVGHGH(const ParticleSetT<T>& P,
+                                      int iat,
+                                      ValueVector& psi,
+                                      GradVector& dpsi,
+                                      HessVector& dhpsi,
+                                      GGGVector& dghpsi)
+{
+  // APP_ABORT("LCAORbitalSet::evaluate(psi,gpsi,hpsi,ghpsi) not
+  // implemented\n");
+
+  // TAKE CARE OF IDENTITY
+  myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+  if (Identity)
+    evaluate_vghgh_impl(Tempgh, psi, dpsi, dhpsi, dghpsi);
+  else
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), psi.size(), BasisSetSize);
+    Product_ABt(Tempgh, C_partial_view, Tempghv);
+    evaluate_vghgh_impl(Tempghv, psi, dpsi, dhpsi, dghpsi);
+  }
 }
 
 /* implement using gemm algorithm */
-template <class T>
-inline void
-LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp, int i,
-    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) const
-{
-    const size_t output_size = logdet.cols();
-    std::copy_n(temp.data(0), output_size, logdet[i]);
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    for (size_t j = 0; j < output_size; j++) {
-        dlogdet[i][j][0] = gx[j];
-        dlogdet[i][j][1] = gy[j];
-        dlogdet[i][j][2] = gz[j];
-    }
-    std::copy_n(temp.data(4), output_size, d2logdet[i]);
-}
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp, int i,
-    ValueMatrix& psi, GradMatrix& dpsi, HessMatrix& d2psi) const
-{
-    const size_t output_size = psi.cols();
-    std::copy_n(temp.data(0), output_size, psi[i]);
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    const T* restrict hxx = temp.data(4);
-    const T* restrict hxy = temp.data(5);
-    const T* restrict hxz = temp.data(6);
-    const T* restrict hyy = temp.data(7);
-    const T* restrict hyz = temp.data(8);
-    const T* restrict hzz = temp.data(9);
-
-    for (size_t j = 0; j < output_size; j++) {
-        dpsi[i][j][0] = gx[j];
-        dpsi[i][j][1] = gy[j];
-        dpsi[i][j][2] = gz[j];
-
-        d2psi[i][j](0, 0) = hxx[j];
-        d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
-        d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
-        d2psi[i][j](1, 1) = hyy[j];
-        d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
-        d2psi[i][j](2, 2) = hzz[j];
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(
-    const vgl_type& temp, int i, GradMatrix& dpsi) const
-{
-    const size_t output_size = dpsi.cols();
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-
-    for (size_t j = 0; j < output_size; j++) {
-        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
-        // that
-        //  for an atomic center, the ion gradient is the negative of the
-        //  elecron gradient. Hence minus signs for each of these.
-        dpsi[i][j][0] = -gx[j];
-        dpsi[i][j][1] = -gy[j];
-        dpsi[i][j][2] = -gz[j];
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i,
-    GradMatrix& dpsi, HessMatrix& dgpsi, GradMatrix& dlpsi) const
-{
-    const size_t output_size = dpsi.cols();
-    const T* restrict gx = temp.data(1);
-    const T* restrict gy = temp.data(2);
-    const T* restrict gz = temp.data(3);
-    const T* restrict hxx = temp.data(4);
-    const T* restrict hxy = temp.data(5);
-    const T* restrict hxz = temp.data(6);
-    const T* restrict hyy = temp.data(7);
-    const T* restrict hyz = temp.data(8);
-    const T* restrict hzz = temp.data(9);
-    const T* restrict gh_xxx = temp.data(10);
-    const T* restrict gh_xxy = temp.data(11);
-    const T* restrict gh_xxz = temp.data(12);
-    const T* restrict gh_xyy = temp.data(13);
-    const T* restrict gh_xzz = temp.data(15);
-    const T* restrict gh_yyy = temp.data(16);
-    const T* restrict gh_yyz = temp.data(17);
-    const T* restrict gh_yzz = temp.data(18);
-    const T* restrict gh_zzz = temp.data(19);
-
-    for (size_t j = 0; j < output_size; j++) {
-        // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
-        // that
-        //  for an atomic center, the ion gradient is the negative of the
-        //  elecron gradient. Hence minus signs for each of these.
-        dpsi[i][j][0] = -gx[j];
-        dpsi[i][j][1] = -gy[j];
-        dpsi[i][j][2] = -gz[j];
-
-        dgpsi[i][j](0, 0) = -hxx[j];
-        dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
-        dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
-        dgpsi[i][j](1, 1) = -hyy[j];
-        dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
-        dgpsi[i][j](2, 2) = -hzz[j];
-
-        // Since this returns the ion gradient of the laplacian, we have to
-        // trace the grad hessian vector.
-        dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
-        dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
-        dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
-{
-    if (Identity) {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGL(P, iat, Temp);
-            evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
-        }
-    }
-    else {
-        assert(logdet.cols() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGL(P, iat, Temp);
-            Product_ABt(Temp, C_partial_view, Tempv);
-            evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
-        }
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    HessMatrix& grad_grad_logdet)
-{
-    if (Identity) {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGH(P, iat, Temph);
-            evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
-        }
-    }
-    else {
-        assert(logdet.cols() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGH(P, iat, Temph);
-            Product_ABt(Temph, C_partial_view, Temphv);
-            evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
-        }
-    }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    HessMatrix& grad_grad_logdet, GGGMatrix& grad_grad_grad_logdet)
-{
-    if (Identity) {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGHGH(P, iat, Tempgh);
-            evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet,
-                grad_grad_grad_logdet);
-        }
-    }
-    else {
-        assert(logdet.cols() <= this->OrbitalSetSize);
-        ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
-            Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
-            evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet,
-                grad_grad_logdet, grad_grad_grad_logdet);
-        }
+template<class T>
+inline void LCAOrbitalSetT<T>::evaluate_vgl_impl(const vgl_type& temp,
+                                                 int i,
+                                                 ValueMatrix& logdet,
+                                                 GradMatrix& dlogdet,
+                                                 ValueMatrix& d2logdet) const
+{
+  const size_t output_size = logdet.cols();
+  std::copy_n(temp.data(0), output_size, logdet[i]);
+  const T* restrict gx = temp.data(1);
+  const T* restrict gy = temp.data(2);
+  const T* restrict gz = temp.data(3);
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dlogdet[i][j][0] = gx[j];
+    dlogdet[i][j][1] = gy[j];
+    dlogdet[i][j][2] = gz[j];
+  }
+  std::copy_n(temp.data(4), output_size, d2logdet[i]);
+}
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_vgh_impl(const vgh_type& temp,
+                                          int i,
+                                          ValueMatrix& psi,
+                                          GradMatrix& dpsi,
+                                          HessMatrix& d2psi) const
+{
+  const size_t output_size = psi.cols();
+  std::copy_n(temp.data(0), output_size, psi[i]);
+  const T* restrict gx  = temp.data(1);
+  const T* restrict gy  = temp.data(2);
+  const T* restrict gz  = temp.data(3);
+  const T* restrict hxx = temp.data(4);
+  const T* restrict hxy = temp.data(5);
+  const T* restrict hxz = temp.data(6);
+  const T* restrict hyy = temp.data(7);
+  const T* restrict hyz = temp.data(8);
+  const T* restrict hzz = temp.data(9);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    dpsi[i][j][0] = gx[j];
+    dpsi[i][j][1] = gy[j];
+    dpsi[i][j][2] = gz[j];
+
+    d2psi[i][j](0, 0) = hxx[j];
+    d2psi[i][j](0, 1) = d2psi[i][j](1, 0) = hxy[j];
+    d2psi[i][j](0, 2) = d2psi[i][j](2, 0) = hxz[j];
+    d2psi[i][j](1, 1)                     = hyy[j];
+    d2psi[i][j](2, 1) = d2psi[i][j](1, 2) = hyz[j];
+    d2psi[i][j](2, 2)                     = hzz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dpsi) const
+{
+  const size_t output_size = dpsi.cols();
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+    // that
+    //  for an atomic center, the ion gradient is the negative of the
+    //  elecron gradient. Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                                   int i,
+                                                   GradMatrix& dpsi,
+                                                   HessMatrix& dgpsi,
+                                                   GradMatrix& dlpsi) const
+{
+  const size_t output_size = dpsi.cols();
+  const T* restrict gx     = temp.data(1);
+  const T* restrict gy     = temp.data(2);
+  const T* restrict gz     = temp.data(3);
+  const T* restrict hxx    = temp.data(4);
+  const T* restrict hxy    = temp.data(5);
+  const T* restrict hxz    = temp.data(6);
+  const T* restrict hyy    = temp.data(7);
+  const T* restrict hyz    = temp.data(8);
+  const T* restrict hzz    = temp.data(9);
+  const T* restrict gh_xxx = temp.data(10);
+  const T* restrict gh_xxy = temp.data(11);
+  const T* restrict gh_xxz = temp.data(12);
+  const T* restrict gh_xyy = temp.data(13);
+  const T* restrict gh_xzz = temp.data(15);
+  const T* restrict gh_yyy = temp.data(16);
+  const T* restrict gh_yyz = temp.data(17);
+  const T* restrict gh_yzz = temp.data(18);
+  const T* restrict gh_zzz = temp.data(19);
+
+  for (size_t j = 0; j < output_size; j++)
+  {
+    // As mentioned in SoaLocalizedBasisSet, LCAO's have a nice property
+    // that
+    //  for an atomic center, the ion gradient is the negative of the
+    //  elecron gradient. Hence minus signs for each of these.
+    dpsi[i][j][0] = -gx[j];
+    dpsi[i][j][1] = -gy[j];
+    dpsi[i][j][2] = -gz[j];
+
+    dgpsi[i][j](0, 0) = -hxx[j];
+    dgpsi[i][j](0, 1) = dgpsi[i][j](1, 0) = -hxy[j];
+    dgpsi[i][j](0, 2) = dgpsi[i][j](2, 0) = -hxz[j];
+    dgpsi[i][j](1, 1)                     = -hyy[j];
+    dgpsi[i][j](2, 1) = dgpsi[i][j](1, 2) = -hyz[j];
+    dgpsi[i][j](2, 2)                     = -hzz[j];
+
+    // Since this returns the ion gradient of the laplacian, we have to
+    // trace the grad hessian vector.
+    dlpsi[i][j][0] = -(gh_xxx[j] + gh_xyy[j] + gh_xzz[j]);
+    dlpsi[i][j][1] = -(gh_xxy[j] + gh_yyy[j] + gh_yzz[j]);
+    dlpsi[i][j][2] = -(gh_xxz[j] + gh_yyz[j] + gh_zzz[j]);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                             int first,
+                                             int last,
+                                             ValueMatrix& logdet,
+                                             GradMatrix& dlogdet,
+                                             ValueMatrix& d2logdet)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      evaluate_vgl_impl(Temp, i, logdet, dlogdet, d2logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGL(P, iat, Temp);
+      Product_ABt(Temp, C_partial_view, Tempv);
+      evaluate_vgl_impl(Tempv, i, logdet, dlogdet, d2logdet);
     }
+  }
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first,
-    int last, const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                             int first,
+                                             int last,
+                                             ValueMatrix& logdet,
+                                             GradMatrix& dlogdet,
+                                             HessMatrix& grad_grad_logdet)
 {
-    if (Identity) {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateGradSourceV(
-                P, iat, source, iat_src, this->Temp);
-            evaluate_ionderiv_v_impl(Temp, i, gradphi);
-        }
-    }
-    else {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateGradSourceV(
-                P, iat, source, iat_src, this->Temp);
-            Product_ABt(this->Temp, *C, this->Tempv);
-            evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
-        }
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      evaluate_vgh_impl(Temph, i, logdet, dlogdet, grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGH(P, iat, Temph);
+      Product_ABt(Temph, C_partial_view, Temphv);
+      evaluate_vgh_impl(Temphv, i, logdet, dlogdet, grad_grad_logdet);
     }
+  }
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first,
-    int last, const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-    HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi)
+template<class T>
+void LCAOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                             int first,
+                                             int last,
+                                             ValueMatrix& logdet,
+                                             GradMatrix& dlogdet,
+                                             HessMatrix& grad_grad_logdet,
+                                             GGGMatrix& grad_grad_grad_logdet)
 {
-    if (Identity) {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateGradSourceVGL(
-                P, iat, source, iat_src, this->Tempgh);
-            evaluate_ionderiv_vgl_impl(
-                this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
-        }
-    }
-    else {
-        for (size_t i = 0, iat = first; iat < last; i++, iat++) {
-            myBasisSet->evaluateGradSourceVGL(
-                P, iat, source, iat_src, this->Tempgh);
-            Product_ABt(this->Tempgh, *C, this->Tempghv);
-            evaluate_ionderiv_vgl_impl(
-                this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
-        }
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, Tempgh);
+      evaluate_vghgh_impl(Tempgh, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+    }
+  }
+  else
+  {
+    assert(logdet.cols() <= this->OrbitalSetSize);
+    ValueMatrix C_partial_view(C->data(), logdet.cols(), BasisSetSize);
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateVGHGH(P, iat, this->Tempgh);
+      Product_ABt(this->Tempgh, C_partial_view, this->Tempghv);
+      evaluate_vghgh_impl(this->Tempghv, i, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
     }
+  }
 }
 
-template <class T>
-void
-LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
-    const ParticleSetT<T>& source, int iat_src, GradVector& gradphi)
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P,
+                                           int first,
+                                           int last,
+                                           const ParticleSetT<T>& source,
+                                           int iat_src,
+                                           GradMatrix& gradphi)
 {
-    if (Identity) {
-        myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
-        evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      evaluate_ionderiv_v_impl(Temp, i, gradphi);
     }
-    else {
-        myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
-        Product_ABt(Temp, *C, this->Tempv);
-        evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceV(P, iat, source, iat_src, this->Temp);
+      Product_ABt(this->Temp, *C, this->Tempv);
+      evaluate_ionderiv_v_impl(this->Tempv, i, gradphi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSource(const ParticleSetT<T>& P,
+                                           int first,
+                                           int last,
+                                           const ParticleSetT<T>& source,
+                                           int iat_src,
+                                           GradMatrix& grad_phi,
+                                           HessMatrix& grad_grad_phi,
+                                           GradMatrix& grad_lapl_phi)
+{
+  if (Identity)
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      evaluate_ionderiv_vgl_impl(this->Tempgh, i, grad_phi, grad_grad_phi, grad_lapl_phi);
     }
-}
-
-template <class T>
-void
-LCAOrbitalSetT<T>::applyRotation(
-    const ValueMatrix& rot_mat, bool use_stored_copy)
-{
-    if (!use_stored_copy)
-        *C_copy = *C;
-    // gemm is out-of-place
-    BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize,
-        this->OrbitalSetSize, RealType(1.0), C_copy->data(), BasisSetSize,
-        rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(),
-        BasisSetSize);
-
-    /* debugging code
+  }
+  else
+  {
+    for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    {
+      myBasisSet->evaluateGradSourceVGL(P, iat, source, iat_src, this->Tempgh);
+      Product_ABt(this->Tempgh, *C, this->Tempghv);
+      evaluate_ionderiv_vgl_impl(this->Tempghv, i, grad_phi, grad_grad_phi, grad_lapl_phi);
+    }
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::evaluateGradSourceRow(const ParticleSetT<T>& P,
+                                              int iel,
+                                              const ParticleSetT<T>& source,
+                                              int iat_src,
+                                              GradVector& gradphi)
+{
+  if (Identity)
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    evaluate_ionderiv_v_row_impl(this->Temp, gradphi);
+  }
+  else
+  {
+    myBasisSet->evaluateGradSourceV(P, iel, source, iat_src, this->Temp);
+    Product_ABt(Temp, *C, this->Tempv);
+    evaluate_ionderiv_v_row_impl(this->Tempv, gradphi);
+  }
+}
+
+template<class T>
+void LCAOrbitalSetT<T>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy)
+{
+  if (!use_stored_copy)
+    *C_copy = *C;
+  // gemm is out-of-place
+  BLAS::gemm('N', 'T', BasisSetSize, this->OrbitalSetSize, this->OrbitalSetSize, RealType(1.0), C_copy->data(),
+             BasisSetSize, rot_mat.data(), this->OrbitalSetSize, RealType(0.0), C->data(), BasisSetSize);
+
+  /* debugging code
     app_log() << "PRINTING MO COEFFICIENTS AFTER ROTATION " << objectName <<
     std::endl; for (int j = 0; j < OrbitalSetSize; j++) for (int i = 0; i <
     BasisSetSize; i++)
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
index 29f8c897d22..a356bdd6f52 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetT.h
@@ -27,210 +27,190 @@ namespace qmcplusplus
  * SoA verson of LCOrtbitalSet
  * Localized basis set is always real
  */
-template <class T>
+template<class T>
 class LCAOrbitalSetT : public SPOSetT<T>
 {
 public:
-    using basis_type = SoaBasisSetBaseT<T>;
-    using vgl_type = typename basis_type::vgl_type;
-    using vgh_type = typename basis_type::vgh_type;
-    using vghgh_type = typename basis_type::vghgh_type;
-
-    using IndexType = typename SPOSetT<T>::IndexType;
-    using RealType = typename SPOSetT<T>::RealType;
-    using ComplexType = typename SPOSetT<T>::ComplexType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using HessMatrix = typename SPOSetT<T>::HessMatrix;
-    using PosType = typename SPOSetT<T>::PosType;
-    using HessVector = typename SPOSetT<T>::HessVector;
-    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
-    using GGGVector = typename SPOSetT<T>::GGGVector;
-    using GradType = typename SPOSetT<T>::GradType;
-    using OffloadMWVGLArray = typename basis_type::OffloadMWVGLArray;
-    using OffloadMWVArray = typename basis_type::OffloadMWVArray;
-
-    /// pointer to the basis set
-    std::unique_ptr<basis_type> myBasisSet;
-    /// pointer to matrix containing the coefficients
-    std::shared_ptr<ValueMatrix> C;
-
-    /** constructor
+  using basis_type = SoaBasisSetBaseT<T>;
+  using vgl_type   = typename basis_type::vgl_type;
+  using vgh_type   = typename basis_type::vgh_type;
+  using vghgh_type = typename basis_type::vghgh_type;
+
+  using IndexType         = typename SPOSetT<T>::IndexType;
+  using RealType          = typename SPOSetT<T>::RealType;
+  using ComplexType       = typename SPOSetT<T>::ComplexType;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using HessMatrix        = typename SPOSetT<T>::HessMatrix;
+  using PosType           = typename SPOSetT<T>::PosType;
+  using HessVector        = typename SPOSetT<T>::HessVector;
+  using GGGMatrix         = typename SPOSetT<T>::GGGMatrix;
+  using GGGVector         = typename SPOSetT<T>::GGGVector;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using OffloadMWVGLArray = typename basis_type::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename basis_type::OffloadMWVArray;
+
+  /// pointer to the basis set
+  std::unique_ptr<basis_type> myBasisSet;
+  /// pointer to matrix containing the coefficients
+  std::shared_ptr<ValueMatrix> C;
+
+  /** constructor
      * @param bs pointer to the BasisSet
      */
-    LCAOrbitalSetT(
-        const std::string& my_name, std::unique_ptr<basis_type>&& bs);
-
-    LCAOrbitalSetT(const LCAOrbitalSetT& in);
-
-    std::string
-    getClassName() const final
-    {
-        return "LCAOrbitalSetT";
-    }
-
-    bool
-    isRotationSupported() const final
-    {
-        return true;
-    }
-
-    bool
-    hasIonDerivs() const final
-    {
-        return true;
-    }
-
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const final;
-
-    void
-    storeParamsBeforeRotation() final
-    {
-        C_copy = std::make_shared<ValueMatrix>(*C);
-    }
-
-    void
-    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
-
-    /** set the OrbitalSetSize and Identity=false and initialize internal
+  LCAOrbitalSetT(const std::string& my_name, std::unique_ptr<basis_type>&& bs);
+
+  LCAOrbitalSetT(const LCAOrbitalSetT& in);
+
+  std::string getClassName() const final { return "LCAOrbitalSetT"; }
+
+  bool isRotationSupported() const final { return true; }
+
+  bool hasIonDerivs() const final { return true; }
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
+
+  void storeParamsBeforeRotation() final { C_copy = std::make_shared<ValueMatrix>(*C); }
+
+  void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy) final;
+
+  /** set the OrbitalSetSize and Identity=false and initialize internal
      * storages
      */
-    void
-    setOrbitalSetSize(int norbs) final;
+  void setOrbitalSetSize(int norbs) final;
 
-    /** return the size of the basis set
+  /** return the size of the basis set
      */
-    int
-    getBasisSetSize() const
-    {
-        return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize();
-    }
-
-    bool
-    isIdentity() const
-    {
-        return Identity;
-    };
-
-    /** check consistency between Identity and C
+  int getBasisSetSize() const { return (myBasisSet == nullptr) ? 0 : myBasisSet->getBasisSetSize(); }
+
+  bool isIdentity() const { return Identity; };
+
+  /** check consistency between Identity and C
      *
      */
-    void
-    checkObject() const final;
-
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
-
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) final;
-
-    void
-    mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list) const final;
-
-    void
-    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const final;
-
-    void
-    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
-        const RefVector<ValueVector>& psi_list,
-        const std::vector<const T*>& invRow_ptr_list,
-        std::vector<std::vector<T>>& ratios_list) const final;
-
-    void
-    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<T>& ratios) final;
-
-    void
-    mw_evaluateVGLandDetRatioGrads(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const T*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
-        std::vector<GradType>& grads) const final;
-
-    void
-    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) final;
-
-    void
-    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) final;
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        HessMatrix& grad_grad_logdet) final;
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
-        GGGMatrix& grad_grad_grad_logdet) final;
-
-    // NOTE:  The data types get complicated here, so here's an overview of the
-    //        data types associated with ionic derivatives, and how to get their
-    //        data.
-    //
-    // NOTE:  These data structures hold the data for one particular ion, and so
-    // the ID is implicit.
-    //        It's up to the user to keep track of which ion these derivatives
-    //        refer to.
-    //
-    //  1.) GradMatrix grad_phi:  Holds the ionic derivatives of each SPO for
-    //  each electron.
-    //             Example:  grad_phi[iel][iorb][idim].  iel  -- electron index.
-    //                                                 iorb -- orbital index.
-    //                                                 idim  -- cartesian index
-    //                                                 of ionic derivative.
-    //                                                         X=0, Y=1, Z=2.
-    //
-    //  2.) HessMatrix grad_grad_phi:  Holds the ionic derivatives of the
-    //  electron gradient components
-    //                                    for each SPO and each electron.
-    //             Example:  grad_grad_phi[iel][iorb](idim,edim)  iel  --
-    //             electron index.
-    //                                                            iorb --
-    //                                                            orbital index.
-    //                                                            idim -- ionic
-    //                                                            derivative's
-    //                                                            cartesian
-    //                                                            index.
-    //                                                               X=0, Y=1,
-    //                                                               Z=2
-    //                                                            edim --
-    //                                                            electron
-    //                                                            derivative's
-    //                                                            cartesian
-    //                                                            index.
-    //                                                               x=0, y=1,
-    //                                                               z=2.
-    //
-    //  3.) GradMatrix grad_lapl_phi:  Holds the ionic derivatives of the
-    //  electron laplacian for each SPO and each electron.
-    //             Example:  grad_lapl_phi[iel][iorb][idim].  iel  -- electron
-    //             index.
-    //                                                        iorb -- orbital
-    //                                                        index. idim --
-    //                                                        cartesian index of
-    //                                                        ionic derivative.
-    //                                                            X=0, Y=1, Z=2.
-
-    /**
+  void checkObject() const final;
+
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
+
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const final;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const final;
+
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const T*>& invRow_ptr_list,
+                            std::vector<std::vector<T>>& ratios_list) const final;
+
+  void evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) final;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                      int iat,
+                                      const std::vector<const T*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<T>& ratios,
+                                      std::vector<GradType>& grads) const final;
+
+  void evaluateVGH(const ParticleSetT<T>& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) final;
+
+  void evaluateVGHGH(const ParticleSetT<T>& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) final;
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) final;
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) final;
+
+  // NOTE:  The data types get complicated here, so here's an overview of the
+  //        data types associated with ionic derivatives, and how to get their
+  //        data.
+  //
+  // NOTE:  These data structures hold the data for one particular ion, and so
+  // the ID is implicit.
+  //        It's up to the user to keep track of which ion these derivatives
+  //        refer to.
+  //
+  //  1.) GradMatrix grad_phi:  Holds the ionic derivatives of each SPO for
+  //  each electron.
+  //             Example:  grad_phi[iel][iorb][idim].  iel  -- electron index.
+  //                                                 iorb -- orbital index.
+  //                                                 idim  -- cartesian index
+  //                                                 of ionic derivative.
+  //                                                         X=0, Y=1, Z=2.
+  //
+  //  2.) HessMatrix grad_grad_phi:  Holds the ionic derivatives of the
+  //  electron gradient components
+  //                                    for each SPO and each electron.
+  //             Example:  grad_grad_phi[iel][iorb](idim,edim)  iel  --
+  //             electron index.
+  //                                                            iorb --
+  //                                                            orbital index.
+  //                                                            idim -- ionic
+  //                                                            derivative's
+  //                                                            cartesian
+  //                                                            index.
+  //                                                               X=0, Y=1,
+  //                                                               Z=2
+  //                                                            edim --
+  //                                                            electron
+  //                                                            derivative's
+  //                                                            cartesian
+  //                                                            index.
+  //                                                               x=0, y=1,
+  //                                                               z=2.
+  //
+  //  3.) GradMatrix grad_lapl_phi:  Holds the ionic derivatives of the
+  //  electron laplacian for each SPO and each electron.
+  //             Example:  grad_lapl_phi[iel][iorb][idim].  iel  -- electron
+  //             index.
+  //                                                        iorb -- orbital
+  //                                                        index. idim --
+  //                                                        cartesian index of
+  //                                                        ionic derivative.
+  //                                                            X=0, Y=1, Z=2.
+
+  /**
      * \brief Calculate ion derivatives of SPO's.
      *
      *  @param P Electron particle set.
@@ -241,11 +221,14 @@ class LCAOrbitalSetT : public SPOSetT<T>
      *  @param gradphi Container storing ion gradients for all particles and all
      * orbitals.
      */
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi) final;
-
-    /**
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) final;
+
+  /**
      * \brief Calculate ion derivatives of SPO's, their gradients, and their
      * laplacians.
      *
@@ -261,125 +244,129 @@ class LCAOrbitalSetT : public SPOSetT<T>
      *  @param grad_lapl_phi Container storing ion gradients of SPO laplacians
      * for all particles and all orbitals.
      */
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) final;
-
-    void
-    evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
-        const ParticleSetT<T>& source, int iat_src, GradVector& grad_phi) final;
-
-    void
-    createResource(ResourceCollection& collection) const final;
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) final;
+
+  void evaluateGradSourceRow(const ParticleSetT<T>& P,
+                             int iel,
+                             const ParticleSetT<T>& source,
+                             int iat_src,
+                             GradVector& grad_phi) final;
+
+  void createResource(ResourceCollection& collection) const final;
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const final;
 
 protected:
-    /// number of Single-particle orbitals
-    const IndexType BasisSetSize;
-    /// a copy of the original C before orbital rotation is applied;
-    std::shared_ptr<ValueMatrix> C_copy;
-
-    /// true if C is an identity matrix
-    bool Identity;
-    /// Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L
-    vgl_type Temp;
-    /// Tempv(OrbitalSetSize) Tempv=C*Temp
-    vgl_type Tempv;
-
-    /// These are temporary VectorSoAContainers to hold value, gradient, and
-    /// hessian for all basis or SPO functions evaluated at a given point.
-    /// Nbasis x [1(value)+3(gradient)+6(hessian)]
-    vgh_type Temph;
-    /// Norbitals x [1(value)+3(gradient)+6(hessian)]
-    vgh_type Temphv;
-
-    /// These are temporary VectorSoAContainers to hold value, gradient,
-    /// hessian, and
-    ///  gradient hessian for all basis or SPO functions evaluated at a given
-    ///  point.
-    /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
-    vghgh_type Tempgh;
-    /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
-    vghgh_type Tempghv;
+  /// number of Single-particle orbitals
+  const IndexType BasisSetSize;
+  /// a copy of the original C before orbital rotation is applied;
+  std::shared_ptr<ValueMatrix> C_copy;
+
+  /// true if C is an identity matrix
+  bool Identity;
+  /// Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L
+  vgl_type Temp;
+  /// Tempv(OrbitalSetSize) Tempv=C*Temp
+  vgl_type Tempv;
+
+  /// These are temporary VectorSoAContainers to hold value, gradient, and
+  /// hessian for all basis or SPO functions evaluated at a given point.
+  /// Nbasis x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temph;
+  /// Norbitals x [1(value)+3(gradient)+6(hessian)]
+  vgh_type Temphv;
+
+  /// These are temporary VectorSoAContainers to hold value, gradient,
+  /// hessian, and
+  ///  gradient hessian for all basis or SPO functions evaluated at a given
+  ///  point.
+  /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempgh;
+  /// Nbasis x [1(value)+3(gradient)+6(hessian)+10(grad_hessian)]
+  vghgh_type Tempghv;
 
 private:
-    /// helper functions to handle Identity
-    void
-    evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi,
-        ValueVector& d2psi) const;
-
-    void
-    evaluate_vgl_impl(const vgl_type& temp, int i, ValueMatrix& logdet,
-        GradMatrix& dlogdet, ValueMatrix& d2logdet) const;
-    /// These two functions unpack the data in vgh_type temp object into
-    /// wavefunction friendly data structures.
-
-    /// This unpacks temp into vectors psi, dpsi, and d2psi.
-    void
-    evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi,
-        HessVector& d2psi) const;
-
-    /// Unpacks temp into the ith row (or electron index) of logdet, dlogdet,
-    /// dhlogdet.
-    void
-    evaluate_vgh_impl(const vgh_type& temp, int i, ValueMatrix& logdet,
-        GradMatrix& dlogdet, HessMatrix& dhlogdet) const;
-    /// Unpacks data in vghgh_type temp object into wavefunction friendly data
-    /// structures for value, gradient, hessian and gradient hessian.
-    void
-    evaluate_vghgh_impl(const vghgh_type& temp, ValueVector& psi,
-        GradVector& dpsi, HessVector& d2psi, GGGVector& dghpsi) const;
-
-    void
-    evaluate_vghgh_impl(const vghgh_type& temp, int i, ValueMatrix& logdet,
-        GradMatrix& dlogdet, HessMatrix& dhlogdet, GGGMatrix& dghlogdet) const;
-
-    /// Unpacks data in vgl object and calculates/places ionic gradient result
-    /// into dlogdet.
-    void
-    evaluate_ionderiv_v_impl(
-        const vgl_type& temp, int i, GradMatrix& dlogdet) const;
-
-    /// Unpacks data in vgl object and calculates/places ionic gradient of
-    /// value,
-    ///   electron gradient, and electron laplacian result into dlogdet,
-    ///   dglogdet, and dllogdet respectively.
-    void
-    evaluate_ionderiv_vgl_impl(const vghgh_type& temp, int i,
-        GradMatrix& dlogdet, HessMatrix& dglogdet, GradMatrix& dllogdet) const;
-
-    /// Unpacks data in vgl object and calculates/places ionic gradient of a
-    /// single row (phi_j(r)) into dlogdet.
-    void
-    evaluate_ionderiv_v_row_impl(
-        const vgl_type& temp, GradVector& dlogdet) const;
-
-    void
-    mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        OffloadMWVGLArray& phi_vgl_v) const;
-
-    /// packed walker GEMM implementation
-    void
-    mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        OffloadMWVArray& phi_v) const;
-
-    /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet
-    RefVectorWithLeader<basis_type> extractBasisRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
-
-    struct LCAOMultiWalkerMem;
-    ResourceHandle<LCAOMultiWalkerMem> mw_mem_handle_;
-    /// timer for basis set
-    NewTimer& basis_timer_;
-    /// timer for MO
-    NewTimer& mo_timer_;
+  /// helper functions to handle Identity
+  void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const;
+
+  void evaluate_vgl_impl(const vgl_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         ValueMatrix& d2logdet) const;
+  /// These two functions unpack the data in vgh_type temp object into
+  /// wavefunction friendly data structures.
+
+  /// This unpacks temp into vectors psi, dpsi, and d2psi.
+  void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const;
+
+  /// Unpacks temp into the ith row (or electron index) of logdet, dlogdet,
+  /// dhlogdet.
+  void evaluate_vgh_impl(const vgh_type& temp,
+                         int i,
+                         ValueMatrix& logdet,
+                         GradMatrix& dlogdet,
+                         HessMatrix& dhlogdet) const;
+  /// Unpacks data in vghgh_type temp object into wavefunction friendly data
+  /// structures for value, gradient, hessian and gradient hessian.
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& d2psi,
+                           GGGVector& dghpsi) const;
+
+  void evaluate_vghgh_impl(const vghgh_type& temp,
+                           int i,
+                           ValueMatrix& logdet,
+                           GradMatrix& dlogdet,
+                           HessMatrix& dhlogdet,
+                           GGGMatrix& dghlogdet) const;
+
+  /// Unpacks data in vgl object and calculates/places ionic gradient result
+  /// into dlogdet.
+  void evaluate_ionderiv_v_impl(const vgl_type& temp, int i, GradMatrix& dlogdet) const;
+
+  /// Unpacks data in vgl object and calculates/places ionic gradient of
+  /// value,
+  ///   electron gradient, and electron laplacian result into dlogdet,
+  ///   dglogdet, and dllogdet respectively.
+  void evaluate_ionderiv_vgl_impl(const vghgh_type& temp,
+                                  int i,
+                                  GradMatrix& dlogdet,
+                                  HessMatrix& dglogdet,
+                                  GradMatrix& dllogdet) const;
+
+  /// Unpacks data in vgl object and calculates/places ionic gradient of a
+  /// single row (phi_j(r)) into dlogdet.
+  void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const;
+
+  void mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                              int iat,
+                              OffloadMWVGLArray& phi_vgl_v) const;
+
+  /// packed walker GEMM implementation
+  void mw_evaluateValueImplGEMM(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                int iat,
+                                OffloadMWVArray& phi_v) const;
+
+  /// helper function for extracting a list of basis sets from a list of LCAOrbitalSet
+  RefVectorWithLeader<basis_type> extractBasisRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
+
+  struct LCAOMultiWalkerMem;
+  ResourceHandle<LCAOMultiWalkerMem> mw_mem_handle_;
+  /// timer for basis set
+  NewTimer& basis_timer_;
+  /// timer for MO
+  NewTimer& mo_timer_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
index 87b4e719d0b..17bd8c32b3f 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.cpp
@@ -13,60 +13,57 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(
-    const std::string& my_name, ParticleSetT<T>& ions, ParticleSetT<T>& els,
-    std::unique_ptr<basis_type>&& bs) :
-    SPOSetT<T>(my_name),
-    lcao(my_name + "_modified", std::move(bs)),
-    cusp(ions, els)
-{
-}
+template<typename T>
+LCAOrbitalSetWithCorrectionT<T>::LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                                                              ParticleSetT<T>& ions,
+                                                              ParticleSetT<T>& els,
+                                                              std::unique_ptr<basis_type>&& bs)
+    : SPOSetT<T>(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els)
+{}
 
-template <typename T>
-void
-LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::setOrbitalSetSize(int norbs)
 {
-    assert(
-        lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
-    this->OrbitalSetSize = norbs;
-    cusp.setOrbitalSetSize(norbs);
+  assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!");
+  this->OrbitalSetSize = norbs;
+  cusp.setOrbitalSetSize(norbs);
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-LCAOrbitalSetWithCorrectionT<T>::makeClone() const
+template<typename T>
+std::unique_ptr<SPOSetT<T>> LCAOrbitalSetWithCorrectionT<T>::makeClone() const
 {
-    return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
+  return std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(*this);
 }
 
-template <typename T>
-void
-LCAOrbitalSetWithCorrectionT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    lcao.evaluateValue(P, iat, psi);
-    cusp.addV(P, iat, psi);
+  lcao.evaluateValue(P, iat, psi);
+  cusp.addV(P, iat, psi);
 }
 
-template <typename T>
-void
-LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                                  int iat,
+                                                  ValueVector& psi,
+                                                  GradVector& dpsi,
+                                                  ValueVector& d2psi)
 {
-    lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
-    cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
+  lcao.evaluateVGL(P, iat, psi, dpsi, d2psi);
+  cusp.add_vector_vgl(P, iat, psi, dpsi, d2psi);
 }
 
-template <typename T>
-void
-LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
-    int first, int last, ValueMatrix& logdet, GradMatrix& dlogdet,
-    ValueMatrix& d2logdet)
+template<typename T>
+void LCAOrbitalSetWithCorrectionT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                                           int first,
+                                                           int last,
+                                                           ValueMatrix& logdet,
+                                                           GradMatrix& dlogdet,
+                                                           ValueMatrix& d2logdet)
 {
-    lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
-    for (size_t i = 0, iat = first; iat < last; i++, iat++)
-        cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
+  lcao.evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  for (size_t i = 0, iat = first; iat < last; i++, iat++)
+    cusp.add_vgl(P, iat, i, logdet, dlogdet, d2logdet);
 }
 
 template class LCAOrbitalSetWithCorrectionT<double>;
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
index 8b0003d18fd..c6182a5d666 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrectionT.h
@@ -23,58 +23,52 @@ namespace qmcplusplus
  *
  */
 
-template <typename T>
+template<typename T>
 class LCAOrbitalSetWithCorrectionT : public SPOSetT<T>
 {
 public:
-    using basis_type = typename LCAOrbitalSetT<T>::basis_type;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    /** constructor
+  using basis_type  = typename LCAOrbitalSetT<T>::basis_type;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  /** constructor
      * @param ions
      * @param els
      * @param bs pointer to the BasisSet
      * @param rl report level
      */
-    LCAOrbitalSetWithCorrectionT(const std::string& my_name,
-        ParticleSetT<T>& ions, ParticleSetT<T>& els,
-        std::unique_ptr<basis_type>&& bs);
+  LCAOrbitalSetWithCorrectionT(const std::string& my_name,
+                               ParticleSetT<T>& ions,
+                               ParticleSetT<T>& els,
+                               std::unique_ptr<basis_type>&& bs);
 
-    LCAOrbitalSetWithCorrectionT(
-        const LCAOrbitalSetWithCorrectionT& in) = default;
+  LCAOrbitalSetWithCorrectionT(const LCAOrbitalSetWithCorrectionT& in) = default;
 
-    std::string
-    getClassName() const final
-    {
-        return "LCAOrbitalSetWithCorrectionT";
-    }
+  std::string getClassName() const final { return "LCAOrbitalSetWithCorrectionT"; }
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const final;
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
 
-    void
-    setOrbitalSetSize(int norbs) final;
+  void setOrbitalSetSize(int norbs) final;
 
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
 
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) final;
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
 
-    template <typename>
-    friend class LCAOrbitalBuilderT;
+  template<typename>
+  friend class LCAOrbitalBuilderT;
 
 private:
-    LCAOrbitalSetT<T> lcao;
+  LCAOrbitalSetT<T> lcao;
 
-    SoaCuspCorrectionT<T> cusp;
+  SoaCuspCorrectionT<T> cusp;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
index 33aa7070964..0866c165548 100644
--- a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
+++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
@@ -19,34 +19,31 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 struct CorrectPhaseFunctor
 {
-    const TinyVector<double, 3>& superTwist;
+  const TinyVector<double, 3>& superTwist;
 
-    template <typename PosType>
-    T
-    operator()(PosType Tv) const
-    {
-        return 1.0;
-    }
+  template<typename PosType>
+  T operator()(PosType Tv) const
+  {
+    return 1.0;
+  }
 };
 
-template <typename T>
+template<typename T>
 struct CorrectPhaseFunctor<std::complex<T>>
 {
-    const TinyVector<double, 3>& superTwist;
-
-    template <typename PosType>
-    std::complex<T>
-    operator()(PosType Tv) const
-    {
-        T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] +
-            superTwist[2] * Tv[2];
-        T s, c;
-        qmcplusplus::sincos(-phasearg, &s, &c);
-        return {c, s};
-    };
+  const TinyVector<double, 3>& superTwist;
+
+  template<typename PosType>
+  std::complex<T> operator()(PosType Tv) const
+  {
+    T phasearg = superTwist[0] * Tv[0] + superTwist[1] * Tv[1] + superTwist[2] * Tv[2];
+    T s, c;
+    qmcplusplus::sincos(-phasearg, &s, &c);
+    return {c, s};
+  };
 };
 
 /* A basis set for a center type
@@ -56,768 +53,694 @@ struct CorrectPhaseFunctor<std::complex<T>>
  *
  * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$
  */
-template <typename ROT, typename SH, typename ORBT>
+template<typename ROT, typename SH, typename ORBT>
 struct SoaAtomicBasisSetT
 {
-    using RadialOrbital_t = ROT;
-    using RealType = typename ROT::RealType;
-    using GridType = typename ROT::GridType;
-    using ValueType = ORBT;
-    using OffloadArray4D  = Array<ValueType, 4, OffloadPinnedAllocator<ValueType>>;
-    using OffloadArray3D  = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>;
-    using OffloadMatrix   = Matrix<ValueType, OffloadPinnedAllocator<ValueType>>;
-    using OffloadVector   = Vector<ValueType, OffloadPinnedAllocator<ValueType>>;
-    
-    /// multi walker shared memory buffer
-    struct SoaAtomicBSetMultiWalkerMem;
-    /// multi walker resource handle
-    ResourceHandle<SoaAtomicBSetMultiWalkerMem> mw_mem_handle_;
-    /// size of the basis set
-    int BasisSetSize;
-    /// Number of Cell images for the evaluation of the orbital with PBC. If No
-    /// PBC, should be 0;
-    TinyVector<int, 3> PBCImages;
-    /// Coordinates of SuperTwist
-    TinyVector<double, 3> SuperTwist;
-    /// Phase Factor array
-    std::vector<ValueType> periodic_image_phase_factors;
-    /// maximum radius of this center
-    RealType Rmax;
-    /// spherical harmonics
-    SH Ylm;
-    /// radial orbitals
-    ROT MultiRnl;
-    /// index of the corresponding real Spherical Harmonic with quantum numbers
-    /// \f$ (l,m) \f$
-    aligned_vector<int> LM;
-    /**index of the corresponding radial orbital with quantum numbers \f$ (n,l)
+  using RadialOrbital_t = ROT;
+  using RealType        = typename ROT::RealType;
+  using GridType        = typename ROT::GridType;
+  using ValueType       = ORBT;
+  using OffloadArray4D  = Array<ValueType, 4, OffloadPinnedAllocator<ValueType>>;
+  using OffloadArray3D  = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>;
+  using OffloadMatrix   = Matrix<ValueType, OffloadPinnedAllocator<ValueType>>;
+  using OffloadVector   = Vector<ValueType, OffloadPinnedAllocator<ValueType>>;
+
+  /// multi walker shared memory buffer
+  struct SoaAtomicBSetMultiWalkerMem;
+  /// multi walker resource handle
+  ResourceHandle<SoaAtomicBSetMultiWalkerMem> mw_mem_handle_;
+  /// size of the basis set
+  int BasisSetSize;
+  /// Number of Cell images for the evaluation of the orbital with PBC. If No
+  /// PBC, should be 0;
+  TinyVector<int, 3> PBCImages;
+  /// Coordinates of SuperTwist
+  TinyVector<double, 3> SuperTwist;
+  /// Phase Factor array
+  std::vector<ValueType> periodic_image_phase_factors;
+  /// maximum radius of this center
+  RealType Rmax;
+  /// spherical harmonics
+  SH Ylm;
+  /// radial orbitals
+  ROT MultiRnl;
+  /// index of the corresponding real Spherical Harmonic with quantum numbers
+  /// \f$ (l,m) \f$
+  aligned_vector<int> LM;
+  /**index of the corresponding radial orbital with quantum numbers \f$ (n,l)
      * \f$ */
-    aligned_vector<int> NL;
-    /// container for the quantum-numbers
-    std::vector<QuantumNumberType> RnlID;
-    /// temporary storage
-    VectorSoaContainer<RealType, 4> tempS;
-
-    /// the constructor
-    explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) :
-        Ylm(lmax, addsignforM)
-    {
-    }
-
-    void
-    checkInVariables(opt_variables_type& active)
-    {
-        // for(size_t nl=0; nl<Rnl.size(); nl++)
-        //   Rnl[nl]->checkInVariables(active);
-    }
-
-    void
-    checkOutVariables(const opt_variables_type& active)
-    {
-        // for(size_t nl=0; nl<Rnl.size(); nl++)
-        //   Rnl[nl]->checkOutVariables(active);
-    }
-
-    void
-    resetParameters(const opt_variables_type& active)
-    {
-        // for(size_t nl=0; nl<Rnl.size(); nl++)
-        //   Rnl[nl]->resetParameters(active);
-    }
-
-    /** return the number of basis functions
+  aligned_vector<int> NL;
+  /// container for the quantum-numbers
+  std::vector<QuantumNumberType> RnlID;
+  /// temporary storage
+  VectorSoaContainer<RealType, 4> tempS;
+
+  /// the constructor
+  explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) : Ylm(lmax, addsignforM) {}
+
+  void checkInVariables(opt_variables_type& active)
+  {
+    // for(size_t nl=0; nl<Rnl.size(); nl++)
+    //   Rnl[nl]->checkInVariables(active);
+  }
+
+  void checkOutVariables(const opt_variables_type& active)
+  {
+    // for(size_t nl=0; nl<Rnl.size(); nl++)
+    //   Rnl[nl]->checkOutVariables(active);
+  }
+
+  void resetParameters(const opt_variables_type& active)
+  {
+    // for(size_t nl=0; nl<Rnl.size(); nl++)
+    //   Rnl[nl]->resetParameters(active);
+  }
+
+  /** return the number of basis functions
      */
-    inline int
-    getBasisSetSize() const
-    {
-        //=NL.size();
-        return BasisSetSize;
-    }
+  inline int getBasisSetSize() const
+  {
+    //=NL.size();
+    return BasisSetSize;
+  }
 
-    /** Set the number of periodic image for the evaluation of the orbitals and
+  /** Set the number of periodic image for the evaluation of the orbitals and
      * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1),
      * SuperTwist(0,0,0) and the PhaseFactor=1.
      */
-    void
-    setPBCParams(const TinyVector<int, 3>& pbc_images,
-        const TinyVector<double, 3> supertwist,
-        const std::vector<ValueType>& PeriodicImagePhaseFactors)
-    {
-        PBCImages = pbc_images;
-        periodic_image_phase_factors = PeriodicImagePhaseFactors;
-        SuperTwist = supertwist;
-    }
-
-    /** implement a BasisSetBase virtual function
+  void setPBCParams(const TinyVector<int, 3>& pbc_images,
+                    const TinyVector<double, 3> supertwist,
+                    const std::vector<ValueType>& PeriodicImagePhaseFactors)
+  {
+    PBCImages                    = pbc_images;
+    periodic_image_phase_factors = PeriodicImagePhaseFactors;
+    SuperTwist                   = supertwist;
+  }
+
+  /** implement a BasisSetBase virtual function
      *
      * Set Rmax and BasisSetSize
      * @todo Should be able to overwrite Rmax to be much smaller than the
      * maximum grid
      */
-    inline void
-    setBasisSetSize(int n)
+  inline void setBasisSetSize(int n)
+  {
+    BasisSetSize = LM.size();
+    tempS.resize(std::max(Ylm.size(), RnlID.size()));
+  }
+
+  /** Set Rmax */
+  template<typename RealType>
+  inline void setRmax(RealType rmax)
+  {
+    Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
+  }
+
+  /// set the current offset
+  inline void setCenter(int c, int offset) {}
+
+  /// Sets a boolean vector for S-type orbitals.  Used for cusp correction.
+  void queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
+  {
+    for (int i = 0; i < BasisSetSize; i++)
     {
-        BasisSetSize = LM.size();
-        tempS.resize(std::max(Ylm.size(), RnlID.size()));
+      s_orbitals[i] = (RnlID[NL[i]][1] == 0);
     }
+  }
 
-    /** Set Rmax */
-    template <typename RealType>
-    inline void
-    setRmax(RealType rmax)
+  /** evaluate VGL
+     */
+  template<typename LAT, typename PosType, typename VGL>
+  inline void evaluateVGL(const LAT& lattice,
+                          const RealType r,
+                          const PosType& dr,
+                          const size_t offset,
+                          VGL& vgl,
+                          PosType Tv)
+  {
+    int TransX, TransY, TransZ;
+
+    PosType dr_new;
+    RealType r_new;
+    // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
+
+    const ValueType correctphase = CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+
+    constexpr RealType cone(1);
+    constexpr RealType ctwo(2);
+
+    // one can assert the alignment
+    RealType* restrict phi   = tempS.data(0);
+    RealType* restrict dphi  = tempS.data(1);
+    RealType* restrict d2phi = tempS.data(2);
+
+    // V,Gx,Gy,Gz,L
+    auto* restrict psi             = vgl.data(0) + offset;
+    const RealType* restrict ylm_v = Ylm[0]; // value
+    auto* restrict dpsi_x          = vgl.data(1) + offset;
+    const RealType* restrict ylm_x = Ylm[1]; // gradX
+    auto* restrict dpsi_y          = vgl.data(2) + offset;
+    const RealType* restrict ylm_y = Ylm[2]; // gradY
+    auto* restrict dpsi_z          = vgl.data(3) + offset;
+    const RealType* restrict ylm_z = Ylm[3]; // gradZ
+    auto* restrict d2psi           = vgl.data(4) + offset;
+    const RealType* restrict ylm_l = Ylm[4]; // lap
+
+    for (size_t ib = 0; ib < BasisSetSize; ++ib)
     {
-        Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
+      psi[ib]    = 0;
+      dpsi_x[ib] = 0;
+      dpsi_y[ib] = 0;
+      dpsi_z[ib] = 0;
+      d2psi[ib]  = 0;
     }
-
-    /// set the current offset
-    inline void
-    setCenter(int c, int offset)
+    // Phase_idx (iter) needs to be initialized at -1 as it has to be
+    // incremented first to comply with the if statement (r_new >=Rmax)
+    int iter = -1;
+    for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
     {
+      // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+      TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+      for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+      {
+        // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+        TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+        for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
+        {
+          // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+          TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+          dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0));
+          dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1));
+          dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2));
+
+          r_new = std::sqrt(dot(dr_new, dr_new));
+
+          iter++;
+          if (r_new >= Rmax)
+            continue;
+
+          // SIGN Change!!
+          const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+          Ylm.evaluateVGL(x, y, z);
+
+          MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+          const RealType rinv = cone / r_new;
+
+          /// Phase for PBC containing the phase for the nearest image
+          /// displacement and the correction due to the Distance
+          /// table.
+          const ValueType Phase = periodic_image_phase_factors[iter] * correctphase;
+
+          for (size_t ib = 0; ib < BasisSetSize; ++ib)
+          {
+            const int nl(NL[ib]);
+            const int lm(LM[ib]);
+            const RealType drnloverr = rinv * dphi[nl];
+            const RealType ang       = ylm_v[lm];
+            const RealType gr_x      = drnloverr * x;
+            const RealType gr_y      = drnloverr * y;
+            const RealType gr_z      = drnloverr * z;
+            const RealType ang_x     = ylm_x[lm];
+            const RealType ang_y     = ylm_y[lm];
+            const RealType ang_z     = ylm_z[lm];
+            const RealType vr        = phi[nl];
+
+            psi[ib] += ang * vr * Phase;
+            dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
+            dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
+            dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
+            d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) + ctwo * (gr_x * ang_x + gr_y * ang_y + gr_z * ang_z) +
+                          vr * ylm_l[lm]) *
+                Phase;
+          }
+        }
+      }
     }
-
-    /// Sets a boolean vector for S-type orbitals.  Used for cusp correction.
-    void
-    queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
+  }
+
+  template<typename LAT, typename PosType, typename VGH>
+  inline void evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr, const size_t offset, VGH& vgh)
+  {
+    int TransX, TransY, TransZ;
+
+    PosType dr_new;
+    RealType r_new;
+
+    constexpr RealType cone(1);
+
+    // one can assert the alignment
+    RealType* restrict phi   = tempS.data(0);
+    RealType* restrict dphi  = tempS.data(1);
+    RealType* restrict d2phi = tempS.data(2);
+
+    // V,Gx,Gy,Gz,L
+    auto* restrict psi             = vgh.data(0) + offset;
+    const RealType* restrict ylm_v = Ylm[0]; // value
+    auto* restrict dpsi_x          = vgh.data(1) + offset;
+    const RealType* restrict ylm_x = Ylm[1]; // gradX
+    auto* restrict dpsi_y          = vgh.data(2) + offset;
+    const RealType* restrict ylm_y = Ylm[2]; // gradY
+    auto* restrict dpsi_z          = vgh.data(3) + offset;
+    const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+    auto* restrict dhpsi_xx         = vgh.data(4) + offset;
+    const RealType* restrict ylm_xx = Ylm[4];
+    auto* restrict dhpsi_xy         = vgh.data(5) + offset;
+    const RealType* restrict ylm_xy = Ylm[5];
+    auto* restrict dhpsi_xz         = vgh.data(6) + offset;
+    const RealType* restrict ylm_xz = Ylm[6];
+    auto* restrict dhpsi_yy         = vgh.data(7) + offset;
+    const RealType* restrict ylm_yy = Ylm[7];
+    auto* restrict dhpsi_yz         = vgh.data(8) + offset;
+    const RealType* restrict ylm_yz = Ylm[8];
+    auto* restrict dhpsi_zz         = vgh.data(9) + offset;
+    const RealType* restrict ylm_zz = Ylm[9];
+
+    for (size_t ib = 0; ib < BasisSetSize; ++ib)
     {
-        for (int i = 0; i < BasisSetSize; i++) {
-            s_orbitals[i] = (RnlID[NL[i]][1] == 0);
-        }
+      psi[ib]      = 0;
+      dpsi_x[ib]   = 0;
+      dpsi_y[ib]   = 0;
+      dpsi_z[ib]   = 0;
+      dhpsi_xx[ib] = 0;
+      dhpsi_xy[ib] = 0;
+      dhpsi_xz[ib] = 0;
+      dhpsi_yy[ib] = 0;
+      dhpsi_yz[ib] = 0;
+      dhpsi_zz[ib] = 0;
+      //      d2psi[ib]  = 0;
     }
 
-    /** evaluate VGL
-     */
-    template <typename LAT, typename PosType, typename VGL>
-    inline void
-    evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr,
-        const size_t offset, VGL& vgl, PosType Tv)
+    for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
     {
-        int TransX, TransY, TransZ;
-
-        PosType dr_new;
-        RealType r_new;
-        // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
-
-        const ValueType correctphase =
-            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
-
-        constexpr RealType cone(1);
-        constexpr RealType ctwo(2);
-
-        // one can assert the alignment
-        RealType* restrict phi = tempS.data(0);
-        RealType* restrict dphi = tempS.data(1);
-        RealType* restrict d2phi = tempS.data(2);
-
-        // V,Gx,Gy,Gz,L
-        auto* restrict psi = vgl.data(0) + offset;
-        const RealType* restrict ylm_v = Ylm[0]; // value
-        auto* restrict dpsi_x = vgl.data(1) + offset;
-        const RealType* restrict ylm_x = Ylm[1]; // gradX
-        auto* restrict dpsi_y = vgl.data(2) + offset;
-        const RealType* restrict ylm_y = Ylm[2]; // gradY
-        auto* restrict dpsi_z = vgl.data(3) + offset;
-        const RealType* restrict ylm_z = Ylm[3]; // gradZ
-        auto* restrict d2psi = vgl.data(4) + offset;
-        const RealType* restrict ylm_l = Ylm[4]; // lap
-
-        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-            psi[ib] = 0;
-            dpsi_x[ib] = 0;
-            dpsi_y[ib] = 0;
-            dpsi_z[ib] = 0;
-            d2psi[ib] = 0;
-        }
-        // Phase_idx (iter) needs to be initialized at -1 as it has to be
-        // incremented first to comply with the if statement (r_new >=Rmax)
-        int iter = -1;
-        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+      // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+      TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+      for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+      {
+        // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+        TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+        for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
         {
-            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
-            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
-            {
-                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
-                for (int k = 0; k <= PBCImages[2];
-                     k++) // loop Translation over Z
-                {
-                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
-
-                    dr_new[0] = dr[0] +
-                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
-                            TransZ * lattice.R(2, 0));
-                    dr_new[1] = dr[1] +
-                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
-                            TransZ * lattice.R(2, 1));
-                    dr_new[2] = dr[2] +
-                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
-                            TransZ * lattice.R(2, 2));
-
-                    r_new = std::sqrt(dot(dr_new, dr_new));
-
-                    iter++;
-                    if (r_new >= Rmax)
-                        continue;
-
-                    // SIGN Change!!
-                    const RealType x = -dr_new[0], y = -dr_new[1],
-                                   z = -dr_new[2];
-                    Ylm.evaluateVGL(x, y, z);
-
-                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
-
-                    const RealType rinv = cone / r_new;
-
-                    /// Phase for PBC containing the phase for the nearest image
-                    /// displacement and the correction due to the Distance
-                    /// table.
-                    const ValueType Phase =
-                        periodic_image_phase_factors[iter] * correctphase;
-
-                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-                        const int nl(NL[ib]);
-                        const int lm(LM[ib]);
-                        const RealType drnloverr = rinv * dphi[nl];
-                        const RealType ang = ylm_v[lm];
-                        const RealType gr_x = drnloverr * x;
-                        const RealType gr_y = drnloverr * y;
-                        const RealType gr_z = drnloverr * z;
-                        const RealType ang_x = ylm_x[lm];
-                        const RealType ang_y = ylm_y[lm];
-                        const RealType ang_z = ylm_z[lm];
-                        const RealType vr = phi[nl];
-
-                        psi[ib] += ang * vr * Phase;
-                        dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
-                        dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
-                        dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
-                        d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) +
-                                         ctwo *
-                                             (gr_x * ang_x + gr_y * ang_y +
-                                                 gr_z * ang_z) +
-                                         vr * ylm_l[lm]) *
-                            Phase;
-                    }
-                }
-            }
+          // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+          TransZ    = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+          dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+          dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+          dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+          r_new     = std::sqrt(dot(dr_new, dr_new));
+
+          // const size_t ib_max=NL.size();
+          if (r_new >= Rmax)
+            continue;
+
+          // SIGN Change!!
+          const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+          Ylm.evaluateVGH(x, y, z);
+
+          MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+          const RealType rinv = cone / r_new;
+
+          for (size_t ib = 0; ib < BasisSetSize; ++ib)
+          {
+            const int nl(NL[ib]);
+            const int lm(LM[ib]);
+            const RealType drnloverr = rinv * dphi[nl];
+            const RealType ang       = ylm_v[lm];
+            const RealType gr_x      = drnloverr * x;
+            const RealType gr_y      = drnloverr * y;
+            const RealType gr_z      = drnloverr * z;
+
+            // The non-strictly diagonal term in \partial_i
+            // \partial_j R_{nl} is
+            //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+            //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+            //  R_{nl}}{\partial r}) To save recomputation, I
+            //  evaluate everything except the x_i*x_j term once,
+            //  and store it in gr2_tmp.  The full term is obtained
+            //  by x_i*x_j*gr2_tmp.
+            const RealType gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr);
+            const RealType gr_xx   = x * x * gr2_tmp + drnloverr;
+            const RealType gr_xy   = x * y * gr2_tmp;
+            const RealType gr_xz   = x * z * gr2_tmp;
+            const RealType gr_yy   = y * y * gr2_tmp + drnloverr;
+            const RealType gr_yz   = y * z * gr2_tmp;
+            const RealType gr_zz   = z * z * gr2_tmp + drnloverr;
+
+            const RealType ang_x  = ylm_x[lm];
+            const RealType ang_y  = ylm_y[lm];
+            const RealType ang_z  = ylm_z[lm];
+            const RealType ang_xx = ylm_xx[lm];
+            const RealType ang_xy = ylm_xy[lm];
+            const RealType ang_xz = ylm_xz[lm];
+            const RealType ang_yy = ylm_yy[lm];
+            const RealType ang_yz = ylm_yz[lm];
+            const RealType ang_zz = ylm_zz[lm];
+
+            const RealType vr = phi[nl];
+
+            psi[ib] += ang * vr;
+            dpsi_x[ib] += ang * gr_x + vr * ang_x;
+            dpsi_y[ib] += ang * gr_y + vr * ang_y;
+            dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+            // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+            // R + R \partial_i \partial_j Y
+            //                             + (\partial_i R)
+            //                             (\partial_j Y) +
+            //                             (\partial_j R)(\partial_i
+            //                             Y)
+            dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+            dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x;
+            dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x;
+            dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+            dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y;
+            dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+          }
         }
+      }
     }
-
-    template <typename LAT, typename PosType, typename VGH>
-    inline void
-    evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr,
-        const size_t offset, VGH& vgh)
+  }
+
+  template<typename LAT, typename PosType, typename VGHGH>
+  inline void evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr, const size_t offset, VGHGH& vghgh)
+  {
+    int TransX, TransY, TransZ;
+
+    PosType dr_new;
+    RealType r_new;
+
+    constexpr RealType cone(1);
+
+    // one can assert the alignment
+    RealType* restrict phi   = tempS.data(0);
+    RealType* restrict dphi  = tempS.data(1);
+    RealType* restrict d2phi = tempS.data(2);
+    RealType* restrict d3phi = tempS.data(3);
+
+    // V,Gx,Gy,Gz,L
+    auto* restrict psi             = vghgh.data(0) + offset;
+    const RealType* restrict ylm_v = Ylm[0]; // value
+    auto* restrict dpsi_x          = vghgh.data(1) + offset;
+    const RealType* restrict ylm_x = Ylm[1]; // gradX
+    auto* restrict dpsi_y          = vghgh.data(2) + offset;
+    const RealType* restrict ylm_y = Ylm[2]; // gradY
+    auto* restrict dpsi_z          = vghgh.data(3) + offset;
+    const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+    auto* restrict dhpsi_xx         = vghgh.data(4) + offset;
+    const RealType* restrict ylm_xx = Ylm[4];
+    auto* restrict dhpsi_xy         = vghgh.data(5) + offset;
+    const RealType* restrict ylm_xy = Ylm[5];
+    auto* restrict dhpsi_xz         = vghgh.data(6) + offset;
+    const RealType* restrict ylm_xz = Ylm[6];
+    auto* restrict dhpsi_yy         = vghgh.data(7) + offset;
+    const RealType* restrict ylm_yy = Ylm[7];
+    auto* restrict dhpsi_yz         = vghgh.data(8) + offset;
+    const RealType* restrict ylm_yz = Ylm[8];
+    auto* restrict dhpsi_zz         = vghgh.data(9) + offset;
+    const RealType* restrict ylm_zz = Ylm[9];
+
+    auto* restrict dghpsi_xxx        = vghgh.data(10) + offset;
+    const RealType* restrict ylm_xxx = Ylm[10];
+    auto* restrict dghpsi_xxy        = vghgh.data(11) + offset;
+    const RealType* restrict ylm_xxy = Ylm[11];
+    auto* restrict dghpsi_xxz        = vghgh.data(12) + offset;
+    const RealType* restrict ylm_xxz = Ylm[12];
+    auto* restrict dghpsi_xyy        = vghgh.data(13) + offset;
+    const RealType* restrict ylm_xyy = Ylm[13];
+    auto* restrict dghpsi_xyz        = vghgh.data(14) + offset;
+    const RealType* restrict ylm_xyz = Ylm[14];
+    auto* restrict dghpsi_xzz        = vghgh.data(15) + offset;
+    const RealType* restrict ylm_xzz = Ylm[15];
+    auto* restrict dghpsi_yyy        = vghgh.data(16) + offset;
+    const RealType* restrict ylm_yyy = Ylm[16];
+    auto* restrict dghpsi_yyz        = vghgh.data(17) + offset;
+    const RealType* restrict ylm_yyz = Ylm[17];
+    auto* restrict dghpsi_yzz        = vghgh.data(18) + offset;
+    const RealType* restrict ylm_yzz = Ylm[18];
+    auto* restrict dghpsi_zzz        = vghgh.data(19) + offset;
+    const RealType* restrict ylm_zzz = Ylm[19];
+
+    for (size_t ib = 0; ib < BasisSetSize; ++ib)
     {
-        int TransX, TransY, TransZ;
-
-        PosType dr_new;
-        RealType r_new;
-
-        constexpr RealType cone(1);
-
-        // one can assert the alignment
-        RealType* restrict phi = tempS.data(0);
-        RealType* restrict dphi = tempS.data(1);
-        RealType* restrict d2phi = tempS.data(2);
-
-        // V,Gx,Gy,Gz,L
-        auto* restrict psi = vgh.data(0) + offset;
-        const RealType* restrict ylm_v = Ylm[0]; // value
-        auto* restrict dpsi_x = vgh.data(1) + offset;
-        const RealType* restrict ylm_x = Ylm[1]; // gradX
-        auto* restrict dpsi_y = vgh.data(2) + offset;
-        const RealType* restrict ylm_y = Ylm[2]; // gradY
-        auto* restrict dpsi_z = vgh.data(3) + offset;
-        const RealType* restrict ylm_z = Ylm[3]; // gradZ
-
-        auto* restrict dhpsi_xx = vgh.data(4) + offset;
-        const RealType* restrict ylm_xx = Ylm[4];
-        auto* restrict dhpsi_xy = vgh.data(5) + offset;
-        const RealType* restrict ylm_xy = Ylm[5];
-        auto* restrict dhpsi_xz = vgh.data(6) + offset;
-        const RealType* restrict ylm_xz = Ylm[6];
-        auto* restrict dhpsi_yy = vgh.data(7) + offset;
-        const RealType* restrict ylm_yy = Ylm[7];
-        auto* restrict dhpsi_yz = vgh.data(8) + offset;
-        const RealType* restrict ylm_yz = Ylm[8];
-        auto* restrict dhpsi_zz = vgh.data(9) + offset;
-        const RealType* restrict ylm_zz = Ylm[9];
-
-        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-            psi[ib] = 0;
-            dpsi_x[ib] = 0;
-            dpsi_y[ib] = 0;
-            dpsi_z[ib] = 0;
-            dhpsi_xx[ib] = 0;
-            dhpsi_xy[ib] = 0;
-            dhpsi_xz[ib] = 0;
-            dhpsi_yy[ib] = 0;
-            dhpsi_yz[ib] = 0;
-            dhpsi_zz[ib] = 0;
-            //      d2psi[ib]  = 0;
-        }
-
-        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
-        {
-            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
-            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
-            {
-                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
-                for (int k = 0; k <= PBCImages[2];
-                     k++) // loop Translation over Z
-                {
-                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
-                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
-                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
-                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
-                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
-                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
-                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
-                    r_new = std::sqrt(dot(dr_new, dr_new));
-
-                    // const size_t ib_max=NL.size();
-                    if (r_new >= Rmax)
-                        continue;
-
-                    // SIGN Change!!
-                    const RealType x = -dr_new[0], y = -dr_new[1],
-                                   z = -dr_new[2];
-                    Ylm.evaluateVGH(x, y, z);
-
-                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
-
-                    const RealType rinv = cone / r_new;
-
-                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-                        const int nl(NL[ib]);
-                        const int lm(LM[ib]);
-                        const RealType drnloverr = rinv * dphi[nl];
-                        const RealType ang = ylm_v[lm];
-                        const RealType gr_x = drnloverr * x;
-                        const RealType gr_y = drnloverr * y;
-                        const RealType gr_z = drnloverr * z;
-
-                        // The non-strictly diagonal term in \partial_i
-                        // \partial_j R_{nl} is
-                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
-                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
-                        //  R_{nl}}{\partial r}) To save recomputation, I
-                        //  evaluate everything except the x_i*x_j term once,
-                        //  and store it in gr2_tmp.  The full term is obtained
-                        //  by x_i*x_j*gr2_tmp.
-                        const RealType gr2_tmp =
-                            rinv * rinv * (d2phi[nl] - drnloverr);
-                        const RealType gr_xx = x * x * gr2_tmp + drnloverr;
-                        const RealType gr_xy = x * y * gr2_tmp;
-                        const RealType gr_xz = x * z * gr2_tmp;
-                        const RealType gr_yy = y * y * gr2_tmp + drnloverr;
-                        const RealType gr_yz = y * z * gr2_tmp;
-                        const RealType gr_zz = z * z * gr2_tmp + drnloverr;
-
-                        const RealType ang_x = ylm_x[lm];
-                        const RealType ang_y = ylm_y[lm];
-                        const RealType ang_z = ylm_z[lm];
-                        const RealType ang_xx = ylm_xx[lm];
-                        const RealType ang_xy = ylm_xy[lm];
-                        const RealType ang_xz = ylm_xz[lm];
-                        const RealType ang_yy = ylm_yy[lm];
-                        const RealType ang_yz = ylm_yz[lm];
-                        const RealType ang_zz = ylm_zz[lm];
-
-                        const RealType vr = phi[nl];
-
-                        psi[ib] += ang * vr;
-                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
-                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
-                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
-
-                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
-                        // R + R \partial_i \partial_j Y
-                        //                             + (\partial_i R)
-                        //                             (\partial_j Y) +
-                        //                             (\partial_j R)(\partial_i
-                        //                             Y)
-                        dhpsi_xx[ib] +=
-                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
-                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
-                            gr_x * ang_y + gr_y * ang_x;
-                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
-                            gr_x * ang_z + gr_z * ang_x;
-                        dhpsi_yy[ib] +=
-                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
-                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
-                            gr_y * ang_z + gr_z * ang_y;
-                        dhpsi_zz[ib] +=
-                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
-                    }
-                }
-            }
-        }
+      psi[ib] = 0;
+
+      dpsi_x[ib] = 0;
+      dpsi_y[ib] = 0;
+      dpsi_z[ib] = 0;
+
+      dhpsi_xx[ib] = 0;
+      dhpsi_xy[ib] = 0;
+      dhpsi_xz[ib] = 0;
+      dhpsi_yy[ib] = 0;
+      dhpsi_yz[ib] = 0;
+      dhpsi_zz[ib] = 0;
+
+      dghpsi_xxx[ib] = 0;
+      dghpsi_xxy[ib] = 0;
+      dghpsi_xxz[ib] = 0;
+      dghpsi_xyy[ib] = 0;
+      dghpsi_xyz[ib] = 0;
+      dghpsi_xzz[ib] = 0;
+      dghpsi_yyy[ib] = 0;
+      dghpsi_yyz[ib] = 0;
+      dghpsi_yzz[ib] = 0;
+      dghpsi_zzz[ib] = 0;
     }
 
-    template <typename LAT, typename PosType, typename VGHGH>
-    inline void
-    evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr,
-        const size_t offset, VGHGH& vghgh)
+    for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
     {
-        int TransX, TransY, TransZ;
-
-        PosType dr_new;
-        RealType r_new;
-
-        constexpr RealType cone(1);
-
-        // one can assert the alignment
-        RealType* restrict phi = tempS.data(0);
-        RealType* restrict dphi = tempS.data(1);
-        RealType* restrict d2phi = tempS.data(2);
-        RealType* restrict d3phi = tempS.data(3);
-
-        // V,Gx,Gy,Gz,L
-        auto* restrict psi = vghgh.data(0) + offset;
-        const RealType* restrict ylm_v = Ylm[0]; // value
-        auto* restrict dpsi_x = vghgh.data(1) + offset;
-        const RealType* restrict ylm_x = Ylm[1]; // gradX
-        auto* restrict dpsi_y = vghgh.data(2) + offset;
-        const RealType* restrict ylm_y = Ylm[2]; // gradY
-        auto* restrict dpsi_z = vghgh.data(3) + offset;
-        const RealType* restrict ylm_z = Ylm[3]; // gradZ
-
-        auto* restrict dhpsi_xx = vghgh.data(4) + offset;
-        const RealType* restrict ylm_xx = Ylm[4];
-        auto* restrict dhpsi_xy = vghgh.data(5) + offset;
-        const RealType* restrict ylm_xy = Ylm[5];
-        auto* restrict dhpsi_xz = vghgh.data(6) + offset;
-        const RealType* restrict ylm_xz = Ylm[6];
-        auto* restrict dhpsi_yy = vghgh.data(7) + offset;
-        const RealType* restrict ylm_yy = Ylm[7];
-        auto* restrict dhpsi_yz = vghgh.data(8) + offset;
-        const RealType* restrict ylm_yz = Ylm[8];
-        auto* restrict dhpsi_zz = vghgh.data(9) + offset;
-        const RealType* restrict ylm_zz = Ylm[9];
-
-        auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
-        const RealType* restrict ylm_xxx = Ylm[10];
-        auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
-        const RealType* restrict ylm_xxy = Ylm[11];
-        auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
-        const RealType* restrict ylm_xxz = Ylm[12];
-        auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
-        const RealType* restrict ylm_xyy = Ylm[13];
-        auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
-        const RealType* restrict ylm_xyz = Ylm[14];
-        auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
-        const RealType* restrict ylm_xzz = Ylm[15];
-        auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
-        const RealType* restrict ylm_yyy = Ylm[16];
-        auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
-        const RealType* restrict ylm_yyz = Ylm[17];
-        auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
-        const RealType* restrict ylm_yzz = Ylm[18];
-        auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
-        const RealType* restrict ylm_zzz = Ylm[19];
-
-        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-            psi[ib] = 0;
-
-            dpsi_x[ib] = 0;
-            dpsi_y[ib] = 0;
-            dpsi_z[ib] = 0;
-
-            dhpsi_xx[ib] = 0;
-            dhpsi_xy[ib] = 0;
-            dhpsi_xz[ib] = 0;
-            dhpsi_yy[ib] = 0;
-            dhpsi_yz[ib] = 0;
-            dhpsi_zz[ib] = 0;
-
-            dghpsi_xxx[ib] = 0;
-            dghpsi_xxy[ib] = 0;
-            dghpsi_xxz[ib] = 0;
-            dghpsi_xyy[ib] = 0;
-            dghpsi_xyz[ib] = 0;
-            dghpsi_xzz[ib] = 0;
-            dghpsi_yyy[ib] = 0;
-            dghpsi_yyz[ib] = 0;
-            dghpsi_yzz[ib] = 0;
-            dghpsi_zzz[ib] = 0;
-        }
-
-        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+      // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+      TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+      for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+      {
+        // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+        TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+        for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
         {
-            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
-            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
-            {
-                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
-                for (int k = 0; k <= PBCImages[2];
-                     k++) // loop Translation over Z
-                {
-                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
-                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
-                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
-                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
-                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
-                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
-                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
-                    r_new = std::sqrt(dot(dr_new, dr_new));
-
-                    // const size_t ib_max=NL.size();
-                    if (r_new >= Rmax)
-                        continue;
-
-                    // SIGN Change!!
-                    const RealType x = -dr_new[0], y = -dr_new[1],
-                                   z = -dr_new[2];
-                    Ylm.evaluateVGHGH(x, y, z);
-
-                    MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
-
-                    const RealType rinv = cone / r_new;
-                    const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv;
-                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
-                        const int nl(NL[ib]);
-                        const int lm(LM[ib]);
-                        const RealType drnloverr = rinv * dphi[nl];
-                        const RealType ang = ylm_v[lm];
-                        const RealType gr_x = drnloverr * x;
-                        const RealType gr_y = drnloverr * y;
-                        const RealType gr_z = drnloverr * z;
-
-                        // The non-strictly diagonal term in \partial_i
-                        // \partial_j R_{nl} is
-                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
-                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
-                        //  R_{nl}}{\partial r}) To save recomputation, I
-                        //  evaluate everything except the x_i*x_j term once,
-                        //  and store it in gr2_tmp.  The full term is obtained
-                        //  by x_i*x_j*gr2_tmp.  This is p(r) in the notes.
-                        const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr);
-
-                        const RealType gr_xx = x * xu * gr2_tmp + drnloverr;
-                        const RealType gr_xy = x * yu * gr2_tmp;
-                        const RealType gr_xz = x * zu * gr2_tmp;
-                        const RealType gr_yy = y * yu * gr2_tmp + drnloverr;
-                        const RealType gr_yz = y * zu * gr2_tmp;
-                        const RealType gr_zz = z * zu * gr2_tmp + drnloverr;
-
-                        // This is q(r) in the notes.
-                        const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp;
-
-                        const RealType gr_xxx =
-                            xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
-                        const RealType gr_xxy =
-                            xu * xu * yu * gr3_tmp + gr2_tmp * yu;
-                        const RealType gr_xxz =
-                            xu * xu * zu * gr3_tmp + gr2_tmp * zu;
-                        const RealType gr_xyy =
-                            xu * yu * yu * gr3_tmp + gr2_tmp * xu;
-                        const RealType gr_xyz = xu * yu * zu * gr3_tmp;
-                        const RealType gr_xzz =
-                            xu * zu * zu * gr3_tmp + gr2_tmp * xu;
-                        const RealType gr_yyy =
-                            yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
-                        const RealType gr_yyz =
-                            yu * yu * zu * gr3_tmp + gr2_tmp * zu;
-                        const RealType gr_yzz =
-                            yu * zu * zu * gr3_tmp + gr2_tmp * yu;
-                        const RealType gr_zzz =
-                            zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
-
-                        // Angular derivatives up to third
-                        const RealType ang_x = ylm_x[lm];
-                        const RealType ang_y = ylm_y[lm];
-                        const RealType ang_z = ylm_z[lm];
-
-                        const RealType ang_xx = ylm_xx[lm];
-                        const RealType ang_xy = ylm_xy[lm];
-                        const RealType ang_xz = ylm_xz[lm];
-                        const RealType ang_yy = ylm_yy[lm];
-                        const RealType ang_yz = ylm_yz[lm];
-                        const RealType ang_zz = ylm_zz[lm];
-
-                        const RealType ang_xxx = ylm_xxx[lm];
-                        const RealType ang_xxy = ylm_xxy[lm];
-                        const RealType ang_xxz = ylm_xxz[lm];
-                        const RealType ang_xyy = ylm_xyy[lm];
-                        const RealType ang_xyz = ylm_xyz[lm];
-                        const RealType ang_xzz = ylm_xzz[lm];
-                        const RealType ang_yyy = ylm_yyy[lm];
-                        const RealType ang_yyz = ylm_yyz[lm];
-                        const RealType ang_yzz = ylm_yzz[lm];
-                        const RealType ang_zzz = ylm_zzz[lm];
-
-                        const RealType vr = phi[nl];
-
-                        psi[ib] += ang * vr;
-                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
-                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
-                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
-
-                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
-                        // R + R \partial_i \partial_j Y
-                        //                             + (\partial_i R)
-                        //                             (\partial_j Y) +
-                        //                             (\partial_j R)(\partial_i
-                        //                             Y)
-                        dhpsi_xx[ib] +=
-                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
-                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
-                            gr_x * ang_y + gr_y * ang_x;
-                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
-                            gr_x * ang_z + gr_z * ang_x;
-                        dhpsi_yy[ib] +=
-                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
-                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
-                            gr_y * ang_z + gr_z * ang_y;
-                        dhpsi_zz[ib] +=
-                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
-
-                        dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx +
-                            3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx;
-                        dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy +
-                            gr_xx * ang_y + ang_xx * gr_y +
-                            2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x;
-                        dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz +
-                            gr_xx * ang_z + ang_xx * gr_z +
-                            2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x;
-                        dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy +
-                            gr_yy * ang_x + ang_yy * gr_x +
-                            2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y;
-                        dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz +
-                            gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
-                            ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y;
-                        dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz +
-                            gr_zz * ang_x + ang_zz * gr_x +
-                            2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z;
-                        dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy +
-                            3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy;
-                        dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz +
-                            gr_yy * ang_z + ang_yy * gr_z +
-                            2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y;
-                        dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz +
-                            gr_zz * ang_y + ang_zz * gr_y +
-                            2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z;
-                        dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz +
-                            3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz;
-                    }
-                }
-            }
+          // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+          TransZ    = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+          dr_new[0] = dr[0] + TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+          dr_new[1] = dr[1] + TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+          dr_new[2] = dr[2] + TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+          r_new     = std::sqrt(dot(dr_new, dr_new));
+
+          // const size_t ib_max=NL.size();
+          if (r_new >= Rmax)
+            continue;
+
+          // SIGN Change!!
+          const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+          Ylm.evaluateVGHGH(x, y, z);
+
+          MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
+
+          const RealType rinv = cone / r_new;
+          const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv;
+          for (size_t ib = 0; ib < BasisSetSize; ++ib)
+          {
+            const int nl(NL[ib]);
+            const int lm(LM[ib]);
+            const RealType drnloverr = rinv * dphi[nl];
+            const RealType ang       = ylm_v[lm];
+            const RealType gr_x      = drnloverr * x;
+            const RealType gr_y      = drnloverr * y;
+            const RealType gr_z      = drnloverr * z;
+
+            // The non-strictly diagonal term in \partial_i
+            // \partial_j R_{nl} is
+            //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+            //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+            //  R_{nl}}{\partial r}) To save recomputation, I
+            //  evaluate everything except the x_i*x_j term once,
+            //  and store it in gr2_tmp.  The full term is obtained
+            //  by x_i*x_j*gr2_tmp.  This is p(r) in the notes.
+            const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr);
+
+            const RealType gr_xx = x * xu * gr2_tmp + drnloverr;
+            const RealType gr_xy = x * yu * gr2_tmp;
+            const RealType gr_xz = x * zu * gr2_tmp;
+            const RealType gr_yy = y * yu * gr2_tmp + drnloverr;
+            const RealType gr_yz = y * zu * gr2_tmp;
+            const RealType gr_zz = z * zu * gr2_tmp + drnloverr;
+
+            // This is q(r) in the notes.
+            const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp;
+
+            const RealType gr_xxx = xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
+            const RealType gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu;
+            const RealType gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu;
+            const RealType gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu;
+            const RealType gr_xyz = xu * yu * zu * gr3_tmp;
+            const RealType gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu;
+            const RealType gr_yyy = yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
+            const RealType gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu;
+            const RealType gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu;
+            const RealType gr_zzz = zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
+
+            // Angular derivatives up to third
+            const RealType ang_x = ylm_x[lm];
+            const RealType ang_y = ylm_y[lm];
+            const RealType ang_z = ylm_z[lm];
+
+            const RealType ang_xx = ylm_xx[lm];
+            const RealType ang_xy = ylm_xy[lm];
+            const RealType ang_xz = ylm_xz[lm];
+            const RealType ang_yy = ylm_yy[lm];
+            const RealType ang_yz = ylm_yz[lm];
+            const RealType ang_zz = ylm_zz[lm];
+
+            const RealType ang_xxx = ylm_xxx[lm];
+            const RealType ang_xxy = ylm_xxy[lm];
+            const RealType ang_xxz = ylm_xxz[lm];
+            const RealType ang_xyy = ylm_xyy[lm];
+            const RealType ang_xyz = ylm_xyz[lm];
+            const RealType ang_xzz = ylm_xzz[lm];
+            const RealType ang_yyy = ylm_yyy[lm];
+            const RealType ang_yyz = ylm_yyz[lm];
+            const RealType ang_yzz = ylm_yzz[lm];
+            const RealType ang_zzz = ylm_zzz[lm];
+
+            const RealType vr = phi[nl];
+
+            psi[ib] += ang * vr;
+            dpsi_x[ib] += ang * gr_x + vr * ang_x;
+            dpsi_y[ib] += ang * gr_y + vr * ang_y;
+            dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+            // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+            // R + R \partial_i \partial_j Y
+            //                             + (\partial_i R)
+            //                             (\partial_j Y) +
+            //                             (\partial_j R)(\partial_i
+            //                             Y)
+            dhpsi_xx[ib] += gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+            dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr + gr_x * ang_y + gr_y * ang_x;
+            dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr + gr_x * ang_z + gr_z * ang_x;
+            dhpsi_yy[ib] += gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+            dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr + gr_y * ang_z + gr_z * ang_y;
+            dhpsi_zz[ib] += gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+
+            dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx + 3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx;
+            dghpsi_xxy[ib] +=
+                gr_xxy * ang + vr * ang_xxy + gr_xx * ang_y + ang_xx * gr_y + 2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x;
+            dghpsi_xxz[ib] +=
+                gr_xxz * ang + vr * ang_xxz + gr_xx * ang_z + ang_xx * gr_z + 2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x;
+            dghpsi_xyy[ib] +=
+                gr_xyy * ang + vr * ang_xyy + gr_yy * ang_x + ang_yy * gr_x + 2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y;
+            dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz + gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
+                ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y;
+            dghpsi_xzz[ib] +=
+                gr_xzz * ang + vr * ang_xzz + gr_zz * ang_x + ang_zz * gr_x + 2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z;
+            dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy + 3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy;
+            dghpsi_yyz[ib] +=
+                gr_yyz * ang + vr * ang_yyz + gr_yy * ang_z + ang_yy * gr_z + 2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y;
+            dghpsi_yzz[ib] +=
+                gr_yzz * ang + vr * ang_yzz + gr_zz * ang_y + ang_zz * gr_y + 2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z;
+            dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz + 3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz;
+          }
         }
+      }
     }
+  }
 
-    /** evaluate V
+  /** evaluate V
      */
-    template <typename LAT, typename PosType, typename VT>
-    inline void
-    evaluateV(const LAT& lattice, const RealType r, const PosType& dr,
-        VT* restrict psi, PosType Tv)
-    {
-        int TransX, TransY, TransZ;
+  template<typename LAT, typename PosType, typename VT>
+  inline void evaluateV(const LAT& lattice, const RealType r, const PosType& dr, VT* restrict psi, PosType Tv)
+  {
+    int TransX, TransY, TransZ;
 
-        PosType dr_new;
-        RealType r_new;
+    PosType dr_new;
+    RealType r_new;
 
-        const ValueType correctphase =
-            CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
+    const ValueType correctphase = CorrectPhaseFunctor<ValueType>{SuperTwist}(Tv);
 
-        RealType* restrict ylm_v = tempS.data(0);
-        RealType* restrict phi_r = tempS.data(1);
+    RealType* restrict ylm_v = tempS.data(0);
+    RealType* restrict phi_r = tempS.data(1);
 
-        for (size_t ib = 0; ib < BasisSetSize; ++ib)
-            psi[ib] = 0;
-        // Phase_idx (iter) needs to be initialized at -1 as it has to be
-        // incremented first to comply with the if statement (r_new >=Rmax)
-        int iter = -1;
-        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+    for (size_t ib = 0; ib < BasisSetSize; ++ib)
+      psi[ib] = 0;
+    // Phase_idx (iter) needs to be initialized at -1 as it has to be
+    // incremented first to comply with the if statement (r_new >=Rmax)
+    int iter = -1;
+    for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+    {
+      // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+      TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+      for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+      {
+        // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+        TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+        for (int k = 0; k <= PBCImages[2]; k++) // loop Translation over Z
         {
-            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
-            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
-            {
-                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
-                for (int k = 0; k <= PBCImages[2];
-                     k++) // loop Translation over Z
-                {
-                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
-                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
-
-                    dr_new[0] = dr[0] +
-                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
-                            TransZ * lattice.R(2, 0));
-                    dr_new[1] = dr[1] +
-                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
-                            TransZ * lattice.R(2, 1));
-                    dr_new[2] = dr[2] +
-                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
-                            TransZ * lattice.R(2, 2));
-
-                    r_new = std::sqrt(dot(dr_new, dr_new));
-                    iter++;
-                    if (r_new >= Rmax)
-                        continue;
-
-                    Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
-                    MultiRnl.evaluate(r_new, phi_r);
-                    /// Phase for PBC containing the phase for the nearest image
-                    /// displacement and the correction due to the Distance
-                    /// table.
-                    const ValueType Phase =
-                        periodic_image_phase_factors[iter] * correctphase;
-                    for (size_t ib = 0; ib < BasisSetSize; ++ib)
-                        psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
-                }
-            }
+          // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+          TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+          dr_new[0] = dr[0] + (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0));
+          dr_new[1] = dr[1] + (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1));
+          dr_new[2] = dr[2] + (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2));
+
+          r_new = std::sqrt(dot(dr_new, dr_new));
+          iter++;
+          if (r_new >= Rmax)
+            continue;
+
+          Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
+          MultiRnl.evaluate(r_new, phi_r);
+          /// Phase for PBC containing the phase for the nearest image
+          /// displacement and the correction due to the Distance
+          /// table.
+          const ValueType Phase = periodic_image_phase_factors[iter] * correctphase;
+          for (size_t ib = 0; ib < BasisSetSize; ++ib)
+            psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
         }
+      }
     }
-
-    void createResource(ResourceCollection& collection) const
-    {
-      collection.addResource(std::make_unique<SoaAtomicBSetMultiWalkerMem>());
-    }
-
-    void acquireResource(ResourceCollection& collection,
-                         const RefVectorWithLeader<SoaAtomicBasisSetT>& atom_basis_list) const
-    {
-      assert(this == &atom_basis_list.getLeader());
-      atom_basis_list.template getCastedLeader<SoaAtomicBasisSetT>().mw_mem_handle_ =
-          collection.lendResource<SoaAtomicBSetMultiWalkerMem>();
-    }
-
-    void releaseResource(ResourceCollection& collection,
-                         const RefVectorWithLeader<SoaAtomicBasisSetT>& atom_basis_list) const
+  }
+
+  void createResource(ResourceCollection& collection) const
+  {
+    collection.addResource(std::make_unique<SoaAtomicBSetMultiWalkerMem>());
+  }
+
+  void acquireResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<SoaAtomicBasisSetT>& atom_basis_list) const
+  {
+    assert(this == &atom_basis_list.getLeader());
+    atom_basis_list.template getCastedLeader<SoaAtomicBasisSetT>().mw_mem_handle_ =
+        collection.lendResource<SoaAtomicBSetMultiWalkerMem>();
+  }
+
+  void releaseResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<SoaAtomicBasisSetT>& atom_basis_list) const
+  {
+    assert(this == &atom_basis_list.getLeader());
+    collection.takebackResource(atom_basis_list.template getCastedLeader<SoaAtomicBasisSetT>().mw_mem_handle_);
+  }
+
+  struct SoaAtomicBSetMultiWalkerMem : public Resource
+  {
+    SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {}
+
+    SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {}
+
+    std::unique_ptr<Resource> makeClone() const override
     {
-      assert(this == &atom_basis_list.getLeader());
-      collection.takebackResource(atom_basis_list.template getCastedLeader<SoaAtomicBasisSetT>().mw_mem_handle_);
+      return std::make_unique<SoaAtomicBSetMultiWalkerMem>(*this);
     }
 
-    struct SoaAtomicBSetMultiWalkerMem : public Resource
-    {
-      SoaAtomicBSetMultiWalkerMem() : Resource("SoaAtomicBasisSet") {}
-
-      SoaAtomicBSetMultiWalkerMem(const SoaAtomicBSetMultiWalkerMem&) : SoaAtomicBSetMultiWalkerMem() {}
-
-      std::unique_ptr<Resource> makeClone() const override
-      {
-        return std::make_unique<SoaAtomicBSetMultiWalkerMem>(*this);
-      }
-
-      OffloadArray4D ylm_vgl;     // [5][Nelec][PBC][NYlm]
-      OffloadArray4D rnl_vgl;     // [5][Nelec][PBC][NRnl]
-      OffloadArray3D ylm_v;       // [Nelec][PBC][NYlm]
-      OffloadArray3D rnl_v;       // [Nelec][PBC][NRnl]
-      OffloadMatrix dr_pbc;       // [PBC][xyz]        translation vector for each image
-      OffloadArray3D dr;          // [Nelec][PBC][xyz] ion->elec displacement for each image
-      OffloadMatrix r;            // [Nelec][PBC]      ion->elec distance for each image
-      OffloadVector correctphase; // [Nelec]           overall phase
-    };
+    OffloadArray4D ylm_vgl;     // [5][Nelec][PBC][NYlm]
+    OffloadArray4D rnl_vgl;     // [5][Nelec][PBC][NRnl]
+    OffloadArray3D ylm_v;       // [Nelec][PBC][NYlm]
+    OffloadArray3D rnl_v;       // [Nelec][PBC][NRnl]
+    OffloadMatrix dr_pbc;       // [PBC][xyz]        translation vector for each image
+    OffloadArray3D dr;          // [Nelec][PBC][xyz] ion->elec displacement for each image
+    OffloadMatrix r;            // [Nelec][PBC]      ion->elec distance for each image
+    OffloadVector correctphase; // [Nelec]           overall phase
+  };
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
index 85c17ef568b..c17fdddda80 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.cpp
@@ -18,164 +18,153 @@
 
 namespace qmcplusplus
 {
-template <class T>
-SoaCuspCorrectionT<T>::SoaCuspCorrectionT(
-    ParticleSetT<T>& ions, ParticleSetT<T>& els) :
-    myTableIndex(els.addTable(ions))
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(ParticleSetT<T>& ions, ParticleSetT<T>& els)
+    : myTableIndex(els.addTable(ions))
 {
-    NumCenters = ions.getTotalNum();
-    NumTargets = els.getTotalNum();
-    LOBasisSet.resize(NumCenters);
+  NumCenters = ions.getTotalNum();
+  NumTargets = els.getTotalNum();
+  LOBasisSet.resize(NumCenters);
 }
 
-template <class T>
-SoaCuspCorrectionT<T>::SoaCuspCorrectionT(
-    const SoaCuspCorrectionT<T>& a) = default;
+template<class T>
+SoaCuspCorrectionT<T>::SoaCuspCorrectionT(const SoaCuspCorrectionT<T>& a) = default;
 
-template <class T>
-void
-SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void SoaCuspCorrectionT<T>::setOrbitalSetSize(int norbs)
 {
-    MaxOrbSize = norbs;
-    myVGL.resize(5, MaxOrbSize);
+  MaxOrbSize = norbs;
+  myVGL.resize(5, MaxOrbSize);
 }
 
-template <class T>
-inline void
-SoaCuspCorrectionT<T>::evaluateVGL(
-    const ParticleSetT<T>& P, int iat, VGLVector& vgl)
+template<class T>
+inline void SoaCuspCorrectionT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl)
 {
-    assert(MaxOrbSize >= vgl.size());
-    myVGL = 0.0;
-
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-    for (int c = 0; c < NumCenters; c++)
-        if (LOBasisSet[c])
-            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
-                myVGL[2], myVGL[3], myVGL[4]);
-
-    {
-        const auto v_in = myVGL[0];
-        const auto gx_in = myVGL[1];
-        const auto gy_in = myVGL[2];
-        const auto gz_in = myVGL[3];
-        const auto l_in = myVGL[4];
-        auto v_out = vgl.data(0);
-        auto gx_out = vgl.data(1);
-        auto gy_out = vgl.data(2);
-        auto gz_out = vgl.data(3);
-        auto l_out = vgl.data(4);
-        for (size_t i = 0; i < vgl.size(); ++i) {
-            v_out[i] += v_in[i];
-            gx_out[i] += gx_in[i];
-            gy_out[i] += gy_in[i];
-            gz_out[i] += gz_in[i];
-            l_out[i] += l_in[i];
-        }
-    }
-}
-
-template <class T>
-void
-SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
-{
-    assert(MaxOrbSize >= psi.size());
-    myVGL = 0.0;
-
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-    for (int c = 0; c < NumCenters; c++)
-        if (LOBasisSet[c])
-            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
-                myVGL[2], myVGL[3], myVGL[4]);
-
-    const auto v_in = myVGL[0];
+  assert(MaxOrbSize >= vgl.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  {
+    const auto v_in  = myVGL[0];
     const auto gx_in = myVGL[1];
     const auto gy_in = myVGL[2];
     const auto gz_in = myVGL[3];
-    const auto l_in = myVGL[4];
-    for (size_t i = 0; i < psi.size(); ++i) {
-        psi[i] += v_in[i];
-        dpsi[i][0] += gx_in[i];
-        dpsi[i][1] += gy_in[i];
-        dpsi[i][2] += gz_in[i];
-        d2psi[i] += l_in[i];
+    const auto l_in  = myVGL[4];
+    auto v_out       = vgl.data(0);
+    auto gx_out      = vgl.data(1);
+    auto gy_out      = vgl.data(2);
+    auto gz_out      = vgl.data(3);
+    auto l_out       = vgl.data(4);
+    for (size_t i = 0; i < vgl.size(); ++i)
+    {
+      v_out[i] += v_in[i];
+      gx_out[i] += gx_in[i];
+      gy_out[i] += gy_in[i];
+      gz_out[i] += gz_in[i];
+      l_out[i] += l_in[i];
     }
+  }
 }
 
-template <class T>
-void
-SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P, int iat, int idx,
-    ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi)
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P,
+                                         int iat,
+                                         ValueVector& psi,
+                                         GradVector& dpsi,
+                                         ValueVector& d2psi)
 {
-    assert(MaxOrbSize >= psi.cols());
-    myVGL = 0.0;
-
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-    for (int c = 0; c < NumCenters; c++)
-        if (LOBasisSet[c])
-            LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1],
-                myVGL[2], myVGL[3], myVGL[4]);
+  assert(MaxOrbSize >= psi.size());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.size(); ++i)
+  {
+    psi[i] += v_in[i];
+    dpsi[i][0] += gx_in[i];
+    dpsi[i][1] += gy_in[i];
+    dpsi[i][2] += gz_in[i];
+    d2psi[i] += l_in[i];
+  }
+}
 
-    const auto v_in = myVGL[0];
-    const auto gx_in = myVGL[1];
-    const auto gy_in = myVGL[2];
-    const auto gz_in = myVGL[3];
-    const auto l_in = myVGL[4];
-    for (size_t i = 0; i < psi.cols(); ++i) {
-        psi[idx][i] += v_in[i];
-        dpsi[idx][i][0] += gx_in[i];
-        dpsi[idx][i][1] += gy_in[i];
-        dpsi[idx][i][2] += gz_in[i];
-        d2psi[idx][i] += l_in[i];
-    }
+template<class T>
+void SoaCuspCorrectionT<T>::evaluate_vgl(const ParticleSetT<T>& P,
+                                         int iat,
+                                         int idx,
+                                         ValueMatrix& psi,
+                                         GradMatrix& dpsi,
+                                         ValueMatrix& d2psi)
+{
+  assert(MaxOrbSize >= psi.cols());
+  myVGL = 0.0;
+
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate_vgl(dist[c], displ[c], myVGL[0], myVGL[1], myVGL[2], myVGL[3], myVGL[4]);
+
+  const auto v_in  = myVGL[0];
+  const auto gx_in = myVGL[1];
+  const auto gy_in = myVGL[2];
+  const auto gz_in = myVGL[3];
+  const auto l_in  = myVGL[4];
+  for (size_t i = 0; i < psi.cols(); ++i)
+  {
+    psi[idx][i] += v_in[i];
+    dpsi[idx][i][0] += gx_in[i];
+    dpsi[idx][i][1] += gy_in[i];
+    dpsi[idx][i][2] += gz_in[i];
+    d2psi[idx][i] += l_in[i];
+  }
 }
 
-template <class T>
-void
-SoaCuspCorrectionT<T>::evaluateV(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void SoaCuspCorrectionT<T>::evaluateV(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    assert(MaxOrbSize >= psi.size());
-    T* tmp_vals = myVGL[0];
+  assert(MaxOrbSize >= psi.size());
+  T* tmp_vals = myVGL[0];
 
-    std::fill_n(tmp_vals, myVGL.size(), 0.0);
+  std::fill_n(tmp_vals, myVGL.size(), 0.0);
 
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
 
-    // THIS IS SERIAL, only way to avoid this is to use myVGL
-    for (int c = 0; c < NumCenters; c++)
-        if (LOBasisSet[c])
-            LOBasisSet[c]->evaluate(dist[c], tmp_vals);
+  // THIS IS SERIAL, only way to avoid this is to use myVGL
+  for (int c = 0; c < NumCenters; c++)
+    if (LOBasisSet[c])
+      LOBasisSet[c]->evaluate(dist[c], tmp_vals);
 
-    { // collect
-        const auto v_in = myVGL[0];
-        for (size_t i = 0; i < psi.size(); ++i)
-            psi[i] += v_in[i];
-    }
+  { // collect
+    const auto v_in = myVGL[0];
+    for (size_t i = 0; i < psi.size(); ++i)
+      psi[i] += v_in[i];
+  }
 }
 
-template <class T>
-void
-SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
+template<class T>
+void SoaCuspCorrectionT<T>::add(int icenter, std::unique_ptr<COT> aos)
 {
-    assert(MaxOrbSize == aos->getNumOrbs() &&
-        "All the centers should support the same number of orbitals!");
-    LOBasisSet[icenter].reset(aos.release());
+  assert(MaxOrbSize == aos->getNumOrbs() && "All the centers should support the same number of orbitals!");
+  LOBasisSet[icenter].reset(aos.release());
 }
 
 template class SoaCuspCorrectionT<double>;
diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
index 0edf61af87e..9d11d883978 100644
--- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
+++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrectionT.h
@@ -18,7 +18,7 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class CuspCorrectionAtomicBasis;
 
 /** A localized basis set derived from BasisSetBase<typename COT::ValueType>
@@ -28,110 +28,90 @@ class CuspCorrectionAtomicBasis;
  * The template parameter COT denotes Centered-Orbital-Type which provides
  * a set of localized orbitals associated with a center.
  */
-template <class T>
+template<class T>
 class SoaCuspCorrectionT
 {
-    using RealType = typename SPOSetT<T>::RealType;
-    using VGLVector = VectorSoaContainer<T, 5>;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using PosType = typename SPOSetT<T>::PosType;
-
-    /// number of centers, e.g., ions
-    size_t NumCenters;
-    /// number of quantum particles
-    size_t NumTargets;
-    /// number of quantum particles
-    const int myTableIndex;
-    /** Maximal number of supported MOs
+  using RealType    = typename SPOSetT<T>::RealType;
+  using VGLVector   = VectorSoaContainer<T, 5>;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using PosType     = typename SPOSetT<T>::PosType;
+
+  /// number of centers, e.g., ions
+  size_t NumCenters;
+  /// number of quantum particles
+  size_t NumTargets;
+  /// number of quantum particles
+  const int myTableIndex;
+  /** Maximal number of supported MOs
      * this is not the AO basis because cusp correction is applied on the MO
      * directly.
      */
-    int MaxOrbSize = 0;
+  int MaxOrbSize = 0;
 
-    /// COMPLEX WON'T WORK
-    using COT = CuspCorrectionAtomicBasis<RealType>;
+  /// COMPLEX WON'T WORK
+  using COT = CuspCorrectionAtomicBasis<RealType>;
 
-    /** container of the unique pointers to the Atomic Orbitals
+  /** container of the unique pointers to the Atomic Orbitals
      *
      * size of LOBasisSet = number of centers (atoms)
      * should use unique_ptr once COT is fixed for better performance
      */
-    std::vector<std::shared_ptr<const COT>> LOBasisSet;
+  std::vector<std::shared_ptr<const COT>> LOBasisSet;
 
-    Matrix<RealType> myVGL;
+  Matrix<RealType> myVGL;
 
 public:
-    /** constructor
+  /** constructor
      * @param ions ionic system
      * @param els electronic system
      */
-    SoaCuspCorrectionT(ParticleSetT<T>& ions, ParticleSetT<T>& els);
+  SoaCuspCorrectionT(ParticleSetT<T>& ions, ParticleSetT<T>& els);
 
-    /** copy constructor */
-    SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
+  /** copy constructor */
+  SoaCuspCorrectionT(const SoaCuspCorrectionT& a);
 
-    /** set the number of orbitals this cusp correction may serve. call this
+  /** set the number of orbitals this cusp correction may serve. call this
      * before adding any correction centers.
      */
-    void
-    setOrbitalSetSize(int norbs);
+  void setOrbitalSetSize(int norbs);
 
-    /** compute VGL
+  /** compute VGL
      * @param P quantum particleset
      * @param iat active particle
      * @param vgl Matrix(5,BasisSetSize)
      * @param trialMove if true, use getTempDists()/getTempDispls()
      */
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl);
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl);
 
-    void
-    evaluate_vgl(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi);
+  void evaluate_vgl(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi);
 
-    void
-    evaluate_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& psi,
-        GradMatrix& dpsi, ValueMatrix& d2psi);
+  void evaluate_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& psi, GradMatrix& dpsi, ValueMatrix& d2psi);
 
-    /** compute values for the iat-paricle move
+  /** compute values for the iat-paricle move
      *
      * Always uses getTempDists() and getTempDispls()
      */
-    void
-    evaluateV(const ParticleSetT<T>& P, int iat, ValueVector& psi);
+  void evaluateV(const ParticleSetT<T>& P, int iat, ValueVector& psi);
 
-    /** add a new set of Centered Atomic Orbitals
+  /** add a new set of Centered Atomic Orbitals
      * @param icenter the index of the center
      * @param aos a set of Centered Atomic Orbitals
      */
-    void
-    add(int icenter, std::unique_ptr<COT> aos);
-
-    void
-    addVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl)
-    {
-        evaluateVGL(P, iat, vgl);
-    }
-    void
-    addV(const ParticleSetT<T>& P, int iat, ValueVector& psi)
-    {
-        evaluateV(P, iat, psi);
-    }
-    void
-    add_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& vals,
-        GradMatrix& dpsi, ValueMatrix& d2psi)
-    {
-        evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
-    }
-    void
-    add_vector_vgl(const ParticleSetT<T>& P, int iat, ValueVector& vals,
-        GradVector& dpsi, ValueVector& d2psi)
-    {
-        evaluate_vgl(P, iat, vals, dpsi, d2psi);
-    }
+  void add(int icenter, std::unique_ptr<COT> aos);
+
+  void addVGL(const ParticleSetT<T>& P, int iat, VGLVector& vgl) { evaluateVGL(P, iat, vgl); }
+  void addV(const ParticleSetT<T>& P, int iat, ValueVector& psi) { evaluateV(P, iat, psi); }
+  void add_vgl(const ParticleSetT<T>& P, int iat, int idx, ValueMatrix& vals, GradMatrix& dpsi, ValueMatrix& d2psi)
+  {
+    evaluate_vgl(P, iat, idx, vals, dpsi, d2psi);
+  }
+  void add_vector_vgl(const ParticleSetT<T>& P, int iat, ValueVector& vals, GradVector& dpsi, ValueVector& d2psi)
+  {
+    evaluate_vgl(P, iat, vals, dpsi, d2psi);
+  }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
index 8b8ab7c66c4..cd844d7bf6b 100644
--- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
@@ -74,466 +74,414 @@ RefVectorWithLeader<COT> SoaLocalizedBasisSetT<COT, ORBT>::extractOneSpeciesBasi
   return one_species_basis_list;
 }
 
-template <class COT, typename ORBT>
-SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
-    ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els) :
-    ions_(ions),
-    myTableIndex(els.addTable(ions,
-        DTModes::NEED_FULL_TABLE_ANYTIME |
-            DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
-    SuperTwist(0.0)
+template<class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els)
+    : ions_(ions),
+      myTableIndex(els.addTable(ions, DTModes::NEED_FULL_TABLE_ANYTIME | DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
+      SuperTwist(0.0)
 {
-    NumCenters = ions.getTotalNum();
-    NumTargets = els.getTotalNum();
-    LOBasisSet.resize(ions.getSpeciesSet().getTotalNum());
-    BasisOffset.resize(NumCenters + 1);
-    BasisSetSize = 0;
+  NumCenters = ions.getTotalNum();
+  NumTargets = els.getTotalNum();
+  LOBasisSet.resize(ions.getSpeciesSet().getTotalNum());
+  BasisOffset.resize(NumCenters + 1);
+  BasisSetSize = 0;
 }
 
-template <class COT, typename ORBT>
-SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
-    const SoaLocalizedBasisSetT& a) :
-    SoaBasisSetBaseT<ORBT>(a),
-    NumCenters(a.NumCenters),
-    NumTargets(a.NumTargets),
-    ions_(a.ions_),
-    myTableIndex(a.myTableIndex),
-    SuperTwist(a.SuperTwist),
-    BasisOffset(a.BasisOffset)
+template<class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a)
+    : SoaBasisSetBaseT<ORBT>(a),
+      NumCenters(a.NumCenters),
+      NumTargets(a.NumTargets),
+      ions_(a.ions_),
+      myTableIndex(a.myTableIndex),
+      SuperTwist(a.SuperTwist),
+      BasisOffset(a.BasisOffset)
 {
-    LOBasisSet.reserve(a.LOBasisSet.size());
-    for (auto& elem : a.LOBasisSet)
-        LOBasisSet.push_back(std::make_unique<COT>(*elem));
+  LOBasisSet.reserve(a.LOBasisSet.size());
+  for (auto& elem : a.LOBasisSet)
+    LOBasisSet.push_back(std::make_unique<COT>(*elem));
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::setPBCParams(
-    const TinyVector<int, 3>& PBCImages, const TinyVector<double, 3> Sup_Twist,
-    const std::vector<ORBT>& phase_factor)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::setPBCParams(const TinyVector<int, 3>& PBCImages,
+                                                    const TinyVector<double, 3> Sup_Twist,
+                                                    const std::vector<ORBT>& phase_factor)
 {
-    for (int i = 0; i < LOBasisSet.size(); ++i)
-        LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor);
+  for (int i = 0; i < LOBasisSet.size(); ++i)
+    LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor);
 
-    SuperTwist = Sup_Twist;
+  SuperTwist = Sup_Twist;
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::setBasisSetSize(int nbs)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::setBasisSetSize(int nbs)
 {
-    const auto& IonID(ions_.GroupID);
-    if (BasisSetSize > 0 && nbs == BasisSetSize)
-        return;
-
-    if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) {
-        // evaluate the total basis dimension and offset for each center
-        BasisOffset[0] = 0;
-        for (int c = 0; c < NumCenters; c++)
-            BasisOffset[c + 1] =
-                BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize();
-        BasisSetSize = BasisOffset[NumCenters];
-    }
-    else {
-        // when particles are reordered due to grouping, AOs need to restore the
-        // input order to match MOs.
-        std::vector<int> map_input_to_storage(mapping.size());
-        for (int c = 0; c < NumCenters; c++)
-            map_input_to_storage[mapping[c]] = c;
-
-        std::vector<size_t> basis_offset_input_order(BasisOffset.size(), 0);
-        for (int c = 0; c < NumCenters; c++)
-            basis_offset_input_order[c + 1] = basis_offset_input_order[c] +
-                LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize();
-
-        for (int c = 0; c < NumCenters; c++)
-            BasisOffset[c] = basis_offset_input_order[mapping[c]];
-
-        BasisSetSize = basis_offset_input_order[NumCenters];
-    }
+  const auto& IonID(ions_.GroupID);
+  if (BasisSetSize > 0 && nbs == BasisSetSize)
+    return;
+
+  if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty())
+  {
+    // evaluate the total basis dimension and offset for each center
+    BasisOffset[0] = 0;
+    for (int c = 0; c < NumCenters; c++)
+      BasisOffset[c + 1] = BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize();
+    BasisSetSize = BasisOffset[NumCenters];
+  }
+  else
+  {
+    // when particles are reordered due to grouping, AOs need to restore the
+    // input order to match MOs.
+    std::vector<int> map_input_to_storage(mapping.size());
+    for (int c = 0; c < NumCenters; c++)
+      map_input_to_storage[mapping[c]] = c;
+
+    std::vector<size_t> basis_offset_input_order(BasisOffset.size(), 0);
+    for (int c = 0; c < NumCenters; c++)
+      basis_offset_input_order[c + 1] =
+          basis_offset_input_order[c] + LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize();
+
+    for (int c = 0; c < NumCenters; c++)
+      BasisOffset[c] = basis_offset_input_order[mapping[c]];
+
+    BasisSetSize = basis_offset_input_order[NumCenters];
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::queryOrbitalsForSType(
-    const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::queryOrbitalsForSType(const std::vector<bool>& corrCenter,
+                                                             std::vector<bool>& is_s_orbital) const
 {
-    const auto& IonID(ions_.GroupID);
-    for (int c = 0; c < NumCenters; c++) {
-        int idx = BasisOffset[c];
-        int bss = LOBasisSet[IonID[c]]->BasisSetSize;
-        std::vector<bool> local_is_s_orbital(bss);
-        LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital);
-        for (int k = 0; k < bss; k++) {
-            if (corrCenter[c]) {
-                is_s_orbital[idx++] = local_is_s_orbital[k];
-            }
-            else {
-                is_s_orbital[idx++] = false;
-            }
-        }
+  const auto& IonID(ions_.GroupID);
+  for (int c = 0; c < NumCenters; c++)
+  {
+    int idx = BasisOffset[c];
+    int bss = LOBasisSet[IonID[c]]->BasisSetSize;
+    std::vector<bool> local_is_s_orbital(bss);
+    LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital);
+    for (int k = 0; k < bss; k++)
+    {
+      if (corrCenter[c])
+      {
+        is_s_orbital[idx++] = local_is_s_orbital[k];
+      }
+      else
+      {
+        is_s_orbital[idx++] = false;
+      }
     }
+  }
+}
+
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGL(const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl)
+{
+  const auto& IonID(ions_.GroupID);
+  const auto& coordR  = P.activeR(iat);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+
+  PosType Tv;
+  for (int c = 0; c < NumCenters; c++)
+  {
+    Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+    Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+    Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+    LOBasisSet[IonID[c]]->evaluateVGL(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv);
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGL(
-    const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
+                                                      int iat,
+                                                      OffloadMWVGLArray& vgl_v)
 {
+  for (size_t iw = 0; iw < P_list.size(); iw++)
+  {
     const auto& IonID(ions_.GroupID);
-    const auto& coordR = P.activeR(iat);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
+    const auto& coordR  = P_list[iw].activeR(iat);
+    const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
+    const auto& dist    = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+    const auto& displ   = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
     PosType Tv;
-    for (int c = 0; c < NumCenters; c++) {
-        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
-        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
-        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
-        LOBasisSet[IonID[c]]->evaluateVGL(
-            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv);
-    }
-}
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateVGL(
-    const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat,
-    OffloadMWVGLArray& vgl_v)
-{
-    for (size_t iw = 0; iw < P_list.size(); iw++) {
-        const auto& IonID(ions_.GroupID);
-        const auto& coordR = P_list[iw].activeR(iat);
-        const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
-        const auto& dist = (P_list[iw].getActivePtcl() == iat) ?
-            d_table.getTempDists() :
-            d_table.getDistRow(iat);
-        const auto& displ = (P_list[iw].getActivePtcl() == iat) ?
-            d_table.getTempDispls() :
-            d_table.getDisplRow(iat);
-
-        PosType Tv;
-
-        // number of walkers * BasisSetSize
-        auto stride = vgl_v.size(1) * BasisSetSize;
-        assert(BasisSetSize == vgl_v.size(2));
-        vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);
-
-        for (int c = 0; c < NumCenters; c++) {
-            Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
-            Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
-            Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
-            LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c],
-                displ[c], BasisOffset[c], vgl_iw, Tv);
-        }
+    // number of walkers * BasisSetSize
+    auto stride = vgl_v.size(1) * BasisSetSize;
+    assert(BasisSetSize == vgl_v.size(2));
+    vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);
+
+    for (int c = 0; c < NumCenters; c++)
+    {
+      Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+      Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+      Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+      LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], displ[c], BasisOffset[c], vgl_iw, Tv);
     }
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGH(
-    const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGH(const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh)
 {
-    const auto& IonID(ions_.GroupID);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-    for (int c = 0; c < NumCenters; c++) {
-        LOBasisSet[IonID[c]]->evaluateVGH(
-            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh);
-    }
+  const auto& IonID(ions_.GroupID);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+  {
+    LOBasisSet[IonID[c]]->evaluateVGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh);
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGHGH(
-    const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGHGH(const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh)
 {
-    // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n");
+  // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n");
 
-    const auto& IonID(ions_.GroupID);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-    for (int c = 0; c < NumCenters; c++) {
-        LOBasisSet[IonID[c]]->evaluateVGHGH(
-            P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh);
-    }
+  const auto& IonID(ions_.GroupID);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+  for (int c = 0; c < NumCenters; c++)
+  {
+    LOBasisSet[IonID[c]]->evaluateVGHGH(P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh);
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateV(
-    const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateV(const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals)
 {
-    const auto& IonID(ions_.GroupID);
-    const auto& coordR = P.activeR(iat);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-
-    PosType Tv;
-    for (int c = 0; c < NumCenters; c++) {
-        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
-        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
-        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
-        LOBasisSet[IonID[c]]->evaluateV(
-            P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv);
-    }
+  const auto& IonID(ions_.GroupID);
+  const auto& coordR  = P.activeR(iat);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+
+  PosType Tv;
+  for (int c = 0; c < NumCenters; c++)
+  {
+    Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+    Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+    Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+    LOBasisSet[IonID[c]]->evaluateV(P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv);
+  }
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateValue(
-    const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat,
-    OffloadMWVArray& v)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
+                                                        int iat,
+                                                        OffloadMWVArray& v)
 {
-    for (size_t iw = 0; iw < P_list.size(); iw++)
-        evaluateV(P_list[iw], iat, v.data_at(iw, 0));
+  for (size_t iw = 0; iw < P_list.size(); iw++)
+    evaluateV(P_list[iw], iat, v.data_at(iw, 0));
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceV(
-    const ParticleSetT<ORBT>& P, int iat, const ParticleSetT<ORBT>& ions,
-    int jion, vgl_type& vgl)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceV(const ParticleSetT<ORBT>& P,
+                                                           int iat,
+                                                           const ParticleSetT<ORBT>& ions,
+                                                           int jion,
+                                                           vgl_type& vgl)
 {
-    // We need to zero out the temporary array vgl.
-    auto* restrict gx = vgl.data(1);
-    auto* restrict gy = vgl.data(2);
-    auto* restrict gz = vgl.data(3);
-
-    for (int ib = 0; ib < BasisSetSize; ib++) {
-        gx[ib] = 0;
-        gy[ib] = 0;
-        gz[ib] = 0;
-    }
+  // We need to zero out the temporary array vgl.
+  auto* restrict gx = vgl.data(1);
+  auto* restrict gy = vgl.data(2);
+  auto* restrict gz = vgl.data(3);
 
-    const auto& IonID(ions_.GroupID);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
+  for (int ib = 0; ib < BasisSetSize; ib++)
+  {
+    gx[ib] = 0;
+    gy[ib] = 0;
+    gz[ib] = 0;
+  }
 
-    PosType Tv;
-    Tv[0] = Tv[1] = Tv[2] = 0;
-    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
-    // exist for the atomic center that we wish to take derivatives of.
-    // Moreover, we can obtain an ion derivative by multiplying an electron
-    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
-    // now, just note this is the electron VGL function.
-    LOBasisSet[IonID[jion]]->evaluateVGL(
-        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv);
+  const auto& IonID(ions_.GroupID);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
+
+  PosType Tv;
+  Tv[0] = Tv[1] = Tv[2] = 0;
+  // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+  // exist for the atomic center that we wish to take derivatives of.
+  // Moreover, we can obtain an ion derivative by multiplying an electron
+  // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+  // now, just note this is the electron VGL function.
+  LOBasisSet[IonID[jion]]->evaluateVGL(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv);
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceVGL(
-    const ParticleSetT<ORBT>& P, int iat, const ParticleSetT<ORBT>& ions,
-    int jion, vghgh_type& vghgh)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceVGL(const ParticleSetT<ORBT>& P,
+                                                             int iat,
+                                                             const ParticleSetT<ORBT>& ions,
+                                                             int jion,
+                                                             vghgh_type& vghgh)
 {
-    // We need to zero out the temporary array vghgh.
-    auto* restrict gx = vghgh.data(1);
-    auto* restrict gy = vghgh.data(2);
-    auto* restrict gz = vghgh.data(3);
-
-    auto* restrict hxx = vghgh.data(4);
-    auto* restrict hxy = vghgh.data(5);
-    auto* restrict hxz = vghgh.data(6);
-    auto* restrict hyy = vghgh.data(7);
-    auto* restrict hyz = vghgh.data(8);
-    auto* restrict hzz = vghgh.data(9);
-
-    auto* restrict gxxx = vghgh.data(10);
-    auto* restrict gxxy = vghgh.data(11);
-    auto* restrict gxxz = vghgh.data(12);
-    auto* restrict gxyy = vghgh.data(13);
-    auto* restrict gxyz = vghgh.data(14);
-    auto* restrict gxzz = vghgh.data(15);
-    auto* restrict gyyy = vghgh.data(16);
-    auto* restrict gyyz = vghgh.data(17);
-    auto* restrict gyzz = vghgh.data(18);
-    auto* restrict gzzz = vghgh.data(19);
-
-    for (int ib = 0; ib < BasisSetSize; ib++) {
-        gx[ib] = 0;
-        gy[ib] = 0;
-        gz[ib] = 0;
-
-        hxx[ib] = 0;
-        hxy[ib] = 0;
-        hxz[ib] = 0;
-        hyy[ib] = 0;
-        hyz[ib] = 0;
-        hzz[ib] = 0;
-
-        gxxx[ib] = 0;
-        gxxy[ib] = 0;
-        gxxz[ib] = 0;
-        gxyy[ib] = 0;
-        gxyz[ib] = 0;
-        gxzz[ib] = 0;
-        gyyy[ib] = 0;
-        gyyz[ib] = 0;
-        gyzz[ib] = 0;
-        gzzz[ib] = 0;
-    }
+  // We need to zero out the temporary array vghgh.
+  auto* restrict gx = vghgh.data(1);
+  auto* restrict gy = vghgh.data(2);
+  auto* restrict gz = vghgh.data(3);
+
+  auto* restrict hxx = vghgh.data(4);
+  auto* restrict hxy = vghgh.data(5);
+  auto* restrict hxz = vghgh.data(6);
+  auto* restrict hyy = vghgh.data(7);
+  auto* restrict hyz = vghgh.data(8);
+  auto* restrict hzz = vghgh.data(9);
+
+  auto* restrict gxxx = vghgh.data(10);
+  auto* restrict gxxy = vghgh.data(11);
+  auto* restrict gxxz = vghgh.data(12);
+  auto* restrict gxyy = vghgh.data(13);
+  auto* restrict gxyz = vghgh.data(14);
+  auto* restrict gxzz = vghgh.data(15);
+  auto* restrict gyyy = vghgh.data(16);
+  auto* restrict gyyz = vghgh.data(17);
+  auto* restrict gyzz = vghgh.data(18);
+  auto* restrict gzzz = vghgh.data(19);
+
+  for (int ib = 0; ib < BasisSetSize; ib++)
+  {
+    gx[ib] = 0;
+    gy[ib] = 0;
+    gz[ib] = 0;
+
+    hxx[ib] = 0;
+    hxy[ib] = 0;
+    hxz[ib] = 0;
+    hyy[ib] = 0;
+    hyz[ib] = 0;
+    hzz[ib] = 0;
+
+    gxxx[ib] = 0;
+    gxxy[ib] = 0;
+    gxxz[ib] = 0;
+    gxyy[ib] = 0;
+    gxyz[ib] = 0;
+    gxzz[ib] = 0;
+    gyyy[ib] = 0;
+    gyyz[ib] = 0;
+    gyzz[ib] = 0;
+    gzzz[ib] = 0;
+  }
+
+  // Since jion is indexed on the source ions not the ions_ the distinction
+  // between ions_ and ions is extremely important.
+  const auto& IonID(ions.GroupID);
+  const auto& d_table = P.getDistTableAB(myTableIndex);
+  const auto& dist    = (P.getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
+  const auto& displ   = (P.getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);
 
-    // Since jion is indexed on the source ions not the ions_ the distinction
-    // between ions_ and ions is extremely important.
-    const auto& IonID(ions.GroupID);
-    const auto& d_table = P.getDistTableAB(myTableIndex);
-    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
-                                                    d_table.getDistRow(iat);
-    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
-                                                     d_table.getDisplRow(iat);
-
-    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
-    // exist for the atomic center that we wish to take derivatives of.
-    // Moreover, we can obtain an ion derivative by multiplying an electron
-    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
-    // now, just note this is the electron VGL function.
-
-    LOBasisSet[IonID[jion]]->evaluateVGHGH(
-        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh);
+  // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+  // exist for the atomic center that we wish to take derivatives of.
+  // Moreover, we can obtain an ion derivative by multiplying an electron
+  // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+  // now, just note this is the electron VGL function.
+
+  LOBasisSet[IonID[jion]]->evaluateVGHGH(P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh);
 }
 
-template <class COT, typename ORBT>
-void
-SoaLocalizedBasisSetT<COT, ORBT>::add(int icenter, std::unique_ptr<COT> aos)
+template<class COT, typename ORBT>
+void SoaLocalizedBasisSetT<COT, ORBT>::add(int icenter, std::unique_ptr<COT> aos)
 {
-    LOBasisSet[icenter] = std::move(aos);
+  LOBasisSet[icenter] = std::move(aos);
 }
 
 // TODO: this should be redone with template template parameters
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
-        double>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>, double>,
     double>;
-template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
-        float>,
-    float>;
+template class SoaLocalizedBasisSetT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>, float>,
+                                     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
-        std::complex<double>>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
-        std::complex<float>>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
-        double>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>, double>,
     double>;
-template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
-        float>,
-    float>;
+template class SoaLocalizedBasisSetT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>, float>,
+                                     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
-        std::complex<double>>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
-        std::complex<float>>,
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaCartesianTensor<double>, double>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaCartesianTensor<double>, double>,
     double>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaCartesianTensor<float>, float>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaCartesianTensor<float>, float>,
     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaCartesianTensor<double>, std::complex<double>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaCartesianTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaCartesianTensor<float>, std::complex<float>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaCartesianTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaSphericalTensor<double>, double>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaSphericalTensor<double>, double>,
     double>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaSphericalTensor<float>, float>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaSphericalTensor<float>, float>,
     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
-        SoaSphericalTensor<double>, std::complex<double>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>, SoaSphericalTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
-        SoaSphericalTensor<float>, std::complex<float>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>, SoaSphericalTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaCartesianTensor<double>, double>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaCartesianTensor<double>, double>,
     double>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaCartesianTensor<float>, float>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>,
     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaCartesianTensor<double>, std::complex<double>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaCartesianTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaCartesianTensor<float>, std::complex<float>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
 #ifndef QMC_COMPLEX
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaSphericalTensor<double>, double>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaSphericalTensor<double>, double>,
     double>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaSphericalTensor<float>, float>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>,
     float>;
 #else
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
-        SoaSphericalTensor<double>, std::complex<double>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>, SoaSphericalTensor<double>, std::complex<double>>,
     std::complex<double>>;
 template class SoaLocalizedBasisSetT<
-    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
-        SoaSphericalTensor<float>, std::complex<float>>,
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, std::complex<float>>,
     std::complex<float>>;
 #endif
 
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
index 6839fef181f..5fd6276c419 100644
--- a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
@@ -33,130 +33,114 @@ namespace qmcplusplus
  * a set of localized orbitals associated with a center.
  * The template parameter ORBT denotes the orbital value return type
  */
-template <class COT, typename ORBT>
+template<class COT, typename ORBT>
 class SoaLocalizedBasisSetT : public SoaBasisSetBaseT<ORBT>
 {
 public:
-    using RealType = typename COT::RealType;
-    using BaseType = SoaBasisSetBaseT<ORBT>;
-    using ValueType = ORBT;
-
-    using vgl_type = typename BaseType::vgl_type;
-    using vgh_type = typename BaseType::vgh_type;
-    using vghgh_type = typename BaseType::vghgh_type;
-    using PosType = typename ParticleSetT<ORBT>::PosType;
-    using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray;
-    using OffloadMWVArray = typename BaseType::OffloadMWVArray;
-
-    using BaseType::BasisSetSize;
-
-    /// number of centers, e.g., ions
-    size_t NumCenters;
-    /// number of quantum particles
-    size_t NumTargets;
-    /// ion particle set
-    const ParticleSetT<ORBT>& ions_;
-    /// number of quantum particles
-    const int myTableIndex;
-    /// Global Coordinate of Supertwist read from HDF5
-    PosType SuperTwist;
-
-    /** container to store the offsets of the basis functions for each center
+  using RealType  = typename COT::RealType;
+  using BaseType  = SoaBasisSetBaseT<ORBT>;
+  using ValueType = ORBT;
+
+  using vgl_type          = typename BaseType::vgl_type;
+  using vgh_type          = typename BaseType::vgh_type;
+  using vghgh_type        = typename BaseType::vghgh_type;
+  using PosType           = typename ParticleSetT<ORBT>::PosType;
+  using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename BaseType::OffloadMWVArray;
+
+  using BaseType::BasisSetSize;
+
+  /// number of centers, e.g., ions
+  size_t NumCenters;
+  /// number of quantum particles
+  size_t NumTargets;
+  /// ion particle set
+  const ParticleSetT<ORBT>& ions_;
+  /// number of quantum particles
+  const int myTableIndex;
+  /// Global Coordinate of Supertwist read from HDF5
+  PosType SuperTwist;
+
+  /** container to store the offsets of the basis functions for each center
      * Due to potential reordering of ions, offsets can be in any order.
      */
-    std::vector<size_t> BasisOffset;
+  std::vector<size_t> BasisOffset;
 
-    /** container of the unique pointers to the Atomic Orbitals
+  /** container of the unique pointers to the Atomic Orbitals
      *
      * size of LOBasisSet = number  of unique centers
      */
-    std::vector<std::unique_ptr<COT>> LOBasisSet;
+  std::vector<std::unique_ptr<COT>> LOBasisSet;
 
-    /** constructor
+  /** constructor
      * @param ions ionic system
      * @param els electronic system
      */
-    SoaLocalizedBasisSetT(ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els);
+  SoaLocalizedBasisSetT(ParticleSetT<ORBT>& ions, ParticleSetT<ORBT>& els);
 
-    /** copy constructor */
-    SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a);
+  /** copy constructor */
+  SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a);
 
-    /** makeClone */
-    BaseType*
-    makeClone() const override
-    {
-        return new SoaLocalizedBasisSetT<COT, ORBT>(*this);
-    }
+  /** makeClone */
+  BaseType* makeClone() const override { return new SoaLocalizedBasisSetT<COT, ORBT>(*this); }
 
-    /** set Number of periodic Images to evaluate the orbitals.
+  /** set Number of periodic Images to evaluate the orbitals.
         Set to 0 for non-PBC, and set manually in the input.
         Passes the pre-computed phase factor for evaluation of complex
        wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or
        -1 if non-Gamma.
     */
-    void
-    setPBCParams(const TinyVector<int, 3>& PBCImages,
-        const TinyVector<double, 3> Sup_Twist,
-        const std::vector<ORBT>& phase_factor);
+  void setPBCParams(const TinyVector<int, 3>& PBCImages,
+                    const TinyVector<double, 3> Sup_Twist,
+                    const std::vector<ORBT>& phase_factor);
 
-    /** set BasisSetSize and allocate mVGL container
+  /** set BasisSetSize and allocate mVGL container
      */
-    void
-    setBasisSetSize(int nbs) override;
+  void setBasisSetSize(int nbs) override;
 
-    /**  Determine which orbitals are S-type.  Used by cusp correction.
+  /**  Determine which orbitals are S-type.  Used by cusp correction.
      */
-    void
-    queryOrbitalsForSType(const std::vector<bool>& corrCenter,
-        std::vector<bool>& is_s_orbital) const override;
+  void queryOrbitalsForSType(const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const override;
 
-    /** compute VGL
+  /** compute VGL
      * @param P quantum particleset
      * @param iat active particle
      * @param vgl Matrix(5,BasisSetSize)
      * @param trialMove if true, use getTempDists()/getTempDispls()
      */
-    void
-    evaluateVGL(const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl) override;
+  void evaluateVGL(const ParticleSetT<ORBT>& P, int iat, vgl_type& vgl) override;
 
-    /** compute V using packed array with all walkers
+  /** compute V using packed array with all walkers
      * @param P_list list of quantum particleset (one for each walker)
      * @param iat active particle
      * @param v   Array(n_walkers, BasisSetSize)
      */
-    void
-    mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
-        int iat, OffloadMWVArray& v) override;
+  void mw_evaluateValue(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat, OffloadMWVArray& v) override;
 
-    /** compute VGL using packed array with all walkers
+  /** compute VGL using packed array with all walkers
      * @param P_list list of quantum particleset (one for each walker)
      * @param iat active particle
      * @param vgl   Array(n_walkers, 5, BasisSetSize)
      */
-    void
-    mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list,
-        int iat, OffloadMWVGLArray& vgl) override;
+  void mw_evaluateVGL(const RefVectorWithLeader<ParticleSetT<ORBT>>& P_list, int iat, OffloadMWVGLArray& vgl) override;
 
-    /** compute VGH
+  /** compute VGH
      * @param P quantum particleset
      * @param iat active particle
      * @param vgl Matrix(10,BasisSetSize)
      * @param trialMove if true, use getTempDists()/getTempDispls()
      */
-    void
-    evaluateVGH(const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh) override;
+  void evaluateVGH(const ParticleSetT<ORBT>& P, int iat, vgh_type& vgh) override;
 
-    /** compute VGHGH
+  /** compute VGHGH
      * @param P quantum particleset
      * @param iat active particle
      * @param vghgh Matrix(20,BasisSetSize)
      * @param trialMove if true, use getTempDists()/getTempDispls()
      */
-    void
-    evaluateVGHGH(
-        const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh) override;
+  void evaluateVGHGH(const ParticleSetT<ORBT>& P, int iat, vghgh_type& vghgh) override;
 
-    /** compute values for the iat-paricle move
+  /** compute values for the iat-paricle move
      *
      * Always uses getTempDists() and getTempDispls()
      * Tv is a translation vector; In PBC, in order to reduce the number
@@ -167,46 +151,47 @@ class SoaLocalizedBasisSetT : public SoaBasisSetBaseT<ORBT>
      * displacement. We need to keep track of Tv because it must be add
      * as a phase factor, i.e., exp(i*k*Tv).
      */
-    void
-    evaluateV(
-        const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals) override;
+  void evaluateV(const ParticleSetT<ORBT>& P, int iat, ORBT* restrict vals) override;
 
-    void
-    evaluateGradSourceV(const ParticleSetT<ORBT>& P, int iat,
-        const ParticleSetT<ORBT>& ions, int jion, vgl_type& vgl) override;
+  void evaluateGradSourceV(const ParticleSetT<ORBT>& P,
+                           int iat,
+                           const ParticleSetT<ORBT>& ions,
+                           int jion,
+                           vgl_type& vgl) override;
 
-    void
-    evaluateGradSourceVGL(const ParticleSetT<ORBT>& P, int iat,
-        const ParticleSetT<ORBT>& ions, int jion, vghgh_type& vghgh) override;
+  void evaluateGradSourceVGL(const ParticleSetT<ORBT>& P,
+                             int iat,
+                             const ParticleSetT<ORBT>& ions,
+                             int jion,
+                             vghgh_type& vghgh) override;
 
-    /** add a new set of Centered Atomic Orbitals
+  /** add a new set of Centered Atomic Orbitals
      * @param icenter the index of the center
      * @param aos a set of Centered Atomic Orbitals
      */
-    void
-    add(int icenter, std::unique_ptr<COT> aos);
+  void add(int icenter, std::unique_ptr<COT> aos);
 
-    /** initialize a shared resource and hand it to collection
+  /** initialize a shared resource and hand it to collection
      */
-    void createResource(ResourceCollection& collection) const override;
+  void createResource(ResourceCollection& collection) const override;
 
-    /** acquire a shared resource from collection
+  /** acquire a shared resource from collection
      */
-    void acquireResource(ResourceCollection& collection,
-                         const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list) const override;
+  void acquireResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list) const override;
 
-    /** return a shared resource to collection
+  /** return a shared resource to collection
      */
-    void releaseResource(ResourceCollection& collection,
-                         const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list) const override;
+  void releaseResource(ResourceCollection& collection,
+                       const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list) const override;
 
 
-    /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`)
+  /** helper function for extracting a list of atomic basis sets for a single species (indexed by `id`)
      *  from a list of basis sets
      */
-    static RefVectorWithLeader<COT> extractOneSpeciesBasisRefList(
-        const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list,
-        int id);
+  static RefVectorWithLeader<COT> extractOneSpeciesBasisRefList(
+      const RefVectorWithLeader<SoaBasisSetBaseT<ORBT>>& basisset_list,
+      int id);
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/OptimizableObjectT.h b/src/QMCWaveFunctions/OptimizableObjectT.h
index 1ab14979b7e..d435db04023 100644
--- a/src/QMCWaveFunctions/OptimizableObjectT.h
+++ b/src/QMCWaveFunctions/OptimizableObjectT.h
@@ -23,35 +23,25 @@ namespace qmcplusplus
 template<typename T>
 using OptVariablesTypeT = optimize::VariableSetT<T>;
 
-template <typename T>
+template<typename T>
 class OptimizableObjectT
 {
 public:
-    OptimizableObjectT(const std::string& name) : name_(name)
-    {
-    }
+  OptimizableObjectT(const std::string& name) : name_(name) {}
 
-    const std::string&
-    getName() const
-    {
-        return name_;
-    }
-    bool
-    isOptimized() const
-    {
-        return is_optimized_;
-    }
+  const std::string& getName() const { return name_; }
+  bool isOptimized() const { return is_optimized_; }
 
 private:
-    /** Name of the optimizable object
+  /** Name of the optimizable object
      */
-    const std::string name_;
-    /** If true, this object is actively modified during WFOpt
+  const std::string name_;
+  /** If true, this object is actively modified during WFOpt
      */
-    bool is_optimized_ = false;
+  bool is_optimized_ = false;
 
 public:
-    /** check in variational parameters to the global list of parameters used by
+  /** check in variational parameters to the global list of parameters used by
      * the optimizer.
      * @param active a super set of optimizable variables
      *
@@ -111,29 +101,22 @@ class OptimizableObjectT
   virtual void readVariationalParameters(hdf_archive& hin){};
 };
 
-template <typename T>
+template<typename T>
 class UniqueOptObjRefsT : public RefVector<OptimizableObjectT<T>>
 {
 public:
-    OptimizableObjectT<T>&
-    operator[](size_t i) const
-    {
-        return RefVector<OptimizableObjectT<T>>::operator[](i);
-    }
+  OptimizableObjectT<T>& operator[](size_t i) const { return RefVector<OptimizableObjectT<T>>::operator[](i); }
 
-    void
-    push_back(OptimizableObjectT<T>& obj)
-    {
-        if (obj.getName().empty())
-            throw std::logic_error("BUG!! Only named OptimizableObject object "
-                                   "can be added to UniqueOptObjRefs!");
-        auto result = std::find_if(
-            this->begin(), this->end(), [&](OptimizableObjectT<T>& element) {
-                return element.getName() == obj.getName();
-            });
-        if (result == this->end())
-            RefVector<OptimizableObjectT<T>>::push_back(obj);
-    }
+  void push_back(OptimizableObjectT<T>& obj)
+  {
+    if (obj.getName().empty())
+      throw std::logic_error("BUG!! Only named OptimizableObject object "
+                             "can be added to UniqueOptObjRefs!");
+    auto result = std::find_if(this->begin(), this->end(),
+                               [&](OptimizableObjectT<T>& element) { return element.getName() == obj.getName(); });
+    if (result == this->end())
+      RefVector<OptimizableObjectT<T>>::push_back(obj);
+  }
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
index fe006553092..6641e457350 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.cpp
@@ -22,11 +22,11 @@ namespace qmcplusplus
 {
 template<class T>
 int PWBasisT<T>::readbasis(hdf_archive& h5basisgroup,
-                       RealType ecutoff,
-                       const ParticleLayout& lat,
-                       const std::string& pwname,
-                       const std::string& pwmultname,
-                       bool resizeContainer)
+                           RealType ecutoff,
+                           const ParticleLayout& lat,
+                           const std::string& pwname,
+                           const std::string& pwmultname,
+                           bool resizeContainer)
 {
   ///make a local copy
   Lattice = lat;
diff --git a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
index e02706f7bfe..822f3b4418a 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWBasisT.h
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file PWBasis.h
@@ -40,66 +37,66 @@ namespace qmcplusplus
  * Rewrite of PlaneWaveBasis to utilize blas II or III
  * Support more general input tags
  */
-template <typename T>
+template<typename T>
 class PWBasisT : public QMCTraits
 {
 public:
-    using RealType = typename RealAlias_impl<T>::value_type;
-    using ComplexType = T;
-    using PosType = TinyVector<RealType, DIM>;
-    using IndexType = QMCTraits::IndexType;
-    using ParticleLayout = typename ParticleSetT<T>::ParticleLayout;
-    using GIndex_t = TinyVector<IndexType, 3>;
+  using RealType       = typename RealAlias_impl<T>::value_type;
+  using ComplexType    = T;
+  using PosType        = TinyVector<RealType, DIM>;
+  using IndexType      = QMCTraits::IndexType;
+  using ParticleLayout = typename ParticleSetT<T>::ParticleLayout;
+  using GIndex_t       = TinyVector<IndexType, 3>;
 
 private:
-    /// max of maxg[i]
-    int maxmaxg;
-    // Need to store the maximum translation in each dimension to use recursive
-    // PW generation.
-    GIndex_t maxg;
-    // The PlaneWave data - keep all of these strictly private to prevent
-    // inconsistencies.
-    RealType ecut;
-    /// twist angle in reduced
-    PosType twist;
-    /// twist angle in cartesian
-    PosType twist_cart; // Twist angle in reduced and Cartesian.
+  /// max of maxg[i]
+  int maxmaxg;
+  // Need to store the maximum translation in each dimension to use recursive
+  // PW generation.
+  GIndex_t maxg;
+  // The PlaneWave data - keep all of these strictly private to prevent
+  // inconsistencies.
+  RealType ecut;
+  /// twist angle in reduced
+  PosType twist;
+  /// twist angle in cartesian
+  PosType twist_cart; // Twist angle in reduced and Cartesian.
 
-    /// gvecs in reduced coordiates
-    std::vector<GIndex_t> gvecs;
-    /// Reduced coordinates with offset
-    /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
-    std::vector<GIndex_t> gvecs_shifted;
+  /// gvecs in reduced coordiates
+  std::vector<GIndex_t> gvecs;
+  /// Reduced coordinates with offset
+  /// gvecs_shifted[][idim]=gvecs[][idim]+maxg[idim]
+  std::vector<GIndex_t> gvecs_shifted;
 
-    std::vector<RealType> minusModKplusG2;
-    std::vector<PosType> kplusgvecs_cart; // Cartesian.
+  std::vector<RealType> minusModKplusG2;
+  std::vector<PosType> kplusgvecs_cart; // Cartesian.
 
-    Matrix<ComplexType> C;
-    // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not
-    // exp(iGr) We need a way of switching between them for G -> -G, otherwise
-    // the determinant will have multiple rows that are equal (to within a
-    // constant factor) of others, giving a zero determinant. For this, we build
-    // a vector (negative) which stores whether a vector is "+" or "-" (with
-    // some criterion, to be defined). We the switch from cos() to sin() based
-    // on the value of this input.
-    std::vector<int> negative;
+  Matrix<ComplexType> C;
+  // Real wavefunctions here. Now the basis states are cos(Gr) or sin(Gr), not
+  // exp(iGr) We need a way of switching between them for G -> -G, otherwise
+  // the determinant will have multiple rows that are equal (to within a
+  // constant factor) of others, giving a zero determinant. For this, we build
+  // a vector (negative) which stores whether a vector is "+" or "-" (with
+  // some criterion, to be defined). We the switch from cos() to sin() based
+  // on the value of this input.
+  std::vector<int> negative;
 
 public:
-    // enumeration for the value, laplacian, gradients and size
-    enum
-    {
-        PW_VALUE,
-        PW_LAP,
-        PW_GRADX,
-        PW_GRADY,
-        PW_GRADZ,
-        PW_MAXINDEX
-    };
+  // enumeration for the value, laplacian, gradients and size
+  enum
+  {
+    PW_VALUE,
+    PW_LAP,
+    PW_GRADX,
+    PW_GRADY,
+    PW_GRADZ,
+    PW_MAXINDEX
+  };
 
-    Matrix<ComplexType> Z;
+  Matrix<ComplexType> Z;
 
-    Vector<ComplexType> Zv;
-    /* inputmap is used for a memory efficient way of
+  Vector<ComplexType> Zv;
+  /* inputmap is used for a memory efficient way of
      *
      * importing the basis-set and coefficients when the desired energy cutoff
      * may be lower than that represented by all data in the wavefunction input
@@ -115,70 +112,58 @@ class PWBasisT : public QMCTraits
      * twist-angle is used, the "sphere" of allowed planewaves is shifted.
      */
 
-    Vector<RealType> phi;
+  Vector<RealType> phi;
 
-    std::vector<int> inputmap;
+  std::vector<int> inputmap;
 
-    /// total number of basis functions
-    int NumPlaneWaves;
+  /// total number of basis functions
+  int NumPlaneWaves;
 
-    /// local copy of Lattice
-    ParticleLayout Lattice;
+  /// local copy of Lattice
+  ParticleLayout Lattice;
 
-    /// default constructor
-    PWBasisT() : maxmaxg(0), NumPlaneWaves(0)
-    {
-    }
+  /// default constructor
+  PWBasisT() : maxmaxg(0), NumPlaneWaves(0) {}
 
-    /// constructor
-    PWBasisT(const PosType& twistangle) :
-        maxmaxg(0),
-        twist(twistangle),
-        NumPlaneWaves(0)
-    {
-    }
+  /// constructor
+  PWBasisT(const PosType& twistangle) : maxmaxg(0), twist(twistangle), NumPlaneWaves(0) {}
 
-    ~PWBasisT()
-    {
-    }
+  ~PWBasisT() {}
 
-    /// set the twist angle
-    void
-    setTwistAngle(const PosType& tang);
+  /// set the twist angle
+  void setTwistAngle(const PosType& tang);
 
-    /// reset
-    void
-    reset();
+  /// reset
+  void reset();
 
-    /** Read basisset from hdf5 file. Apply ecut.
+  /** Read basisset from hdf5 file. Apply ecut.
      * @param h5basisgroup h5 node where basis is located
      * @param ecutoff cutoff energy
      * @param lat CrystalLattice
      * @param resizeContainer if true, resize internal storage.
      * @return the number of plane waves
      */
-    int
-    readbasis(hdf_archive& h5basisgroup, RealType ecutoff,
-        const ParticleLayout& lat, const std::string& pwname = "planewaves",
-        const std::string& pwmultname = "multipliers",
-        bool resizeContainer = true);
+  int readbasis(hdf_archive& h5basisgroup,
+                RealType ecutoff,
+                const ParticleLayout& lat,
+                const std::string& pwname     = "planewaves",
+                const std::string& pwmultname = "multipliers",
+                bool resizeContainer          = true);
 
-    /** Remove basis elements if kinetic energy > ecut.
+  /** Remove basis elements if kinetic energy > ecut.
      *
      * Keep and indexmap so we know how to match coefficients on read.
      */
-    void
-    trimforecut();
+  void trimforecut();
 
 #if defined(PWBASIS_USE_RECURSIVE)
-    /** Fill the recursion coefficients matrix.
+  /** Fill the recursion coefficients matrix.
      *
      * @todo Generalize to non-orthorohmbic cells
      */
-    inline void
-    BuildRecursionCoefs(const PosType& pos)
-    {
-        PosType tau_red(Lattice.toUnit(pos));
+  inline void BuildRecursionCoefs(const PosType& pos)
+  {
+    PosType tau_red(Lattice.toUnit(pos));
 //      RealType phi=TWOPI*tau_red[0];
 //      RealType nphi=maxg0*phi;
 //      ComplexType ct0(std::cos(phi),std::sin(phi));
@@ -200,78 +185,80 @@ class PWBasisT : public QMCTraits
 //      C2[0]=t;
 //      for(int n=1; n<=2*maxg2; n++) C2[n] = (t *= ct0);
 #pragma ivdep
-        for (int idim = 0; idim < 3; idim++) {
-            int ng = maxg[idim];
-            RealType phi = TWOPI * tau_red[idim];
-            RealType nphi = ng * phi;
-            ComplexType Ctemp(std::cos(phi), std::sin(phi));
-            ComplexType t(std::cos(nphi), -std::sin(nphi));
-            ComplexType* restrict cp_ptr = C[idim];
-            *cp_ptr++ = t;
-            for (int n = 1; n <= 2 * ng; n++) {
-                *cp_ptr++ = (t *= Ctemp);
-            }
-        }
-        // Base version
-        // #pragma ivdep
-        //       for(int idim=0; idim<3; idim++){
-        //         RealType phi=TWOPI*tau_red[idim];
-        //         ComplexType Ctemp(std::cos(phi),std::sin(phi));
-        //         int ng=maxg[idim];
-        //         ComplexType* restrict cp_ptr=C[idim]+ng;
-        //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
-        //         *cp_ptr=1.0;
-        //         for(int n=1; n<=ng; n++,cn_ptr--){
-        //           ComplexType t(Ctemp*(*cp_ptr++));
-        //           *cp_ptr = t;
-        //           *cn_ptr = conj(t);
-        //         }
-        //       }
-        // Not valid for general supercell
-        //       // Cartesian of twist for 1,1,1 (reduced coordinates)
-        //       PosType G111(1.0,1.0,1.0);
-        //       G111 = Lattice.k_cart(G111);
-        //
-        //       //Precompute a small number of complex factors (PWs along
-        //       b1,b2,b3 lines)
-        //       //using a fast recursion algorithm
-        // #pragma ivdep
-        //       for(int idim=0; idim<3; idim++){
-        //         //start the recursion with the 111 vector.
-        //         RealType phi = pos[idim] * G111[idim];
-        //         register ComplexType Ctemp(std::cos(phi), std::sin(phi));
-        //         int ng=maxg[idim];
-        //         ComplexType* restrict cp_ptr=C[idim]+ng;
-        //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
-        //         *cp_ptr=1.0;
-        //         for(int n=1; n<=ng; n++,cn_ptr--){
-        //           ComplexType t(Ctemp*(*cp_ptr++));
-        //           *cp_ptr = t;
-        //           *cn_ptr = conj(t);
-        //         }
-        //       }
+    for (int idim = 0; idim < 3; idim++)
+    {
+      int ng        = maxg[idim];
+      RealType phi  = TWOPI * tau_red[idim];
+      RealType nphi = ng * phi;
+      ComplexType Ctemp(std::cos(phi), std::sin(phi));
+      ComplexType t(std::cos(nphi), -std::sin(nphi));
+      ComplexType* restrict cp_ptr = C[idim];
+      *cp_ptr++                    = t;
+      for (int n = 1; n <= 2 * ng; n++)
+      {
+        *cp_ptr++ = (t *= Ctemp);
+      }
     }
+    // Base version
+    // #pragma ivdep
+    //       for(int idim=0; idim<3; idim++){
+    //         RealType phi=TWOPI*tau_red[idim];
+    //         ComplexType Ctemp(std::cos(phi),std::sin(phi));
+    //         int ng=maxg[idim];
+    //         ComplexType* restrict cp_ptr=C[idim]+ng;
+    //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //         *cp_ptr=1.0;
+    //         for(int n=1; n<=ng; n++,cn_ptr--){
+    //           ComplexType t(Ctemp*(*cp_ptr++));
+    //           *cp_ptr = t;
+    //           *cn_ptr = conj(t);
+    //         }
+    //       }
+    // Not valid for general supercell
+    //       // Cartesian of twist for 1,1,1 (reduced coordinates)
+    //       PosType G111(1.0,1.0,1.0);
+    //       G111 = Lattice.k_cart(G111);
+    //
+    //       //Precompute a small number of complex factors (PWs along
+    //       b1,b2,b3 lines)
+    //       //using a fast recursion algorithm
+    // #pragma ivdep
+    //       for(int idim=0; idim<3; idim++){
+    //         //start the recursion with the 111 vector.
+    //         RealType phi = pos[idim] * G111[idim];
+    //         register ComplexType Ctemp(std::cos(phi), std::sin(phi));
+    //         int ng=maxg[idim];
+    //         ComplexType* restrict cp_ptr=C[idim]+ng;
+    //         ComplexType* restrict cn_ptr=C[idim]+ng-1;
+    //         *cp_ptr=1.0;
+    //         for(int n=1; n<=ng; n++,cn_ptr--){
+    //           ComplexType t(Ctemp*(*cp_ptr++));
+    //           *cp_ptr = t;
+    //           *cn_ptr = conj(t);
+    //         }
+    //       }
+  }
 
-    inline void
-    evaluate(const PosType& pos)
+  inline void evaluate(const PosType& pos)
+  {
+    BuildRecursionCoefs(pos);
+    RealType twistdotr = dot(twist_cart, pos);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    // Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
     {
-        BuildRecursionCoefs(pos);
-        RealType twistdotr = dot(twist_cart, pos);
-        ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
-        // Evaluate the planewaves for particle iat.
-        for (int ig = 0; ig < NumPlaneWaves; ig++) {
-            // PW is initialized as exp(i*twist.r) so that the final basis
-            // evaluations are for (twist+G).r
-            ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr));
-            for (int idim = 0; idim < 3; idim++)
-                pw *= C(idim, gvecs_shifted[ig][idim]);
-            // pw *= C0[gvecs_shifted[ig][0]];
-            // pw *= C1[gvecs_shifted[ig][1]];
-            // pw *= C2[gvecs_shifted[ig][2]];
-            Zv[ig] = pw;
-        }
+      // PW is initialized as exp(i*twist.r) so that the final basis
+      // evaluations are for (twist+G).r
+      ComplexType pw(pw0); // std::cos(twistdotr),std::sin(twistdotr));
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      // pw *= C0[gvecs_shifted[ig][0]];
+      // pw *= C1[gvecs_shifted[ig][1]];
+      // pw *= C2[gvecs_shifted[ig][2]];
+      Zv[ig] = pw;
     }
-    /** Evaluate all planewaves and derivatives for the iat-th particle
+  }
+  /** Evaluate all planewaves and derivatives for the iat-th particle
      *
      * The basis functions are evaluated for particles iat: first <= iat < last
      * Evaluate the plane-waves at current particle coordinates using a fast
@@ -279,91 +266,84 @@ class PWBasisT : public QMCTraits
      * These can be "dotted" with coefficients later to complete orbital
      * evaluations.
      */
-    inline void
-    evaluateAll(const ParticleSetT<T>& P, int iat)
+  inline void evaluateAll(const ParticleSetT<T>& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    BuildRecursionCoefs(r);
+    RealType twistdotr = dot(twist_cart, r);
+    ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
+    // Evaluate the planewaves and derivatives.
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
     {
-        const PosType& r(P.activeR(iat));
-        BuildRecursionCoefs(r);
-        RealType twistdotr = dot(twist_cart, r);
-        ComplexType pw0(std::cos(twistdotr), std::sin(twistdotr));
-        // Evaluate the planewaves and derivatives.
-        ComplexType* restrict zptr = Z.data();
-        for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) {
-            // PW is initialized as exp(i*twist.r) so that the final basis
-            // evaluations are for (twist+G).r
-            ComplexType pw(pw0);
-            // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
-            for (int idim = 0; idim < 3; idim++)
-                pw *= C(idim, gvecs_shifted[ig][idim]);
-            // pw *= C0[gvecs_shifted[ig][0]];
-            // pw *= C1[gvecs_shifted[ig][1]];
-            // pw *= C2[gvecs_shifted[ig][2]];
-            zptr[0] = pw;
-            zptr[1] = minusModKplusG2[ig] * pw;
-            zptr[2] =
-                kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
-            zptr[3] =
-                kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
-            zptr[4] =
-                kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
-        }
+      // PW is initialized as exp(i*twist.r) so that the final basis
+      // evaluations are for (twist+G).r
+      ComplexType pw(pw0);
+      // THE INDEX ORDER OF C DOESN'T LOOK TOO GOOD: this could be fixed
+      for (int idim = 0; idim < 3; idim++)
+        pw *= C(idim, gvecs_shifted[ig][idim]);
+      // pw *= C0[gvecs_shifted[ig][0]];
+      // pw *= C1[gvecs_shifted[ig][1]];
+      // pw *= C2[gvecs_shifted[ig][2]];
+      zptr[0] = pw;
+      zptr[1] = minusModKplusG2[ig] * pw;
+      zptr[2] = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3] = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4] = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
     }
+  }
 #else
-    inline void
-    evaluate(const PosType& pos)
-    {
-        // Evaluate the planewaves for particle iat.
-        for (int ig = 0; ig < NumPlaneWaves; ig++)
-            phi[ig] = dot(kplusgvecs_cart[ig], pos);
-        eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
-    }
-    inline void
-    evaluateAll(const ParticleSetT<T>& P, int iat)
+  inline void evaluate(const PosType& pos)
+  {
+    // Evaluate the planewaves for particle iat.
+    for (int ig = 0; ig < NumPlaneWaves; ig++)
+      phi[ig] = dot(kplusgvecs_cart[ig], pos);
+    eval_e2iphi(NumPlaneWaves, phi.data(), Zv.data());
+  }
+  inline void evaluateAll(const ParticleSetT<T>& P, int iat)
+  {
+    const PosType& r(P.activeR(iat));
+    evaluate(r);
+    ComplexType* restrict zptr = Z.data();
+    for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5)
     {
-        const PosType& r(P.activeR(iat));
-        evaluate(r);
-        ComplexType* restrict zptr = Z.data();
-        for (int ig = 0; ig < NumPlaneWaves; ig++, zptr += 5) {
-            // PW is initialized as exp(i*twist.r) so that the final basis
-            // evaluations are for (twist+G).r
-            ComplexType& pw = Zv[ig];
-            zptr[0] = pw;
-            zptr[1] = minusModKplusG2[ig] * pw;
-            zptr[2] =
-                kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
-            zptr[3] =
-                kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
-            zptr[4] =
-                kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
-        }
+      // PW is initialized as exp(i*twist.r) so that the final basis
+      // evaluations are for (twist+G).r
+      ComplexType& pw = Zv[ig];
+      zptr[0]         = pw;
+      zptr[1]         = minusModKplusG2[ig] * pw;
+      zptr[2]         = kplusgvecs_cart[ig][0] * ComplexType(-pw.imag(), pw.real());
+      zptr[3]         = kplusgvecs_cart[ig][1] * ComplexType(-pw.imag(), pw.real());
+      zptr[4]         = kplusgvecs_cart[ig][2] * ComplexType(-pw.imag(), pw.real());
     }
+  }
 #endif
-    //    /** Fill the recursion coefficients matrix.
-    //     *
-    //     * @todo Generalize to non-orthorohmbic cells
-    //     */
-    //    void BuildRecursionCoefsByAdd(const PosType& pos)
-    //    {
-    //      // Cartesian of twist for 1,1,1 (reduced coordinates)
-    //      PosType G111(1.0,1.0,1.0);
-    //      G111 = Lattice.k_cart(G111);
-    //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
-    //      //Precompute a small number of complex factors (PWs along b1,b2,b3
-    //      lines) for(int idim=0; idim<3; idim++){
-    //        //start the recursion with the 111 vector.
-    //        RealType phi = pos[idim] * G111[idim];
-    //        int ng(maxg[idim]);
-    //        RealType* restrict cp_ptr=logC[idim]+ng;
-    //        RealType* restrict cn_ptr=logC[idim]+ng-1;
-    //        *cp_ptr=0.0;
-    //        //add INTEL vectorization
-    //        for(int n=1; n<=ng; n++,cn_ptr--){
-    //          RealType t(phi+*cp_ptr++);
-    //          *cp_ptr = t;
-    //          *cn_ptr = -t;
-    //        }
-    //      }
-    //    }
+  //    /** Fill the recursion coefficients matrix.
+  //     *
+  //     * @todo Generalize to non-orthorohmbic cells
+  //     */
+  //    void BuildRecursionCoefsByAdd(const PosType& pos)
+  //    {
+  //      // Cartesian of twist for 1,1,1 (reduced coordinates)
+  //      PosType G111(1.0,1.0,1.0);
+  //      G111 = Lattice.k_cart(G111);
+  //      //PosType redP=P.Lattice.toUnit(P.R[iat]);
+  //      //Precompute a small number of complex factors (PWs along b1,b2,b3
+  //      lines) for(int idim=0; idim<3; idim++){
+  //        //start the recursion with the 111 vector.
+  //        RealType phi = pos[idim] * G111[idim];
+  //        int ng(maxg[idim]);
+  //        RealType* restrict cp_ptr=logC[idim]+ng;
+  //        RealType* restrict cn_ptr=logC[idim]+ng-1;
+  //        *cp_ptr=0.0;
+  //        //add INTEL vectorization
+  //        for(int n=1; n<=ng; n++,cn_ptr--){
+  //          RealType t(phi+*cp_ptr++);
+  //          *cp_ptr = t;
+  //          *cn_ptr = -t;
+  //        }
+  //      }
+  //    }
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
index 6d82f8fdac1..899042244dd 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.cpp
@@ -1,19 +1,15 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source
-// License. See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory Mark
-//                    Dewing, markdewing@gmail.com, University of Illinois at
-//                    Urbana-Champaign
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "PWOrbitalSetT.h"
@@ -23,128 +19,126 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 PWOrbitalSetT<T>::~PWOrbitalSetT()
 {
-    if (OwnBasisSet && myBasisSet)
-        delete myBasisSet;
-    if (!IsCloned && this->C != nullptr)
-        delete this->C;
+  if (OwnBasisSet && myBasisSet)
+    delete myBasisSet;
+  if (!IsCloned && this->C != nullptr)
+    delete this->C;
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-PWOrbitalSetT<T>::makeClone() const
+template<class T>
+std::unique_ptr<SPOSetT<T>> PWOrbitalSetT<T>::makeClone() const
 {
-    auto myclone = std::make_unique<PWOrbitalSetT<T>>(*this);
-    myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
-    myclone->IsCloned = true;
-    return myclone;
+  auto myclone        = std::make_unique<PWOrbitalSetT<T>>(*this);
+  myclone->myBasisSet = new PWBasisT<T>(*myBasisSet);
+  myclone->IsCloned   = true;
+  return myclone;
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void PWOrbitalSetT<T>::setOrbitalSetSize(int norbs)
+{}
+
+template<class T>
+void PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
 {
+  myBasisSet           = bset;
+  this->OrbitalSetSize = nbands;
+  OwnBasisSet          = cleanup;
+  BasisSetSize         = myBasisSet->NumPlaneWaves;
+  this->C              = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
+  this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
+  app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize =" << this->OrbitalSetSize
+            << " BasisSetSize = " << BasisSetSize << std::endl;
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::resize(PWBasisPtr bset, int nbands, bool cleanup)
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
 {
-    myBasisSet = bset;
-    this->OrbitalSetSize = nbands;
-    OwnBasisSet = cleanup;
-    BasisSetSize = myBasisSet->NumPlaneWaves;
-    this->C = new ValueMatrix(this->OrbitalSetSize, BasisSetSize);
-    this->Temp.resize(this->OrbitalSetSize, PW_MAXINDEX);
-    app_log() << "  PWOrbitalSetT<T>::resize OrbitalSetSize ="
-              << this->OrbitalSetSize << " BasisSetSize = " << BasisSetSize
-              << std::endl;
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  // drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::addVector(const std::vector<ComplexType>& coefs, int jorb)
+template<class T>
+void PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
 {
-    int ng = myBasisSet->inputmap.size();
-    if (ng != coefs.size()) {
-        app_error()
-            << "  Input G map does not match the basis size of wave functions "
-            << std::endl;
-        OHMMS::Controller->abort();
-    }
-    // drop G points for the given TwistAngle
-    const std::vector<int>& inputmap(myBasisSet->inputmap);
-    for (int ig = 0; ig < ng; ig++) {
-        if (inputmap[ig] > -1)
-            (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
-    }
+  int ng = myBasisSet->inputmap.size();
+  if (ng != coefs.size())
+  {
+    app_error() << "  Input G map does not match the basis size of wave functions " << std::endl;
+    OHMMS::Controller->abort();
+  }
+  // drop G points for the given TwistAngle
+  const std::vector<int>& inputmap(myBasisSet->inputmap);
+  for (int ig = 0; ig < ng; ig++)
+  {
+    if (inputmap[ig] > -1)
+      (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
+  }
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::addVector(const std::vector<RealType>& coefs, int jorb)
+template<class T>
+void PWOrbitalSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    int ng = myBasisSet->inputmap.size();
-    if (ng != coefs.size()) {
-        app_error()
-            << "  Input G map does not match the basis size of wave functions "
-            << std::endl;
-        OHMMS::Controller->abort();
-    }
-    // drop G points for the given TwistAngle
-    const std::vector<int>& inputmap(myBasisSet->inputmap);
-    for (int ig = 0; ig < ng; ig++) {
-        if (inputmap[ig] > -1)
-            (*(this->C))[jorb][inputmap[ig]] = coefs[ig];
-    }
+  // Evaluate every orbital for particle iat.
+  // Evaluate the basis-set at these coordinates:
+  // myBasisSet->evaluate(P,iat);
+  myBasisSet->evaluate(P.activeR(iat));
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void PWOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                   int iat,
+                                   ValueVector& psi,
+                                   GradVector& dpsi,
+                                   ValueVector& d2psi)
 {
-    // Evaluate every orbital for particle iat.
-    // Evaluate the basis-set at these coordinates:
-    // myBasisSet->evaluate(P,iat);
-    myBasisSet->evaluate(P.activeR(iat));
-    MatrixOperators::product<T>(*(this->C), myBasisSet->Zv, psi);
+  // Evaluate the orbitals and derivatives for particle iat only.
+  myBasisSet->evaluateAll(P, iat);
+  MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
+  const T* restrict tptr = this->Temp.data();
+  for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+  {
+    psi[j]   = tptr[PW_VALUE];
+    d2psi[j] = tptr[PW_LAP];
+    dpsi[j]  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
+  }
 }
 
-template <class T>
-void
-PWOrbitalSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<class T>
+void PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                            int first,
+                                            int last,
+                                            ValueMatrix& logdet,
+                                            GradMatrix& dlogdet,
+                                            ValueMatrix& d2logdet)
 {
-    // Evaluate the orbitals and derivatives for particle iat only.
+  for (int iat = first, i = 0; iat < last; iat++, i++)
+  {
     myBasisSet->evaluateAll(P, iat);
     MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
     const T* restrict tptr = this->Temp.data();
-    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) {
-        psi[j] = tptr[PW_VALUE];
-        d2psi[j] = tptr[PW_LAP];
-        dpsi[j] = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
-    }
-}
-
-template <class T>
-void
-PWOrbitalSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
-{
-    for (int iat = first, i = 0; iat < last; iat++, i++) {
-        myBasisSet->evaluateAll(P, iat);
-        MatrixOperators::product<T>(*(this->C), myBasisSet->Z, this->Temp);
-        const T* restrict tptr = this->Temp.data();
-        for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX) {
-            logdet(i, j) = tptr[PW_VALUE];
-            d2logdet(i, j) = tptr[PW_LAP];
-            dlogdet(i, j) =
-                GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
-        }
+    for (int j = 0; j < this->OrbitalSetSize; j++, tptr += PW_MAXINDEX)
+    {
+      logdet(i, j)   = tptr[PW_VALUE];
+      d2logdet(i, j) = tptr[PW_LAP];
+      dlogdet(i, j)  = GradType(tptr[PW_GRADX], tptr[PW_GRADY], tptr[PW_GRADZ]);
     }
+  }
 }
 
 // Class concrete types from T
diff --git a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
index 9103a16ee2b..848a10b3eaa 100644
--- a/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
+++ b/src/QMCWaveFunctions/PlaneWave/PWOrbitalSetT.h
@@ -1,18 +1,14 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source
-// License. See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark Dewing,
-//                    markdewing@gmail.com, University of Illinois at
-//                    Urbana-Champaign
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 /** @file PWOrbitalSetT.h
@@ -29,118 +25,101 @@
 namespace qmcplusplus
 {
 
-template <class T>
+template<class T>
 class PWOrbitalSetT : public SPOSetT<T>
 {
 public:
-    using RealType = typename SPOSetT<T>::RealType;
-    using ComplexType = T;
-    using PosType = typename SPOSetT<T>::PosType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using GradType = typename SPOSetT<T>::GradType;
-    using IndexType = typename SPOSetT<T>::IndexType;
-
-    using BasisSet_t = PWBasisT<T>;
-    using PWBasisPtr = PWBasisT<T>*;
-
-    /** inherit the enum of BasisSet_t */
-    enum
-    {
-        PW_VALUE = BasisSet_t::PW_VALUE,
-        PW_LAP = BasisSet_t::PW_LAP,
-        PW_GRADX = BasisSet_t::PW_GRADX,
-        PW_GRADY = BasisSet_t::PW_GRADY,
-        PW_GRADZ = BasisSet_t::PW_GRADZ,
-        PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
-    };
-
-    /** default constructor
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ComplexType = T;
+  using PosType     = typename SPOSetT<T>::PosType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType    = typename SPOSetT<T>::GradType;
+  using IndexType   = typename SPOSetT<T>::IndexType;
+
+  using BasisSet_t = PWBasisT<T>;
+  using PWBasisPtr = PWBasisT<T>*;
+
+  /** inherit the enum of BasisSet_t */
+  enum
+  {
+    PW_VALUE    = BasisSet_t::PW_VALUE,
+    PW_LAP      = BasisSet_t::PW_LAP,
+    PW_GRADX    = BasisSet_t::PW_GRADX,
+    PW_GRADY    = BasisSet_t::PW_GRADY,
+    PW_GRADZ    = BasisSet_t::PW_GRADZ,
+    PW_MAXINDEX = BasisSet_t::PW_MAXINDEX
+  };
+
+  /** default constructor
      */
-    PWOrbitalSetT<T>(const std::string& my_name) :
-        SPOSetT<T>(my_name),
-        OwnBasisSet(false),
-        myBasisSet(nullptr),
-        BasisSetSize(0),
-        C(nullptr),
-        IsCloned(false)
-    {
-    }
-
-    std::string
-    getClassName() const override
-    {
-        return "PWOrbitalSetT";
-    }
-
-    /** delete BasisSet only it owns this
+  PWOrbitalSetT<T>(const std::string& my_name)
+      : SPOSetT<T>(my_name), OwnBasisSet(false), myBasisSet(nullptr), BasisSetSize(0), C(nullptr), IsCloned(false)
+  {}
+
+  std::string getClassName() const override { return "PWOrbitalSetT"; }
+
+  /** delete BasisSet only it owns this
      *
      * Builder takes care of who owns what
      */
-    ~PWOrbitalSetT<T>() override;
+  ~PWOrbitalSetT<T>() override;
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
-    /** resize  the orbital base
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+  /** resize  the orbital base
      * @param bset PWBasis
      * @param nbands number of bands
      * @param cleaup if true, owns PWBasis. Will clean up.
      */
-    void
-    resize(PWBasisPtr bset, int nbands, bool cleanup = false);
+  void resize(PWBasisPtr bset, int nbands, bool cleanup = false);
 
-    /** Builder class takes care of the assertion
+  /** Builder class takes care of the assertion
      */
-    void
-    addVector(const std::vector<ComplexType>& coefs, int jorb);
-    void
-    addVector(const std::vector<RealType>& coefs, int jorb);
-
-    void
-    setOrbitalSetSize(int norbs) override;
-
-    inline T
-    evaluate(int ib, const PosType& pos)
-    {
-        myBasisSet->evaluate(pos);
-        return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
-    }
-
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
-
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-
-    /** boolean
+  void addVector(const std::vector<ComplexType>& coefs, int jorb);
+  void addVector(const std::vector<RealType>& coefs, int jorb);
+
+  void setOrbitalSetSize(int norbs) override;
+
+  inline T evaluate(int ib, const PosType& pos)
+  {
+    myBasisSet->evaluate(pos);
+    return BLAS::dot(BasisSetSize, (*C)[ib], myBasisSet->Zv.data());
+  }
+
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  /** boolean
      *
      * If true, this has to delete the BasisSet
      */
-    bool OwnBasisSet;
-    /// TwistAngle of this PWOrbitalSetT
-    PosType TwistAngle;
-    /// My basis set
-    PWBasisPtr myBasisSet;
-    /// number of basis
-    IndexType BasisSetSize;
-    /** pointer to matrix containing the coefficients
+  bool OwnBasisSet;
+  /// TwistAngle of this PWOrbitalSetT
+  PosType TwistAngle;
+  /// My basis set
+  PWBasisPtr myBasisSet;
+  /// number of basis
+  IndexType BasisSetSize;
+  /** pointer to matrix containing the coefficients
      *
      * makeClone makes a shallow copy and flag IsCloned
      */
-    ValueMatrix* C;
-    /// if true, do not clean up
-    bool IsCloned;
+  ValueMatrix* C;
+  /// if true, do not clean up
+  bool IsCloned;
 
-    /** temporary array to perform gemm operation */
-    Matrix<T> Temp;
+  /** temporary array to perform gemm operation */
+  Matrix<T> Temp;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.cpp b/src/QMCWaveFunctions/RotatedSPOsT.cpp
index 116cc3d1438..a0b557591e4 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOsT.cpp
@@ -1,17 +1,15 @@
 //////////////////////////////////////////////////////////////////////////////////////
-//// This file is distributed under the University of Illinois/NCSA Open Source
-/// License. / See LICENSE file in top directory for details.
-////
-//// Copyright (c) QMCPACK developers.
-////
-//// File developed by: Sergio D. Pineda Flores,
-/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
-/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
-/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-////
-//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
-/// University of California, Berkeley
-////////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley
+//                    Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+// File created by:  Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley
+//////////////////////////////////////////////////////////////////////////////////////
+
 #include "RotatedSPOsT.h"
 
 #include "CPU/BLAS.hpp"
@@ -21,292 +19,280 @@
 
 namespace qmcplusplus
 {
-template <typename T>
-RotatedSPOsT<T>::RotatedSPOsT(
-    const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos) :
-    SPOSetT<T>(my_name),
-    OptimizableObjectT<T>(my_name),
-    Phi(std::move(spos)),
-    nel_major_(0),
-    params_supplied(false)
+template<typename T>
+RotatedSPOsT<T>::RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos)
+    : SPOSetT<T>(my_name), OptimizableObjectT<T>(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
 {
-    this->OrbitalSetSize = Phi->getOrbitalSetSize();
+  this->OrbitalSetSize = Phi->getOrbitalSetSize();
 }
 
-template <typename T>
+template<typename T>
 RotatedSPOsT<T>::~RotatedSPOsT()
-{
-}
+{}
 
-template <typename T>
-void
-RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
+template<typename T>
+void RotatedSPOsT<T>::setRotationParameters(const std::vector<RealType>& param_list)
 {
-    params = param_list;
-    params_supplied = true;
+  params          = param_list;
+  params_supplied = true;
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::createRotationIndices(
-    int nel, int nmo, RotationIndices& rot_indices)
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndices(int nel, int nmo, RotationIndices& rot_indices)
 {
-    for (int i = 0; i < nel; i++)
-        for (int j = nel; j < nmo; j++)
-            rot_indices.emplace_back(i, j);
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::createRotationIndicesFull(
-    int nel, int nmo, RotationIndices& rot_indices)
+template<typename T>
+void RotatedSPOsT<T>::createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices)
 {
-    rot_indices.reserve(nmo * (nmo - 1) / 2);
-
-    // start with core-active rotations - put them at the beginning of the list
-    // so it matches the other list of rotation indices
-    for (int i = 0; i < nel; i++)
-        for (int j = nel; j < nmo; j++)
-            rot_indices.emplace_back(i, j);
-
-    // Add core-core rotations - put them at the end of the list
-    for (int i = 0; i < nel; i++)
-        for (int j = i + 1; j < nel; j++)
-            rot_indices.emplace_back(i, j);
-
-    // Add active-active rotations - put them at the end of the list
-    for (int i = nel; i < nmo; i++)
-        for (int j = i + 1; j < nmo; j++)
-            rot_indices.emplace_back(i, j);
+  rot_indices.reserve(nmo * (nmo - 1) / 2);
+
+  // start with core-active rotations - put them at the beginning of the list
+  // so it matches the other list of rotation indices
+  for (int i = 0; i < nel; i++)
+    for (int j = nel; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add core-core rotations - put them at the end of the list
+  for (int i = 0; i < nel; i++)
+    for (int j = i + 1; j < nel; j++)
+      rot_indices.emplace_back(i, j);
+
+  // Add active-active rotations - put them at the end of the list
+  for (int i = nel; i < nmo; i++)
+    for (int j = i + 1; j < nmo; j++)
+      rot_indices.emplace_back(i, j);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::constructAntiSymmetricMatrix(
-    const RotationIndices& rot_indices, const std::vector<RealType>& param,
-    ValueMatrix& rot_mat)
+template<typename T>
+void RotatedSPOsT<T>::constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const std::vector<RealType>& param,
+                                                   ValueMatrix& rot_mat)
 {
-    assert(rot_indices.size() == param.size());
-    // Assumes rot_mat is of the correct size
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
 
-    rot_mat = 0.0;
+  rot_mat = 0.0;
 
-    for (int i = 0; i < rot_indices.size(); i++) {
-        const int p = rot_indices[i].first;
-        const int q = rot_indices[i].second;
-        const RealType x = param[i];
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p      = rot_indices[i].first;
+    const int q      = rot_indices[i].second;
+    const RealType x = param[i];
 
-        rot_mat[q][p] = x;
-        rot_mat[p][q] = -x;
-    }
+    rot_mat[q][p] = x;
+    rot_mat[p][q] = -x;
+  }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(
-    const RotationIndices& rot_indices, const ValueMatrix& rot_mat,
-    std::vector<RealType>& param)
+template<typename T>
+void RotatedSPOsT<T>::extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                           const ValueMatrix& rot_mat,
+                                                           std::vector<RealType>& param)
 {
-    assert(rot_indices.size() == param.size());
-    // Assumes rot_mat is of the correct size
-
-    for (int i = 0; i < rot_indices.size(); i++) {
-        const int p = rot_indices[i].first;
-        const int q = rot_indices[i].second;
-        param[i] = rot_mat[q][p];
-    }
+  assert(rot_indices.size() == param.size());
+  // Assumes rot_mat is of the correct size
+
+  for (int i = 0; i < rot_indices.size(); i++)
+  {
+    const int p = rot_indices[i].first;
+    const int q = rot_indices[i].second;
+    param[i]    = rot_mat[q][p];
+  }
 }
 
 template<typename T>
 void RotatedSPOsT<T>::resetParametersExclusive(const OptVariablesTypeT<T>& active)
 {
-    std::vector<RealType> delta_param(m_act_rot_inds.size());
-
-    size_t psize = m_act_rot_inds.size();
-
-    if (use_global_rot_) {
-        psize = m_full_rot_inds.size();
-        assert(psize >= m_act_rot_inds.size());
-    }
-
-    std::vector<RealType> old_param(psize);
-    std::vector<RealType> new_param(psize);
-
-    for (int i = 0; i < m_act_rot_inds.size(); i++) {
-        int loc = this->myVars.where(i);
-        delta_param[i] = active[loc] - this->myVars[i];
-        this->myVars[i] = active[loc];
-    }
-
-    if (use_global_rot_) {
-        for (int i = 0; i < m_full_rot_inds.size(); i++)
-            old_param[i] = myVarsFull[i];
-
-        applyDeltaRotation(delta_param, old_param, new_param);
-
-        // Save the the params
-        for (int i = 0; i < m_full_rot_inds.size(); i++)
-            myVarsFull[i] = new_param[i];
-    }
-    else {
-        apply_rotation(delta_param, false);
-
-        // Save the parameters in the history list
-        history_params_.push_back(delta_param);
-    }
+  std::vector<RealType> delta_param(m_act_rot_inds.size());
+
+  size_t psize = m_act_rot_inds.size();
+
+  if (use_global_rot_)
+  {
+    psize = m_full_rot_inds.size();
+    assert(psize >= m_act_rot_inds.size());
+  }
+
+  std::vector<RealType> old_param(psize);
+  std::vector<RealType> new_param(psize);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int loc         = this->myVars.where(i);
+    delta_param[i]  = active[loc] - this->myVars[i];
+    this->myVars[i] = active[loc];
+  }
+
+  if (use_global_rot_)
+  {
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      old_param[i] = myVarsFull[i];
+
+    applyDeltaRotation(delta_param, old_param, new_param);
+
+    // Save the the params
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+      myVarsFull[i] = new_param[i];
+  }
+  else
+  {
+    apply_rotation(delta_param, false);
+
+    // Save the parameters in the history list
+    history_params_.push_back(delta_param);
+  }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
+template<typename T>
+void RotatedSPOsT<T>::writeVariationalParameters(hdf_archive& hout)
 {
-    hout.push("RotatedSPOsT");
-    if (use_global_rot_) {
-        hout.push("rotation_global");
-        std::string rot_global_name =
-            std::string("rotation_global_") + SPOSetT<T>::getName();
-
-        int nparam_full = myVarsFull.size();
-        std::vector<RealType> full_params(nparam_full);
-        for (int i = 0; i < nparam_full; i++)
-            full_params[i] = myVarsFull[i];
-
-        hout.write(full_params, rot_global_name);
-        hout.pop();
-    }
-    else {
-        hout.push("rotation_history");
-        size_t rows = history_params_.size();
-        size_t cols = 0;
-        if (rows > 0)
-            cols = history_params_[0].size();
-
-        Matrix<RealType> tmp(rows, cols);
-        for (size_t i = 0; i < rows; i++)
-            for (size_t j = 0; j < cols; j++)
-                tmp(i, j) = history_params_[i][j];
-
-        std::string rot_hist_name =
-            std::string("rotation_history_") + SPOSetT<T>::getName();
-        hout.write(tmp, rot_hist_name);
-        hout.pop();
-    }
+  hout.push("RotatedSPOsT");
+  if (use_global_rot_)
+  {
+    hout.push("rotation_global");
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
+
+    int nparam_full = myVarsFull.size();
+    std::vector<RealType> full_params(nparam_full);
+    for (int i = 0; i < nparam_full; i++)
+      full_params[i] = myVarsFull[i];
+
+    hout.write(full_params, rot_global_name);
+    hout.pop();
+  }
+  else
+  {
+    hout.push("rotation_history");
+    size_t rows = history_params_.size();
+    size_t cols = 0;
+    if (rows > 0)
+      cols = history_params_[0].size();
+
+    Matrix<RealType> tmp(rows, cols);
+    for (size_t i = 0; i < rows; i++)
+      for (size_t j = 0; j < cols; j++)
+        tmp(i, j) = history_params_[i][j];
+
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    hout.write(tmp, rot_hist_name);
+    hout.pop();
+  }
 
-    // Save myVars in order to restore object state exactly
-    //  The values aren't meaningful, but they need to match those saved in
-    //  VariableSet
-    hout.push("rotation_params");
-    std::string rot_params_name =
-        std::string("rotation_params_") + SPOSetT<T>::getName();
+  // Save myVars in order to restore object state exactly
+  //  The values aren't meaningful, but they need to match those saved in
+  //  VariableSet
+  hout.push("rotation_params");
+  std::string rot_params_name = std::string("rotation_params_") + SPOSetT<T>::getName();
 
-    int nparam = this->myVars.size();
-    std::vector<RealType> params(nparam);
-    for (int i = 0; i < nparam; i++)
-        params[i] = this->myVars[i];
+  int nparam = this->myVars.size();
+  std::vector<RealType> params(nparam);
+  for (int i = 0; i < nparam; i++)
+    params[i] = this->myVars[i];
 
-    hout.write(params, rot_params_name);
-    hout.pop();
+  hout.write(params, rot_params_name);
+  hout.pop();
 
-    hout.pop();
+  hout.pop();
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
+template<typename T>
+void RotatedSPOsT<T>::readVariationalParameters(hdf_archive& hin)
 {
-    hin.push("RotatedSPOsT", false);
-
-    bool grp_hist_exists = hin.is_group("rotation_history");
-    bool grp_global_exists = hin.is_group("rotation_global");
-    if (!grp_hist_exists && !grp_global_exists)
-        app_warning() << "Rotation parameters not found in VP file";
-
-    if (grp_global_exists) {
-        hin.push("rotation_global", false);
-        std::string rot_global_name =
-            std::string("rotation_global_") + SPOSetT<T>::getName();
-
-        std::vector<int> sizes(1);
-        if (!hin.getShape<RealType>(rot_global_name, sizes))
-            throw std::runtime_error(
-                "Failed to read rotation_global in VP file");
-
-        int nparam_full_actual = sizes[0];
-        int nparam_full = myVarsFull.size();
-
-        if (nparam_full != nparam_full_actual) {
-            std::ostringstream tmp_err;
-            tmp_err << "Expected number of full rotation parameters ("
-                    << nparam_full << ") does not match number in file ("
-                    << nparam_full_actual << ")";
-            throw std::runtime_error(tmp_err.str());
-        }
-        std::vector<RealType> full_params(nparam_full);
-        hin.read(full_params, rot_global_name);
-        for (int i = 0; i < nparam_full; i++)
-            myVarsFull[i] = full_params[i];
+  hin.push("RotatedSPOsT", false);
+
+  bool grp_hist_exists   = hin.is_group("rotation_history");
+  bool grp_global_exists = hin.is_group("rotation_global");
+  if (!grp_hist_exists && !grp_global_exists)
+    app_warning() << "Rotation parameters not found in VP file";
 
-        hin.pop();
+  if (grp_global_exists)
+  {
+    hin.push("rotation_global", false);
+    std::string rot_global_name = std::string("rotation_global_") + SPOSetT<T>::getName();
 
-        applyFullRotation(full_params, true);
+    std::vector<int> sizes(1);
+    if (!hin.getShape<RealType>(rot_global_name, sizes))
+      throw std::runtime_error("Failed to read rotation_global in VP file");
+
+    int nparam_full_actual = sizes[0];
+    int nparam_full        = myVarsFull.size();
+
+    if (nparam_full != nparam_full_actual)
+    {
+      std::ostringstream tmp_err;
+      tmp_err << "Expected number of full rotation parameters (" << nparam_full << ") does not match number in file ("
+              << nparam_full_actual << ")";
+      throw std::runtime_error(tmp_err.str());
     }
-    else if (grp_hist_exists) {
-        hin.push("rotation_history", false);
-        std::string rot_hist_name =
-            std::string("rotation_history_") + SPOSetT<T>::getName();
-        std::vector<int> sizes(2);
-        if (!hin.getShape<RealType>(rot_hist_name, sizes))
-            throw std::runtime_error(
-                "Failed to read rotation history in VP file");
-
-        int rows = sizes[0];
-        int cols = sizes[1];
-        history_params_.resize(rows);
-        Matrix<RealType> tmp(rows, cols);
-        hin.read(tmp, rot_hist_name);
-        for (size_t i = 0; i < rows; i++) {
-            history_params_[i].resize(cols);
-            for (size_t j = 0; j < cols; j++)
-                history_params_[i][j] = tmp(i, j);
-        }
+    std::vector<RealType> full_params(nparam_full);
+    hin.read(full_params, rot_global_name);
+    for (int i = 0; i < nparam_full; i++)
+      myVarsFull[i] = full_params[i];
 
-        hin.pop();
+    hin.pop();
 
-        applyRotationHistory();
+    applyFullRotation(full_params, true);
+  }
+  else if (grp_hist_exists)
+  {
+    hin.push("rotation_history", false);
+    std::string rot_hist_name = std::string("rotation_history_") + SPOSetT<T>::getName();
+    std::vector<int> sizes(2);
+    if (!hin.getShape<RealType>(rot_hist_name, sizes))
+      throw std::runtime_error("Failed to read rotation history in VP file");
+
+    int rows = sizes[0];
+    int cols = sizes[1];
+    history_params_.resize(rows);
+    Matrix<RealType> tmp(rows, cols);
+    hin.read(tmp, rot_hist_name);
+    for (size_t i = 0; i < rows; i++)
+    {
+      history_params_[i].resize(cols);
+      for (size_t j = 0; j < cols; j++)
+        history_params_[i][j] = tmp(i, j);
     }
 
-    hin.push("rotation_params", false);
-    std::string rot_param_name =
-        std::string("rotation_params_") + SPOSetT<T>::getName();
+    hin.pop();
 
-    std::vector<int> sizes(1);
-    if (!hin.getShape<RealType>(rot_param_name, sizes))
-        throw std::runtime_error("Failed to read rotation_params in VP file");
-
-    int nparam_actual = sizes[0];
-    int nparam = this->myVars.size();
-    if (nparam != nparam_actual) {
-        std::ostringstream tmp_err;
-        tmp_err << "Expected number of rotation parameters (" << nparam
-                << ") does not match number in file (" << nparam_actual << ")";
-        throw std::runtime_error(tmp_err.str());
-    }
+    applyRotationHistory();
+  }
 
-    std::vector<RealType> params(nparam);
-    hin.read(params, rot_param_name);
-    for (int i = 0; i < nparam; i++)
-        this->myVars[i] = params[i];
+  hin.push("rotation_params", false);
+  std::string rot_param_name = std::string("rotation_params_") + SPOSetT<T>::getName();
 
-    hin.pop();
+  std::vector<int> sizes(1);
+  if (!hin.getShape<RealType>(rot_param_name, sizes))
+    throw std::runtime_error("Failed to read rotation_params in VP file");
 
-    hin.pop();
+  int nparam_actual = sizes[0];
+  int nparam        = this->myVars.size();
+  if (nparam != nparam_actual)
+  {
+    std::ostringstream tmp_err;
+    tmp_err << "Expected number of rotation parameters (" << nparam << ") does not match number in file ("
+            << nparam_actual << ")";
+    throw std::runtime_error(tmp_err.str());
+  }
+
+  std::vector<RealType> params(nparam);
+  hin.read(params, rot_param_name);
+  for (int i = 0; i < nparam; i++)
+    this->myVars[i] = params[i];
+
+  hin.pop();
+
+  hin.pop();
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::buildOptVariables(const size_t nel)
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const size_t nel)
 {
-    /* Only rebuild optimized variables if more after-rotation orbitals are
+  /* Only rebuild optimized variables if more after-rotation orbitals are
      * needed Consider ROHF, there is only one set of SPO for both spin up and
      * down Nup > Ndown. nel_major_ will be set Nup.
      *
@@ -314,347 +300,338 @@ RotatedSPOsT<T>::buildOptVariables(const size_t nel)
      * parameters again when a clone is made (the DiracDeterminant constructor
      * calls buildOptVariables)
      */
-    if (nel > nel_major_ && this->myVars.size() == 0) {
-        nel_major_ = nel;
-
-        const size_t nmo = Phi->getOrbitalSetSize();
-
-        // create active rotation parameter indices
-        RotationIndices created_m_act_rot_inds;
+  if (nel > nel_major_ && this->myVars.size() == 0)
+  {
+    nel_major_ = nel;
 
-        RotationIndices created_full_rot_inds;
-        if (use_global_rot_)
-            createRotationIndicesFull(nel, nmo, created_full_rot_inds);
-
-        createRotationIndices(nel, nmo, created_m_act_rot_inds);
-
-        buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
-    }
-}
-
-template <typename T>
-void
-RotatedSPOsT<T>::buildOptVariables(
-    const RotationIndices& rotations, const RotationIndices& full_rotations)
-{
     const size_t nmo = Phi->getOrbitalSetSize();
 
-    // create active rotations
-    m_act_rot_inds = rotations;
+    // create active rotation parameter indices
+    RotationIndices created_m_act_rot_inds;
 
+    RotationIndices created_full_rot_inds;
     if (use_global_rot_)
-        m_full_rot_inds = full_rotations;
+      createRotationIndicesFull(nel, nmo, created_full_rot_inds);
 
-    if (use_global_rot_)
-        app_log() << "Orbital rotation using global rotation" << std::endl;
-    else
-        app_log() << "Orbital rotation using history" << std::endl;
+    createRotationIndices(nel, nmo, created_m_act_rot_inds);
 
-    // This will add the orbital rotation parameters to myVars
-    // and will also read in initial parameter values supplied in input file
-    int p, q;
-    int nparams_active = m_act_rot_inds.size();
+    buildOptVariables(created_m_act_rot_inds, created_full_rot_inds);
+  }
+}
 
-    app_log() << "nparams_active: " << nparams_active
-              << " params2.size(): " << params.size() << std::endl;
+template<typename T>
+void RotatedSPOsT<T>::buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations)
+{
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  // create active rotations
+  m_act_rot_inds = rotations;
+
+  if (use_global_rot_)
+    m_full_rot_inds = full_rotations;
+
+  if (use_global_rot_)
+    app_log() << "Orbital rotation using global rotation" << std::endl;
+  else
+    app_log() << "Orbital rotation using history" << std::endl;
+
+  // This will add the orbital rotation parameters to myVars
+  // and will also read in initial parameter values supplied in input file
+  int p, q;
+  int nparams_active = m_act_rot_inds.size();
+
+  app_log() << "nparams_active: " << nparams_active << " params2.size(): " << params.size() << std::endl;
+  if (params_supplied)
+    if (nparams_active != params.size())
+      throw std::runtime_error("The number of supplied orbital rotation parameters does not "
+                               "match number prdouced by the slater "
+                               "expansion. \n");
+
+  this->myVars.clear();
+  for (int i = 0; i < nparams_active; i++)
+  {
+    p = m_act_rot_inds[i].first;
+    q = m_act_rot_inds[i].second;
+    std::stringstream sstr;
+    sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "")
+         << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "") << q;
+
+    // If the user input parameters, use those. Otherwise, initialize the
+    // parameters to zero
     if (params_supplied)
-        if (nparams_active != params.size())
-            throw std::runtime_error(
-                "The number of supplied orbital rotation parameters does not "
-                "match number prdouced by the slater "
-                "expansion. \n");
-
-    this->myVars.clear();
-    for (int i = 0; i < nparams_active; i++) {
-        p = m_act_rot_inds[i].first;
-        q = m_act_rot_inds[i].second;
-        std::stringstream sstr;
-        sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
-             << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
-             << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
-             << (q < 1000 ? "0" : "") << q;
-
-        // If the user input parameters, use those. Otherwise, initialize the
-        // parameters to zero
-        if (params_supplied) {
-            this->myVars.insert(sstr.str(), params[i]);
-        }
-        else {
-            this->myVars.insert(sstr.str(), 0.0);
-        }
-    }
-
-    if (use_global_rot_) {
-        myVarsFull.clear();
-        for (int i = 0; i < m_full_rot_inds.size(); i++) {
-            p = m_full_rot_inds[i].first;
-            q = m_full_rot_inds[i].second;
-            std::stringstream sstr;
-            sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "")
-                 << (p < 100 ? "0" : "") << (p < 1000 ? "0" : "") << p << "_"
-                 << (q < 10 ? "0" : "") << (q < 100 ? "0" : "")
-                 << (q < 1000 ? "0" : "") << q;
-
-            if (params_supplied && i < m_act_rot_inds.size())
-                myVarsFull.insert(sstr.str(), params[i]);
-            else
-                myVarsFull.insert(sstr.str(), 0.0);
-        }
+    {
+      this->myVars.insert(sstr.str(), params[i]);
     }
-
-    // Printing the parameters
-    if (true) {
-        app_log() << std::string(16, ' ') << "Parameter name"
-                  << std::string(15, ' ') << "Value\n";
-        this->myVars.print(app_log());
+    else
+    {
+      this->myVars.insert(sstr.str(), 0.0);
     }
-
-    if (params_supplied) {
-        std::vector<RealType> param(m_act_rot_inds.size());
-        for (int i = 0; i < m_act_rot_inds.size(); i++)
-            param[i] = this->myVars[i];
-        apply_rotation(param, false);
+  }
+
+  if (use_global_rot_)
+  {
+    myVarsFull.clear();
+    for (int i = 0; i < m_full_rot_inds.size(); i++)
+    {
+      p = m_full_rot_inds[i].first;
+      q = m_full_rot_inds[i].second;
+      std::stringstream sstr;
+      sstr << SPOSetT<T>::getName() << "_orb_rot_" << (p < 10 ? "0" : "") << (p < 100 ? "0" : "")
+           << (p < 1000 ? "0" : "") << p << "_" << (q < 10 ? "0" : "") << (q < 100 ? "0" : "") << (q < 1000 ? "0" : "")
+           << q;
+
+      if (params_supplied && i < m_act_rot_inds.size())
+        myVarsFull.insert(sstr.str(), params[i]);
+      else
+        myVarsFull.insert(sstr.str(), 0.0);
     }
+  }
+
+  // Printing the parameters
+  if (true)
+  {
+    app_log() << std::string(16, ' ') << "Parameter name" << std::string(15, ' ') << "Value\n";
+    this->myVars.print(app_log());
+  }
+
+  if (params_supplied)
+  {
+    std::vector<RealType> param(m_act_rot_inds.size());
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+      param[i] = this->myVars[i];
+    apply_rotation(param, false);
+  }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::apply_rotation(
-    const std::vector<RealType>& param, bool use_stored_copy)
+template<typename T>
+void RotatedSPOsT<T>::apply_rotation(const std::vector<RealType>& param, bool use_stored_copy)
 {
-    assert(param.size() == m_act_rot_inds.size());
+  assert(param.size() == m_act_rot_inds.size());
 
-    const size_t nmo = Phi->getOrbitalSetSize();
-    ValueMatrix rot_mat(nmo, nmo);
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
 
-    constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
+  constructAntiSymmetricMatrix(m_act_rot_inds, param, rot_mat);
 
-    /*
+  /*
         rot_mat is now an anti-hermitian matrix. Now we convert
         it into a unitary matrix via rot_mat = exp(-rot_mat).
         Finally, apply unitary matrix to orbs.
       */
-    exponentiate_antisym_matrix(rot_mat);
-    Phi->applyRotation(rot_mat, use_stored_copy);
+  exponentiate_antisym_matrix(rot_mat);
+  Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
-    const std::vector<RealType>& old_param, std::vector<RealType>& new_param)
+template<typename T>
+void RotatedSPOsT<T>::applyDeltaRotation(const std::vector<RealType>& delta_param,
+                                         const std::vector<RealType>& old_param,
+                                         std::vector<RealType>& new_param)
 {
-    const size_t nmo = Phi->getOrbitalSetSize();
-    ValueMatrix new_rot_mat(nmo, nmo);
-    constructDeltaRotation(delta_param, old_param, m_act_rot_inds,
-        m_full_rot_inds, new_param, new_rot_mat);
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix new_rot_mat(nmo, nmo);
+  constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
 
-    Phi->applyRotation(new_rot_mat, true);
+  Phi->applyRotation(new_rot_mat, true);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::constructDeltaRotation(
-    const std::vector<RealType>& delta_param,
-    const std::vector<RealType>& old_param, const RotationIndices& act_rot_inds,
-    const RotationIndices& full_rot_inds, std::vector<RealType>& new_param,
-    ValueMatrix& new_rot_mat)
+template<typename T>
+void RotatedSPOsT<T>::constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                             const std::vector<RealType>& old_param,
+                                             const RotationIndices& act_rot_inds,
+                                             const RotationIndices& full_rot_inds,
+                                             std::vector<RealType>& new_param,
+                                             ValueMatrix& new_rot_mat)
 {
-    assert(delta_param.size() == act_rot_inds.size());
-    assert(old_param.size() == full_rot_inds.size());
-    assert(new_param.size() == full_rot_inds.size());
+  assert(delta_param.size() == act_rot_inds.size());
+  assert(old_param.size() == full_rot_inds.size());
+  assert(new_param.size() == full_rot_inds.size());
 
-    const size_t nmo = new_rot_mat.rows();
-    assert(new_rot_mat.rows() == new_rot_mat.cols());
+  const size_t nmo = new_rot_mat.rows();
+  assert(new_rot_mat.rows() == new_rot_mat.cols());
 
-    ValueMatrix old_rot_mat(nmo, nmo);
+  ValueMatrix old_rot_mat(nmo, nmo);
 
-    constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
-    exponentiate_antisym_matrix(old_rot_mat);
+  constructAntiSymmetricMatrix(full_rot_inds, old_param, old_rot_mat);
+  exponentiate_antisym_matrix(old_rot_mat);
 
-    ValueMatrix delta_rot_mat(nmo, nmo);
+  ValueMatrix delta_rot_mat(nmo, nmo);
 
-    constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
-    exponentiate_antisym_matrix(delta_rot_mat);
+  constructAntiSymmetricMatrix(act_rot_inds, delta_param, delta_rot_mat);
+  exponentiate_antisym_matrix(delta_rot_mat);
 
-    // Apply delta rotation to old rotation.
-    BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo,
-        old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(), nmo);
+  // Apply delta rotation to old rotation.
+  BLAS::gemm('N', 'N', nmo, nmo, nmo, 1.0, delta_rot_mat.data(), nmo, old_rot_mat.data(), nmo, 0.0, new_rot_mat.data(),
+             nmo);
 
-    ValueMatrix log_rot_mat(nmo, nmo);
-    log_antisym_matrix(new_rot_mat, log_rot_mat);
-    extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
+  ValueMatrix log_rot_mat(nmo, nmo);
+  log_antisym_matrix(new_rot_mat, log_rot_mat);
+  extractParamsFromAntiSymmetricMatrix(full_rot_inds, log_rot_mat, new_param);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::applyFullRotation(
-    const std::vector<RealType>& full_param, bool use_stored_copy)
+template<typename T>
+void RotatedSPOsT<T>::applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy)
 {
-    assert(full_param.size() == m_full_rot_inds.size());
+  assert(full_param.size() == m_full_rot_inds.size());
 
-    const size_t nmo = Phi->getOrbitalSetSize();
-    ValueMatrix rot_mat(nmo, nmo);
-    rot_mat = T(0);
+  const size_t nmo = Phi->getOrbitalSetSize();
+  ValueMatrix rot_mat(nmo, nmo);
+  rot_mat = T(0);
 
-    constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
+  constructAntiSymmetricMatrix(m_full_rot_inds, full_param, rot_mat);
 
-    /*
+  /*
         rot_mat is now an anti-hermitian matrix. Now we convert
         it into a unitary matrix via rot_mat = exp(-rot_mat).
         Finally, apply unitary matrix to orbs.
       */
-    exponentiate_antisym_matrix(rot_mat);
-    Phi->applyRotation(rot_mat, use_stored_copy);
+  exponentiate_antisym_matrix(rot_mat);
+  Phi->applyRotation(rot_mat, use_stored_copy);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::applyRotationHistory()
+template<typename T>
+void RotatedSPOsT<T>::applyRotationHistory()
 {
-    for (auto delta_param : history_params_) {
-        apply_rotation(delta_param, false);
-    }
+  for (auto delta_param : history_params_)
+  {
+    apply_rotation(delta_param, false);
+  }
 }
 
 // compute exponential of a real, antisymmetric matrix by diagonalizing and
 // exponentiating eigenvalues
-template <typename T>
-void
-RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
+template<typename T>
+void RotatedSPOsT<T>::exponentiate_antisym_matrix(ValueMatrix& mat)
 {
-    const int n = mat.rows();
-    std::vector<std::complex<RealType>> mat_h(n * n, 0);
-    std::vector<RealType> eval(n, 0);
-    std::vector<std::complex<RealType>> work(2 * n, 0);
-    std::vector<RealType> rwork(3 * n, 0);
-    std::vector<std::complex<RealType>> mat_d(n * n, 0);
-    std::vector<std::complex<RealType>> mat_t(n * n, 0);
-    // exponentiating e^X = e^iY (Y hermitian)
-    // i(-iX) = X, so -iX is hermitian
-    // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
-    // construct hermitian analogue of mat by multiplying by -i
-    for (int i = 0; i < n; ++i) {
-        for (int j = i; j < n; ++j) {
-            mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
-            mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
-        }
+  const int n = mat.rows();
+  std::vector<std::complex<RealType>> mat_h(n * n, 0);
+  std::vector<RealType> eval(n, 0);
+  std::vector<std::complex<RealType>> work(2 * n, 0);
+  std::vector<RealType> rwork(3 * n, 0);
+  std::vector<std::complex<RealType>> mat_d(n * n, 0);
+  std::vector<std::complex<RealType>> mat_t(n * n, 0);
+  // exponentiating e^X = e^iY (Y hermitian)
+  // i(-iX) = X, so -iX is hermitian
+  // diagonalize -iX = UDU^T, exponentiate e^iD, and return U e^iD U^T
+  // construct hermitian analogue of mat by multiplying by -i
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = i; j < n; ++j)
+    {
+      mat_h[i + n * j] = std::complex<RealType>(0, -1.0 * mat[j][i]);
+      mat_h[j + n * i] = std::complex<RealType>(0, 1.0 * mat[j][i]);
     }
-    // diagonalize the matrix
-    char JOBZ('V');
-    char UPLO('U');
-    int N(n);
-    int LDA(n);
-    int LWORK(2 * n);
-    int info = 0;
-    LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0),
-        LWORK, &rwork.at(0), info);
-    if (info != 0) {
-        std::ostringstream msg;
-        msg << "heev failed with info = " << info
-            << " in RotatedSPOsT::exponentiate_antisym_matrix";
-        throw std::runtime_error(msg.str());
+  }
+  // diagonalize the matrix
+  char JOBZ('V');
+  char UPLO('U');
+  int N(n);
+  int LDA(n);
+  int LWORK(2 * n);
+  int info = 0;
+  LAPACK::heev(JOBZ, UPLO, N, &mat_h.at(0), LDA, &eval.at(0), &work.at(0), LWORK, &rwork.at(0), info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::exponentiate_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+  // iterate through diagonal matrix, exponentiate terms
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      mat_d[i + j * n] = (i == j) ? std::exp(std::complex<RealType>(0.0, eval[i])) : std::complex<RealType>(0.0, 0.0);
     }
-    // iterate through diagonal matrix, exponentiate terms
-    for (int i = 0; i < n; ++i) {
-        for (int j = 0; j < n; ++j) {
-            mat_d[i + j * n] = (i == j) ?
-                std::exp(std::complex<RealType>(0.0, eval[i])) :
-                std::complex<RealType>(0.0, 0.0);
-        }
+  }
+  // perform matrix multiplication
+  // assume row major
+  BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0), n, &mat_h.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
+  BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0), n, &mat_t.at(0), n,
+             std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_d[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in orbital "
+                     "rotation matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_d[i + n * j].imag() << std::endl;
+      }
+      mat[j][i] = mat_d[i + n * j].real();
     }
-    // perform matrix multiplication
-    // assume row major
-    BLAS::gemm('N', 'C', n, n, n, std::complex<RealType>(1.0, 0), &mat_d.at(0),
-        n, &mat_h.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_t.at(0), n);
-    BLAS::gemm('N', 'N', n, n, n, std::complex<RealType>(1.0, 0), &mat_h.at(0),
-        n, &mat_t.at(0), n, std::complex<RealType>(0.0, 0.0), &mat_d.at(0), n);
-    for (int i = 0; i < n; ++i)
-        for (int j = 0; j < n; ++j) {
-            if (mat_d[i + n * j].imag() > 1e-12) {
-                app_log() << "warning: large imaginary value in orbital "
-                             "rotation matrix: (i,j) = ("
-                          << i << "," << j
-                          << "), im = " << mat_d[i + n * j].imag() << std::endl;
-            }
-            mat[j][i] = mat_d[i + n * j].real();
-        }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
+template<typename T>
+void RotatedSPOsT<T>::log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output)
 {
-    const int n = mat.rows();
-    std::vector<RealType> mat_h(n * n, 0);
-    std::vector<RealType> eval_r(n, 0);
-    std::vector<RealType> eval_i(n, 0);
-    std::vector<RealType> mat_l(n * n, 0);
-    std::vector<RealType> work(4 * n, 0);
-
-    std::vector<std::complex<RealType>> mat_cd(n * n, 0);
-    std::vector<std::complex<RealType>> mat_cl(n * n, 0);
-    std::vector<std::complex<RealType>> mat_ch(n * n, 0);
-
-    for (int i = 0; i < n; ++i)
-        for (int j = 0; j < n; ++j)
-            mat_h[i + n * j] = mat[i][j];
-
-    // diagonalize the matrix
-    char JOBL('V');
-    char JOBR('N');
-    int N(n);
-    int LDA(n);
-    int LWORK(4 * n);
-    int info = 0;
-    LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0),
-        &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA, &work.at(0), &LWORK,
-        &info);
-    if (info != 0) {
-        std::ostringstream msg;
-        msg << "heev failed with info = " << info
-            << " in RotatedSPOsT::log_antisym_matrix";
-        throw std::runtime_error(msg.str());
+  const int n = mat.rows();
+  std::vector<RealType> mat_h(n * n, 0);
+  std::vector<RealType> eval_r(n, 0);
+  std::vector<RealType> eval_i(n, 0);
+  std::vector<RealType> mat_l(n * n, 0);
+  std::vector<RealType> work(4 * n, 0);
+
+  std::vector<std::complex<RealType>> mat_cd(n * n, 0);
+  std::vector<std::complex<RealType>> mat_cl(n * n, 0);
+  std::vector<std::complex<RealType>> mat_ch(n * n, 0);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+      mat_h[i + n * j] = mat[i][j];
+
+  // diagonalize the matrix
+  char JOBL('V');
+  char JOBR('N');
+  int N(n);
+  int LDA(n);
+  int LWORK(4 * n);
+  int info = 0;
+  LAPACK::geev(&JOBL, &JOBR, &N, &mat_h.at(0), &LDA, &eval_r.at(0), &eval_i.at(0), &mat_l.at(0), &LDA, nullptr, &LDA,
+               &work.at(0), &LWORK, &info);
+  if (info != 0)
+  {
+    std::ostringstream msg;
+    msg << "heev failed with info = " << info << " in RotatedSPOsT::log_antisym_matrix";
+    throw std::runtime_error(msg.str());
+  }
+
+  // iterate through diagonal matrix, take log
+  for (int i = 0; i < n; ++i)
+  {
+    for (int j = 0; j < n; ++j)
+    {
+      auto tmp = (i == j) ? std::log(std::complex<RealType>(eval_r[i], eval_i[i])) : std::complex<RealType>(0.0, 0.0);
+      mat_cd[i + j * n] = tmp;
+
+      if (eval_i[j] > 0.0)
+      {
+        mat_cl[i + j * n]       = std::complex<RealType>(mat_l[i + j * n], mat_l[i + (j + 1) * n]);
+        mat_cl[i + (j + 1) * n] = std::complex<RealType>(mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
+      }
+      else if (!(eval_i[j] < 0.0))
+      {
+        mat_cl[i + j * n] = std::complex<RealType>(mat_l[i + j * n], 0.0);
+      }
     }
-
-    // iterate through diagonal matrix, take log
-    for (int i = 0; i < n; ++i) {
-        for (int j = 0; j < n; ++j) {
-            auto tmp = (i == j) ?
-                std::log(std::complex<RealType>(eval_r[i], eval_i[i])) :
-                std::complex<RealType>(0.0, 0.0);
-            mat_cd[i + j * n] = tmp;
-
-            if (eval_i[j] > 0.0) {
-                mat_cl[i + j * n] = std::complex<RealType>(
-                    mat_l[i + j * n], mat_l[i + (j + 1) * n]);
-                mat_cl[i + (j + 1) * n] = std::complex<RealType>(
-                    mat_l[i + j * n], -mat_l[i + (j + 1) * n]);
-            }
-            else if (!(eval_i[j] < 0.0)) {
-                mat_cl[i + j * n] =
-                    std::complex<RealType>(mat_l[i + j * n], 0.0);
-            }
-        }
+  }
+
+  RealType one(1.0);
+  RealType zero(0.0);
+  BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero, &mat_ch.at(0), n);
+  BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero, &mat_cd.at(0), n);
+
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < n; ++j)
+    {
+      if (mat_cd[i + n * j].imag() > 1e-12)
+      {
+        app_log() << "warning: large imaginary value in antisymmetric "
+                     "matrix: (i,j) = ("
+                  << i << "," << j << "), im = " << mat_cd[i + n * j].imag() << std::endl;
+      }
+      output[i][j] = mat_cd[i + n * j].real();
     }
-
-    RealType one(1.0);
-    RealType zero(0.0);
-    BLAS::gemm('N', 'N', n, n, n, one, &mat_cl.at(0), n, &mat_cd.at(0), n, zero,
-        &mat_ch.at(0), n);
-    BLAS::gemm('N', 'C', n, n, n, one, &mat_ch.at(0), n, &mat_cl.at(0), n, zero,
-        &mat_cd.at(0), n);
-
-    for (int i = 0; i < n; ++i)
-        for (int j = 0; j < n; ++j) {
-            if (mat_cd[i + n * j].imag() > 1e-12) {
-                app_log() << "warning: large imaginary value in antisymmetric "
-                             "matrix: (i,j) = ("
-                          << i << "," << j
-                          << "), im = " << mat_cd[i + n * j].imag()
-                          << std::endl;
-            }
-            output[i][j] = mat_cd[i + n * j].real();
-        }
 }
 
 template<typename T>
@@ -667,79 +644,78 @@ void RotatedSPOsT<T>::evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
                                           int FirstIndex,
                                           int LastIndex)
 {
-    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+  Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
 
-    const size_t nel = LastIndex - FirstIndex;
-    const size_t nmo = Phi->getOrbitalSetSize();
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
 
-    psiM_inv.resize(nel, nel);
-    psiM_all.resize(nel, nmo);
-    dpsiM_all.resize(nel, nmo);
-    d2psiM_all.resize(nel, nmo);
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
 
-    psiM_inv = 0;
-    psiM_all = 0;
-    dpsiM_all = 0;
-    d2psiM_all = 0;
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
 
-    const ParticleSetT<T>& P = VP.getRefPS();
-    int iel = VP.refPtcl;
+  const ParticleSetT<T>& P = VP.getRefPS();
+  int iel                  = VP.refPtcl;
 
-    Phi->evaluate_notranspose(
-        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
-    for (int i = 0; i < nel; i++)
-        for (int j = 0; j < nel; j++)
-            psiM_inv(i, j) = psiM_all(i, j);
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
 
-    Invert(psiM_inv.data(), nel, nel);
+  Invert(psiM_inv.data(), nel, nel);
 
-    const T* const A(psiM_all.data());
-    const T* const Ainv(psiM_inv.data());
-    ValueMatrix T_orig;
-    T_orig.resize(nel, nmo);
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_orig;
+  T_orig.resize(nel, nmo);
 
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
-        T_orig.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_orig.data(), nmo);
 
-    ValueMatrix T_mat;
-    T_mat.resize(nel, nmo);
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
 
-    ValueVector tmp_psi;
-    tmp_psi.resize(nmo);
+  ValueVector tmp_psi;
+  tmp_psi.resize(nmo);
 
-    for (int iat = 0; iat < VP.getTotalNum(); iat++) {
-        Phi->evaluateValue(VP, iat, tmp_psi);
+  for (int iat = 0; iat < VP.getTotalNum(); iat++)
+  {
+    Phi->evaluateValue(VP, iat, tmp_psi);
 
-        for (int j = 0; j < nmo; j++)
-            psiM_all(iel - FirstIndex, j) = tmp_psi[j];
+    for (int j = 0; j < nmo; j++)
+      psiM_all(iel - FirstIndex, j) = tmp_psi[j];
 
-        for (int i = 0; i < nel; i++)
-            for (int j = 0; j < nel; j++)
-                psiM_inv(i, j) = psiM_all(i, j);
-
-        Invert(psiM_inv.data(), nel, nel);
+    for (int i = 0; i < nel; i++)
+      for (int j = 0; j < nel; j++)
+        psiM_inv(i, j) = psiM_all(i, j);
 
-        const T* const A(psiM_all.data());
-        const T* const Ainv(psiM_inv.data());
+    Invert(psiM_inv.data(), nel, nel);
 
-        // The matrix A is rectangular.  Ainv is the inverse of the square part
-        // of the matrix. The multiply of Ainv and the square part of A is just
-        // the identity. This multiply could be reduced to Ainv and the
-        // non-square part of A.
-        BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
-            T_mat.data(), nmo);
+    const T* const A(psiM_all.data());
+    const T* const Ainv(psiM_inv.data());
 
-        for (int i = 0; i < m_act_rot_inds.size(); i++) {
-            int kk = this->myVars.where(i);
-            if (kk >= 0) {
-                const int p = m_act_rot_inds.at(i).first;
-                const int q = m_act_rot_inds.at(i).second;
-                dratios(iat, kk) = T_mat(p, q) -
-                    T_orig(p, q); // dratio size is (nknot, num_vars)
-            }
-        }
+    // The matrix A is rectangular.  Ainv is the inverse of the square part
+    // of the matrix. The multiply of Ainv and the square part of A is just
+    // the identity. This multiply could be reduced to Ainv and the
+    // non-square part of A.
+    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+    for (int i = 0; i < m_act_rot_inds.size(); i++)
+    {
+      int kk = this->myVars.where(i);
+      if (kk >= 0)
+      {
+        const int p      = m_act_rot_inds.at(i).first;
+        const int q      = m_act_rot_inds.at(i).second;
+        dratios(iat, kk) = T_mat(p, q) - T_orig(p, q); // dratio size is (nknot, num_vars)
+      }
     }
+  }
 }
 
 template<typename T>
@@ -749,47 +725,47 @@ void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
                                             int FirstIndex,
                                             int LastIndex)
 {
-    const size_t nel = LastIndex - FirstIndex;
-    const size_t nmo = Phi->getOrbitalSetSize();
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
 
-    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
 
-    psiM_inv.resize(nel, nel);
-    psiM_all.resize(nel, nmo);
-    dpsiM_all.resize(nel, nmo);
-    d2psiM_all.resize(nel, nmo);
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
 
-    psiM_inv = 0;
-    psiM_all = 0;
-    dpsiM_all = 0;
-    d2psiM_all = 0;
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
 
-    Phi->evaluate_notranspose(
-        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
 
-    for (int i = 0; i < nel; i++)
-        for (int j = 0; j < nel; j++)
-            psiM_inv(i, j) = psiM_all(i, j);
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
 
-    Invert(psiM_inv.data(), nel, nel);
+  Invert(psiM_inv.data(), nel, nel);
 
-    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-    const T* const A(psiM_all.data());
-    const T* const Ainv(psiM_inv.data());
-    ValueMatrix T_mat;
-    T_mat.resize(nel, nmo);
-
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0),
-        T_mat.data(), nmo);
-
-    for (int i = 0; i < m_act_rot_inds.size(); i++) {
-        int kk = this->myVars.where(i);
-        if (kk >= 0) {
-            const int p = m_act_rot_inds.at(i).first;
-            const int q = m_act_rot_inds.at(i).second;
-            dlogpsi[kk] = T_mat(p, q);
-        }
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const T* const A(psiM_all.data());
+  const T* const Ainv(psiM_inv.data());
+  ValueMatrix T_mat;
+  T_mat.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, T(0.0), T_mat.data(), nmo);
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] = T_mat(p, q);
     }
+  }
 }
 
 template<typename T>
@@ -800,104 +776,102 @@ void RotatedSPOsT<T>::evaluateDerivatives(ParticleSetT<T>& P,
                                           const int& FirstIndex,
                                           const int& LastIndex)
 {
-    const size_t nel = LastIndex - FirstIndex;
-    const size_t nmo = Phi->getOrbitalSetSize();
-
-    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
-    myG_temp.resize(nel);
-    myG_J.resize(nel);
-    myL_temp.resize(nel);
-    myL_J.resize(nel);
-
-    myG_temp = 0;
-    myG_J = 0;
-    myL_temp = 0;
-    myL_J = 0;
-
-    Bbar.resize(nel, nmo);
-    psiM_inv.resize(nel, nel);
-    psiM_all.resize(nel, nmo);
-    dpsiM_all.resize(nel, nmo);
-    d2psiM_all.resize(nel, nmo);
-
-    Bbar = 0;
-    psiM_inv = 0;
-    psiM_all = 0;
-    dpsiM_all = 0;
-    d2psiM_all = 0;
-
-    Phi->evaluate_notranspose(
-        P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
-
+  const size_t nel = LastIndex - FirstIndex;
+  const size_t nmo = Phi->getOrbitalSetSize();
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART1
+  myG_temp.resize(nel);
+  myG_J.resize(nel);
+  myL_temp.resize(nel);
+  myL_J.resize(nel);
+
+  myG_temp = 0;
+  myG_J    = 0;
+  myL_temp = 0;
+  myL_J    = 0;
+
+  Bbar.resize(nel, nmo);
+  psiM_inv.resize(nel, nel);
+  psiM_all.resize(nel, nmo);
+  dpsiM_all.resize(nel, nmo);
+  d2psiM_all.resize(nel, nmo);
+
+  Bbar       = 0;
+  psiM_inv   = 0;
+  psiM_all   = 0;
+  dpsiM_all  = 0;
+  d2psiM_all = 0;
+
+  Phi->evaluate_notranspose(P, FirstIndex, LastIndex, psiM_all, dpsiM_all, d2psiM_all);
+
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nel; j++)
+      psiM_inv(i, j) = psiM_all(i, j);
+
+  Invert(psiM_inv.data(), nel, nel);
+
+  // current value of Gradient and Laplacian
+  //  gradient components
+  for (int a = 0; a < nel; a++)
     for (int i = 0; i < nel; i++)
-        for (int j = 0; j < nel; j++)
-            psiM_inv(i, j) = psiM_all(i, j);
-
-    Invert(psiM_inv.data(), nel, nel);
-
-    // current value of Gradient and Laplacian
-    //  gradient components
-    for (int a = 0; a < nel; a++)
-        for (int i = 0; i < nel; i++)
-            for (int k = 0; k < 3; k++)
-                myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
-    // laplacian components
-    for (int a = 0; a < nel; a++) {
-        for (int i = 0; i < nel; i++)
-            myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
-    }
-
-    // calculation of myG_J which will be used to represent
-    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
-    // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
-    // this is what myL_J will hold
-    for (int a = 0, iat = FirstIndex; a < nel; a++, iat++) {
-        myG_J[a] = (P.G[iat] - myG_temp[a]);
-        myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
-    }
-    // possibly replace wit BLAS calls
+      for (int k = 0; k < 3; k++)
+        myG_temp[a][k] += psiM_inv(i, a) * dpsiM_all(a, i)[k];
+  // laplacian components
+  for (int a = 0; a < nel; a++)
+  {
     for (int i = 0; i < nel; i++)
-        for (int j = 0; j < nmo; j++)
-            Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) +
-                myL_J[i] * psiM_all(i, j);
-
-    //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
-    const ValueType* const A(psiM_all.data());
-    const ValueType* const Ainv(psiM_inv.data());
-    const ValueType* const B(Bbar.data());
-    ValueMatrix t;
-    ValueMatrix Y1;
-    ValueMatrix Y2;
-    ValueMatrix Y3;
-    ValueMatrix Y4;
-    t.resize(nel, nmo);
-    Y1.resize(nel, nel);
-    Y2.resize(nel, nmo);
-    Y3.resize(nel, nmo);
-    Y4.resize(nel, nmo);
-
-    BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel,
-        ValueType(0.0), t.data(), nmo);
-    BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel,
-        ValueType(0.0), Y1.data(), nel);
-    BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo,
-        Y1.data(), nel, ValueType(0.0), Y2.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel,
-        ValueType(0.0), Y3.data(), nmo);
-
-    // possibly replace with BLAS call
-    Y4 = Y3 - Y2;
-
-    for (int i = 0; i < m_act_rot_inds.size(); i++) {
-        int kk = this->myVars.where(i);
-        if (kk >= 0) {
-            const int p = m_act_rot_inds.at(i).first;
-            const int q = m_act_rot_inds.at(i).second;
-            dlogpsi[kk] += t(p, q);
-            dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q);
-        }
+      myL_temp[a] += psiM_inv(i, a) * d2psiM_all(a, i);
+  }
+
+  // calculation of myG_J which will be used to represent
+  // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+  // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The value of
+  // P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and
+  // this is what myL_J will hold
+  for (int a = 0, iat = FirstIndex; a < nel; a++, iat++)
+  {
+    myG_J[a] = (P.G[iat] - myG_temp[a]);
+    myL_J[a] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[a]);
+  }
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = d2psiM_all(i, j) + 2 * dot(myG_J[i], dpsiM_all(i, j)) + myL_J[i] * psiM_all(i, j);
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~PART2
+  const ValueType* const A(psiM_all.data());
+  const ValueType* const Ainv(psiM_inv.data());
+  const ValueType* const B(Bbar.data());
+  ValueMatrix t;
+  ValueMatrix Y1;
+  ValueMatrix Y2;
+  ValueMatrix Y3;
+  ValueMatrix Y4;
+  t.resize(nel, nmo);
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), A, nmo, Ainv, nel, ValueType(0.0), t.data(), nmo);
+  BLAS::gemm('N', 'N', nel, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), t.data(), nmo, Y1.data(), nel, ValueType(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, ValueType(1.0), B, nmo, Ainv, nel, ValueType(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  for (int i = 0; i < m_act_rot_inds.size(); i++)
+  {
+    int kk = this->myVars.where(i);
+    if (kk >= 0)
+    {
+      const int p = m_act_rot_inds.at(i).first;
+      const int q = m_act_rot_inds.at(i).second;
+      dlogpsi[kk] += t(p, q);
+      dhpsioverpsi[kk] += ValueType(-0.5) * Y4(p, q);
     }
+  }
 }
 
 template<typename T>
@@ -928,63 +902,68 @@ void RotatedSPOsT<T>::evaluateDerivatives(ParticleSetT<T>& P,
                                           const size_t NP2,
                                           const std::vector<std::vector<int>>& lookup_tbl)
 {
-    bool recalculate(false);
-    for (int k = 0; k < this->myVars.size(); ++k) {
-        int kk = this->myVars.where(k);
-        if (kk < 0)
-            continue;
-        if (optvars.recompute(kk))
-            recalculate = true;
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
+    typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
+    const int NP = P.getTotalNum();
+    myG_temp.resize(NP);
+    myG_temp = 0.0;
+    myL_temp.resize(NP);
+    myL_temp = 0.0;
+    myG_J.resize(NP);
+    myG_J = 0.0;
+    myL_J.resize(NP);
+    myL_J            = 0.0;
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
+
+    const T* restrict C_p = Coeff.data();
+    for (int i = 0; i < Coeff.size(); i++)
+    {
+      const size_t upC = C2node_up[i];
+      const size_t dnC = C2node_dn[i];
+      const T tmp1     = C_p[i] * detValues_dn[dnC];
+      const T tmp2     = C_p[i] * detValues_up[upC];
+      for (size_t k = 0, j = N1; k < NP1; k++, j++)
+      {
+        myG_temp[j] += tmp1 * grads_up(upC, k);
+        myL_temp[j] += tmp1 * lapls_up(upC, k);
+      }
+      for (size_t k = 0, j = N2; k < NP2; k++, j++)
+      {
+        myG_temp[j] += tmp2 * grads_dn(dnC, k);
+        myL_temp[j] += tmp2 * lapls_dn(dnC, k);
+      }
     }
-    if (recalculate) {
-        typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
-        typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
-        const int NP = P.getTotalNum();
-        myG_temp.resize(NP);
-        myG_temp = 0.0;
-        myL_temp.resize(NP);
-        myL_temp = 0.0;
-        myG_J.resize(NP);
-        myG_J = 0.0;
-        myL_J.resize(NP);
-        myL_J = 0.0;
-        const size_t nmo = Phi->getOrbitalSetSize();
-        const size_t nel = P.last(0) - P.first(0);
-
-        const T* restrict C_p = Coeff.data();
-        for (int i = 0; i < Coeff.size(); i++) {
-            const size_t upC = C2node_up[i];
-            const size_t dnC = C2node_dn[i];
-            const T tmp1 = C_p[i] * detValues_dn[dnC];
-            const T tmp2 = C_p[i] * detValues_up[upC];
-            for (size_t k = 0, j = N1; k < NP1; k++, j++) {
-                myG_temp[j] += tmp1 * grads_up(upC, k);
-                myL_temp[j] += tmp1 * lapls_up(upC, k);
-            }
-            for (size_t k = 0, j = N2; k < NP2; k++, j++) {
-                myG_temp[j] += tmp2 * grads_dn(dnC, k);
-                myL_temp[j] += tmp2 * lapls_dn(dnC, k);
-            }
-        }
 
-        myG_temp *= (1 / psiCurrent);
-        myL_temp *= (1 / psiCurrent);
-
-        // calculation of myG_J which will be used to represent
-        // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
-        // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
-        // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
-        // \psi}{\psi} and this is what myL_J will hold
-        for (int iat = 0; iat < (myL_temp.size()); iat++) {
-            myG_J[iat] = (P.G[iat] - myG_temp[iat]);
-            myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
-        }
+    myG_temp *= (1 / psiCurrent);
+    myL_temp *= (1 / psiCurrent);
 
-        table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo,
-            psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn,
-            grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up,
-            Minv_dn, B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
+    // calculation of myG_J which will be used to represent
+    // \frac{\nabla\psi_{J}}{\psi_{J}} calculation of myL_J will be used to
+    // represent \frac{\nabla^2\psi_{J}}{\psi_{J}} IMPORTANT NOTE:  The
+    // value of P.L holds \nabla^2 ln[\psi] but we need  \frac{\nabla^2
+    // \psi}{\psi} and this is what myL_J will hold
+    for (int iat = 0; iat < (myL_temp.size()); iat++)
+    {
+      myG_J[iat] = (P.G[iat] - myG_temp[iat]);
+      myL_J[iat] = (P.L[iat] + dot(P.G[iat], P.G[iat]) - myL_temp[iat]);
     }
+
+    table_method_eval(dlogpsi, dhpsioverpsi, myL_J, myG_J, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn,
+                      detValues_up, detValues_dn, grads_up, grads_dn, lapls_up, lapls_dn, M_up, M_dn, Minv_up, Minv_dn,
+                      B_grad, B_lapl, detData_up, N1, N2, NP1, NP2, lookup_tbl);
+  }
 }
 
 template<typename T>
@@ -1004,40 +983,54 @@ void RotatedSPOsT<T>::evaluateDerivativesWF(ParticleSetT<T>& P,
                                             const std::vector<int>& detData_up,
                                             const std::vector<std::vector<int>>& lookup_tbl)
 {
-    bool recalculate(false);
-    for (int k = 0; k < this->myVars.size(); ++k) {
-        int kk = this->myVars.where(k);
-        if (kk < 0)
-            continue;
-        if (optvars.recompute(kk))
-            recalculate = true;
-    }
-    if (recalculate) {
-        const size_t nmo = Phi->getOrbitalSetSize();
-        const size_t nel = P.last(0) - P.first(0);
+  bool recalculate(false);
+  for (int k = 0; k < this->myVars.size(); ++k)
+  {
+    int kk = this->myVars.where(k);
+    if (kk < 0)
+      continue;
+    if (optvars.recompute(kk))
+      recalculate = true;
+  }
+  if (recalculate)
+  {
+    const size_t nmo = Phi->getOrbitalSetSize();
+    const size_t nel = P.last(0) - P.first(0);
 
-        table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up,
-            C2node_dn, detValues_up, detValues_dn, M_up, M_dn, Minv_up, Minv_dn,
-            detData_up, lookup_tbl);
-    }
+    table_method_evalWF(dlogpsi, nel, nmo, psiCurrent, Coeff, C2node_up, C2node_dn, detValues_up, detValues_dn, M_up,
+                        M_dn, Minv_up, Minv_dn, detData_up, lookup_tbl);
+  }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi,
-    const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
-    const typename ParticleSetT<T>::ParticleGradient& myG_J, const size_t nel,
-    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
-    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
-    const ValueVector& detValues_up, const ValueVector& detValues_dn,
-    const GradMatrix& grads_up, const GradMatrix& grads_dn,
-    const ValueMatrix& lapls_up, const ValueMatrix& lapls_dn,
-    const ValueMatrix& M_up, const ValueMatrix& M_dn,
-    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
-    const GradMatrix& B_grad, const ValueMatrix& B_lapl,
-    const std::vector<int>& detData_up, const size_t N1, const size_t N2,
-    const size_t NP1, const size_t NP2,
-    const std::vector<std::vector<int>>& lookup_tbl)
+template<typename T>
+void RotatedSPOsT<T>::table_method_eval(Vector<T>& dlogpsi,
+                                        Vector<T>& dhpsioverpsi,
+                                        const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
+                                        const typename ParticleSetT<T>::ParticleGradient& myG_J,
+                                        const size_t nel,
+                                        const size_t nmo,
+                                        const T& psiCurrent,
+                                        const std::vector<T>& Coeff,
+                                        const std::vector<size_t>& C2node_up,
+                                        const std::vector<size_t>& C2node_dn,
+                                        const ValueVector& detValues_up,
+                                        const ValueVector& detValues_dn,
+                                        const GradMatrix& grads_up,
+                                        const GradMatrix& grads_dn,
+                                        const ValueMatrix& lapls_up,
+                                        const ValueMatrix& lapls_dn,
+                                        const ValueMatrix& M_up,
+                                        const ValueMatrix& M_dn,
+                                        const ValueMatrix& Minv_up,
+                                        const ValueMatrix& Minv_dn,
+                                        const GradMatrix& B_grad,
+                                        const ValueMatrix& B_lapl,
+                                        const std::vector<int>& detData_up,
+                                        const size_t N1,
+                                        const size_t N2,
+                                        const size_t NP1,
+                                        const size_t NP2,
+                                        const std::vector<std::vector<int>>& lookup_tbl)
 /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 GUIDE TO THE MATICES BEING BUILT
 ----------------------------------------------
@@ -1188,667 +1181,633 @@ to each element will be called B_bar
 $
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
 {
-    ValueMatrix Table;
-    ValueMatrix Bbar;
-    ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
-    ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T,
-        MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
-
-    Table.resize(nel, nmo);
-
-    Bbar.resize(nel, nmo);
-
-    Y1.resize(nel, nel);
-    Y2.resize(nel, nmo);
-    Y3.resize(nel, nmo);
-    Y4.resize(nel, nmo);
-
-    pK1.resize(nmo, nel);
-    K1T.resize(nmo, nmo);
-    TK1T.resize(nel, nmo);
-
-    pK2.resize(nmo, nel);
-    K2AiB.resize(nmo, nmo);
-    TK2AiB.resize(nel, nmo);
-    K2XA.resize(nmo, nmo);
-    TK2XA.resize(nel, nmo);
-    K2T.resize(nmo, nmo);
-    TK2T.resize(nel, nmo);
-    MK2T.resize(nel, nmo);
-
-    pK3.resize(nmo, nel);
-    K3T.resize(nmo, nmo);
-    TK3T.resize(nel, nmo);
-
-    pK5.resize(nmo, nel);
-    K5T.resize(nmo, nmo);
-    TK5T.resize(nel, nmo);
-
-    const int parameters_size(m_act_rot_inds.size());
-    const int parameter_start_index(0);
-
-    const size_t num_unique_up_dets(detValues_up.size());
-    const size_t num_unique_dn_dets(detValues_dn.size());
-
-    const T* restrict cptr = Coeff.data();
-    const size_t nc = Coeff.size();
-    const size_t* restrict upC(C2node_up.data());
-    const size_t* restrict dnC(C2node_dn.data());
-    // B_grad holds the gradient operator
-    // B_lapl holds the laplacian operator
-    // B_bar will hold our special O operator
-
-    const int offset1(N1);
-    const int offset2(N2);
-    const int NPother(NP2);
-
-    T* T_(Table.data());
-
-    // possibly replace wit BLAS calls
-    for (int i = 0; i < nel; i++)
-        for (int j = 0; j < nmo; j++)
-            Bbar(i, j) = B_lapl(i, j) +
-                2.0 * dot(myG_J[i + offset1], B_grad(i, j)) +
-                myL_J[i + offset1] * M_up(i, j);
-
-    const T* restrict B(Bbar.data());
-    const T* restrict A(M_up.data());
-    const T* restrict Ainv(Minv_up.data());
-    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-    // THIS CASE
-    //  The T matrix should be calculated and stored for use
-    //  T = A^{-1} \widetilde A
-    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-    // and that BLAS commands assume column major
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel,
-        RealType(0.0), T_, nmo);
-
-    BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel,
-        RealType(0.0), Y1.data(), nel);
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel,
-        RealType(0.0), Y2.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel,
-        RealType(0.0), Y3.data(), nmo);
-
-    // possibly replace with BLAS call
-    Y4 = Y3 - Y2;
-
-    // Need to create the constants: (Oi, const0, const1, const2)to take
-    // advantage of minimal BLAS commands; Oi is the special operator applied to
-    // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
-    //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
-    //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
-    std::vector<RealType> Oi(num_unique_dn_dets);
-
-    for (int index = 0; index < num_unique_dn_dets; index++)
-        for (int iat = 0; iat < NPother; iat++)
-            Oi[index] += lapls_dn(index, iat) +
-                2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
-                myL_J[offset2 + iat] * detValues_dn[index];
-
-    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
-    // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
-    // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
-    // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
-    // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
-    RealType const0(0.0), const1(0.0), const2(0.0);
-    for (size_t i = 0; i < nc; ++i) {
-        const RealType c = cptr[i];
-        const size_t up = upC[i];
-        const size_t down = dnC[i];
-
-        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
-        const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
+  ValueMatrix Table;
+  ValueMatrix Bbar;
+  ValueMatrix Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y11, Y23, Y24, Y25, Y26;
+  ValueMatrix pK1, K1T, TK1T, pK2, K2AiB, TK2AiB, K2XA, TK2XA, K2T, TK2T, MK2T, pK3, K3T, TK3T, pK5, K5T, TK5T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  Y1.resize(nel, nel);
+  Y2.resize(nel, nmo);
+  Y3.resize(nel, nmo);
+  Y4.resize(nel, nmo);
+
+  pK1.resize(nmo, nel);
+  K1T.resize(nmo, nmo);
+  TK1T.resize(nel, nmo);
+
+  pK2.resize(nmo, nel);
+  K2AiB.resize(nmo, nmo);
+  TK2AiB.resize(nel, nmo);
+  K2XA.resize(nmo, nmo);
+  TK2XA.resize(nel, nmo);
+  K2T.resize(nmo, nmo);
+  TK2T.resize(nel, nmo);
+  MK2T.resize(nel, nmo);
+
+  pK3.resize(nmo, nel);
+  K3T.resize(nmo, nmo);
+  TK3T.resize(nel, nmo);
+
+  pK5.resize(nmo, nel);
+  K5T.resize(nmo, nmo);
+  TK5T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+  // B_grad holds the gradient operator
+  // B_lapl holds the laplacian operator
+  // B_bar will hold our special O operator
+
+  const int offset1(N1);
+  const int offset2(N2);
+  const int NPother(NP2);
+
+  T* T_(Table.data());
+
+  // possibly replace wit BLAS calls
+  for (int i = 0; i < nel; i++)
+    for (int j = 0; j < nmo; j++)
+      Bbar(i, j) = B_lapl(i, j) + 2.0 * dot(myG_J[i + offset1], B_grad(i, j)) + myL_J[i + offset1] * M_up(i, j);
+
+  const T* restrict B(Bbar.data());
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  BLAS::gemm('N', 'N', nel, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y1.data(), nel);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), T_, nmo, Y1.data(), nel, RealType(0.0), Y2.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nel, T(1.0), B, nmo, Ainv, nel, RealType(0.0), Y3.data(), nmo);
+
+  // possibly replace with BLAS call
+  Y4 = Y3 - Y2;
+
+  // Need to create the constants: (Oi, const0, const1, const2)to take
+  // advantage of minimal BLAS commands; Oi is the special operator applied to
+  // the slater matrix "A subscript i" from the total CI expansion \hat{O_{i}}
+  //= \hat{O}D_{i} with D_{i}=det(A_{i}) and Multi-Slater component defined as
+  //\sum_{i=0} C_{i} D_{i\uparrow}D_{i\downarrow}
+  std::vector<RealType> Oi(num_unique_dn_dets);
+
+  for (int index = 0; index < num_unique_dn_dets; index++)
+    for (int iat = 0; iat < NPother; iat++)
+      Oi[index] += lapls_dn(index, iat) + 2.0 * dot(grads_dn(index, iat), myG_J[offset2 + iat]) +
+          myL_J[offset2 + iat] * detValues_dn[index];
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const1 =
+  // C_{0}*\hat{O} det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*\hat{O}det(A_{i\downarrow})* det(\alpha_{i\uparrow}) const2 =
+  // \sum_{i=1} C_{i}*det(A_{i\downarrow})*
+  // Tr[\alpha_{i}^{-1}M_{i}]*det(\alpha_{i})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+    const1 += c * Oi[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK1.begin(), pK1.end(), 0.0);
+  std::fill(pK2.begin(), pK2.end(), 0.0);
+  std::fill(pK3.begin(), pK3.end(), 0.0);
+  std::fill(pK5.begin(), pK5.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
     }
 
-    std::fill(pK1.begin(), pK1.end(), 0.0);
-    std::fill(pK2.begin(), pK2.end(), 0.0);
-    std::fill(pK3.begin(), pK3.end(), 0.0);
-    std::fill(pK5.begin(), pK5.end(), 0.0);
-
-    // Now we are going to loop through all unique determinants.
-    // The few lines above are for the reference matrix contribution.
-    // Although I start the loop below from index 0, the loop only performs
-    // actions when the index is >= 1 the detData object contains all the
-    // information about the P^T and Q matrices (projection matrices) needed in
-    // the table method
-    const int* restrict data_it = detData_up.data();
-    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
-        const int k = data_it[datum];
-
-        if (k == 0) {
-            datum += 3 * k + 1;
-        }
-
-        else {
-            // Number of rows and cols of P^T
-            const int prows = k;
-            const int pcols = nel;
-            // Number of rows and cols of Q
-            const int qrows = nmo;
-            const int qcols = k;
-
-            Y5.resize(nel, k);
-            Y6.resize(k, k);
-
-            // Any matrix multiplication of P^T or Q is simply a projection
-            // Explicit matrix multiplication can be avoided; instead column or
-            // row copying can be done BlAS::copy(size of col/row being copied,
-            //            Matrix pointer + place to begin copying,
-            //            storage spacing (number of elements btw next row/col
-            //            element), Pointer to resultant matrix + place to begin
-            //            pasting, storage spacing of resultant matrix)
-            // For example the next 4 lines is the matrix multiplication of T*Q
-            // = Y5
-            std::fill(Y5.begin(), Y5.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
-                    Y5.data() + i, k);
-            }
-
-            std::fill(Y6.begin(), Y6.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
-                    (Y6.data() + i * k), 1);
-            }
-
-            Vector<T> WS;
-            Vector<IndexType> Piv;
-            WS.resize(k);
-            Piv.resize(k);
-            std::complex<RealType> logdet = 0.0;
-            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-            Y11.resize(nel, k);
-            Y23.resize(k, k);
-            Y24.resize(k, k);
-            Y25.resize(k, k);
-            Y26.resize(k, nel);
-
-            std::fill(Y11.begin(), Y11.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo,
-                    Y11.data() + i, k);
-            }
-
-            std::fill(Y23.begin(), Y23.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1,
-                    (Y23.data() + i * k), 1);
-            }
-
-            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k,
-                Y6.data(), k, RealType(0.0), Y24.data(), k);
-            BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k,
-                Y24.data(), k, RealType(0.0), Y25.data(), k);
-
-            Y26.resize(k, nel);
-
-            std::fill(Y26.begin(), Y26.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y25.data() + i, k,
-                    Y26.data() + (data_it[datum + 1 + i]), nel);
-            }
-
-            Y7.resize(k, nel);
-
-            std::fill(Y7.begin(), Y7.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y6.data() + i, k,
-                    Y7.data() + (data_it[datum + 1 + i]), nel);
-            }
-
-            // c_Tr_AlphaI_MI is a constant contributing to constant const2
-            // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
-            RealType c_Tr_AlphaI_MI = 0.0;
-            for (int i = 0; i < k; i++) {
-                c_Tr_AlphaI_MI += Y24(i, i);
-            }
-
-            for (int p = 0; p < lookup_tbl[index].size(); p++) {
-                // el_p is the element position that contains information about
-                // the CI coefficient, and det up/dn values associated with the
-                // current unique determinant
-                const int el_p(lookup_tbl[index][p]);
-                const RealType c = cptr[el_p];
-                const size_t up = upC[el_p];
-                const size_t down = dnC[el_p];
-
-                const RealType alpha_1(c * detValues_dn[down] *
-                    detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
-                const RealType alpha_2(c * detValues_dn[down] *
-                    detValues_up[up] / detValues_up[0]);
-                const RealType alpha_3(
-                    c * Oi[down] * detValues_up[up] / detValues_up[0]);
-
-                const2 += alpha_1;
-
-                for (int i = 0; i < k; i++) {
-                    BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1,
-                        pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-                    BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1,
-                        pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-                    BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1,
-                        pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-                    BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1,
-                        pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-                }
-            }
-            datum += 3 * k + 1;
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y11.resize(nel, k);
+      Y23.resize(k, k);
+      Y24.resize(k, k);
+      Y25.resize(k, k);
+      Y26.resize(k, nel);
+
+      std::fill(Y11.begin(), Y11.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, Y4.data() + (data_it[datum + 1 + k + i]), nmo, Y11.data() + i, k);
+      }
+
+      std::fill(Y23.begin(), Y23.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y11.data() + (data_it[datum + 1 + i]) * k, 1, (Y23.data() + i * k), 1);
+      }
+
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y23.data(), k, Y6.data(), k, RealType(0.0), Y24.data(), k);
+      BLAS::gemm('N', 'N', k, k, k, RealType(1.0), Y6.data(), k, Y24.data(), k, RealType(0.0), Y25.data(), k);
+
+      Y26.resize(k, nel);
+
+      std::fill(Y26.begin(), Y26.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y25.data() + i, k, Y26.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      // c_Tr_AlphaI_MI is a constant contributing to constant const2
+      // c_Tr_AlphaI_MI = Tr[\alpha_{I}^{-1}(P^{T}\widetilde{M} Q)]
+      RealType c_Tr_AlphaI_MI = 0.0;
+      for (int i = 0; i < k; i++)
+      {
+        c_Tr_AlphaI_MI += Y24(i, i);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_1(c * detValues_dn[down] * detValues_up[up] / detValues_up[0] * c_Tr_AlphaI_MI);
+        const RealType alpha_2(c * detValues_dn[down] * detValues_up[up] / detValues_up[0]);
+        const RealType alpha_3(c * Oi[down] * detValues_up[up] / detValues_up[0]);
+
+        const2 += alpha_1;
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_1, Y7.data() + i * nel, 1, pK1.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y7.data() + i * nel, 1, pK2.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_3, Y7.data() + i * nel, 1, pK3.data() + (data_it[datum + 1 + k + i]) * nel, 1);
+          BLAS::axpy(nel, alpha_2, Y26.data() + i * nel, 1, pK5.data() + (data_it[datum + 1 + k + i]) * nel, 1);
         }
+      }
+      datum += 3 * k + 1;
     }
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel,
-        RealType(0.0), K1T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo,
-        RealType(0.0), TK1T.data(), nmo);
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo,
-        pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_,
-        nmo, RealType(0.0), TK2AiB.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo,
-        pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_,
-        nmo, RealType(0.0), TK2XA.data(), nmo);
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo,
-        pK2.data(), nel, RealType(0.0), K2T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo,
-        RealType(0.0), TK2T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo,
-        Y4.data(), nmo, RealType(0.0), MK2T.data(), nmo);
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel,
-        RealType(0.0), K3T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo,
-        RealType(0.0), TK3T.data(), nmo);
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel,
-        RealType(0.0), K5T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo,
-        RealType(0.0), TK5T.data(), nmo);
-
-    for (int mu = 0, k = parameter_start_index;
-         k < (parameter_start_index + parameters_size); k++, mu++) {
-        int kk = this->myVars.where(k);
-        if (kk >= 0) {
-            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-            if (i <= nel - 1 && j > nel - 1) {
-                dhpsioverpsi[kk] += T(-0.5 * Y4(i, j) -
-                    0.5 *
-                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) -
-                            K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
-                            K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) -
-                            K1T(j, i) - TK1T(i, j) -
-                            const2 / const1 * K2T(i, j) +
-                            const2 / const1 * K2T(j, i) +
-                            const2 / const1 * TK2T(i, j) + K3T(i, j) -
-                            K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) +
-                            TK2T(i, j)));
-            }
-            else if (i <= nel - 1 && j <= nel - 1) {
-                dhpsioverpsi[kk] += T(-0.5 * (Y4(i, j) - Y4(j, i)) -
-                    0.5 *
-                        (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) +
-                            K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
-                            TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) +
-                            TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) +
-                            MK2T(j, i) + K1T(i, j) - K1T(j, i) - TK1T(i, j) +
-                            TK1T(j, i) - const2 / const1 * K2T(i, j) +
-                            const2 / const1 * K2T(j, i) +
-                            const2 / const1 * TK2T(i, j) -
-                            const2 / const1 * TK2T(j, i) + K3T(i, j) -
-                            K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) +
-                            K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
-            }
-            else {
-                dhpsioverpsi[kk] += T(-0.5 *
-                    (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) -
-                        K2XA(i, j) + K2XA(j, i)
-
-                        + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) +
-                        const2 / const1 * K2T(j, i) + K3T(i, j) - K3T(j, i) -
-                        K2T(i, j) + K2T(j, i)));
-            }
-        }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK1.data(), nel, RealType(0.0), K1T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K1T.data(), nmo, T_, nmo, RealType(0.0), TK1T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y3.data(), nmo, pK2.data(), nel, RealType(0.0), K2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2AiB.data(), nmo, T_, nmo, RealType(0.0), TK2AiB.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, Y2.data(), nmo, pK2.data(), nel, RealType(0.0), K2XA.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2XA.data(), nmo, T_, nmo, RealType(0.0), TK2XA.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, const1 / (const0 * const0), T_, nmo, pK2.data(), nel, RealType(0.0), K2T.data(),
+             nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K2T.data(), nmo, T_, nmo, RealType(0.0), TK2T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, const0 / const1, K2T.data(), nmo, Y4.data(), nmo, RealType(0.0), MK2T.data(),
+             nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK3.data(), nel, RealType(0.0), K3T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K3T.data(), nmo, T_, nmo, RealType(0.0), TK3T.data(), nmo);
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, 1.0 / const0, T_, nmo, pK5.data(), nel, RealType(0.0), K5T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K5T.data(), nmo, T_, nmo, RealType(0.0), TK5T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * Y4(i, j) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) - K2XA(i, j) +
+                   K2XA(j, i) + TK2XA(i, j) - MK2T(i, j) + K1T(i, j) - K1T(j, i) - TK1T(i, j) -
+                   const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) - K2T(i, j) + K2T(j, i) + TK2T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dhpsioverpsi[kk] +=
+            T(-0.5 * (Y4(i, j) - Y4(j, i)) -
+              0.5 *
+                  (-K5T(i, j) + K5T(j, i) + TK5T(i, j) - TK5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - TK2AiB(i, j) +
+                   TK2AiB(j, i) - K2XA(i, j) + K2XA(j, i) + TK2XA(i, j) - TK2XA(j, i) - MK2T(i, j) + MK2T(j, i) +
+                   K1T(i, j) - K1T(j, i) - TK1T(i, j) + TK1T(j, i) - const2 / const1 * K2T(i, j) +
+                   const2 / const1 * K2T(j, i) + const2 / const1 * TK2T(i, j) - const2 / const1 * TK2T(j, i) +
+                   K3T(i, j) - K3T(j, i) - TK3T(i, j) + TK3T(j, i) - K2T(i, j) + K2T(j, i) + TK2T(i, j) - TK2T(j, i)));
+      }
+      else
+      {
+        dhpsioverpsi[kk] += T(-0.5 *
+                              (-K5T(i, j) + K5T(j, i) + K2AiB(i, j) - K2AiB(j, i) - K2XA(i, j) + K2XA(j, i)
+
+                               + K1T(i, j) - K1T(j, i) - const2 / const1 * K2T(i, j) + const2 / const1 * K2T(j, i) +
+                               K3T(i, j) - K3T(j, i) - K2T(i, j) + K2T(j, i)));
+      }
     }
+  }
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi, const size_t nel,
-    const size_t nmo, const T& psiCurrent, const std::vector<T>& Coeff,
-    const std::vector<size_t>& C2node_up, const std::vector<size_t>& C2node_dn,
-    const ValueVector& detValues_up, const ValueVector& detValues_dn,
-    const ValueMatrix& M_up, const ValueMatrix& M_dn,
-    const ValueMatrix& Minv_up, const ValueMatrix& Minv_dn,
-    const std::vector<int>& detData_up,
-    const std::vector<std::vector<int>>& lookup_tbl)
+template<typename T>
+void RotatedSPOsT<T>::table_method_evalWF(Vector<T>& dlogpsi,
+                                          const size_t nel,
+                                          const size_t nmo,
+                                          const T& psiCurrent,
+                                          const std::vector<T>& Coeff,
+                                          const std::vector<size_t>& C2node_up,
+                                          const std::vector<size_t>& C2node_dn,
+                                          const ValueVector& detValues_up,
+                                          const ValueVector& detValues_dn,
+                                          const ValueMatrix& M_up,
+                                          const ValueMatrix& M_dn,
+                                          const ValueMatrix& Minv_up,
+                                          const ValueMatrix& Minv_dn,
+                                          const std::vector<int>& detData_up,
+                                          const std::vector<std::vector<int>>& lookup_tbl)
 {
-    ValueMatrix Table;
-    ValueMatrix Y5, Y6, Y7;
-    ValueMatrix pK4, K4T, TK4T;
-
-    Table.resize(nel, nmo);
-
-    Bbar.resize(nel, nmo);
-
-    pK4.resize(nmo, nel);
-    K4T.resize(nmo, nmo);
-    TK4T.resize(nel, nmo);
-
-    const int parameters_size(m_act_rot_inds.size());
-    const int parameter_start_index(0);
-
-    const size_t num_unique_up_dets(detValues_up.size());
-    const size_t num_unique_dn_dets(detValues_dn.size());
-
-    const T* restrict cptr = Coeff.data();
-    const size_t nc = Coeff.size();
-    const size_t* restrict upC(C2node_up.data());
-    const size_t* restrict dnC(C2node_dn.data());
-
-    T* T_(Table.data());
-
-    const T* restrict A(M_up.data());
-    const T* restrict Ainv(Minv_up.data());
-    // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
-    // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
-    // THIS CASE
-    //  The T matrix should be calculated and stored for use
-    //  T = A^{-1} \widetilde A
-    // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
-    // and that BLAS commands assume column major
-    BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel,
-        RealType(0.0), T_, nmo);
-
-    // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
-    // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
-    RealType const0(0.0), const1(0.0), const2(0.0);
-    for (size_t i = 0; i < nc; ++i) {
-        const RealType c = cptr[i];
-        const size_t up = upC[i];
-        const size_t down = dnC[i];
-
-        const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+  ValueMatrix Table;
+  ValueMatrix Y5, Y6, Y7;
+  ValueMatrix pK4, K4T, TK4T;
+
+  Table.resize(nel, nmo);
+
+  Bbar.resize(nel, nmo);
+
+  pK4.resize(nmo, nel);
+  K4T.resize(nmo, nmo);
+  TK4T.resize(nel, nmo);
+
+  const int parameters_size(m_act_rot_inds.size());
+  const int parameter_start_index(0);
+
+  const size_t num_unique_up_dets(detValues_up.size());
+  const size_t num_unique_dn_dets(detValues_dn.size());
+
+  const T* restrict cptr = Coeff.data();
+  const size_t nc        = Coeff.size();
+  const size_t* restrict upC(C2node_up.data());
+  const size_t* restrict dnC(C2node_dn.data());
+
+  T* T_(Table.data());
+
+  const T* restrict A(M_up.data());
+  const T* restrict Ainv(Minv_up.data());
+  // IMPORTANT NOTE: THE Dets[0]->psiMinv OBJECT DOES NOT HOLD THE INVERSE IF
+  // THE MULTIDIRACDETERMINANTBASE ONLY CONTAINS ONE ELECTRON. NEED A FIX FOR
+  // THIS CASE
+  //  The T matrix should be calculated and stored for use
+  //  T = A^{-1} \widetilde A
+  // REMINDER: that the ValueMatrix "matrix" stores data in a row major order
+  // and that BLAS commands assume column major
+  BLAS::gemm('N', 'N', nmo, nel, nel, RealType(1.0), A, nmo, Ainv, nel, RealType(0.0), T_, nmo);
+
+  // const0 = C_{0}*det(A_{0\downarrow})+\sum_{i=1}
+  // C_{i}*det(A_{i\downarrow})* det(\alpha_{i\uparrow})
+  RealType const0(0.0), const1(0.0), const2(0.0);
+  for (size_t i = 0; i < nc; ++i)
+  {
+    const RealType c  = cptr[i];
+    const size_t up   = upC[i];
+    const size_t down = dnC[i];
+
+    const0 += c * detValues_dn[down] * (detValues_up[up] / detValues_up[0]);
+  }
+
+  std::fill(pK4.begin(), pK4.end(), 0.0);
+
+  // Now we are going to loop through all unique determinants.
+  // The few lines above are for the reference matrix contribution.
+  // Although I start the loop below from index 0, the loop only performs
+  // actions when the index is >= 1 the detData object contains all the
+  // information about the P^T and Q matrices (projection matrices) needed in
+  // the table method
+  const int* restrict data_it = detData_up.data();
+  for (int index = 0, datum = 0; index < num_unique_up_dets; index++)
+  {
+    const int k = data_it[datum];
+
+    if (k == 0)
+    {
+      datum += 3 * k + 1;
     }
 
-    std::fill(pK4.begin(), pK4.end(), 0.0);
-
-    // Now we are going to loop through all unique determinants.
-    // The few lines above are for the reference matrix contribution.
-    // Although I start the loop below from index 0, the loop only performs
-    // actions when the index is >= 1 the detData object contains all the
-    // information about the P^T and Q matrices (projection matrices) needed in
-    // the table method
-    const int* restrict data_it = detData_up.data();
-    for (int index = 0, datum = 0; index < num_unique_up_dets; index++) {
-        const int k = data_it[datum];
-
-        if (k == 0) {
-            datum += 3 * k + 1;
-        }
-
-        else {
-            // Number of rows and cols of P^T
-            const int prows = k;
-            const int pcols = nel;
-            // Number of rows and cols of Q
-            const int qrows = nmo;
-            const int qcols = k;
-
-            Y5.resize(nel, k);
-            Y6.resize(k, k);
-
-            // Any matrix multiplication of P^T or Q is simply a projection
-            // Explicit matrix multiplication can be avoided; instead column or
-            // row copying can be done BlAS::copy(size of col/row being copied,
-            //            Matrix pointer + place to begin copying,
-            //            storage spacing (number of elements btw next row/col
-            //            element), Pointer to resultant matrix + place to begin
-            //            pasting, storage spacing of resultant matrix)
-            // For example the next 4 lines is the matrix multiplication of T*Q
-            // = Y5
-            std::fill(Y5.begin(), Y5.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo,
-                    Y5.data() + i, k);
-            }
-
-            std::fill(Y6.begin(), Y6.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1,
-                    (Y6.data() + i * k), 1);
-            }
-
-            Vector<T> WS;
-            Vector<IndexType> Piv;
-            WS.resize(k);
-            Piv.resize(k);
-            std::complex<RealType> logdet = 0.0;
-            InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
-
-            Y7.resize(k, nel);
-
-            std::fill(Y7.begin(), Y7.end(), 0.0);
-            for (int i = 0; i < k; i++) {
-                BLAS::copy(k, Y6.data() + i, k,
-                    Y7.data() + (data_it[datum + 1 + i]), nel);
-            }
-
-            for (int p = 0; p < lookup_tbl[index].size(); p++) {
-                // el_p is the element position that contains information about
-                // the CI coefficient, and det up/dn values associated with the
-                // current unique determinant
-                const int el_p(lookup_tbl[index][p]);
-                const RealType c = cptr[el_p];
-                const size_t up = upC[el_p];
-                const size_t down = dnC[el_p];
-
-                const RealType alpha_4(c * detValues_dn[down] *
-                    detValues_up[up] * (1 / psiCurrent));
-
-                for (int i = 0; i < k; i++) {
-                    BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1,
-                        pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
-                }
-            }
-            datum += 3 * k + 1;
+    else
+    {
+      // Number of rows and cols of P^T
+      const int prows = k;
+      const int pcols = nel;
+      // Number of rows and cols of Q
+      const int qrows = nmo;
+      const int qcols = k;
+
+      Y5.resize(nel, k);
+      Y6.resize(k, k);
+
+      // Any matrix multiplication of P^T or Q is simply a projection
+      // Explicit matrix multiplication can be avoided; instead column or
+      // row copying can be done BlAS::copy(size of col/row being copied,
+      //            Matrix pointer + place to begin copying,
+      //            storage spacing (number of elements btw next row/col
+      //            element), Pointer to resultant matrix + place to begin
+      //            pasting, storage spacing of resultant matrix)
+      // For example the next 4 lines is the matrix multiplication of T*Q
+      // = Y5
+      std::fill(Y5.begin(), Y5.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(nel, T_ + data_it[datum + 1 + k + i], nmo, Y5.data() + i, k);
+      }
+
+      std::fill(Y6.begin(), Y6.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y5.data() + (data_it[datum + 1 + i]) * k, 1, (Y6.data() + i * k), 1);
+      }
+
+      Vector<T> WS;
+      Vector<IndexType> Piv;
+      WS.resize(k);
+      Piv.resize(k);
+      std::complex<RealType> logdet = 0.0;
+      InvertWithLog(Y6.data(), k, k, WS.data(), Piv.data(), logdet);
+
+      Y7.resize(k, nel);
+
+      std::fill(Y7.begin(), Y7.end(), 0.0);
+      for (int i = 0; i < k; i++)
+      {
+        BLAS::copy(k, Y6.data() + i, k, Y7.data() + (data_it[datum + 1 + i]), nel);
+      }
+
+      for (int p = 0; p < lookup_tbl[index].size(); p++)
+      {
+        // el_p is the element position that contains information about
+        // the CI coefficient, and det up/dn values associated with the
+        // current unique determinant
+        const int el_p(lookup_tbl[index][p]);
+        const RealType c  = cptr[el_p];
+        const size_t up   = upC[el_p];
+        const size_t down = dnC[el_p];
+
+        const RealType alpha_4(c * detValues_dn[down] * detValues_up[up] * (1 / psiCurrent));
+
+        for (int i = 0; i < k; i++)
+        {
+          BLAS::axpy(nel, alpha_4, Y7.data() + i * nel, 1, pK4.data() + (data_it[datum + 1 + k + i]) * nel, 1);
         }
+      }
+      datum += 3 * k + 1;
     }
-
-    BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel,
-        RealType(0.0), K4T.data(), nmo);
-    BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo,
-        RealType(0.0), TK4T.data(), nmo);
-
-    for (int mu = 0, k = parameter_start_index;
-         k < (parameter_start_index + parameters_size); k++, mu++) {
-        int kk = this->myVars.where(k);
-        if (kk >= 0) {
-            const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
-            if (i <= nel - 1 && j > nel - 1) {
-                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j)) * const0 *
-                        (1 / psiCurrent) +
-                    (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
-            }
-            else if (i <= nel - 1 && j <= nel - 1) {
-                dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) *
-                        const0 * (1 / psiCurrent) +
-                    (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
-            }
-            else {
-                dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
-            }
-        }
+  }
+
+  BLAS::gemm('N', 'N', nmo, nmo, nel, RealType(1.0), T_, nmo, pK4.data(), nel, RealType(0.0), K4T.data(), nmo);
+  BLAS::gemm('N', 'N', nmo, nel, nmo, RealType(1.0), K4T.data(), nmo, T_, nmo, RealType(0.0), TK4T.data(), nmo);
+
+  for (int mu = 0, k = parameter_start_index; k < (parameter_start_index + parameters_size); k++, mu++)
+  {
+    int kk = this->myVars.where(k);
+    if (kk >= 0)
+    {
+      const int i(m_act_rot_inds[mu].first), j(m_act_rot_inds[mu].second);
+      if (i <= nel - 1 && j > nel - 1)
+      {
+        dlogpsi[kk] +=
+            T(detValues_up[0] * (Table(i, j)) * const0 * (1 / psiCurrent) + (K4T(i, j) - K4T(j, i) - TK4T(i, j)));
+      }
+      else if (i <= nel - 1 && j <= nel - 1)
+      {
+        dlogpsi[kk] += T(detValues_up[0] * (Table(i, j) - Table(j, i)) * const0 * (1 / psiCurrent) +
+                         (K4T(i, j) - TK4T(i, j) - K4T(j, i) + TK4T(j, i)));
+      }
+      else
+      {
+        dlogpsi[kk] += T((K4T(i, j) - K4T(j, i)));
+      }
     }
+  }
 }
 
-template <typename T>
-std::unique_ptr<SPOSetT<T>>
-RotatedSPOsT<T>::makeClone() const
+template<typename T>
+std::unique_ptr<SPOSetT<T>> RotatedSPOsT<T>::makeClone() const
 {
-    auto myclone = std::make_unique<RotatedSPOsT>(
-        SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
-
-    myclone->params = this->params;
-    myclone->params_supplied = this->params_supplied;
-    myclone->m_act_rot_inds = this->m_act_rot_inds;
-    myclone->m_full_rot_inds = this->m_full_rot_inds;
-    myclone->myVars = this->myVars;
-    myclone->myVarsFull = this->myVarsFull;
-    myclone->history_params_ = this->history_params_;
-    myclone->use_global_rot_ = this->use_global_rot_;
-    return myclone;
+  auto myclone = std::make_unique<RotatedSPOsT>(SPOSetT<T>::getName(), std::unique_ptr<SPOSetT<T>>(Phi->makeClone()));
+
+  myclone->params          = this->params;
+  myclone->params_supplied = this->params_supplied;
+  myclone->m_act_rot_inds  = this->m_act_rot_inds;
+  myclone->m_full_rot_inds = this->m_full_rot_inds;
+  myclone->myVars          = this->myVars;
+  myclone->myVarsFull      = this->myVarsFull;
+  myclone->history_params_ = this->history_params_;
+  myclone->use_global_rot_ = this->use_global_rot_;
+  return myclone;
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateDetRatios(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
-    const RefVector<ValueVector>& psi_list,
-    const std::vector<const ValueType*>& invRow_ptr_list,
-    std::vector<std::vector<ValueType>>& ratios_list) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+                                           const RefVector<ValueVector>& psi_list,
+                                           const std::vector<const ValueType*>& invRow_ptr_list,
+                                           std::vector<std::vector<ValueType>>& ratios_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateDetRatios(
-        phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateValue(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                       int iat,
+                                       const RefVector<ValueVector>& psi_v_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                     const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                     int iat,
+                                     const RefVector<ValueVector>& psi_v_list,
+                                     const RefVector<GradVector>& dpsi_v_list,
+                                     const RefVector<ValueVector>& d2psi_v_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateVGL(
-        phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateVGLWithSpin(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list,
-    OffloadMatrix<ComplexType>& mw_dspin) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                             const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                             int iat,
+                                             const RefVector<ValueVector>& psi_v_list,
+                                             const RefVector<GradVector>& dpsi_v_list,
+                                             const RefVector<ValueVector>& d2psi_v_list,
+                                             OffloadMatrix<ComplexType>& mw_dspin) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateVGLWithSpin(
-        phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGrads(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const std::vector<const ValueType*>& invRow_ptr_list,
-    OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-    std::vector<GradType>& grads) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                     const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                     int iat,
+                                                     const std::vector<const ValueType*>& invRow_ptr_list,
+                                                     OffloadMWVGLArray& phi_vgl_v,
+                                                     std::vector<ValueType>& ratios,
+                                                     std::vector<GradType>& grads) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateVGLandDetRatioGrads(
-        phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const std::vector<const ValueType*>& invRow_ptr_list,
-    OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-    std::vector<GradType>& grads, std::vector<ValueType>& spingrads) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                             const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                             int iat,
+                                                             const std::vector<const ValueType*>& invRow_ptr_list,
+                                                             OffloadMWVGLArray& phi_vgl_v,
+                                                             std::vector<ValueType>& ratios,
+                                                             std::vector<GradType>& grads,
+                                                             std::vector<ValueType>& spingrads) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat,
-        invRow_ptr_list, phi_vgl_v, ratios, grads, spingrads);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads,
+                                                spingrads);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::mw_evaluate_notranspose(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-    const RefVector<ValueMatrix>& logdet_list,
-    const RefVector<GradMatrix>& dlogdet_list,
-    const RefVector<ValueMatrix>& d2logdet_list) const
+template<typename T>
+void RotatedSPOsT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                              int first,
+                                              int last,
+                                              const RefVector<ValueMatrix>& logdet_list,
+                                              const RefVector<GradMatrix>& dlogdet_list,
+                                              const RefVector<ValueMatrix>& d2logdet_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list,
-        dlogdet_list, d2logdet_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::createResource(ResourceCollection& collection) const
+template<typename T>
+void RotatedSPOsT<T>::createResource(ResourceCollection& collection) const
 {
-    Phi->createResource(collection);
+  Phi->createResource(collection);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::acquireResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<typename T>
+void RotatedSPOsT<T>::acquireResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.acquireResource(collection, phi_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.acquireResource(collection, phi_list);
 }
 
-template <typename T>
-void
-RotatedSPOsT<T>::releaseResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<typename T>
+void RotatedSPOsT<T>::releaseResource(ResourceCollection& collection,
+                                      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    auto phi_list = extractPhiRefList(spo_list);
-    auto& leader = phi_list.getLeader();
-    leader.releaseResource(collection, phi_list);
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.releaseResource(collection, phi_list);
 }
 
-template <typename T>
-RefVectorWithLeader<SPOSetT<T>>
-RotatedSPOsT<T>::extractPhiRefList(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list)
+template<typename T>
+RefVectorWithLeader<SPOSetT<T>> RotatedSPOsT<T>::extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list)
 {
-    auto& spo_leader = spo_list.template getCastedLeader<RotatedSPOsT>();
-    const auto nw = spo_list.size();
-    RefVectorWithLeader<SPOSetT<T>> phi_list(*spo_leader.Phi);
-    phi_list.reserve(nw);
-    for (int iw = 0; iw < nw; iw++) {
-        RotatedSPOsT& rot =
-            spo_list.template getCastedElement<RotatedSPOsT>(iw);
-        phi_list.emplace_back(*rot.Phi);
-    }
-    return phi_list;
+  auto& spo_leader = spo_list.template getCastedLeader<RotatedSPOsT>();
+  const auto nw    = spo_list.size();
+  RefVectorWithLeader<SPOSetT<T>> phi_list(*spo_leader.Phi);
+  phi_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RotatedSPOsT& rot = spo_list.template getCastedElement<RotatedSPOsT>(iw);
+    phi_list.emplace_back(*rot.Phi);
+  }
+  return phi_list;
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/RotatedSPOsT.h b/src/QMCWaveFunctions/RotatedSPOsT.h
index 1ee3b075332..dd3680a349d 100644
--- a/src/QMCWaveFunctions/RotatedSPOsT.h
+++ b/src/QMCWaveFunctions/RotatedSPOsT.h
@@ -1,17 +1,15 @@
 //////////////////////////////////////////////////////////////////////////////////////
-//// This file is distributed under the University of Illinois/NCSA Open Source
-/// License. / See LICENSE file in top directory for details.
-////
-//// Copyright (c) QMCPACK developers.
-////
-//// File developed by: Sergio D. Pineda Flores,
-/// sergio_pinedaflores@berkeley.edu, University of California, Berkeley / Eric
-/// Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley /
-/// Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-////
-//// File created by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu,
-/// University of California, Berkeley
-////////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2022 QMCPACK developers
+//
+// File developed by: Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley
+//                    Eric Neuscamman, eneuscamman@berkeley.edu, University of California, Berkeley
+//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+// File created by:  Sergio D. Pineda Flores, sergio_pinedaflores@berkeley.edu, University of California, Berkeley
+//////////////////////////////////////////////////////////////////////////////////////
+
 #ifndef QMCPLUSPLUS_ROTATEDSPOST_H
 #define QMCPLUSPLUS_ROTATEDSPOST_H
 
@@ -19,494 +17,465 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class RotatedSPOsT;
 namespace testing
 {
 OptVariablesTypeT<double>& getMyVarsFull(RotatedSPOsT<double>& rot);
 OptVariablesTypeT<float>& getMyVarsFull(RotatedSPOsT<float>& rot);
-std::vector<std::vector<double>>&
-getHistoryParams(RotatedSPOsT<double>& rot);
-std::vector<std::vector<float>>&
-getHistoryParams(RotatedSPOsT<float>& rot);
+std::vector<std::vector<double>>& getHistoryParams(RotatedSPOsT<double>& rot);
+std::vector<std::vector<float>>& getHistoryParams(RotatedSPOsT<float>& rot);
 } // namespace testing
 
-template <class T>
+template<class T>
 class RotatedSPOsT : public SPOSetT<T>, public OptimizableObjectT<T>
 {
 public:
-    using IndexType = typename SPOSetT<T>::IndexType;
-    using RealType = typename SPOSetT<T>::RealType;
-    using ValueType = typename SPOSetT<T>::ValueType;
-    using FullValueType     = typename SPOSetT<T>::FullValueType;
-    using GradType = typename SPOSetT<T>::GradType;
-    using ComplexType = typename SPOSetT<T>::ComplexType;
-    using FullRealType = typename SPOSetT<T>::FullRealType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using HessVector = typename SPOSetT<T>::HessVector;
-    using HessMatrix = typename SPOSetT<T>::HessMatrix;
-    using GGGVector = typename SPOSetT<T>::GGGVector;
-    using GGGMatrix = typename SPOSetT<T>::GGGMatrix;
-    using OffloadMWVGLArray = typename SPOSetT<T>::OffloadMWVGLArray;
-    template <typename DT>
-    using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
-
-    // constructor
-    RotatedSPOsT(
-        const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
-    // destructor
-    ~RotatedSPOsT() override;
-
-    std::string
-    getClassName() const override
-    {
-        return "RotatedSPOsT";
-    }
-    bool
-    isOptimizable() const override
-    {
-        return true;
-    }
-    bool
-    isOMPoffload() const override
-    {
-        return Phi->isOMPoffload();
-    }
-    bool
-    hasIonDerivs() const override
-    {
-        return Phi->hasIonDerivs();
-    }
-
-    // Vector of rotation matrix indices
-    using RotationIndices = std::vector<std::pair<int, int>>;
-
-    // Active orbital rotation parameter indices
-    RotationIndices m_act_rot_inds;
-
-    // Full set of rotation values for global rotation
-    RotationIndices m_full_rot_inds;
-
-    // Construct a list of the matrix indices for non-zero rotation parameters.
-    // (The structure for a sparse representation of the matrix)
-    // Only core->active rotations are created.
-    static void
-    createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
-
-    // Construct a list for all the matrix indices, including core->active,
-    // core->core and active->active
-    static void
-    createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
-
-    // Fill in antisymmetric matrix from the list of rotation parameter indices
-    // and a list of parameter values.
-    // This function assumes rot_mat is properly sized upon input and is set to
-    // zero.
-    static void
-    constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
-        const std::vector<RealType>& param, ValueMatrix& rot_mat);
-
-    // Extract the list of rotation parameters from the entries in an
-    // antisymmetric matrix This function expects rot_indices and param are the
-    // same length.
-    static void
-    extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
-        const ValueMatrix& rot_mat, std::vector<RealType>& param);
-
-    // function to perform orbital rotations
-    void
-    apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
-
-    // For global rotation, inputs are the old parameters and the delta
-    // parameters. The corresponding rotation matrices are constructed,
-    // multiplied together, and the new parameters extracted. The new rotation
-    // is applied to the underlying SPO coefficients
-    void
-    applyDeltaRotation(const std::vector<RealType>& delta_param,
-        const std::vector<RealType>& old_param,
-        std::vector<RealType>& new_param);
-
-    // Perform the construction of matrices and extraction of parameters for a
-    // delta rotation. Split out and made static for testing.
-    static void
-    constructDeltaRotation(const std::vector<RealType>& delta_param,
-        const std::vector<RealType>& old_param,
-        const RotationIndices& act_rot_inds,
-        const RotationIndices& full_rot_inds, std::vector<RealType>& new_param,
-        ValueMatrix& new_rot_mat);
-
-    // When initializing the rotation from VP files
-    // This function applies the rotation history
-    void
-    applyRotationHistory();
-
-    // This function applies the global rotation (similar to apply_rotation, but
-    // for the full set of rotation parameters)
-    void
-    applyFullRotation(
-        const std::vector<RealType>& full_param, bool use_stored_copy);
-
-    // Compute matrix exponential of an antisymmetric matrix (result is rotation
-    // matrix)
-    static void
-    exponentiate_antisym_matrix(ValueMatrix& mat);
-
-    // Compute matrix log of rotation matrix to produce antisymmetric matrix
-    static void
-    log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
-
-    // A particular SPOSet used for Orbitals
-    std::unique_ptr<SPOSetT<T>> Phi;
-
-    /// Set the rotation parameters (usually from input file)
-    void
-    setRotationParameters(const std::vector<RealType>& param_list);
-
-    /// the number of electrons of the majority spin
-    size_t nel_major_;
-
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
-
-    // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
-    // Determinant part of the wfn myG_J is the Gradient of the all other parts
-    // of the wavefunction (typically just the Jastrow).
-    //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
-    // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
-    // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
-    // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
-    // will hold
-    typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
-    typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
-
-    ValueMatrix Bbar;
-    ValueMatrix psiM_inv;
-    ValueMatrix psiM_all;
-    GradMatrix dpsiM_all;
-    ValueMatrix d2psiM_all;
-
-    // Single Slater creation
-    void
-    buildOptVariables(size_t nel);
-
-    // For the MSD case rotations must be created in MultiSlaterDetTableMethod
-    // class
-    void
-    buildOptVariables(const RotationIndices& rotations,
-        const RotationIndices& full_rotations);
-
-    void evaluateDerivatives(ParticleSetT<T>& P,
+  using IndexType         = typename SPOSetT<T>::IndexType;
+  using RealType          = typename SPOSetT<T>::RealType;
+  using ValueType         = typename SPOSetT<T>::ValueType;
+  using FullValueType     = typename SPOSetT<T>::FullValueType;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using ComplexType       = typename SPOSetT<T>::ComplexType;
+  using FullRealType      = typename SPOSetT<T>::FullRealType;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using HessVector        = typename SPOSetT<T>::HessVector;
+  using HessMatrix        = typename SPOSetT<T>::HessMatrix;
+  using GGGVector         = typename SPOSetT<T>::GGGVector;
+  using GGGMatrix         = typename SPOSetT<T>::GGGMatrix;
+  using OffloadMWVGLArray = typename SPOSetT<T>::OffloadMWVGLArray;
+  template<typename DT>
+  using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
+
+  // constructor
+  RotatedSPOsT(const std::string& my_name, std::unique_ptr<SPOSetT<T>>&& spos);
+  // destructor
+  ~RotatedSPOsT() override;
+
+  std::string getClassName() const override { return "RotatedSPOsT"; }
+  bool isOptimizable() const override { return true; }
+  bool isOMPoffload() const override { return Phi->isOMPoffload(); }
+  bool hasIonDerivs() const override { return Phi->hasIonDerivs(); }
+
+  // Vector of rotation matrix indices
+  using RotationIndices = std::vector<std::pair<int, int>>;
+
+  // Active orbital rotation parameter indices
+  RotationIndices m_act_rot_inds;
+
+  // Full set of rotation values for global rotation
+  RotationIndices m_full_rot_inds;
+
+  // Construct a list of the matrix indices for non-zero rotation parameters.
+  // (The structure for a sparse representation of the matrix)
+  // Only core->active rotations are created.
+  static void createRotationIndices(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Construct a list for all the matrix indices, including core->active,
+  // core->core and active->active
+  static void createRotationIndicesFull(int nel, int nmo, RotationIndices& rot_indices);
+
+  // Fill in antisymmetric matrix from the list of rotation parameter indices
+  // and a list of parameter values.
+  // This function assumes rot_mat is properly sized upon input and is set to
+  // zero.
+  static void constructAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                           const std::vector<RealType>& param,
+                                           ValueMatrix& rot_mat);
+
+  // Extract the list of rotation parameters from the entries in an
+  // antisymmetric matrix This function expects rot_indices and param are the
+  // same length.
+  static void extractParamsFromAntiSymmetricMatrix(const RotationIndices& rot_indices,
+                                                   const ValueMatrix& rot_mat,
+                                                   std::vector<RealType>& param);
+
+  // function to perform orbital rotations
+  void apply_rotation(const std::vector<RealType>& param, bool use_stored_copy);
+
+  // For global rotation, inputs are the old parameters and the delta
+  // parameters. The corresponding rotation matrices are constructed,
+  // multiplied together, and the new parameters extracted. The new rotation
+  // is applied to the underlying SPO coefficients
+  void applyDeltaRotation(const std::vector<RealType>& delta_param,
+                          const std::vector<RealType>& old_param,
+                          std::vector<RealType>& new_param);
+
+  // Perform the construction of matrices and extraction of parameters for a
+  // delta rotation. Split out and made static for testing.
+  static void constructDeltaRotation(const std::vector<RealType>& delta_param,
+                                     const std::vector<RealType>& old_param,
+                                     const RotationIndices& act_rot_inds,
+                                     const RotationIndices& full_rot_inds,
+                                     std::vector<RealType>& new_param,
+                                     ValueMatrix& new_rot_mat);
+
+  // When initializing the rotation from VP files
+  // This function applies the rotation history
+  void applyRotationHistory();
+
+  // This function applies the global rotation (similar to apply_rotation, but
+  // for the full set of rotation parameters)
+  void applyFullRotation(const std::vector<RealType>& full_param, bool use_stored_copy);
+
+  // Compute matrix exponential of an antisymmetric matrix (result is rotation
+  // matrix)
+  static void exponentiate_antisym_matrix(ValueMatrix& mat);
+
+  // Compute matrix log of rotation matrix to produce antisymmetric matrix
+  static void log_antisym_matrix(const ValueMatrix& mat, ValueMatrix& output);
+
+  // A particular SPOSet used for Orbitals
+  std::unique_ptr<SPOSetT<T>> Phi;
+
+  /// Set the rotation parameters (usually from input file)
+  void setRotationParameters(const std::vector<RealType>& param_list);
+
+  /// the number of electrons of the majority spin
+  size_t nel_major_;
+
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
+
+  // myG_temp (myL_temp) is the Gradient (Laplacian) value of of the
+  // Determinant part of the wfn myG_J is the Gradient of the all other parts
+  // of the wavefunction (typically just the Jastrow).
+  //       It represents \frac{\nabla\psi_{J}}{\psi_{J}}
+  // myL_J will be used to represent \frac{\nabla^2\psi_{J}}{\psi_{J}} . The
+  // Laplacian portion IMPORTANT NOTE:  The value of P.L holds \nabla^2
+  // ln[\psi] but we need  \frac{\nabla^2 \psi}{\psi} and this is what myL_J
+  // will hold
+  typename ParticleSetT<T>::ParticleGradient myG_temp, myG_J;
+  typename ParticleSetT<T>::ParticleLaplacian myL_temp, myL_J;
+
+  ValueMatrix Bbar;
+  ValueMatrix psiM_inv;
+  ValueMatrix psiM_all;
+  GradMatrix dpsiM_all;
+  ValueMatrix d2psiM_all;
+
+  // Single Slater creation
+  void buildOptVariables(size_t nel);
+
+  // For the MSD case rotations must be created in MultiSlaterDetTableMethod
+  // class
+  void buildOptVariables(const RotationIndices& rotations, const RotationIndices& full_rotations);
+
+  void evaluateDerivatives(ParticleSetT<T>& P,
+                           const OptVariablesTypeT<T>& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const int& FirstIndex,
+                           const int& LastIndex) override;
+
+  void evaluateDerivativesWF(ParticleSetT<T>& P,
                              const OptVariablesTypeT<T>& optvars,
                              Vector<T>& dlogpsi,
-                             Vector<T>& dhpsioverpsi,
-                             const int& FirstIndex,
-                             const int& LastIndex) override;
-
-    void evaluateDerivativesWF(ParticleSetT<T>& P,
-                               const OptVariablesTypeT<T>& optvars,
-                               Vector<T>& dlogpsi,
-                               int FirstIndex,
-                               int LastIndex) override;
+                             int FirstIndex,
+                             int LastIndex) override;
 
-    void evaluateDerivatives(ParticleSetT<T>& P,
+  void evaluateDerivatives(ParticleSetT<T>& P,
+                           const OptVariablesTypeT<T>& optvars,
+                           Vector<T>& dlogpsi,
+                           Vector<T>& dhpsioverpsi,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const GradMatrix& grads_up,
+                           const GradMatrix& grads_dn,
+                           const ValueMatrix& lapls_up,
+                           const ValueMatrix& lapls_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const GradMatrix& B_grad,
+                           const ValueMatrix& B_lapl,
+                           const std::vector<int>& detData_up,
+                           const size_t N1,
+                           const size_t N2,
+                           const size_t NP1,
+                           const size_t NP2,
+                           const std::vector<std::vector<int>>& lookup_tbl) override;
+
+  void evaluateDerivativesWF(ParticleSetT<T>& P,
                              const OptVariablesTypeT<T>& optvars,
-                             Vector<T>& dlogpsi,
-                             Vector<T>& dhpsioverpsi,
-                             const T& psiCurrent,
-                             const std::vector<T>& Coeff,
+                             Vector<ValueType>& dlogpsi,
+                             const FullValueType& psiCurrent,
+                             const std::vector<ValueType>& Coeff,
                              const std::vector<size_t>& C2node_up,
                              const std::vector<size_t>& C2node_dn,
                              const ValueVector& detValues_up,
                              const ValueVector& detValues_dn,
-                             const GradMatrix& grads_up,
-                             const GradMatrix& grads_dn,
-                             const ValueMatrix& lapls_up,
-                             const ValueMatrix& lapls_dn,
                              const ValueMatrix& M_up,
                              const ValueMatrix& M_dn,
                              const ValueMatrix& Minv_up,
                              const ValueMatrix& Minv_dn,
-                             const GradMatrix& B_grad,
-                             const ValueMatrix& B_lapl,
                              const std::vector<int>& detData_up,
-                             const size_t N1,
-                             const size_t N2,
-                             const size_t NP1,
-                             const size_t NP2,
                              const std::vector<std::vector<int>>& lookup_tbl) override;
 
-    void evaluateDerivativesWF(ParticleSetT<T>& P,
-                               const OptVariablesTypeT<T>& optvars,
-                               Vector<ValueType>& dlogpsi,
-                               const FullValueType& psiCurrent,
-                               const std::vector<ValueType>& Coeff,
-                               const std::vector<size_t>& C2node_up,
-                               const std::vector<size_t>& C2node_dn,
-                               const ValueVector& detValues_up,
-                               const ValueVector& detValues_dn,
-                               const ValueMatrix& M_up,
-                               const ValueMatrix& M_dn,
-                               const ValueMatrix& Minv_up,
-                               const ValueMatrix& Minv_dn,
-                               const std::vector<int>& detData_up,
-                               const std::vector<std::vector<int>>& lookup_tbl) override;
-
-    // helper function to evaluatederivative; evaluate orbital rotation
-    // parameter derivative using table method
-    void
-    table_method_eval(Vector<T>& dlogpsi, Vector<T>& dhpsioverpsi,
-        const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
-        const typename ParticleSetT<T>::ParticleGradient& myG_J,
-        const size_t nel, const size_t nmo, const T& psiCurrent,
-        const std::vector<T>& Coeff, const std::vector<size_t>& C2node_up,
-        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
-        const ValueVector& detValues_dn, const GradMatrix& grads_up,
-        const GradMatrix& grads_dn, const ValueMatrix& lapls_up,
-        const ValueMatrix& lapls_dn, const ValueMatrix& M_up,
-        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
-        const ValueMatrix& Minv_dn, const GradMatrix& B_grad,
-        const ValueMatrix& B_lapl, const std::vector<int>& detData_up,
-        const size_t N1, const size_t N2, const size_t NP1, const size_t NP2,
-        const std::vector<std::vector<int>>& lookup_tbl);
-
-    void
-    table_method_evalWF(Vector<T>& dlogpsi, const size_t nel, const size_t nmo,
-        const T& psiCurrent, const std::vector<T>& Coeff,
-        const std::vector<size_t>& C2node_up,
-        const std::vector<size_t>& C2node_dn, const ValueVector& detValues_up,
-        const ValueVector& detValues_dn, const ValueMatrix& M_up,
-        const ValueMatrix& M_dn, const ValueMatrix& Minv_up,
-        const ValueMatrix& Minv_dn, const std::vector<int>& detData_up,
-        const std::vector<std::vector<int>>& lookup_tbl);
-
-    void
-    extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs) override
-    {
-        opt_obj_refs.push_back(*this);
-    }
-
-    void checkInVariablesExclusive(OptVariablesTypeT<T>& active) override
-    {
-        if (this->myVars.size())
-            active.insertFrom(this->myVars);
-    }
-
-    void checkOutVariables(const OptVariablesTypeT<T>& active) override { this->myVars.getIndex(active); }
-
-    /// reset
-    void resetParametersExclusive(const OptVariablesTypeT<T>& active) override;
-
-    void
-    writeVariationalParameters(hdf_archive& hout) override;
-
-    void
-    readVariationalParameters(hdf_archive& hin) override;
-
-    //*********************************************************************************
-    // the following functions simply call Phi's corresponding functions
-    void
-    setOrbitalSetSize(int norbs) override
-    {
-        Phi->setOrbitalSetSize(norbs);
-    }
-
-    void
-    checkObject() const override
-    {
-        Phi->checkObject();
-    }
-
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override
-    {
-        assert(psi.size() <= this->OrbitalSetSize);
-        Phi->evaluateValue(P, iat, psi);
-    }
-
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override
-    {
-        assert(psi.size() <= this->OrbitalSetSize);
-        Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
-    }
-
-    void
-    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<T>& ratios) override
-    {
-        Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
-    }
-
-    void evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
-                             const OptVariablesTypeT<T>& optvars,
-                             ValueVector& psi,
-                             const ValueVector& psiinv,
-                             std::vector<T>& ratios,
-                             Matrix<T>& dratios,
-                             int FirstIndex,
-                             int LastIndex) override;
-
-    void
-    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi) override
-    {
-        assert(psi.size() <= this->OrbitalSetSize);
-        Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
-    }
-
-    void
-    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi) override
-    {
-        Phi->evaluateVGHGH(
-            P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
-    }
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override
-    {
-        Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
-    }
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        HessMatrix& grad_grad_logdet) override
-    {
-        Phi->evaluate_notranspose(
-            P, first, last, logdet, dlogdet, grad_grad_logdet);
-    }
-
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
-        GGGMatrix& grad_grad_grad_logdet) override
-    {
-        Phi->evaluate_notranspose(P, first, last, logdet, dlogdet,
-            grad_grad_logdet, grad_grad_grad_logdet);
-    }
-
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src,
-        GradMatrix& grad_phi) override
-    {
-        Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
-    }
-
-    void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi) override
-    {
-        Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi,
-            grad_grad_phi, grad_lapl_phi);
-    }
-
-    //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
-    //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
-    //  last, grad_grad_grad_logdet); }
-
-    /// Use history list (false) or global rotation (true)
-    void
-    set_use_global_rotation(bool use_global_rotation)
-    {
-        use_global_rot_ = use_global_rotation;
-    }
-
-    void
-    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
-        const RefVector<ValueVector>& psi_list,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        std::vector<std::vector<ValueType>>& ratios_list) const override;
-
-    void
-    mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list) const override;
-
-    void
-    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const override;
-
-    void
-    mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list,
-        OffloadMatrix<ComplexType>& mw_dspin) const override;
-
-    void
-    mw_evaluateVGLandDetRatioGrads(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-        std::vector<GradType>& grads) const override;
-
-    void
-    mw_evaluateVGLandDetRatioGradsWithSpin(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const ValueType*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<ValueType>& ratios,
-        std::vector<GradType>& grads,
-        std::vector<ValueType>& spingrads) const override;
-
-    void
-    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-        const RefVector<ValueMatrix>& logdet_list,
-        const RefVector<GradMatrix>& dlogdet_list,
-        const RefVector<ValueMatrix>& d2logdet_list) const override;
-
-    void
-    createResource(ResourceCollection& collection) const override;
-
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
-
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+  // helper function to evaluatederivative; evaluate orbital rotation
+  // parameter derivative using table method
+  void table_method_eval(Vector<T>& dlogpsi,
+                         Vector<T>& dhpsioverpsi,
+                         const typename ParticleSetT<T>::ParticleLaplacian& myL_J,
+                         const typename ParticleSetT<T>::ParticleGradient& myG_J,
+                         const size_t nel,
+                         const size_t nmo,
+                         const T& psiCurrent,
+                         const std::vector<T>& Coeff,
+                         const std::vector<size_t>& C2node_up,
+                         const std::vector<size_t>& C2node_dn,
+                         const ValueVector& detValues_up,
+                         const ValueVector& detValues_dn,
+                         const GradMatrix& grads_up,
+                         const GradMatrix& grads_dn,
+                         const ValueMatrix& lapls_up,
+                         const ValueMatrix& lapls_dn,
+                         const ValueMatrix& M_up,
+                         const ValueMatrix& M_dn,
+                         const ValueMatrix& Minv_up,
+                         const ValueMatrix& Minv_dn,
+                         const GradMatrix& B_grad,
+                         const ValueMatrix& B_lapl,
+                         const std::vector<int>& detData_up,
+                         const size_t N1,
+                         const size_t N2,
+                         const size_t NP1,
+                         const size_t NP2,
+                         const std::vector<std::vector<int>>& lookup_tbl);
+
+  void table_method_evalWF(Vector<T>& dlogpsi,
+                           const size_t nel,
+                           const size_t nmo,
+                           const T& psiCurrent,
+                           const std::vector<T>& Coeff,
+                           const std::vector<size_t>& C2node_up,
+                           const std::vector<size_t>& C2node_dn,
+                           const ValueVector& detValues_up,
+                           const ValueVector& detValues_dn,
+                           const ValueMatrix& M_up,
+                           const ValueMatrix& M_dn,
+                           const ValueMatrix& Minv_up,
+                           const ValueMatrix& Minv_dn,
+                           const std::vector<int>& detData_up,
+                           const std::vector<std::vector<int>>& lookup_tbl);
+
+  void extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs) override { opt_obj_refs.push_back(*this); }
+
+  void checkInVariablesExclusive(OptVariablesTypeT<T>& active) override
+  {
+    if (this->myVars.size())
+      active.insertFrom(this->myVars);
+  }
+
+  void checkOutVariables(const OptVariablesTypeT<T>& active) override { this->myVars.getIndex(active); }
+
+  /// reset
+  void resetParametersExclusive(const OptVariablesTypeT<T>& active) override;
+
+  void writeVariationalParameters(hdf_archive& hout) override;
+
+  void readVariationalParameters(hdf_archive& hin) override;
+
+  //*********************************************************************************
+  // the following functions simply call Phi's corresponding functions
+  void setOrbitalSetSize(int norbs) override { Phi->setOrbitalSetSize(norbs); }
+
+  void checkObject() const override { Phi->checkObject(); }
+
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateValue(P, iat, psi);
+  }
+
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGL(P, iat, psi, dpsi, d2psi);
+  }
+
+  void evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+                         ValueVector& psi,
+                         const ValueVector& psiinv,
+                         std::vector<T>& ratios) override
+  {
+    Phi->evaluateDetRatios(VP, psi, psiinv, ratios);
+  }
+
+  void evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+                           const OptVariablesTypeT<T>& optvars,
+                           ValueVector& psi,
+                           const ValueVector& psiinv,
+                           std::vector<T>& ratios,
+                           Matrix<T>& dratios,
+                           int FirstIndex,
+                           int LastIndex) override;
+
+  void evaluateVGH(const ParticleSetT<T>& P,
+                   int iat,
+                   ValueVector& psi,
+                   GradVector& dpsi,
+                   HessVector& grad_grad_psi) override
+  {
+    assert(psi.size() <= this->OrbitalSetSize);
+    Phi->evaluateVGH(P, iat, psi, dpsi, grad_grad_psi);
+  }
+
+  void evaluateVGHGH(const ParticleSetT<T>& P,
+                     int iat,
+                     ValueVector& psi,
+                     GradVector& dpsi,
+                     HessVector& grad_grad_psi,
+                     GGGVector& grad_grad_grad_psi) override
+  {
+    Phi->evaluateVGHGH(P, iat, psi, dpsi, grad_grad_psi, grad_grad_grad_psi);
+  }
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, d2logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet);
+  }
+
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            HessMatrix& grad_grad_logdet,
+                            GGGMatrix& grad_grad_grad_logdet) override
+  {
+    Phi->evaluate_notranspose(P, first, last, logdet, dlogdet, grad_grad_logdet, grad_grad_grad_logdet);
+  }
+
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& grad_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi);
+  }
+
+  void evaluateGradSource(const ParticleSetT<T>& P,
+                          int first,
+                          int last,
+                          const ParticleSetT<T>& source,
+                          int iat_src,
+                          GradMatrix& grad_phi,
+                          HessMatrix& grad_grad_phi,
+                          GradMatrix& grad_lapl_phi) override
+  {
+    Phi->evaluateGradSource(P, first, last, source, iat_src, grad_phi, grad_grad_phi, grad_lapl_phi);
+  }
+
+  //  void evaluateThirdDeriv(const ParticleSet& P, int first, int last,
+  //  GGGMatrix& grad_grad_grad_logdet) {Phi->evaluateThridDeriv(P, first,
+  //  last, grad_grad_grad_logdet); }
+
+  /// Use history list (false) or global rotation (true)
+  void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
+
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const ValueType*>& invRow_ptr_list,
+                            std::vector<std::vector<ValueType>>& ratios_list) const override;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<ComplexType>& mw_dspin) const override;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                      int iat,
+                                      const std::vector<const ValueType*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<ValueType>& ratios,
+                                      std::vector<GradType>& grads) const override;
+
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                              int iat,
+                                              const std::vector<const ValueType*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<ValueType>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<ValueType>& spingrads) const override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
 
 private:
-    /// true if SPO parameters (orbital rotation parameters) have been supplied
-    /// by input
-    bool params_supplied;
-    /// list of supplied orbital rotation parameters
-    std::vector<RealType> params;
-
-    /// Full set of rotation matrix parameters for use in global rotation method
-    OptVariablesTypeT<T> myVarsFull;
-
-    /// List of previously applied parameters
-    std::vector<std::vector<RealType>> history_params_;
-
-    /// Use global rotation or history list
-    bool use_global_rot_ = true;
-
-    static RefVectorWithLeader<SPOSetT<T>>
-    extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list);
-    friend OptVariablesTypeT<double>& testing::getMyVarsFull(RotatedSPOsT<double>& rot);
-    friend OptVariablesTypeT<float>& testing::getMyVarsFull(RotatedSPOsT<float>& rot);
-    friend std::vector<std::vector<double>>&
-    testing::getHistoryParams(RotatedSPOsT<double>& rot);
-    friend std::vector<std::vector<float>>&
-    testing::getHistoryParams(RotatedSPOsT<float>& rot);
+  /// true if SPO parameters (orbital rotation parameters) have been supplied
+  /// by input
+  bool params_supplied;
+  /// list of supplied orbital rotation parameters
+  std::vector<RealType> params;
+
+  /// Full set of rotation matrix parameters for use in global rotation method
+  OptVariablesTypeT<T> myVarsFull;
+
+  /// List of previously applied parameters
+  std::vector<std::vector<RealType>> history_params_;
+
+  /// Use global rotation or history list
+  bool use_global_rot_ = true;
+
+  static RefVectorWithLeader<SPOSetT<T>> extractPhiRefList(const RefVectorWithLeader<SPOSetT<T>>& spo_list);
+  friend OptVariablesTypeT<double>& testing::getMyVarsFull(RotatedSPOsT<double>& rot);
+  friend OptVariablesTypeT<float>& testing::getMyVarsFull(RotatedSPOsT<float>& rot);
+  friend std::vector<std::vector<double>>& testing::getHistoryParams(RotatedSPOsT<double>& rot);
+  friend std::vector<std::vector<float>>& testing::getHistoryParams(RotatedSPOsT<float>& rot);
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
index e8342a693ba..85c5e9a50ce 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
@@ -5,18 +5,14 @@
 // Copyright (c) 2020 QMCPACK developers.
 //
 // File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at
-//                    Urbana-Champaign Miguel Morales, moralessilva2@llnl.gov,
-//                    Lawrence Livermore National Laboratory Jeremy McMinnis,
-//                    jmcminis@gmail.com, University of Illinois at
-//                    Urbana-Champaign Jaron T. Krogel, krogeljt@ornl.gov, Oak
-//                    Ridge National Laboratory Jeongnim Kim,
-//                    jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Mark A. Berrill, berrillma@ornl.gov, Oak
-//                    Ridge National Laboratory
+//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SPOSetBuilderFactoryT.h"
@@ -144,10 +140,9 @@ std::unique_ptr<SPOSetBuilderT<T>> SPOSetBuilderFactoryT<T>::createSPOSetBuilder
     if (targetPtcl.isSpinor())
     {
 #ifdef QMC_COMPLEX
-            app_log() << "Einspline Spinor Set\n";
-            // FIXME
-            bb = std::make_unique<EinsplineSpinorSetBuilderT<T>>(targetPtcl,
-            ptclPool, myComm, rootNode);
+      app_log() << "Einspline Spinor Set\n";
+      // FIXME
+      bb = std::make_unique<EinsplineSpinorSetBuilderT<T>>(targetPtcl, ptclPool, myComm, rootNode);
 #else
       PRE.error("Use of einspline spinors requires QMC_COMPLEX=1.  "
                 "Rebuild with this option");
@@ -156,11 +151,10 @@ std::unique_ptr<SPOSetBuilderT<T>> SPOSetBuilderFactoryT<T>::createSPOSetBuilder
     else
     {
 #if defined(HAVE_EINSPLINE)
-            PRE << "EinsplineSetBuilder:  using libeinspline for B-spline "
-                   "orbitals.\n";
-            // FIXME
-            bb = std::make_unique<EinsplineSetBuilderT<T>>(targetPtcl, ptclPool,
-            myComm, rootNode);
+      PRE << "EinsplineSetBuilder:  using libeinspline for B-spline "
+             "orbitals.\n";
+      // FIXME
+      bb = std::make_unique<EinsplineSetBuilderT<T>>(targetPtcl, ptclPool, myComm, rootNode);
 #else
       PRE.error("Einspline is missing for B-spline orbitals", true);
 #endif
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
index 9841988d003..65f09b509f9 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.h
@@ -1,18 +1,15 @@
 //////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source
-// License. See LICENSE file in top directory for details.
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National
-//                    Laboratory Jeongnim Kim, jeongnim.kim@gmail.com,
-//                    University of Illinois at Urbana-Champaign Mark A.
-//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_BASISSETFACTORYT_H
@@ -24,66 +21,53 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 class SPOSetBuilderFactoryT : public MPIObjectBase
 {
 public:
-    using SPOMap = typename SPOSetT<T>::SPOMap;
-    using PSetMap =
-        std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
+  using SPOMap  = typename SPOSetT<T>::SPOMap;
+  using PSetMap = std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
 
-    /** constructor
+  /** constructor
      * \param comm communicator
      * \param els reference to the electrons
      * \param ions reference to the ions
      */
-    SPOSetBuilderFactoryT(
-        Communicate* comm, ParticleSetT<T>& els, const PSetMap& psets);
+  SPOSetBuilderFactoryT(Communicate* comm, ParticleSetT<T>& els, const PSetMap& psets);
 
-    ~SPOSetBuilderFactoryT();
+  ~SPOSetBuilderFactoryT();
 
-    std::unique_ptr<SPOSetBuilderT<T>>
-    createSPOSetBuilder(xmlNodePtr rootNode);
+  std::unique_ptr<SPOSetBuilderT<T>> createSPOSetBuilder(xmlNodePtr rootNode);
 
-    /** returns a named sposet from the pool
+  /** returns a named sposet from the pool
      *  only use in serial portion of execution
      *  ie during initialization prior to threaded code
      */
-    const SPOSetT<T>*
-    getSPOSet(const std::string& name) const;
+  const SPOSetT<T>* getSPOSet(const std::string& name) const;
 
-    void
-    buildSPOSetCollection(xmlNodePtr cur);
+  void buildSPOSetCollection(xmlNodePtr cur);
 
-    bool
-    empty() const
-    {
-        return sposets.empty();
-    }
+  bool empty() const { return sposets.empty(); }
 
-    /** add an SPOSet to sposets map.
+  /** add an SPOSet to sposets map.
      * This is only used to handle legacy SPOSet input styles without using
      * sposet_collection
      */
-    void addSPOSet(std::unique_ptr<SPOSetT<T>>);
+  void addSPOSet(std::unique_ptr<SPOSetT<T>>);
 
-    SPOMap&&
-    exportSPOSets()
-    {
-        return std::move(sposets);
-    }
+  SPOMap&& exportSPOSets() { return std::move(sposets); }
 
 private:
-    /// reference to the target particle
-    ParticleSetT<T>& targetPtcl;
+  /// reference to the target particle
+  ParticleSetT<T>& targetPtcl;
 
-    /// reference to the particle pool
-    const PSetMap& ptclPool;
+  /// reference to the particle pool
+  const PSetMap& ptclPool;
 
-    /// list of all sposets created by the builders of this factory
-    SPOMap sposets;
+  /// list of all sposets created by the builders of this factory
+  SPOMap sposets;
 
-    static std::string basisset_tag;
+  static std::string basisset_tag;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/SPOSetScannerT.h b/src/QMCWaveFunctions/SPOSetScannerT.h
index 814601bbacb..9c9b0bdb729 100644
--- a/src/QMCWaveFunctions/SPOSetScannerT.h
+++ b/src/QMCWaveFunctions/SPOSetScannerT.h
@@ -19,266 +19,235 @@
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 struct OutputReportMakerBase
 {
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
 
-    const ValueVector& SPO_v_avg;
-    const ValueVector& SPO_l_avg;
-    const GradVector& SPO_g_avg;
-    int nknots;
+  const ValueVector& SPO_v_avg;
+  const ValueVector& SPO_l_avg;
+  const GradVector& SPO_g_avg;
+  int nknots;
 };
 
-template <typename T>
+template<typename T>
 struct OutputReportMaker : OutputReportMakerBase<T>
 {
-    using RealType = typename SPOSetT<T>::RealType;
+  using RealType = typename SPOSetT<T>::RealType;
 
-    void
-    operator()(std::ofstream& output_report) const
+  void operator()(std::ofstream& output_report) const
+  {
+    output_report << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg" << std::endl;
+    for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++)
     {
-        output_report
-            << "#   Report: Orb   Value_avg   Gradients_avg   Laplacian_avg"
-            << std::endl;
-        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
-            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
-            output_report << "\t" << iorb << "    " << std::scientific
-                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
-                          << this->SPO_g_avg[iorb][0] * one_over_nknots << "   "
-                          << this->SPO_g_avg[iorb][1] * one_over_nknots << "   "
-                          << this->SPO_g_avg[iorb][2] * one_over_nknots << "   "
-                          << this->SPO_l_avg[iorb] * one_over_nknots
-                          << std::fixed << std::endl;
-        }
+      auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+      output_report << "\t" << iorb << "    " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                    << this->SPO_g_avg[iorb][0] * one_over_nknots << "   " << this->SPO_g_avg[iorb][1] * one_over_nknots
+                    << "   " << this->SPO_g_avg[iorb][2] * one_over_nknots << "   "
+                    << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl;
     }
+  }
 };
 
-template <typename T>
-struct OutputReportMaker<std::complex<T>> :
-    OutputReportMakerBase<std::complex<T>>
+template<typename T>
+struct OutputReportMaker<std::complex<T>> : OutputReportMakerBase<std::complex<T>>
 {
-    using RealType = typename SPOSetT<T>::RealType;
+  using RealType = typename SPOSetT<T>::RealType;
 
-    void
-    operator()(std::ofstream& output_report) const
+  void operator()(std::ofstream& output_report) const
+  {
+    output_report << "#   Report: Orb   Value_avg I/R  Gradients_avg Laplacian_avg" << std::endl;
+    for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++)
     {
-        output_report
-            << "#   Report: Orb   Value_avg I/R  Gradients_avg Laplacian_avg"
-            << std::endl;
-        for (int iorb = 0; iorb < this->SPO_v_avg.size(); iorb++) {
-            auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
-            output_report << "\t" << iorb << "    " << std::scientific
-                          << this->SPO_v_avg[iorb] * one_over_nknots << "   "
-                          << this->SPO_v_avg[iorb].imag() /
-                    this->SPO_v_avg[iorb].real()
-                          << "   " << this->SPO_g_avg[iorb][0] * one_over_nknots
-                          << "   " << this->SPO_g_avg[iorb][1] * one_over_nknots
-                          << "   " << this->SPO_g_avg[iorb][2] * one_over_nknots
-                          << "   " << this->SPO_l_avg[iorb] * one_over_nknots
-                          << std::fixed << std::endl;
-        }
+      auto one_over_nknots = static_cast<RealType>(1.0 / this->nknots);
+      output_report << "\t" << iorb << "    " << std::scientific << this->SPO_v_avg[iorb] * one_over_nknots << "   "
+                    << this->SPO_v_avg[iorb].imag() / this->SPO_v_avg[iorb].real() << "   "
+                    << this->SPO_g_avg[iorb][0] * one_over_nknots << "   " << this->SPO_g_avg[iorb][1] * one_over_nknots
+                    << "   " << this->SPO_g_avg[iorb][2] * one_over_nknots << "   "
+                    << this->SPO_l_avg[iorb] * one_over_nknots << std::fixed << std::endl;
     }
+  }
 };
 
 /** a scanner for all the SPO sets.
  */
-template <typename T>
+template<typename T>
 class SPOSetScannerT
 {
 public:
-    using PtclPool =
-        std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
-    using SPOSetMap = typename SPOSetT<T>::SPOMap;
-    using RealType = typename SPOSetT<T>::RealType;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using HessVector = typename SPOSetT<T>::HessVector;
-
-    RealType
-    myfabs(RealType s)
-    {
-        return std::fabs(s);
-    }
-    template <typename U>
-    std::complex<U>
-    myfabs(std::complex<U>& s)
-    {
-        return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
-    }
-    template <typename U>
-    TinyVector<U, OHMMS_DIM>
-    myfabs(TinyVector<U, OHMMS_DIM>& s)
-    {
-        return TinyVector<U, OHMMS_DIM>(
-            myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
-    }
-
-    const SPOSetMap& sposets;
-    ParticleSetT<T>& target;
-    const PtclPool& ptcl_pool_;
-    ParticleSetT<T>* ions;
-
-    // construction/destruction
-    SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT<T>& targetPtcl,
-        const PtclPool& psets) :
-        sposets(sposets_in),
-        target(targetPtcl),
-        ptcl_pool_(psets),
-        ions(0){};
-    //~SPOSetScannerT(){};
-
-    // processing scanning
-    void
-    put(xmlNodePtr cur)
+  using PtclPool    = std::map<std::string, const std::unique_ptr<ParticleSetT<T>>>;
+  using SPOSetMap   = typename SPOSetT<T>::SPOMap;
+  using RealType    = typename SPOSetT<T>::RealType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using HessVector  = typename SPOSetT<T>::HessVector;
+
+  RealType myfabs(RealType s) { return std::fabs(s); }
+  template<typename U>
+  std::complex<U> myfabs(std::complex<U>& s)
+  {
+    return std::complex<U>(myfabs(s.real()), myfabs(s.imag()));
+  }
+  template<typename U>
+  TinyVector<U, OHMMS_DIM> myfabs(TinyVector<U, OHMMS_DIM>& s)
+  {
+    return TinyVector<U, OHMMS_DIM>(myfabs(s[0]), myfabs(s[1]), myfabs(s[2]));
+  }
+
+  const SPOSetMap& sposets;
+  ParticleSetT<T>& target;
+  const PtclPool& ptcl_pool_;
+  ParticleSetT<T>* ions;
+
+  // construction/destruction
+  SPOSetScannerT(const SPOSetMap& sposets_in, ParticleSetT<T>& targetPtcl, const PtclPool& psets)
+      : sposets(sposets_in), target(targetPtcl), ptcl_pool_(psets), ions(0){};
+  //~SPOSetScannerT(){};
+
+  // processing scanning
+  void put(xmlNodePtr cur)
+  {
+    app_log() << "Entering the SPO set scanner!" << std::endl;
+    // check in the source particle set and search for it in the pool.
+    std::string sourcePtcl("ion0");
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(sourcePtcl, "source");
+    aAttrib.put(cur);
+    auto pit(ptcl_pool_.find(sourcePtcl));
+    if (pit == ptcl_pool_.end())
+      app_log() << "Source particle set not found. Can not be used as "
+                   "reference point."
+                << std::endl;
+    else
+      ions = pit->second.get();
+
+    // scanning the SPO sets
+    xmlNodePtr cur_save = cur;
+    for (const auto& [name, sposet] : sposets)
     {
-        app_log() << "Entering the SPO set scanner!" << std::endl;
-        // check in the source particle set and search for it in the pool.
-        std::string sourcePtcl("ion0");
+      app_log() << "  Processing SPO " << sposet->getName() << std::endl;
+      // scanning the paths
+      cur = cur_save->children;
+      while (cur != NULL)
+      {
+        std::string trace_name("no name");
         OhmmsAttributeSet aAttrib;
-        aAttrib.add(sourcePtcl, "source");
+        aAttrib.add(trace_name, "name");
         aAttrib.put(cur);
-        auto pit(ptcl_pool_.find(sourcePtcl));
-        if (pit == ptcl_pool_.end())
-            app_log() << "Source particle set not found. Can not be used as "
-                         "reference point."
-                      << std::endl;
+        std::string cname(getNodeName(cur));
+        std::string prefix(sposet->getName() + "_" + cname + "_" + trace_name);
+        if (cname == "path")
+        {
+          app_log() << "    Scanning a " << cname << " called " << trace_name << " and writing to "
+                    << prefix + "_v/g/l/report.dat" << std::endl;
+          auto spo = sposet->makeClone();
+          scan_path(cur, *spo, prefix);
+        }
         else
-            ions = pit->second.get();
-
-        // scanning the SPO sets
-        xmlNodePtr cur_save = cur;
-        for (const auto& [name, sposet] : sposets) {
-            app_log() << "  Processing SPO " << sposet->getName() << std::endl;
-            // scanning the paths
-            cur = cur_save->children;
-            while (cur != NULL) {
-                std::string trace_name("no name");
-                OhmmsAttributeSet aAttrib;
-                aAttrib.add(trace_name, "name");
-                aAttrib.put(cur);
-                std::string cname(getNodeName(cur));
-                std::string prefix(
-                    sposet->getName() + "_" + cname + "_" + trace_name);
-                if (cname == "path") {
-                    app_log() << "    Scanning a " << cname << " called "
-                              << trace_name << " and writing to "
-                              << prefix + "_v/g/l/report.dat" << std::endl;
-                    auto spo = sposet->makeClone();
-                    scan_path(cur, *spo, prefix);
-                }
-                else {
-                    if (cname != "text" && cname != "comment")
-                        app_log() << "    Unknown type of scanning " << cname
-                                  << std::endl;
-                }
-                cur = cur->next;
-            }
+        {
+          if (cname != "text" && cname != "comment")
+            app_log() << "    Unknown type of scanning " << cname << std::endl;
         }
-        app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
+        cur = cur->next;
+      }
     }
-
-    // scanning a path
-    void
-    scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
+    app_log() << "Exiting the SPO set scanner!" << std::endl << std::endl;
+  }
+
+  // scanning a path
+  void scan_path(xmlNodePtr cur, SPOSetT<T>& sposet, std::string prefix)
+  {
+    std::string file_name;
+    file_name = prefix + "_v.dat";
+    std::ofstream output_v(file_name.c_str());
+    file_name = prefix + "_g.dat";
+    std::ofstream output_g(file_name.c_str());
+    file_name = prefix + "_l.dat";
+    std::ofstream output_l(file_name.c_str());
+    file_name = prefix + "_report.dat";
+    std::ofstream output_report(file_name.c_str());
+
+    int nknots(2);
+    int from_atom(-1);
+    int to_atom(-1);
+    TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
+    TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
+
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(nknots, "nknots");
+    aAttrib.add(from_atom, "from_atom");
+    aAttrib.add(to_atom, "to_atom");
+    aAttrib.add(from_pos, "from_pos");
+    aAttrib.add(to_pos, "to_pos");
+    aAttrib.put(cur);
+
+    // sanity check
+    if (nknots < 2)
+      nknots = 2;
+    // check out the reference atom coordinates
+    if (ions)
     {
-        std::string file_name;
-        file_name = prefix + "_v.dat";
-        std::ofstream output_v(file_name.c_str());
-        file_name = prefix + "_g.dat";
-        std::ofstream output_g(file_name.c_str());
-        file_name = prefix + "_l.dat";
-        std::ofstream output_l(file_name.c_str());
-        file_name = prefix + "_report.dat";
-        std::ofstream output_report(file_name.c_str());
-
-        int nknots(2);
-        int from_atom(-1);
-        int to_atom(-1);
-        TinyVector<double, OHMMS_DIM> from_pos(0.0, 0.0, 0.0);
-        TinyVector<double, OHMMS_DIM> to_pos(0.0, 0.0, 0.0);
-
-        OhmmsAttributeSet aAttrib;
-        aAttrib.add(nknots, "nknots");
-        aAttrib.add(from_atom, "from_atom");
-        aAttrib.add(to_atom, "to_atom");
-        aAttrib.add(from_pos, "from_pos");
-        aAttrib.add(to_pos, "to_pos");
-        aAttrib.put(cur);
-
-        // sanity check
-        if (nknots < 2)
-            nknots = 2;
-        // check out the reference atom coordinates
-        if (ions) {
-            if (from_atom >= 0 && from_atom < ions->R.size())
-                from_pos = ions->R[from_atom];
-            if (to_atom >= 0 && to_atom < ions->R.size())
-                to_pos = ions->R[to_atom];
-        }
+      if (from_atom >= 0 && from_atom < ions->R.size())
+        from_pos = ions->R[from_atom];
+      if (to_atom >= 0 && to_atom < ions->R.size())
+        to_pos = ions->R[to_atom];
+    }
 
-        // prepare a fake particle set
-        ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
-        GradVector SPO_g, SPO_g_avg;
-        int OrbitalSize(sposet.size());
-        SPO_v.resize(OrbitalSize);
-        SPO_g.resize(OrbitalSize);
-        SPO_l.resize(OrbitalSize);
-        SPO_v_avg.resize(OrbitalSize);
-        SPO_g_avg.resize(OrbitalSize);
-        SPO_l_avg.resize(OrbitalSize);
-        SPO_v_avg = 0.0;
-        SPO_g_avg = 0.0;
-        SPO_l_avg = 0.0;
-        double Delta = 1.0 / (nknots - 1);
-        int elec_count = target.R.size();
-        auto R_saved = target.R;
-        typename ParticleSetT<T>::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
-        for (int icount = 0, ind = 0; icount < nknots; icount++, ind++) {
-            if (ind == elec_count)
-                ind = 0;
-            target.R[ind][0] =
-                (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
-            target.R[ind][1] =
-                (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
-            target.R[ind][2] =
-                (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
-            target.makeMove(ind, zero_pos);
-            sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
-            std::ostringstream o;
-            o << "x_y_z  " << std::fixed << std::setprecision(7)
-              << target.R[ind][0] << " " << target.R[ind][1] << " "
-              << target.R[ind][2];
-            output_v << o.str() << " : " << std::scientific
-                     << std::setprecision(12);
-            output_g << o.str() << " : " << std::scientific
-                     << std::setprecision(12);
-            output_l << o.str() << " : " << std::scientific
-                     << std::setprecision(12);
-            for (int iorb = 0; iorb < OrbitalSize; iorb++) {
-                SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
-                SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
-                SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
-                output_v << SPO_v[iorb] << "  ";
-                output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  "
-                         << SPO_g[iorb][2] << "  ";
-                output_l << SPO_l[iorb] << "  ";
-            }
-            output_v << std::endl;
-            output_g << std::endl;
-            output_l << std::endl;
-        }
-        // restore the whole target.
-        target.R = R_saved;
-        target.update();
-        OutputReportMaker<T>{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(
-            output_report);
-        output_v.close();
-        output_g.close();
-        output_l.close();
-        output_report.close();
+    // prepare a fake particle set
+    ValueVector SPO_v, SPO_l, SPO_v_avg, SPO_l_avg;
+    GradVector SPO_g, SPO_g_avg;
+    int OrbitalSize(sposet.size());
+    SPO_v.resize(OrbitalSize);
+    SPO_g.resize(OrbitalSize);
+    SPO_l.resize(OrbitalSize);
+    SPO_v_avg.resize(OrbitalSize);
+    SPO_g_avg.resize(OrbitalSize);
+    SPO_l_avg.resize(OrbitalSize);
+    SPO_v_avg      = 0.0;
+    SPO_g_avg      = 0.0;
+    SPO_l_avg      = 0.0;
+    double Delta   = 1.0 / (nknots - 1);
+    int elec_count = target.R.size();
+    auto R_saved   = target.R;
+    typename ParticleSetT<T>::SingleParticlePos zero_pos(0.0, 0.0, 0.0);
+    for (int icount = 0, ind = 0; icount < nknots; icount++, ind++)
+    {
+      if (ind == elec_count)
+        ind = 0;
+      target.R[ind][0] = (to_pos[0] - from_pos[0]) * Delta * icount + from_pos[0];
+      target.R[ind][1] = (to_pos[1] - from_pos[1]) * Delta * icount + from_pos[1];
+      target.R[ind][2] = (to_pos[2] - from_pos[2]) * Delta * icount + from_pos[2];
+      target.makeMove(ind, zero_pos);
+      sposet.evaluateVGL(target, ind, SPO_v, SPO_g, SPO_l);
+      std::ostringstream o;
+      o << "x_y_z  " << std::fixed << std::setprecision(7) << target.R[ind][0] << " " << target.R[ind][1] << " "
+        << target.R[ind][2];
+      output_v << o.str() << " : " << std::scientific << std::setprecision(12);
+      output_g << o.str() << " : " << std::scientific << std::setprecision(12);
+      output_l << o.str() << " : " << std::scientific << std::setprecision(12);
+      for (int iorb = 0; iorb < OrbitalSize; iorb++)
+      {
+        SPO_v_avg[iorb] += myfabs(SPO_v[iorb]);
+        SPO_g_avg[iorb] += myfabs(SPO_g[iorb]);
+        SPO_l_avg[iorb] += myfabs(SPO_l[iorb]);
+        output_v << SPO_v[iorb] << "  ";
+        output_g << SPO_g[iorb][0] << "  " << SPO_g[iorb][1] << "  " << SPO_g[iorb][2] << "  ";
+        output_l << SPO_l[iorb] << "  ";
+      }
+      output_v << std::endl;
+      output_g << std::endl;
+      output_l << std::endl;
     }
+    // restore the whole target.
+    target.R = R_saved;
+    target.update();
+    OutputReportMaker<T>{SPO_v_avg, SPO_l_avg, SPO_g_avg, nknots}(output_report);
+    output_v.close();
+    output_g.close();
+    output_l.close();
+    output_report.close();
+  }
 };
 } // namespace qmcplusplus
 
diff --git a/src/QMCWaveFunctions/SPOSetT.cpp b/src/QMCWaveFunctions/SPOSetT.cpp
index 587156abf91..b3f85267616 100644
--- a/src/QMCWaveFunctions/SPOSetT.cpp
+++ b/src/QMCWaveFunctions/SPOSetT.cpp
@@ -4,22 +4,17 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-//                    National Laboratory Raymond Clay III,
-//                    j.k.rofling@gmail.com, Lawrence Livermore National
-//                    Laboratory Jeremy McMinnis, jmcminis@gmail.com, University
-//                    of Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory William
-//                    F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill,berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SPOSetT.h"
diff --git a/src/QMCWaveFunctions/SPOSetT.h b/src/QMCWaveFunctions/SPOSetT.h
index 4900c0499fd..b837056f540 100644
--- a/src/QMCWaveFunctions/SPOSetT.h
+++ b/src/QMCWaveFunctions/SPOSetT.h
@@ -4,22 +4,17 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at
-// Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
-//                    National Laboratory Raymond Clay III,
-//                    j.k.rofling@gmail.com, Lawrence Livermore National
-//                    Laboratory Jeremy McMinnis, jmcminis@gmail.com, University
-//                    of Illinois at Urbana-Champaign Jaron T. Krogel,
-//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Jeongnim
-//                    Kim, jeongnim.kim@gmail.com, University of Illinois at
-//                    Urbana-Champaign Ying Wai Li, yingwaili@ornl.gov, Oak
-//                    Ridge National Laboratory Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory William
-//                    F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Ying Wai Li, yingwaili@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill,berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    William F. Godoy, godoywf@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SPOSETT_H
@@ -37,7 +32,7 @@ namespace qmcplusplus
 {
 class ResourceCollection;
 
-template <class T>
+template<class T>
 class SPOSetT;
 namespace testing
 {
@@ -53,217 +48,175 @@ OptVariablesTypeT<std::complex<double>>& getMyVars(SPOSetT<std::complex<double>>
  * a number of single-particle orbitals with capabilities of evaluating \f$
  * \psi_j({\bf r}_i)\f$
  */
-template <class T>
+template<class T>
 class SPOSetT : public QMCTraits
 {
 public:
-    using ValueVector = typename OrbitalSetTraits<T>::ValueVector;
-    using ValueMatrix = typename OrbitalSetTraits<T>::ValueMatrix;
-    using GradVector = typename OrbitalSetTraits<T>::GradVector;
-    using GradMatrix = typename OrbitalSetTraits<T>::GradMatrix;
-    using GradType = TinyVector<T, DIM>;
-    using HessVector = typename OrbitalSetTraits<T>::HessVector;
-    using HessMatrix = typename OrbitalSetTraits<T>::HessMatrix;
-    using GGGVector = typename OrbitalSetTraits<T>::GradHessVector;
-    using GGGMatrix = typename OrbitalSetTraits<T>::GradHessMatrix;
-    using SPOMap =
-        std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
-    using OffloadMWVGLArray =
-        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
-    using OffloadMWVArray =
-        Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
-    using PosType = typename OrbitalSetTraits<T>::PosType;
-    using RealType = typename OrbitalSetTraits<T>::RealType;
-    using ComplexType = typename OrbitalSetTraits<T>::ComplexType;
-    using ValueType = typename OrbitalSetTraits<T>::ValueType;
-    using FullRealType = typename OrbitalSetTraits<double>::RealType;
-    using FullValueType = typename OrbitalSetTraits<T>::FullValueType;
-    ;
-    template <typename DT>
-    using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
-
-    /** constructor */
-    SPOSetT<T>(const std::string& my_name);
-
-    /** destructor
+  using ValueVector       = typename OrbitalSetTraits<T>::ValueVector;
+  using ValueMatrix       = typename OrbitalSetTraits<T>::ValueMatrix;
+  using GradVector        = typename OrbitalSetTraits<T>::GradVector;
+  using GradMatrix        = typename OrbitalSetTraits<T>::GradMatrix;
+  using GradType          = TinyVector<T, DIM>;
+  using HessVector        = typename OrbitalSetTraits<T>::HessVector;
+  using HessMatrix        = typename OrbitalSetTraits<T>::HessMatrix;
+  using GGGVector         = typename OrbitalSetTraits<T>::GradHessVector;
+  using GGGMatrix         = typename OrbitalSetTraits<T>::GradHessMatrix;
+  using SPOMap            = std::map<std::string, const std::unique_ptr<const SPOSetT<T>>>;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  using OffloadMWVArray   = Array<T, 2, OffloadPinnedAllocator<T>>; // [walker, Orbs]
+  using PosType           = typename OrbitalSetTraits<T>::PosType;
+  using RealType          = typename OrbitalSetTraits<T>::RealType;
+  using ComplexType       = typename OrbitalSetTraits<T>::ComplexType;
+  using ValueType         = typename OrbitalSetTraits<T>::ValueType;
+  using FullRealType      = typename OrbitalSetTraits<double>::RealType;
+  using FullValueType     = typename OrbitalSetTraits<T>::FullValueType;
+  ;
+  template<typename DT>
+  using OffloadMatrix = Matrix<DT, OffloadPinnedAllocator<DT>>;
+
+  /** constructor */
+  SPOSetT<T>(const std::string& my_name);
+
+  /** destructor
      *
      * Derived class destructor needs to pay extra attention to freeing memory
      * shared among clones of SPOSet.
      */
-    virtual ~SPOSetT<T>() = default;
+  virtual ~SPOSetT<T>() = default;
 
-    /** return the size of the orbital set
+  /** return the size of the orbital set
      * Ye: this needs to be replaced by getOrbitalSetSize();
      */
-    inline int
-    size() const
-    {
-        return OrbitalSetSize;
-    }
+  inline int size() const { return OrbitalSetSize; }
 
-    /** print basic SPOSet information
+  /** print basic SPOSet information
      */
-    void
-    basic_report(const std::string& pad = "") const;
+  void basic_report(const std::string& pad = "") const;
 
-    /** print SPOSet information
+  /** print SPOSet information
      */
-    virtual void
-    report(const std::string& pad = "") const
-    {
-        basic_report(pad);
-    }
+  virtual void report(const std::string& pad = "") const { basic_report(pad); }
 
-    /** return the size of the orbitals
+  /** return the size of the orbitals
      */
-    inline int
-    getOrbitalSetSize() const
-    {
-        return OrbitalSetSize;
-    }
+  inline int getOrbitalSetSize() const { return OrbitalSetSize; }
 
-    /// Query if this SPOSet is optimizable
-    virtual bool
-    isOptimizable() const
-    {
-        return false;
-    }
+  /// Query if this SPOSet is optimizable
+  virtual bool isOptimizable() const { return false; }
 
-    /** extract underlying OptimizableObject references
+  /** extract underlying OptimizableObject references
      * @param opt_obj_refs aggregated list of optimizable object references
      */
-    virtual void
-    extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs);
+  virtual void extractOptimizableObjectRefs(UniqueOptObjRefsT<T>& opt_obj_refs);
 
-    /** check out variational optimizable variables
+  /** check out variational optimizable variables
      * @param active a super set of optimizable variables
      */
-    virtual void checkOutVariables(const OptVariablesTypeT<T>& active);
+  virtual void checkOutVariables(const OptVariablesTypeT<T>& active);
 
-    /// Query if this SPOSet uses OpenMP offload
-    virtual bool
-    isOMPoffload() const
-    {
-        return false;
-    }
+  /// Query if this SPOSet uses OpenMP offload
+  virtual bool isOMPoffload() const { return false; }
 
-    /** Query if this SPOSet has an explicit ion dependence. returns true if it
+  /** Query if this SPOSet has an explicit ion dependence. returns true if it
      * does.
      */
-    virtual bool
-    hasIonDerivs() const
-    {
-        return false;
-    }
-
-    /// check a few key parameters before putting the SPO into a determinant
-    virtual void
-    checkObject() const
-    {
-    }
-
-    /// return true if this SPOSet can be wrappered by RotatedSPO
-    virtual bool
-    isRotationSupported() const
-    {
-        return false;
-    }
-    /// store parameters before getting destroyed by rotation.
-    virtual void
-    storeParamsBeforeRotation()
-    {
-    }
-    /// apply rotation to all the orbitals
-    virtual void
-    applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
-
-    /// Parameter derivatives of the wavefunction and the Laplacian of the
-    /// wavefunction
-    virtual void evaluateDerivatives(ParticleSetT<T>& P,
+  virtual bool hasIonDerivs() const { return false; }
+
+  /// check a few key parameters before putting the SPO into a determinant
+  virtual void checkObject() const {}
+
+  /// return true if this SPOSet can be wrappered by RotatedSPO
+  virtual bool isRotationSupported() const { return false; }
+  /// store parameters before getting destroyed by rotation.
+  virtual void storeParamsBeforeRotation() {}
+  /// apply rotation to all the orbitals
+  virtual void applyRotation(const ValueMatrix& rot_mat, bool use_stored_copy = false);
+
+  /// Parameter derivatives of the wavefunction and the Laplacian of the
+  /// wavefunction
+  virtual void evaluateDerivatives(ParticleSetT<T>& P,
+                                   const OptVariablesTypeT<T>& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const int& FirstIndex,
+                                   const int& LastIndex);
+
+  /// Parameter derivatives of the wavefunction
+  virtual void evaluateDerivativesWF(ParticleSetT<T>& P,
                                      const OptVariablesTypeT<T>& optvars,
                                      Vector<T>& dlogpsi,
-                                     Vector<T>& dhpsioverpsi,
-                                     const int& FirstIndex,
-                                     const int& LastIndex);
-
-    /// Parameter derivatives of the wavefunction
-    virtual void evaluateDerivativesWF(ParticleSetT<T>& P,
-                                       const OptVariablesTypeT<T>& optvars,
-                                       Vector<T>& dlogpsi,
-                                       int FirstIndex,
-                                       int LastIndex);
-
-    /** Evaluate the derivative of the optimized orbitals with respect to the
+                                     int FirstIndex,
+                                     int LastIndex);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the
+     * parameters this is used only for MSD, to be refined for better serving
+     * both single and multi SD
+     */
+  virtual void evaluateDerivatives(ParticleSetT<T>& P,
+                                   const OptVariablesTypeT<T>& optvars,
+                                   Vector<T>& dlogpsi,
+                                   Vector<T>& dhpsioverpsi,
+                                   const T& psiCurrent,
+                                   const std::vector<T>& Coeff,
+                                   const std::vector<size_t>& C2node_up,
+                                   const std::vector<size_t>& C2node_dn,
+                                   const ValueVector& detValues_up,
+                                   const ValueVector& detValues_dn,
+                                   const GradMatrix& grads_up,
+                                   const GradMatrix& grads_dn,
+                                   const ValueMatrix& lapls_up,
+                                   const ValueMatrix& lapls_dn,
+                                   const ValueMatrix& M_up,
+                                   const ValueMatrix& M_dn,
+                                   const ValueMatrix& Minv_up,
+                                   const ValueMatrix& Minv_dn,
+                                   const GradMatrix& B_grad,
+                                   const ValueMatrix& B_lapl,
+                                   const std::vector<int>& detData_up,
+                                   const size_t N1,
+                                   const size_t N2,
+                                   const size_t NP1,
+                                   const size_t NP2,
+                                   const std::vector<std::vector<int>>& lookup_tbl);
+
+  /** Evaluate the derivative of the optimized orbitals with respect to the
      * parameters this is used only for MSD, to be refined for better serving
      * both single and multi SD
      */
-    virtual void evaluateDerivatives(ParticleSetT<T>& P,
+  virtual void evaluateDerivativesWF(ParticleSetT<T>& P,
                                      const OptVariablesTypeT<T>& optvars,
-                                     Vector<T>& dlogpsi,
-                                     Vector<T>& dhpsioverpsi,
-                                     const T& psiCurrent,
+                                     Vector<ValueType>& dlogpsi,
+                                     const FullValueType& psiCurrent,
                                      const std::vector<T>& Coeff,
                                      const std::vector<size_t>& C2node_up,
                                      const std::vector<size_t>& C2node_dn,
                                      const ValueVector& detValues_up,
                                      const ValueVector& detValues_dn,
-                                     const GradMatrix& grads_up,
-                                     const GradMatrix& grads_dn,
-                                     const ValueMatrix& lapls_up,
-                                     const ValueMatrix& lapls_dn,
                                      const ValueMatrix& M_up,
                                      const ValueMatrix& M_dn,
                                      const ValueMatrix& Minv_up,
                                      const ValueMatrix& Minv_dn,
-                                     const GradMatrix& B_grad,
-                                     const ValueMatrix& B_lapl,
                                      const std::vector<int>& detData_up,
-                                     const size_t N1,
-                                     const size_t N2,
-                                     const size_t NP1,
-                                     const size_t NP2,
                                      const std::vector<std::vector<int>>& lookup_tbl);
 
-    /** Evaluate the derivative of the optimized orbitals with respect to the
-     * parameters this is used only for MSD, to be refined for better serving
-     * both single and multi SD
-     */
-    virtual void evaluateDerivativesWF(ParticleSetT<T>& P,
-                                       const OptVariablesTypeT<T>& optvars,
-                                       Vector<ValueType>& dlogpsi,
-                                       const FullValueType& psiCurrent,
-                                       const std::vector<T>& Coeff,
-                                       const std::vector<size_t>& C2node_up,
-                                       const std::vector<size_t>& C2node_dn,
-                                       const ValueVector& detValues_up,
-                                       const ValueVector& detValues_dn,
-                                       const ValueMatrix& M_up,
-                                       const ValueMatrix& M_dn,
-                                       const ValueMatrix& Minv_up,
-                                       const ValueMatrix& Minv_dn,
-                                       const std::vector<int>& detData_up,
-                                       const std::vector<std::vector<int>>& lookup_tbl);
-
-    /** set the OrbitalSetSize
+  /** set the OrbitalSetSize
      * @param norbs number of single-particle orbitals
      * Ye: I prefer to remove this interface in the future. SPOSet builders need
      * to handle the size correctly. It doesn't make sense allowing to set the
      * value at any place in the code.
      * @TODO make it purely virtual
      */
-    virtual void
-    setOrbitalSetSize(int norbs){};
+  virtual void setOrbitalSetSize(int norbs){};
 
-    /** evaluate the values of this single-particle orbital set
+  /** evaluate the values of this single-particle orbital set
      * @param P current ParticleSet
      * @param iat active particle
      * @param psi values of the SPO
      * @TODO make it purely virtual
      */
-    virtual void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi){};
+  virtual void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi){};
 
-    /** evaluate determinant ratios for virtual moves, e.g., sphere move for
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for
      * nonlocalPP
      * @param VP virtual particle set
      * @param psi values of the SPO, used as a scratch space if needed
@@ -271,22 +224,23 @@ class SPOSetT : public QMCTraits
      * particle moved virtually
      * @param ratios return determinant ratios
      */
-    virtual void
-    evaluateDetRatios(const VirtualParticleSetT<T>& VP, ValueVector& psi,
-        const ValueVector& psiinv, std::vector<T>& ratios);
-
-    /// Determinant ratios and parameter derivatives of the wavefunction for
-    /// virtual moves
-    virtual void evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
-                                     const OptVariablesTypeT<T>& optvars,
-                                     ValueVector& psi,
-                                     const ValueVector& psiinv,
-                                     std::vector<T>& ratios,
-                                     Matrix<T>& dratios,
-                                     int FirstIndex,
-                                     int LastIndex);
-
-    /** evaluate determinant ratios for virtual moves, e.g., sphere move for
+  virtual void evaluateDetRatios(const VirtualParticleSetT<T>& VP,
+                                 ValueVector& psi,
+                                 const ValueVector& psiinv,
+                                 std::vector<T>& ratios);
+
+  /// Determinant ratios and parameter derivatives of the wavefunction for
+  /// virtual moves
+  virtual void evaluateDerivRatios(const VirtualParticleSetT<T>& VP,
+                                   const OptVariablesTypeT<T>& optvars,
+                                   ValueVector& psi,
+                                   const ValueVector& psiinv,
+                                   std::vector<T>& ratios,
+                                   Matrix<T>& dratios,
+                                   int FirstIndex,
+                                   int LastIndex);
+
+  /** evaluate determinant ratios for virtual moves, e.g., sphere move for
      * nonlocalPP, of multiple walkers
      * @param spo_list the list of SPOSet pointers in a walker batch
      * @param vp_list a list of virtual particle sets in a walker batch
@@ -296,14 +250,13 @@ class SPOSetT : public QMCTraits
      * matrix corresponding to the particles moved virtually
      * @param ratios_list a list of returning determinant ratios
      */
-    virtual void
-    mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
-        const RefVector<ValueVector>& psi_list,
-        const std::vector<const T*>& invRow_ptr_list,
-        std::vector<std::vector<T>>& ratios_list) const;
+  virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                    const RefVectorWithLeader<const VirtualParticleSetT<T>>& vp_list,
+                                    const RefVector<ValueVector>& psi_list,
+                                    const std::vector<const T*>& invRow_ptr_list,
+                                    std::vector<std::vector<T>>& ratios_list) const;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -312,11 +265,9 @@ class SPOSetT : public QMCTraits
      * @param d2psi laplacians of the SPO
      * @TODO make this purely virtual
      */
-    virtual void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi){};
+  virtual void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi){};
 
-    /** evaluate the values, gradients and laplacians and spin gradient of this
+  /** evaluate the values, gradients and laplacians and spin gradient of this
      * single-particle orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -325,23 +276,26 @@ class SPOSetT : public QMCTraits
      * @param d2psi laplacians of the SPO
      * @param dspin spin gradients of the SPO
      */
-    virtual void
-    evaluateVGL_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin);
+  virtual void evaluateVGL_spin(const ParticleSetT<T>& P,
+                                int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi,
+                                ValueVector& dspin);
 
-    /** evaluate the values this single-particle orbital sets of multiple
+  /** evaluate the values this single-particle orbital sets of multiple
      * walkers
      * @param spo_list the list of SPOSet pointers in a walker batch
      * @param P_list the list of ParticleSet pointers in a walker batch
      * @param iat active particle
      * @param psi_v_list the list of value vector pointers in a walker batch
      */
-    virtual void
-    mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list) const;
+  virtual void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                int iat,
+                                const RefVector<ValueVector>& psi_v_list) const;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital sets of multiple walkers
      * @param spo_list the list of SPOSet pointers in a walker batch
      * @param P_list the list of ParticleSet pointers in a walker batch
@@ -351,14 +305,14 @@ class SPOSetT : public QMCTraits
      * @param d2psi_v_list the list of laplacian vector pointers in a walker
      * batch
      */
-    virtual void
-    mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list) const;
+  virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list) const;
 
-    /** evaluate the values, gradients and laplacians and spin gradient of this
+  /** evaluate the values, gradients and laplacians and spin gradient of this
      * single-particle orbital sets of multiple walkers
      * @param spo_list the list of SPOSet pointers in a walker batch
      * @param P_list the list of ParticleSet pointers in a walker batch
@@ -370,15 +324,15 @@ class SPOSetT : public QMCTraits
      * @param mw_dspin is a dual matrix of spin gradients [nw][norb]
      * Note that the device side of mw_dspin is up to date
      */
-    virtual void
-    mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list,
-        OffloadMatrix<ComplexType>& mw_dspin) const;
+  virtual void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                      int iat,
+                                      const RefVector<ValueVector>& psi_v_list,
+                                      const RefVector<GradVector>& dpsi_v_list,
+                                      const RefVector<ValueVector>& d2psi_v_list,
+                                      OffloadMatrix<ComplexType>& mw_dspin) const;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital sets and determinant ratio and grads of multiple walkers. Device
      * data of phi_vgl_v must be up-to-date upon return
      * @param spo_list the list of SPOSet pointers in a walker batch
@@ -388,15 +342,15 @@ class SPOSetT : public QMCTraits
      * walkers
      * @param psi_ratio_grads_v determinant ratio and grads of all the walkers
      */
-    virtual void
-    mw_evaluateVGLandDetRatioGrads(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const T*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
-        std::vector<GradType>& grads) const;
+  virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads) const;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital sets and determinant ratio and grads of multiple walkers. Device
      * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients
      * @param spo_list the list of SPOSet pointers in a walker batch
@@ -408,15 +362,16 @@ class SPOSetT : public QMCTraits
      * @param grads, spatial gradients of all walkers
      * @param spingrads, spin gradients of all walkers
      */
-    virtual void
-    mw_evaluateVGLandDetRatioGradsWithSpin(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const T*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
-        std::vector<GradType>& grads, std::vector<T>& spingrads) const;
+  virtual void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                      const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                      int iat,
+                                                      const std::vector<const T*>& invRow_ptr_list,
+                                                      OffloadMWVGLArray& phi_vgl_v,
+                                                      std::vector<T>& ratios,
+                                                      std::vector<GradType>& grads,
+                                                      std::vector<T>& spingrads) const;
 
-    /** evaluate the values, gradients and hessians of this single-particle
+  /** evaluate the values, gradients and hessians of this single-particle
      * orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -424,11 +379,13 @@ class SPOSetT : public QMCTraits
      * @param dpsi gradients of the SPO
      * @param grad_grad_psi hessians of the SPO
      */
-    virtual void
-    evaluateVGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi);
+  virtual void evaluateVGH(const ParticleSetT<T>& P,
+                           int iat,
+                           ValueVector& psi,
+                           GradVector& dpsi,
+                           HessVector& grad_grad_psi);
 
-    /** evaluate the values, gradients, hessians, and grad hessians of this
+  /** evaluate the values, gradients, hessians, and grad hessians of this
      * single-particle orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -437,31 +394,29 @@ class SPOSetT : public QMCTraits
      * @param grad_grad_psi hessians of the SPO
      * @param grad_grad_grad_psi grad hessians of the SPO
      */
-    virtual void
-    evaluateVGHGH(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, HessVector& grad_grad_psi,
-        GGGVector& grad_grad_grad_psi);
+  virtual void evaluateVGHGH(const ParticleSetT<T>& P,
+                             int iat,
+                             ValueVector& psi,
+                             GradVector& dpsi,
+                             HessVector& grad_grad_psi,
+                             GGGVector& grad_grad_grad_psi);
 
-    /** evaluate the values of this single-particle orbital set
+  /** evaluate the values of this single-particle orbital set
      * @param P current ParticleSet
      * @param iat active particle
      * @param psi values of the SPO
      */
-    virtual void
-    evaluate_spin(
-        const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi);
+  virtual void evaluate_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi);
 
-    /** evaluate the third derivatives of this single-particle orbital set
+  /** evaluate the third derivatives of this single-particle orbital set
      * @param P current ParticleSet
      * @param first first particle
      * @param last last particle
      * @param grad_grad_grad_logdet third derivatives of the SPO
      */
-    virtual void
-    evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last,
-        GGGMatrix& grad_grad_grad_logdet);
+  virtual void evaluateThirdDeriv(const ParticleSetT<T>& P, int first, int last, GGGMatrix& grad_grad_grad_logdet);
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital for [first,last) particles
      * @param[in] P current ParticleSet
      * @param[in] first starting index of the particles
@@ -471,11 +426,14 @@ class SPOSetT : public QMCTraits
      * @param[out] d2logdet laplacians
      * @TODO make this pure virtual
      */
-    virtual void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet){};
+  virtual void evaluate_notranspose(const ParticleSetT<T>& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    ValueMatrix& d2logdet){};
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital for [first,last) particles, including the spin gradient
      * @param P current ParticleSet
      * @param first starting index of the particles
@@ -488,19 +446,23 @@ class SPOSetT : public QMCTraits
      * default implementation will abort for all SPOSets except SpinorSet
      *
      */
-    virtual void
-    evaluate_notranspose_spin(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
-        ValueMatrix& dspinlogdet);
-
-    virtual void
-    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-        const RefVector<ValueMatrix>& logdet_list,
-        const RefVector<GradMatrix>& dlogdet_list,
-        const RefVector<ValueMatrix>& d2logdet_list) const;
-
-    /** evaluate the values, gradients and hessians of this single-particle
+  virtual void evaluate_notranspose_spin(const ParticleSetT<T>& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet,
+                                         ValueMatrix& dspinlogdet);
+
+  virtual void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                       const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                       int first,
+                                       int last,
+                                       const RefVector<ValueMatrix>& logdet_list,
+                                       const RefVector<GradMatrix>& dlogdet_list,
+                                       const RefVector<ValueMatrix>& d2logdet_list) const;
+
+  /** evaluate the values, gradients and hessians of this single-particle
      * orbital for [first,last) particles
      * @param P current ParticleSet
      * @param first starting index of the particles
@@ -510,11 +472,14 @@ class SPOSetT : public QMCTraits
      * @param grad_grad_logdet hessians
      *
      */
-    virtual void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet);
+  virtual void evaluate_notranspose(const ParticleSetT<T>& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet);
 
-    /** evaluate the values, gradients, hessians and third derivatives of this
+  /** evaluate the values, gradients, hessians and third derivatives of this
      * single-particle orbital for [first,last) particles
      * @param P current ParticleSet
      * @param first starting index of the particles
@@ -525,12 +490,15 @@ class SPOSetT : public QMCTraits
      * @param grad_grad_grad_logdet third derivatives
      *
      */
-    virtual void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, HessMatrix& grad_grad_logdet,
-        GGGMatrix& grad_grad_grad_logdet);
+  virtual void evaluate_notranspose(const ParticleSetT<T>& P,
+                                    int first,
+                                    int last,
+                                    ValueMatrix& logdet,
+                                    GradMatrix& dlogdet,
+                                    HessMatrix& grad_grad_logdet,
+                                    GGGMatrix& grad_grad_grad_logdet);
 
-    /** evaluate the gradients of this single-particle orbital
+  /** evaluate the gradients of this single-particle orbital
      *  for [first,last) target particles with respect to the given source
      * particle
      * @param P current ParticleSet
@@ -540,11 +508,14 @@ class SPOSetT : public QMCTraits
      * @param gradphi gradients
      *
      */
-    virtual void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi);
+  virtual void evaluateGradSource(const ParticleSetT<T>& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSetT<T>& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi);
 
-    /** evaluate the gradients of values, gradients, laplacians of this
+  /** evaluate the gradients of values, gradients, laplacians of this
      * single-particle orbital for [first,last) target particles with respect to
      * the given source particle
      * @param P current ParticleSet
@@ -556,12 +527,16 @@ class SPOSetT : public QMCTraits
      * @param grad_lapl_phi gradients of laplacians
      *
      */
-    virtual void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src, GradMatrix& grad_phi,
-        HessMatrix& grad_grad_phi, GradMatrix& grad_lapl_phi);
+  virtual void evaluateGradSource(const ParticleSetT<T>& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSetT<T>& source,
+                                  int iat_src,
+                                  GradMatrix& grad_phi,
+                                  HessMatrix& grad_grad_phi,
+                                  GradMatrix& grad_lapl_phi);
 
-    /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.
+  /** @brief Returns a row of d/dR_iat phi_j(r) evaluated at position r.
      *
      *  @param[in] P particle set.
      *  @param[in] iel The electron at which to evaluate phi(r_iel)
@@ -570,96 +545,67 @@ class SPOSetT : public QMCTraits
      *  @param[in,out] gradphi Vector of d/dR_iat phi_j(r).
      *  @return Void
      */
-    virtual void
-    evaluateGradSourceRow(const ParticleSetT<T>& P, int iel,
-        const ParticleSetT<T>& source, int iat_src, GradVector& gradphi);
+  virtual void evaluateGradSourceRow(const ParticleSetT<T>& P,
+                                     int iel,
+                                     const ParticleSetT<T>& source,
+                                     int iat_src,
+                                     GradVector& gradphi);
 
-    /** access the k point related to the given orbital */
-    virtual PosType
-    get_k(int orb)
-    {
-        return PosType();
-    }
+  /** access the k point related to the given orbital */
+  virtual PosType get_k(int orb) { return PosType(); }
 
-    /** initialize a shared resource and hand it to collection
+  /** initialize a shared resource and hand it to collection
      */
-    virtual void
-    createResource(ResourceCollection& collection) const
-    {
-    }
+  virtual void createResource(ResourceCollection& collection) const {}
 
-    /** acquire a shared resource from collection
+  /** acquire a shared resource from collection
      */
-    virtual void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
-    {
-    }
+  virtual void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
 
-    /** return a shared resource to collection
+  /** return a shared resource to collection
      */
-    virtual void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
-    {
-    }
+  virtual void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const {}
 
-    /** make a clone of itself
+  /** make a clone of itself
      * every derived class must implement this to have threading working
      * correctly.
      */
-    [[noreturn]] virtual std::unique_ptr<SPOSetT<T>>
-    makeClone() const;
+  [[noreturn]] virtual std::unique_ptr<SPOSetT<T>> makeClone() const;
 
-    /** Used only by cusp correction in AOS LCAO.
+  /** Used only by cusp correction in AOS LCAO.
      * Ye: the SoA LCAO moves all this responsibility to the builder.
      * This interface should be removed with AoS.
      */
-    virtual bool
-    transformSPOSet()
-    {
-        return true;
-    }
+  virtual bool transformSPOSet() { return true; }
 
-    /** finalize the construction of SPOSet
+  /** finalize the construction of SPOSet
      *
      * for example, classes serving accelerators may need to transfer data from
      * host to device after the host side objects are built.
      */
-    virtual void
-    finalizeConstruction()
-    {
-    }
-
-    /// return object name
-    const std::string&
-    getName() const
-    {
-        return my_name_;
-    }
-
-    /// @TODO make this purely virutal return class name
-    virtual std::string
-    getClassName() const
-    {
-        return "";
-    };
+  virtual void finalizeConstruction() {}
+
+  /// return object name
+  const std::string& getName() const { return my_name_; }
+
+  /// @TODO make this purely virutal return class name
+  virtual std::string getClassName() const { return ""; };
 
 protected:
-    /// name of the object, unique identifier
-    const std::string my_name_;
-    /// number of Single-particle orbitals
-    IndexType OrbitalSetSize;
-    /// Optimizable variables
-    OptVariablesTypeT<T> myVars;
-
-    friend OptVariablesTypeT<float>& testing::getMyVars(SPOSetT<float>& spo);
-    friend OptVariablesTypeT<double>& testing::getMyVars(SPOSetT<double>& spo);
-    friend OptVariablesTypeT<std::complex<float>>& testing::getMyVars(SPOSetT<std::complex<float>>& spo);
-    friend OptVariablesTypeT<std::complex<double>>& testing::getMyVars(SPOSetT<std::complex<double>>& spo);
+  /// name of the object, unique identifier
+  const std::string my_name_;
+  /// number of Single-particle orbitals
+  IndexType OrbitalSetSize;
+  /// Optimizable variables
+  OptVariablesTypeT<T> myVars;
+
+  friend OptVariablesTypeT<float>& testing::getMyVars(SPOSetT<float>& spo);
+  friend OptVariablesTypeT<double>& testing::getMyVars(SPOSetT<double>& spo);
+  friend OptVariablesTypeT<std::complex<float>>& testing::getMyVars(SPOSetT<std::complex<float>>& spo);
+  friend OptVariablesTypeT<std::complex<double>>& testing::getMyVars(SPOSetT<std::complex<double>>& spo);
 };
 
-template <class T>
+template<class T>
 using SPOSetTPtr = SPOSetT<T>*;
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/SpinorSetT.cpp b/src/QMCWaveFunctions/SpinorSetT.cpp
index bac10a6ec8a..036b04c88d0 100644
--- a/src/QMCWaveFunctions/SpinorSetT.cpp
+++ b/src/QMCWaveFunctions/SpinorSetT.cpp
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2022 QMCPACK developers
 //
-// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National
-// Laboratories
-//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
-//                    Laboratories
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //
-// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National
-// Laboratories
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "SpinorSetT.h"
@@ -20,602 +17,573 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 struct SpinorSetT<T>::SpinorSetMultiWalkerResource : public Resource
 {
-    SpinorSetMultiWalkerResource() : Resource("SpinorSet")
-    {
-    }
-    SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) :
-        SpinorSetMultiWalkerResource()
-    {
-    }
-    std::unique_ptr<Resource>
-    makeClone() const override
-    {
-        return std::make_unique<SpinorSetMultiWalkerResource>(*this);
-    }
-    OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
-    std::vector<T> up_ratios, dn_ratios;
-    std::vector<GradType> up_grads, dn_grads;
-    std::vector<RealType> spins;
+  SpinorSetMultiWalkerResource() : Resource("SpinorSet") {}
+  SpinorSetMultiWalkerResource(const SpinorSetMultiWalkerResource&) : SpinorSetMultiWalkerResource() {}
+  std::unique_ptr<Resource> makeClone() const override { return std::make_unique<SpinorSetMultiWalkerResource>(*this); }
+  OffloadMWVGLArray up_phi_vgl_v, dn_phi_vgl_v;
+  std::vector<T> up_ratios, dn_ratios;
+  std::vector<GradType> up_grads, dn_grads;
+  std::vector<RealType> spins;
 };
 
-template <class T>
-SpinorSetT<T>::SpinorSetT(const std::string& my_name) :
-    SPOSetT<T>(my_name),
-    spo_up(nullptr),
-    spo_dn(nullptr)
-{
-}
+template<class T>
+SpinorSetT<T>::SpinorSetT(const std::string& my_name) : SPOSetT<T>(my_name), spo_up(nullptr), spo_dn(nullptr)
+{}
 
-template <class T>
+template<class T>
 SpinorSetT<T>::~SpinorSetT() = default;
 
-template <class T>
-void
-SpinorSetT<T>::set_spos(
-    std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
+template<class T>
+void SpinorSetT<T>::set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn)
 {
-    // Sanity check for input SPO's.  They need to be the same size or
-    IndexType spo_size_up = up->getOrbitalSetSize();
-    IndexType spo_size_down = dn->getOrbitalSetSize();
+  // Sanity check for input SPO's.  They need to be the same size or
+  IndexType spo_size_up   = up->getOrbitalSetSize();
+  IndexType spo_size_down = dn->getOrbitalSetSize();
 
-    if (spo_size_up != spo_size_down)
-        throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO "
-                                 "components have different sizes.");
+  if (spo_size_up != spo_size_down)
+    throw std::runtime_error("SpinorSet::set_spos(...):  up and down SPO "
+                             "components have different sizes.");
 
-    setOrbitalSetSize(spo_size_up);
+  setOrbitalSetSize(spo_size_up);
 
-    spo_up = std::move(up);
-    spo_dn = std::move(dn);
+  spo_up = std::move(up);
+  spo_dn = std::move(dn);
 
-    psi_work_up.resize(this->OrbitalSetSize);
-    psi_work_down.resize(this->OrbitalSetSize);
+  psi_work_up.resize(this->OrbitalSetSize);
+  psi_work_down.resize(this->OrbitalSetSize);
 
-    dpsi_work_up.resize(this->OrbitalSetSize);
-    dpsi_work_down.resize(this->OrbitalSetSize);
+  dpsi_work_up.resize(this->OrbitalSetSize);
+  dpsi_work_down.resize(this->OrbitalSetSize);
 
-    d2psi_work_up.resize(this->OrbitalSetSize);
-    d2psi_work_down.resize(this->OrbitalSetSize);
+  d2psi_work_up.resize(this->OrbitalSetSize);
+  d2psi_work_down.resize(this->OrbitalSetSize);
 }
 
-template <class T>
-void
-SpinorSetT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void SpinorSetT<T>::setOrbitalSetSize(int norbs)
 {
-    this->OrbitalSetSize = norbs;
+  this->OrbitalSetSize = norbs;
 };
 
-template <class T>
-void
-SpinorSetT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void SpinorSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    psi_work_up = 0.0;
-    psi_work_down = 0.0;
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
 
-    spo_up->evaluateValue(P, iat, psi_work_up);
-    spo_dn->evaluateValue(P, iat, psi_work_down);
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
 
-    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    RealType coss(0.0), sins(0.0);
+  RealType coss(0.0), sins(0.0);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+  coss = std::cos(s);
+  sins = std::sin(s);
 
-    // This is only supported in the complex build, so T is some complex number
-    // depending on the precision.
-    T eis(coss, sins);
-    T emis(coss, -sins);
+  // This is only supported in the complex build, so T is some complex number
+  // depending on the precision.
+  T eis(coss, sins);
+  T emis(coss, -sins);
 
-    psi = eis * psi_work_up + emis * psi_work_down;
+  psi = eis * psi_work_up + emis * psi_work_down;
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-    GradVector& dpsi, ValueVector& d2psi)
+template<class T>
+void SpinorSetT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                int iat,
+                                ValueVector& psi,
+                                GradVector& dpsi,
+                                ValueVector& d2psi)
 {
-    psi_work_up = 0.0;
-    psi_work_down = 0.0;
-    dpsi_work_up = 0.0;
-    dpsi_work_down = 0.0;
-    d2psi_work_up = 0.0;
-    d2psi_work_down = 0.0;
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
 
-    spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
-    spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
 
-    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    RealType coss(0.0), sins(0.0);
+  RealType coss(0.0), sins(0.0);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+  coss = std::cos(s);
+  sins = std::sin(s);
 
-    T eis(coss, sins);
-    T emis(coss, -sins);
+  T eis(coss, sins);
+  T emis(coss, -sins);
 
-    psi = eis * psi_work_up + emis * psi_work_down;
-    dpsi = eis * dpsi_work_up + emis * dpsi_work_down;
-    d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluateVGL_spin(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin)
+template<class T>
+void SpinorSetT<T>::evaluateVGL_spin(const ParticleSetT<T>& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi,
+                                     ValueVector& dspin)
 {
-    psi_work_up = 0.0;
-    psi_work_down = 0.0;
-    dpsi_work_up = 0.0;
-    dpsi_work_down = 0.0;
-    d2psi_work_up = 0.0;
-    d2psi_work_down = 0.0;
+  psi_work_up     = 0.0;
+  psi_work_down   = 0.0;
+  dpsi_work_up    = 0.0;
+  dpsi_work_down  = 0.0;
+  d2psi_work_up   = 0.0;
+  d2psi_work_down = 0.0;
 
-    spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
-    spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
+  spo_up->evaluateVGL(P, iat, psi_work_up, dpsi_work_up, d2psi_work_up);
+  spo_dn->evaluateVGL(P, iat, psi_work_down, dpsi_work_down, d2psi_work_down);
 
-    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    RealType coss(0.0), sins(0.0);
+  RealType coss(0.0), sins(0.0);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+  coss = std::cos(s);
+  sins = std::sin(s);
 
-    T eis(coss, sins);
-    T emis(coss, -sins);
-    T eye(0, 1.0);
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
 
-    psi = eis * psi_work_up + emis * psi_work_down;
-    dpsi = eis * dpsi_work_up + emis * dpsi_work_down;
-    d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
-    dspin = eye * (eis * psi_work_up - emis * psi_work_down);
+  psi   = eis * psi_work_up + emis * psi_work_down;
+  dpsi  = eis * dpsi_work_up + emis * dpsi_work_down;
+  d2psi = eis * d2psi_work_up + emis * d2psi_work_down;
+  dspin = eye * (eis * psi_work_up - emis * psi_work_down);
 }
 
-template <class T>
-void
-SpinorSetT<T>::mw_evaluateVGLWithSpin(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const RefVector<ValueVector>& psi_v_list,
-    const RefVector<GradVector>& dpsi_v_list,
-    const RefVector<ValueVector>& d2psi_v_list,
-    OffloadMatrix<ComplexType>& mw_dspin) const
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                           const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                           int iat,
+                                           const RefVector<ValueVector>& psi_v_list,
+                                           const RefVector<GradVector>& dpsi_v_list,
+                                           const RefVector<ValueVector>& d2psi_v_list,
+                                           OffloadMatrix<ComplexType>& mw_dspin) const
 {
-    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-    auto& P_leader = P_list.getLeader();
-    assert(this == &spo_leader);
-
-    IndexType nw = spo_list.size();
-    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-    auto& up_spo_leader = up_spo_list.getLeader();
-    auto& dn_spo_leader = dn_spo_list.getLeader();
-
-    RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
-    RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
-    RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
-    for (int iw = 0; iw < nw; iw++) {
-        auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
-        up_psi_v_list.push_back(spo.psi_work_up);
-        dn_psi_v_list.push_back(spo.psi_work_down);
-        up_dpsi_v_list.push_back(spo.dpsi_work_up);
-        dn_dpsi_v_list.push_back(spo.dpsi_work_down);
-        up_d2psi_v_list.push_back(spo.d2psi_work_up);
-        dn_d2psi_v_list.push_back(spo.d2psi_work_down);
-    }
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw                    = spo_list.size();
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  RefVector<ValueVector> up_psi_v_list, dn_psi_v_list;
+  RefVector<GradVector> up_dpsi_v_list, dn_dpsi_v_list;
+  RefVector<ValueVector> up_d2psi_v_list, dn_d2psi_v_list;
+  for (int iw = 0; iw < nw; iw++)
+  {
+    auto& spo = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_psi_v_list.push_back(spo.psi_work_up);
+    dn_psi_v_list.push_back(spo.psi_work_down);
+    up_dpsi_v_list.push_back(spo.dpsi_work_up);
+    dn_dpsi_v_list.push_back(spo.dpsi_work_down);
+    up_d2psi_v_list.push_back(spo.d2psi_work_up);
+    dn_d2psi_v_list.push_back(spo.d2psi_work_down);
+  }
+
+  up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list, up_dpsi_v_list, up_d2psi_v_list);
+  dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list, dn_dpsi_v_list, dn_d2psi_v_list);
+
+  for (int iw = 0; iw < nw; iw++)
+  {
+    typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+    RealType coss                        = std::cos(s);
+    RealType sins                        = std::sin(s);
 
-    up_spo_leader.mw_evaluateVGL(up_spo_list, P_list, iat, up_psi_v_list,
-        up_dpsi_v_list, up_d2psi_v_list);
-    dn_spo_leader.mw_evaluateVGL(dn_spo_list, P_list, iat, dn_psi_v_list,
-        dn_dpsi_v_list, dn_d2psi_v_list);
-
-    for (int iw = 0; iw < nw; iw++) {
-        typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
-        RealType coss = std::cos(s);
-        RealType sins = std::sin(s);
-
-        T eis(coss, sins);
-        T emis(coss, -sins);
-        T eye(0, 1.0);
-
-        psi_v_list[iw].get() =
-            eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
-        dpsi_v_list[iw].get() =
-            eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
-        d2psi_v_list[iw].get() =
-            eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
-        for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
-            mw_dspin(iw, iorb) = eye *
-                (eis * (up_psi_v_list[iw].get())[iorb] -
-                    emis * (dn_psi_v_list[iw].get())[iorb]);
-    }
-    // Data above is all on host, but since mw_dspin is DualMatrix we need to
-    // sync the host and device
-    mw_dspin.updateTo();
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    psi_v_list[iw].get()   = eis * up_psi_v_list[iw].get() + emis * dn_psi_v_list[iw].get();
+    dpsi_v_list[iw].get()  = eis * up_dpsi_v_list[iw].get() + emis * dn_dpsi_v_list[iw].get();
+    d2psi_v_list[iw].get() = eis * up_d2psi_v_list[iw].get() + emis * dn_d2psi_v_list[iw].get();
+    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+      mw_dspin(iw, iorb) = eye * (eis * (up_psi_v_list[iw].get())[iorb] - emis * (dn_psi_v_list[iw].get())[iorb]);
+  }
+  // Data above is all on host, but since mw_dspin is DualMatrix we need to
+  // sync the host and device
+  mw_dspin.updateTo();
 }
 
-template <class T>
-void
-SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-    const std::vector<const T*>& invRow_ptr_list, OffloadMWVGLArray& phi_vgl_v,
-    std::vector<T>& ratios, std::vector<GradType>& grads,
-    std::vector<T>& spingrads) const
+template<class T>
+void SpinorSetT<T>::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                                           const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                                           int iat,
+                                                           const std::vector<const T*>& invRow_ptr_list,
+                                                           OffloadMWVGLArray& phi_vgl_v,
+                                                           std::vector<T>& ratios,
+                                                           std::vector<GradType>& grads,
+                                                           std::vector<T>& spingrads) const
 {
-    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-    auto& P_leader = P_list.getLeader();
-    assert(this == &spo_leader);
-    assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
-    assert(phi_vgl_v.size(1) == spo_list.size());
-    const size_t nw = spo_list.size();
-    const size_t norb_requested = phi_vgl_v.size(2);
-
-    auto& mw_res = spo_leader.mw_res_handle_.getResource();
-    auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
-    auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
-    auto& up_ratios = mw_res.up_ratios;
-    auto& dn_ratios = mw_res.dn_ratios;
-    auto& up_grads = mw_res.up_grads;
-    auto& dn_grads = mw_res.dn_grads;
-    auto& spins = mw_res.spins;
-
-    up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
-    dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
-    up_ratios.resize(nw);
-    dn_ratios.resize(nw);
-    up_grads.resize(nw);
-    dn_grads.resize(nw);
-    spins.resize(nw);
-
-    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-    auto& up_spo_leader = up_spo_list.getLeader();
-    auto& dn_spo_leader = dn_spo_list.getLeader();
-
-    up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat,
-        invRow_ptr_list, up_phi_vgl_v, up_ratios, up_grads);
-    dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat,
-        invRow_ptr_list, dn_phi_vgl_v, dn_ratios, dn_grads);
-    for (int iw = 0; iw < nw; iw++) {
-        typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
-        spins[iw] = s;
-        RealType coss = std::cos(s);
-        RealType sins = std::sin(s);
-
-        T eis(coss, sins);
-        T emis(coss, -sins);
-        T eye(0, 1.0);
-
-        ratios[iw] = eis * up_ratios[iw] + emis * dn_ratios[iw];
-        grads[iw] = (eis * up_grads[iw] * up_ratios[iw] +
-                        emis * dn_grads[iw] * dn_ratios[iw]) /
-            ratios[iw];
-        spingrads[iw] =
-            eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
-    }
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+  assert(phi_vgl_v.size(0) == QMCTraits::DIM_VGL);
+  assert(phi_vgl_v.size(1) == spo_list.size());
+  const size_t nw             = spo_list.size();
+  const size_t norb_requested = phi_vgl_v.size(2);
+
+  auto& mw_res       = spo_leader.mw_res_handle_.getResource();
+  auto& up_phi_vgl_v = mw_res.up_phi_vgl_v;
+  auto& dn_phi_vgl_v = mw_res.dn_phi_vgl_v;
+  auto& up_ratios    = mw_res.up_ratios;
+  auto& dn_ratios    = mw_res.dn_ratios;
+  auto& up_grads     = mw_res.up_grads;
+  auto& dn_grads     = mw_res.dn_grads;
+  auto& spins        = mw_res.spins;
+
+  up_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  dn_phi_vgl_v.resize(QMCTraits::DIM_VGL, nw, norb_requested);
+  up_ratios.resize(nw);
+  dn_ratios.resize(nw);
+  up_grads.resize(nw);
+  dn_grads.resize(nw);
+  spins.resize(nw);
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  up_spo_leader.mw_evaluateVGLandDetRatioGrads(up_spo_list, P_list, iat, invRow_ptr_list, up_phi_vgl_v, up_ratios,
+                                               up_grads);
+  dn_spo_leader.mw_evaluateVGLandDetRatioGrads(dn_spo_list, P_list, iat, invRow_ptr_list, dn_phi_vgl_v, dn_ratios,
+                                               dn_grads);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+    spins[iw]                            = s;
+    RealType coss                        = std::cos(s);
+    RealType sins                        = std::sin(s);
 
-    auto* spins_ptr = spins.data();
-    // This data lives on the device
-    auto* phi_vgl_ptr = phi_vgl_v.data();
-    auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
-    auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
-    PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
-    for (int iw = 0; iw < nw; iw++) {
-        RealType c, s;
-        omptarget::sincos(spins_ptr[iw], &s, &c);
-        T eis(c, s), emis(c, -s);
-        PRAGMA_OFFLOAD("omp parallel for collapse(2)")
-        for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
-            for (int iorb = 0; iorb < norb_requested; iorb++) {
-                auto offset =
-                    idim * nw * norb_requested + iw * norb_requested + iorb;
-                phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] +
-                    emis * dn_phi_vgl_ptr[offset];
-            }
-    }
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
+
+    ratios[iw]    = eis * up_ratios[iw] + emis * dn_ratios[iw];
+    grads[iw]     = (eis * up_grads[iw] * up_ratios[iw] + emis * dn_grads[iw] * dn_ratios[iw]) / ratios[iw];
+    spingrads[iw] = eye * (eis * up_ratios[iw] - emis * dn_ratios[iw]) / ratios[iw];
+  }
+
+  auto* spins_ptr = spins.data();
+  // This data lives on the device
+  auto* phi_vgl_ptr    = phi_vgl_v.data();
+  auto* up_phi_vgl_ptr = up_phi_vgl_v.data();
+  auto* dn_phi_vgl_ptr = dn_phi_vgl_v.data();
+  PRAGMA_OFFLOAD("omp target teams distribute map(to:spins_ptr[0:nw])")
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RealType c, s;
+    omptarget::sincos(spins_ptr[iw], &s, &c);
+    T eis(c, s), emis(c, -s);
+    PRAGMA_OFFLOAD("omp parallel for collapse(2)")
+    for (int idim = 0; idim < QMCTraits::DIM_VGL; idim++)
+      for (int iorb = 0; iorb < norb_requested; iorb++)
+      {
+        auto offset         = idim * nw * norb_requested + iw * norb_requested + iorb;
+        phi_vgl_ptr[offset] = eis * up_phi_vgl_ptr[offset] + emis * dn_phi_vgl_ptr[offset];
+      }
+  }
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                         int first,
+                                         int last,
+                                         ValueMatrix& logdet,
+                                         GradMatrix& dlogdet,
+                                         ValueMatrix& d2logdet)
 {
-    IndexType nelec = P.getTotalNum();
+  IndexType nelec = P.getTotalNum();
 
-    logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    spo_up->evaluate_notranspose(
-        P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
-    spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down,
-        dlogpsi_work_down, d2logpsi_work_down);
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
 
-    for (int iat = 0; iat < nelec; iat++) {
-        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-        RealType coss(0.0), sins(0.0);
+    RealType coss(0.0), sins(0.0);
 
-        coss = std::cos(s);
-        sins = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
-        T eis(coss, sins);
-        T emis(coss, -sins);
+    T eis(coss, sins);
+    T emis(coss, -sins);
 
-        for (int no = 0; no < this->OrbitalSetSize; no++) {
-            logdet(iat, no) = eis * logpsi_work_up(iat, no) +
-                emis * logpsi_work_down(iat, no);
-            dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) +
-                emis * dlogpsi_work_down(iat, no);
-            d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) +
-                emis * d2logpsi_work_down(iat, no);
-        }
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)   = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)  = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
     }
+  }
 }
 
-template <class T>
-void
-SpinorSetT<T>::mw_evaluate_notranspose(
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-    const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-    const RefVector<ValueMatrix>& logdet_list,
-    const RefVector<GradMatrix>& dlogdet_list,
-    const RefVector<ValueMatrix>& d2logdet_list) const
+template<class T>
+void SpinorSetT<T>::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                            const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                            int first,
+                                            int last,
+                                            const RefVector<ValueMatrix>& logdet_list,
+                                            const RefVector<GradMatrix>& dlogdet_list,
+                                            const RefVector<ValueMatrix>& d2logdet_list) const
 {
-    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-    auto& P_leader = P_list.getLeader();
-    assert(this == &spo_leader);
-
-    IndexType nw = spo_list.size();
-    IndexType nelec = P_leader.getTotalNum();
-
-    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-    auto& up_spo_leader = up_spo_list.getLeader();
-    auto& dn_spo_leader = dn_spo_list.getLeader();
-
-    std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
-    std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
-    std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
-    mw_up_logdet.reserve(nw);
-    mw_dn_logdet.reserve(nw);
-    mw_up_dlogdet.reserve(nw);
-    mw_dn_dlogdet.reserve(nw);
-    mw_up_d2logdet.reserve(nw);
-    mw_dn_d2logdet.reserve(nw);
-
-    RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
-    RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
-    RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
-    up_logdet_list.reserve(nw);
-    dn_logdet_list.reserve(nw);
-    up_dlogdet_list.reserve(nw);
-    dn_dlogdet_list.reserve(nw);
-    up_d2logdet_list.reserve(nw);
-    dn_d2logdet_list.reserve(nw);
-
-    ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
-    GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
-    for (int iw = 0; iw < nw; iw++) {
-        mw_up_logdet.emplace_back(tmp_val_mat);
-        up_logdet_list.emplace_back(mw_up_logdet.back());
-        mw_dn_logdet.emplace_back(tmp_val_mat);
-        dn_logdet_list.emplace_back(mw_dn_logdet.back());
-
-        mw_up_dlogdet.emplace_back(tmp_grad_mat);
-        up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
-        mw_dn_dlogdet.emplace_back(tmp_grad_mat);
-        dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
-
-        mw_up_d2logdet.emplace_back(tmp_val_mat);
-        up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
-        mw_dn_d2logdet.emplace_back(tmp_val_mat);
-        dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
+  auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& P_leader   = P_list.getLeader();
+  assert(this == &spo_leader);
+
+  IndexType nw    = spo_list.size();
+  IndexType nelec = P_leader.getTotalNum();
+
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+
+  std::vector<ValueMatrix> mw_up_logdet, mw_dn_logdet;
+  std::vector<GradMatrix> mw_up_dlogdet, mw_dn_dlogdet;
+  std::vector<ValueMatrix> mw_up_d2logdet, mw_dn_d2logdet;
+  mw_up_logdet.reserve(nw);
+  mw_dn_logdet.reserve(nw);
+  mw_up_dlogdet.reserve(nw);
+  mw_dn_dlogdet.reserve(nw);
+  mw_up_d2logdet.reserve(nw);
+  mw_dn_d2logdet.reserve(nw);
+
+  RefVector<ValueMatrix> up_logdet_list, dn_logdet_list;
+  RefVector<GradMatrix> up_dlogdet_list, dn_dlogdet_list;
+  RefVector<ValueMatrix> up_d2logdet_list, dn_d2logdet_list;
+  up_logdet_list.reserve(nw);
+  dn_logdet_list.reserve(nw);
+  up_dlogdet_list.reserve(nw);
+  dn_dlogdet_list.reserve(nw);
+  up_d2logdet_list.reserve(nw);
+  dn_d2logdet_list.reserve(nw);
+
+  ValueMatrix tmp_val_mat(nelec, this->OrbitalSetSize);
+  GradMatrix tmp_grad_mat(nelec, this->OrbitalSetSize);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    mw_up_logdet.emplace_back(tmp_val_mat);
+    up_logdet_list.emplace_back(mw_up_logdet.back());
+    mw_dn_logdet.emplace_back(tmp_val_mat);
+    dn_logdet_list.emplace_back(mw_dn_logdet.back());
+
+    mw_up_dlogdet.emplace_back(tmp_grad_mat);
+    up_dlogdet_list.emplace_back(mw_up_dlogdet.back());
+    mw_dn_dlogdet.emplace_back(tmp_grad_mat);
+    dn_dlogdet_list.emplace_back(mw_dn_dlogdet.back());
+
+    mw_up_d2logdet.emplace_back(tmp_val_mat);
+    up_d2logdet_list.emplace_back(mw_up_d2logdet.back());
+    mw_dn_d2logdet.emplace_back(tmp_val_mat);
+    dn_d2logdet_list.emplace_back(mw_dn_d2logdet.back());
+  }
+
+  up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last, up_logdet_list, up_dlogdet_list,
+                                        up_d2logdet_list);
+  dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last, dn_logdet_list, dn_dlogdet_list,
+                                        dn_d2logdet_list);
+
+  for (int iw = 0; iw < nw; iw++)
+    for (int iat = 0; iat < nelec; iat++)
+    {
+      typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
+      RealType coss                        = std::cos(s);
+      RealType sins                        = std::sin(s);
+      T eis(coss, sins);
+      T emis(coss, -sins);
+
+      for (int no = 0; no < this->OrbitalSetSize; no++)
+      {
+        logdet_list[iw].get()(iat, no) =
+            eis * up_logdet_list[iw].get()(iat, no) + emis * dn_logdet_list[iw].get()(iat, no);
+        dlogdet_list[iw].get()(iat, no) =
+            eis * up_dlogdet_list[iw].get()(iat, no) + emis * dn_dlogdet_list[iw].get()(iat, no);
+        d2logdet_list[iw].get()(iat, no) =
+            eis * up_d2logdet_list[iw].get()(iat, no) + emis * dn_d2logdet_list[iw].get()(iat, no);
+      }
     }
-
-    up_spo_leader.mw_evaluate_notranspose(up_spo_list, P_list, first, last,
-        up_logdet_list, up_dlogdet_list, up_d2logdet_list);
-    dn_spo_leader.mw_evaluate_notranspose(dn_spo_list, P_list, first, last,
-        dn_logdet_list, dn_dlogdet_list, dn_d2logdet_list);
-
-    for (int iw = 0; iw < nw; iw++)
-        for (int iat = 0; iat < nelec; iat++) {
-            typename ParticleSetT<T>::Scalar_t s = P_list[iw].activeSpin(iat);
-            RealType coss = std::cos(s);
-            RealType sins = std::sin(s);
-            T eis(coss, sins);
-            T emis(coss, -sins);
-
-            for (int no = 0; no < this->OrbitalSetSize; no++) {
-                logdet_list[iw].get()(iat, no) =
-                    eis * up_logdet_list[iw].get()(iat, no) +
-                    emis * dn_logdet_list[iw].get()(iat, no);
-                dlogdet_list[iw].get()(iat, no) =
-                    eis * up_dlogdet_list[iw].get()(iat, no) +
-                    emis * dn_dlogdet_list[iw].get()(iat, no);
-                d2logdet_list[iw].get()(iat, no) =
-                    eis * up_d2logdet_list[iw].get()(iat, no) +
-                    emis * dn_d2logdet_list[iw].get()(iat, no);
-            }
-        }
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
-    ValueMatrix& dspinlogdet)
+template<class T>
+void SpinorSetT<T>::evaluate_notranspose_spin(const ParticleSetT<T>& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet,
+                                              ValueMatrix& dspinlogdet)
 {
-    IndexType nelec = P.getTotalNum();
+  IndexType nelec = P.getTotalNum();
 
-    logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  dlogpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
-    d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_up.resize(nelec, this->OrbitalSetSize);
+  d2logpsi_work_down.resize(nelec, this->OrbitalSetSize);
 
-    spo_up->evaluate_notranspose(
-        P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
-    spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down,
-        dlogpsi_work_down, d2logpsi_work_down);
+  spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
+  spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
 
-    for (int iat = 0; iat < nelec; iat++) {
-        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-        RealType coss(0.0), sins(0.0);
+    RealType coss(0.0), sins(0.0);
 
-        coss = std::cos(s);
-        sins = std::sin(s);
+    coss = std::cos(s);
+    sins = std::sin(s);
 
-        T eis(coss, sins);
-        T emis(coss, -sins);
-        T eye(0, 1.0);
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    T eye(0, 1.0);
 
-        for (int no = 0; no < this->OrbitalSetSize; no++) {
-            logdet(iat, no) = eis * logpsi_work_up(iat, no) +
-                emis * logpsi_work_down(iat, no);
-            dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) +
-                emis * dlogpsi_work_down(iat, no);
-            d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) +
-                emis * d2logpsi_work_down(iat, no);
-            dspinlogdet(iat, no) = eye *
-                (eis * logpsi_work_up(iat, no) -
-                    emis * logpsi_work_down(iat, no));
-        }
+    for (int no = 0; no < this->OrbitalSetSize; no++)
+    {
+      logdet(iat, no)      = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
+      dlogdet(iat, no)     = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
+      d2logdet(iat, no)    = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
+      dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no));
     }
+  }
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluate_spin(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi)
+template<class T>
+void SpinorSetT<T>::evaluate_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi)
 {
-    psi_work_up = 0.0;
-    psi_work_down = 0.0;
+  psi_work_up   = 0.0;
+  psi_work_down = 0.0;
 
-    spo_up->evaluateValue(P, iat, psi_work_up);
-    spo_dn->evaluateValue(P, iat, psi_work_down);
+  spo_up->evaluateValue(P, iat, psi_work_up);
+  spo_dn->evaluateValue(P, iat, psi_work_down);
 
-    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+  typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
 
-    RealType coss(0.0), sins(0.0);
+  RealType coss(0.0), sins(0.0);
 
-    coss = std::cos(s);
-    sins = std::sin(s);
+  coss = std::cos(s);
+  sins = std::sin(s);
 
-    T eis(coss, sins);
-    T emis(coss, -sins);
-    T eye(0, 1.0);
+  T eis(coss, sins);
+  T emis(coss, -sins);
+  T eye(0, 1.0);
 
-    psi = eis * psi_work_up + emis * psi_work_down;
-    dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
+  psi  = eis * psi_work_up + emis * psi_work_down;
+  dpsi = eye * (eis * psi_work_up - emis * psi_work_down);
 }
 
-template <class T>
-void
-SpinorSetT<T>::evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-    const ParticleSetT<T>& source, int iat_src, GradMatrix& gradphi)
+template<class T>
+void SpinorSetT<T>::evaluateGradSource(const ParticleSetT<T>& P,
+                                       int first,
+                                       int last,
+                                       const ParticleSetT<T>& source,
+                                       int iat_src,
+                                       GradMatrix& gradphi)
 {
-    IndexType nelec = P.getTotalNum();
-
-    GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
-    GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
-    spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
-    spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
-
-    for (int iat = 0; iat < nelec; iat++) {
-        typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
-        RealType coss = std::cos(s);
-        RealType sins = std::sin(s);
-        T eis(coss, sins);
-        T emis(coss, -sins);
-        for (int imo = 0; imo < this->OrbitalSetSize; imo++)
-            gradphi(iat, imo) =
-                gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
-    }
+  IndexType nelec = P.getTotalNum();
+
+  GradMatrix gradphi_up(nelec, this->OrbitalSetSize);
+  GradMatrix gradphi_dn(nelec, this->OrbitalSetSize);
+  spo_up->evaluateGradSource(P, first, last, source, iat_src, gradphi_up);
+  spo_dn->evaluateGradSource(P, first, last, source, iat_src, gradphi_dn);
+
+  for (int iat = 0; iat < nelec; iat++)
+  {
+    typename ParticleSetT<T>::Scalar_t s = P.activeSpin(iat);
+    RealType coss                        = std::cos(s);
+    RealType sins                        = std::sin(s);
+    T eis(coss, sins);
+    T emis(coss, -sins);
+    for (int imo = 0; imo < this->OrbitalSetSize; imo++)
+      gradphi(iat, imo) = gradphi_up(iat, imo) * eis + gradphi_dn(iat, imo) * emis;
+  }
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-SpinorSetT<T>::makeClone() const
+template<class T>
+std::unique_ptr<SPOSetT<T>> SpinorSetT<T>::makeClone() const
 {
-    auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
-    std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
-    std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
-    myclone->set_spos(std::move(cloneup), std::move(clonedn));
-    return myclone;
+  auto myclone = std::make_unique<SpinorSetT<T>>(this->my_name_);
+  std::unique_ptr<SPOSetT<T>> cloneup(spo_up->makeClone());
+  std::unique_ptr<SPOSetT<T>> clonedn(spo_dn->makeClone());
+  myclone->set_spos(std::move(cloneup), std::move(clonedn));
+  return myclone;
 }
 
-template <class T>
-void
-SpinorSetT<T>::createResource(ResourceCollection& collection) const
+template<class T>
+void SpinorSetT<T>::createResource(ResourceCollection& collection) const
 {
-    spo_up->createResource(collection);
-    spo_dn->createResource(collection);
-    auto index = collection.addResource(
-        std::make_unique<SpinorSetMultiWalkerResource>());
+  spo_up->createResource(collection);
+  spo_dn->createResource(collection);
+  auto index = collection.addResource(std::make_unique<SpinorSetMultiWalkerResource>());
 }
 
-template <class T>
-void
-SpinorSetT<T>::acquireResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<class T>
+void SpinorSetT<T>::acquireResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-    auto& up_spo_leader = up_spo_list.getLeader();
-    auto& dn_spo_leader = dn_spo_list.getLeader();
-    up_spo_leader.acquireResource(collection, up_spo_list);
-    dn_spo_leader.acquireResource(collection, dn_spo_list);
-    spo_leader.mw_res_handle_ =
-        collection.lendResource<SpinorSetMultiWalkerResource>();
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.acquireResource(collection, up_spo_list);
+  dn_spo_leader.acquireResource(collection, dn_spo_list);
+  spo_leader.mw_res_handle_ = collection.lendResource<SpinorSetMultiWalkerResource>();
 }
 
-template <class T>
-void
-SpinorSetT<T>::releaseResource(ResourceCollection& collection,
-    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
+template<class T>
+void SpinorSetT<T>::releaseResource(ResourceCollection& collection,
+                                    const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
-    auto& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
-    auto& up_spo_leader = up_spo_list.getLeader();
-    auto& dn_spo_leader = dn_spo_list.getLeader();
-    up_spo_leader.releaseResource(collection, up_spo_list);
-    dn_spo_leader.releaseResource(collection, dn_spo_list);
-    collection.takebackResource(spo_leader.mw_res_handle_);
+  auto [up_spo_list, dn_spo_list] = extractSpinComponentRefList(spo_list);
+  auto& spo_leader                = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  auto& up_spo_leader             = up_spo_list.getLeader();
+  auto& dn_spo_leader             = dn_spo_list.getLeader();
+  up_spo_leader.releaseResource(collection, up_spo_list);
+  dn_spo_leader.releaseResource(collection, dn_spo_list);
+  collection.takebackResource(spo_leader.mw_res_handle_);
 }
 
-template <class T>
-std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>>
-SpinorSetT<T>::extractSpinComponentRefList(
+template<class T>
+std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> SpinorSetT<T>::extractSpinComponentRefList(
     const RefVectorWithLeader<SPOSetT<T>>& spo_list) const
 {
-    SpinorSetT<T>& spo_leader =
-        spo_list.template getCastedLeader<SpinorSetT<T>>();
-    IndexType nw = spo_list.size();
-    SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
-    SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
-    RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
-    RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
-    up_spo_list.reserve(nw);
-    dn_spo_list.reserve(nw);
-    for (int iw = 0; iw < nw; iw++) {
-        SpinorSetT<T>& spinor =
-            spo_list.template getCastedElement<SpinorSetT<T>>(iw);
-        up_spo_list.emplace_back(*(spinor.spo_up));
-        dn_spo_list.emplace_back(*(spinor.spo_dn));
-    }
-    return std::make_pair(up_spo_list, dn_spo_list);
+  SpinorSetT<T>& spo_leader = spo_list.template getCastedLeader<SpinorSetT<T>>();
+  IndexType nw              = spo_list.size();
+  SPOSetT<T>& up_spo_leader = *(spo_leader.spo_up);
+  SPOSetT<T>& dn_spo_leader = *(spo_leader.spo_dn);
+  RefVectorWithLeader<SPOSetT<T>> up_spo_list(up_spo_leader);
+  RefVectorWithLeader<SPOSetT<T>> dn_spo_list(dn_spo_leader);
+  up_spo_list.reserve(nw);
+  dn_spo_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    SpinorSetT<T>& spinor = spo_list.template getCastedElement<SpinorSetT<T>>(iw);
+    up_spo_list.emplace_back(*(spinor.spo_up));
+    dn_spo_list.emplace_back(*(spinor.spo_dn));
+  }
+  return std::make_pair(up_spo_list, dn_spo_list);
 }
 
 template class SpinorSetT<std::complex<double>>;
diff --git a/src/QMCWaveFunctions/SpinorSetT.h b/src/QMCWaveFunctions/SpinorSetT.h
index 08d869b1129..6dc1a130d5e 100644
--- a/src/QMCWaveFunctions/SpinorSetT.h
+++ b/src/QMCWaveFunctions/SpinorSetT.h
@@ -4,13 +4,10 @@
 //
 // Copyright (c) 2022 QMCPACK developers
 //
-// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National
-// Laboratories
-//                    Cody A. Melton, cmelton@sandia.gov, Sandia National
-//                    Laboratories
+// File developed by: Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
+//                    Cody A. Melton, cmelton@sandia.gov, Sandia National Laboratories
 //
-// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National
-// Laboratories
+// File created by:  Raymond Clay III, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #ifndef QMCPLUSPLUS_SPINORSETT_H
@@ -24,71 +21,50 @@ namespace qmcplusplus
 /** Class for Melton & Mitas style Spinors.
  *
  */
-template <class T>
+template<class T>
 class SpinorSetT : public SPOSetT<T>
 {
 public:
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using GradType = typename SPOSetT<T>::GradType;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using OffloadMWVGLArray =
-        Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
-    // using OffloadMWVGLArray = typename SPOSetT<T>::template
-    // OffloadMWCGLArray;
-    template <typename DT>
-    using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
-    using RealType = typename SPOSetT<T>::RealType;
-    using ComplexType = typename SPOSetT<T>::ComplexType;
-    using IndexType = OHMMS_INDEXTYPE;
-
-    /** constructor */
-    SpinorSetT(const std::string& my_name);
-    ~SpinorSetT() override;
-
-    std::string
-    getClassName() const override
-    {
-        return "SpinorSet";
-    }
-    bool
-    isOptimizable() const override
-    {
-        return spo_up->isOptimizable() || spo_dn->isOptimizable();
-    }
-    bool
-    isOMPoffload() const override
-    {
-        return spo_up->isOMPoffload() || spo_dn->isOMPoffload();
-    }
-    bool
-    hasIonDerivs() const override
-    {
-        return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs();
-    }
-
-    // This class is initialized by separately building the up and down channels
-    // of the spinor set and then registering them.
-    void
-    set_spos(
-        std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
-
-    /** set the OrbitalSetSize
+  using ValueMatrix       = typename SPOSetT<T>::ValueMatrix;
+  using ValueVector       = typename SPOSetT<T>::ValueVector;
+  using GradMatrix        = typename SPOSetT<T>::GradMatrix;
+  using GradType          = typename SPOSetT<T>::GradType;
+  using GradVector        = typename SPOSetT<T>::GradVector;
+  using OffloadMWVGLArray = Array<T, 3, OffloadPinnedAllocator<T>>; // [VGL, walker, Orbs]
+  // using OffloadMWVGLArray = typename SPOSetT<T>::template
+  // OffloadMWCGLArray;
+  template<typename DT>
+  using OffloadMatrix = typename SPOSetT<T>::template OffloadMatrix<DT>;
+  using RealType      = typename SPOSetT<T>::RealType;
+  using ComplexType   = typename SPOSetT<T>::ComplexType;
+  using IndexType     = OHMMS_INDEXTYPE;
+
+  /** constructor */
+  SpinorSetT(const std::string& my_name);
+  ~SpinorSetT() override;
+
+  std::string getClassName() const override { return "SpinorSet"; }
+  bool isOptimizable() const override { return spo_up->isOptimizable() || spo_dn->isOptimizable(); }
+  bool isOMPoffload() const override { return spo_up->isOMPoffload() || spo_dn->isOMPoffload(); }
+  bool hasIonDerivs() const override { return spo_up->hasIonDerivs() || spo_dn->hasIonDerivs(); }
+
+  // This class is initialized by separately building the up and down channels
+  // of the spinor set and then registering them.
+  void set_spos(std::unique_ptr<SPOSetT<T>>&& up, std::unique_ptr<SPOSetT<T>>&& dn);
+
+  /** set the OrbitalSetSize
      * @param norbs number of single-particle orbitals
      */
-    void
-    setOrbitalSetSize(int norbs) override;
+  void setOrbitalSetSize(int norbs) override;
 
-    /** evaluate the values of this spinor set
+  /** evaluate the values of this spinor set
      * @param P current ParticleSet
      * @param iat active particle
      * @param psi values of the SPO
      */
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -96,11 +72,9 @@ class SpinorSetT : public SPOSetT<T>
      * @param dpsi gradients of the SPO
      * @param d2psi laplacians of the SPO
      */
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
 
-    /** evaluate the values, gradients and laplacians of this single-particle
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital set
      * @param P current ParticleSet
      * @param iat active particle
@@ -109,11 +83,14 @@ class SpinorSetT : public SPOSetT<T>
      * @param d2psi laplacians of the SPO
      * @param dspin spin gradient of the SPO
      */
-    void
-    evaluateVGL_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi, ValueVector& dspin) override;
-
-    /** evaluate the values, gradients and laplacians and spin gradient of this
+  void evaluateVGL_spin(const ParticleSetT<T>& P,
+                        int iat,
+                        ValueVector& psi,
+                        GradVector& dpsi,
+                        ValueVector& d2psi,
+                        ValueVector& dspin) override;
+
+  /** evaluate the values, gradients and laplacians and spin gradient of this
      * single-particle orbital sets of multiple walkers
      * @param spo_list the list of SPOSet pointers in a walker batch
      * @param P_list the list of ParticleSet pointers in a walker batch
@@ -124,15 +101,15 @@ class SpinorSetT : public SPOSetT<T>
      * batch
      * @param mw_dspin dual matrix of spin gradients. nw x num_orbitals
      */
-    void
-    mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list,
-        const RefVector<GradVector>& dpsi_v_list,
-        const RefVector<ValueVector>& d2psi_v_list,
-        OffloadMatrix<ComplexType>& mw_dspin) const override;
-
-    /** evaluate the values, gradients and laplacians of this single-particle
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<ComplexType>& mw_dspin) const override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital sets and determinant ratio and grads of multiple walkers. Device
      * data of phi_vgl_v must be up-to-date upon return. Includes spin gradients
      * @param spo_list the list of SPOSet pointers in a walker batch
@@ -144,15 +121,16 @@ class SpinorSetT : public SPOSetT<T>
      * @param grads, spatial gradients of all walkers
      * @param spingrads, spin gradients of all walkers
      */
-    void
-    mw_evaluateVGLandDetRatioGradsWithSpin(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const std::vector<const T*>& invRow_ptr_list,
-        OffloadMWVGLArray& phi_vgl_v, std::vector<T>& ratios,
-        std::vector<GradType>& grads, std::vector<T>& spingrads) const override;
-
-    /** evaluate the values, gradients and laplacians of this single-particle
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                                              const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                                              int iat,
+                                              const std::vector<const T*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<T>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<T>& spingrads) const override;
+
+  /** evaluate the values, gradients and laplacians of this single-particle
      * orbital for [first,last) particles
      * @param P current ParticleSet
      * @param first starting index of the particles
@@ -162,23 +140,29 @@ class SpinorSetT : public SPOSetT<T>
      * @param d2logdet laplacians
      *
      */
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
-
-    void
-    mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int first, int last,
-        const RefVector<ValueMatrix>& logdet_list,
-        const RefVector<GradMatrix>& dlogdet_list,
-        const RefVector<ValueMatrix>& d2logdet_list) const override;
-
-    void
-    evaluate_notranspose_spin(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet,
-        ValueMatrix& dspinlogdet) override;
-    /** Evaluate the values, spin gradients, and spin laplacians of single
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                               const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void evaluate_notranspose_spin(const ParticleSetT<T>& P,
+                                 int first,
+                                 int last,
+                                 ValueMatrix& logdet,
+                                 GradMatrix& dlogdet,
+                                 ValueMatrix& d2logdet,
+                                 ValueMatrix& dspinlogdet) override;
+  /** Evaluate the values, spin gradients, and spin laplacians of single
      * particle spinors corresponding to electron iat.
      *  @param P current particle set.
      *  @param iat electron index.
@@ -186,11 +170,9 @@ class SpinorSetT : public SPOSetT<T>
      *  @param spin gradient values. d/ds phi(r,s).
      *
      */
-    void
-    evaluate_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        ValueVector& dpsi) override;
+  void evaluate_spin(const ParticleSetT<T>& P, int iat, ValueVector& psi, ValueVector& dpsi) override;
 
-    /** evaluate the gradients of this single-particle orbital
+  /** evaluate the gradients of this single-particle orbital
      *  for [first,last) target particles with respect to the given source
      * particle
      * @param P current ParticleSet
@@ -200,69 +182,60 @@ class SpinorSetT : public SPOSetT<T>
      * @param gradphi gradients
      *
      */
-    virtual void
-    evaluateGradSource(const ParticleSetT<T>& P, int first, int last,
-        const ParticleSetT<T>& source, int iat_src,
-        GradMatrix& gradphi) override;
+  virtual void evaluateGradSource(const ParticleSetT<T>& P,
+                                  int first,
+                                  int last,
+                                  const ParticleSetT<T>& source,
+                                  int iat_src,
+                                  GradMatrix& gradphi) override;
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
 
-    void
-    createResource(ResourceCollection& collection) const override;
+  void createResource(ResourceCollection& collection) const override;
 
-    void
-    acquireResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
 
-    void
-    releaseResource(ResourceCollection& collection,
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSetT<T>>& spo_list) const override;
 
-    /// check if the multi walker resource is owned. For testing only.
-    bool
-    isResourceOwned() const
-    {
-        return bool(mw_res_handle_);
-    }
+  /// check if the multi walker resource is owned. For testing only.
+  bool isResourceOwned() const { return bool(mw_res_handle_); }
 
 private:
-    struct SpinorSetMultiWalkerResource;
-    ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
+  struct SpinorSetMultiWalkerResource;
+  ResourceHandle<SpinorSetMultiWalkerResource> mw_res_handle_;
 
-    std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>>
-    extractSpinComponentRefList(
-        const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
+  std::pair<RefVectorWithLeader<SPOSetT<T>>, RefVectorWithLeader<SPOSetT<T>>> extractSpinComponentRefList(
+      const RefVectorWithLeader<SPOSetT<T>>& spo_list) const;
 
-    // Sposet for the up and down channels of our spinors.
-    std::unique_ptr<SPOSetT<T>> spo_up;
-    std::unique_ptr<SPOSetT<T>> spo_dn;
+  // Sposet for the up and down channels of our spinors.
+  std::unique_ptr<SPOSetT<T>> spo_up;
+  std::unique_ptr<SPOSetT<T>> spo_dn;
 
-    // temporary arrays for holding the values of the up and down channels
-    // respectively.
-    ValueVector psi_work_up;
-    ValueVector psi_work_down;
+  // temporary arrays for holding the values of the up and down channels
+  // respectively.
+  ValueVector psi_work_up;
+  ValueVector psi_work_down;
 
-    // temporary arrays for holding the gradients of the up and down channels
-    // respectively.
-    GradVector dpsi_work_up;
-    GradVector dpsi_work_down;
+  // temporary arrays for holding the gradients of the up and down channels
+  // respectively.
+  GradVector dpsi_work_up;
+  GradVector dpsi_work_down;
 
-    // temporary arrays for holding the laplacians of the up and down channels
-    // respectively.
-    ValueVector d2psi_work_up;
-    ValueVector d2psi_work_down;
+  // temporary arrays for holding the laplacians of the up and down channels
+  // respectively.
+  ValueVector d2psi_work_up;
+  ValueVector d2psi_work_down;
 
-    // Same as above, but these are the full matrices containing all
-    // spinor/particle combinations.
-    ValueMatrix logpsi_work_up;
-    ValueMatrix logpsi_work_down;
+  // Same as above, but these are the full matrices containing all
+  // spinor/particle combinations.
+  ValueMatrix logpsi_work_up;
+  ValueMatrix logpsi_work_down;
 
-    GradMatrix dlogpsi_work_up;
-    GradMatrix dlogpsi_work_down;
+  GradMatrix dlogpsi_work_up;
+  GradMatrix dlogpsi_work_down;
 
-    ValueMatrix d2logpsi_work_up;
-    ValueMatrix d2logpsi_work_down;
+  ValueMatrix d2logpsi_work_up;
+  ValueMatrix d2logpsi_work_down;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/VariableSetT.cpp b/src/QMCWaveFunctions/VariableSetT.cpp
index 2c494010666..d6aea105fed 100644
--- a/src/QMCWaveFunctions/VariableSetT.cpp
+++ b/src/QMCWaveFunctions/VariableSetT.cpp
@@ -4,14 +4,11 @@
 //
 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
 //
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of
-// Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of
-//                    Illinois at Urbana-Champaign Mark A. Berrill,
-//                    berrillma@ornl.gov, Oak Ridge National Laboratory
+// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
 //
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
-// at Urbana-Champaign
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "VariableSetT.h"
@@ -29,313 +26,305 @@ using std::setw;
 
 namespace optimize
 {
-template <typename T>
-void
-VariableSetT<T>::clear()
+template<typename T>
+void VariableSetT<T>::clear()
 {
-    num_active_vars = 0;
-    Index.clear();
-    NameAndValue.clear();
-    Recompute.clear();
-    ParameterType.clear();
+  num_active_vars = 0;
+  Index.clear();
+  NameAndValue.clear();
+  Recompute.clear();
+  ParameterType.clear();
 }
 
-template <typename T>
-void
-VariableSetT<T>::insertFrom(const VariableSetT& input)
+template<typename T>
+void VariableSetT<T>::insertFrom(const VariableSetT& input)
 {
-    for (int i = 0; i < input.size(); ++i) {
-        iterator loc = find(input.name(i));
-        if (loc == NameAndValue.end()) {
-            Index.push_back(input.Index[i]);
-            NameAndValue.push_back(input.NameAndValue[i]);
-            ParameterType.push_back(input.ParameterType[i]);
-            Recompute.push_back(input.Recompute[i]);
-        }
-        else
-            (*loc).second = input.NameAndValue[i].second;
+  for (int i = 0; i < input.size(); ++i)
+  {
+    iterator loc = find(input.name(i));
+    if (loc == NameAndValue.end())
+    {
+      Index.push_back(input.Index[i]);
+      NameAndValue.push_back(input.NameAndValue[i]);
+      ParameterType.push_back(input.ParameterType[i]);
+      Recompute.push_back(input.Recompute[i]);
     }
-    num_active_vars = input.num_active_vars;
+    else
+      (*loc).second = input.NameAndValue[i].second;
+  }
+  num_active_vars = input.num_active_vars;
 }
 
-template <typename T>
-void
-VariableSetT<T>::insertFromSum(
-    const VariableSetT& input_1, const VariableSetT& input_2)
+template<typename T>
+void VariableSetT<T>::insertFromSum(const VariableSetT& input_1, const VariableSetT& input_2)
 {
-    value_type sum_val;
-    std::string vname;
-
-    // Check that objects to be summed together have the same number of active
-    // variables.
-    if (input_1.num_active_vars != input_2.num_active_vars)
-        throw std::runtime_error(
-            "Inconsistent number of parameters in two provided "
-            "variable sets.");
-
-    for (int i = 0; i < input_1.size(); ++i) {
-        // Check that each of the equivalent variables in both VariableSet
-        // objects have the same name - otherwise we certainly shouldn't be
-        // adding them.
-        if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
-            throw std::runtime_error(
-                "Inconsistent parameters exist in the two provided "
-                "variable sets.");
-
-        sum_val =
-            input_1.NameAndValue[i].second + input_2.NameAndValue[i].second;
-
-        iterator loc = find(input_1.name(i));
-        if (loc == NameAndValue.end()) {
-            Index.push_back(input_1.Index[i]);
-            ParameterType.push_back(input_1.ParameterType[i]);
-            Recompute.push_back(input_1.Recompute[i]);
-
-            // We can reuse the above values, which aren't summed between the
-            // objects, but the parameter values themselves need to use the
-            // summed values.
-            vname = input_1.NameAndValue[i].first;
-            NameAndValue.push_back(pair_type(vname, sum_val));
-        }
-        else
-            (*loc).second = sum_val;
+  value_type sum_val;
+  std::string vname;
+
+  // Check that objects to be summed together have the same number of active
+  // variables.
+  if (input_1.num_active_vars != input_2.num_active_vars)
+    throw std::runtime_error("Inconsistent number of parameters in two provided "
+                             "variable sets.");
+
+  for (int i = 0; i < input_1.size(); ++i)
+  {
+    // Check that each of the equivalent variables in both VariableSet
+    // objects have the same name - otherwise we certainly shouldn't be
+    // adding them.
+    if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
+      throw std::runtime_error("Inconsistent parameters exist in the two provided "
+                               "variable sets.");
+
+    sum_val = input_1.NameAndValue[i].second + input_2.NameAndValue[i].second;
+
+    iterator loc = find(input_1.name(i));
+    if (loc == NameAndValue.end())
+    {
+      Index.push_back(input_1.Index[i]);
+      ParameterType.push_back(input_1.ParameterType[i]);
+      Recompute.push_back(input_1.Recompute[i]);
+
+      // We can reuse the above values, which aren't summed between the
+      // objects, but the parameter values themselves need to use the
+      // summed values.
+      vname = input_1.NameAndValue[i].first;
+      NameAndValue.push_back(pair_type(vname, sum_val));
     }
-    num_active_vars = input_1.num_active_vars;
+    else
+      (*loc).second = sum_val;
+  }
+  num_active_vars = input_1.num_active_vars;
 }
 
-template <typename T>
-void
-VariableSetT<T>::insertFromDiff(
-    const VariableSetT& input_1, const VariableSetT& input_2)
+template<typename T>
+void VariableSetT<T>::insertFromDiff(const VariableSetT& input_1, const VariableSetT& input_2)
 {
-    value_type diff_val;
-    std::string vname;
-
-    // Check that objects to be subtracted have the same number of active
-    // variables.
-    if (input_1.num_active_vars != input_2.num_active_vars)
-        throw std::runtime_error(
-            "Inconsistent number of parameters in two provided "
-            "variable sets.");
-
-    for (int i = 0; i < input_1.size(); ++i) {
-        // Check that each of the equivalent variables in both VariableSet
-        // objects have the same name - otherwise we certainly shouldn't be
-        // subtracting them.
-        if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
-            throw std::runtime_error(
-                "Inconsistent parameters exist in the two provided "
-                "variable sets.");
-
-        diff_val =
-            input_1.NameAndValue[i].second - input_2.NameAndValue[i].second;
-
-        iterator loc = find(input_1.name(i));
-        if (loc == NameAndValue.end()) {
-            Index.push_back(input_1.Index[i]);
-            ParameterType.push_back(input_1.ParameterType[i]);
-            Recompute.push_back(input_1.Recompute[i]);
-
-            // We can reuse the above values, which aren't subtracted between
-            // the objects, but the parameter values themselves need to use the
-            // subtracted values.
-            vname = input_1.NameAndValue[i].first;
-            NameAndValue.push_back(pair_type(vname, diff_val));
-        }
-        else
-            (*loc).second = diff_val;
+  value_type diff_val;
+  std::string vname;
+
+  // Check that objects to be subtracted have the same number of active
+  // variables.
+  if (input_1.num_active_vars != input_2.num_active_vars)
+    throw std::runtime_error("Inconsistent number of parameters in two provided "
+                             "variable sets.");
+
+  for (int i = 0; i < input_1.size(); ++i)
+  {
+    // Check that each of the equivalent variables in both VariableSet
+    // objects have the same name - otherwise we certainly shouldn't be
+    // subtracting them.
+    if (input_1.NameAndValue[i].first != input_2.NameAndValue[i].first)
+      throw std::runtime_error("Inconsistent parameters exist in the two provided "
+                               "variable sets.");
+
+    diff_val = input_1.NameAndValue[i].second - input_2.NameAndValue[i].second;
+
+    iterator loc = find(input_1.name(i));
+    if (loc == NameAndValue.end())
+    {
+      Index.push_back(input_1.Index[i]);
+      ParameterType.push_back(input_1.ParameterType[i]);
+      Recompute.push_back(input_1.Recompute[i]);
+
+      // We can reuse the above values, which aren't subtracted between
+      // the objects, but the parameter values themselves need to use the
+      // subtracted values.
+      vname = input_1.NameAndValue[i].first;
+      NameAndValue.push_back(pair_type(vname, diff_val));
     }
-    num_active_vars = input_1.num_active_vars;
+    else
+      (*loc).second = diff_val;
+  }
+  num_active_vars = input_1.num_active_vars;
 }
 
-template <typename T>
-void
-VariableSetT<T>::removeInactive()
+template<typename T>
+void VariableSetT<T>::removeInactive()
 {
-    std::vector<int> valid(Index);
-    std::vector<pair_type> acopy(NameAndValue);
-    std::vector<index_pair_type> bcopy(Recompute), ccopy(ParameterType);
-    num_active_vars = 0;
-    Index.clear();
-    NameAndValue.clear();
-    Recompute.clear();
-    ParameterType.clear();
-    for (int i = 0; i < valid.size(); ++i) {
-        if (valid[i] > -1) {
-            Index.push_back(num_active_vars++);
-            NameAndValue.push_back(acopy[i]);
-            Recompute.push_back(bcopy[i]);
-            ParameterType.push_back(ccopy[i]);
-        }
+  std::vector<int> valid(Index);
+  std::vector<pair_type> acopy(NameAndValue);
+  std::vector<index_pair_type> bcopy(Recompute), ccopy(ParameterType);
+  num_active_vars = 0;
+  Index.clear();
+  NameAndValue.clear();
+  Recompute.clear();
+  ParameterType.clear();
+  for (int i = 0; i < valid.size(); ++i)
+  {
+    if (valid[i] > -1)
+    {
+      Index.push_back(num_active_vars++);
+      NameAndValue.push_back(acopy[i]);
+      Recompute.push_back(bcopy[i]);
+      ParameterType.push_back(ccopy[i]);
     }
+  }
 }
 
-template <typename T>
-void
-VariableSetT<T>::resetIndex()
+template<typename T>
+void VariableSetT<T>::resetIndex()
 {
-    num_active_vars = 0;
-    for (int i = 0; i < Index.size(); ++i) {
-        Index[i] = (Index[i] < 0) ? -1 : num_active_vars++;
-    }
+  num_active_vars = 0;
+  for (int i = 0; i < Index.size(); ++i)
+  {
+    Index[i] = (Index[i] < 0) ? -1 : num_active_vars++;
+  }
 }
 
-template <typename T>
-void
-VariableSetT<T>::getIndex(const VariableSetT& selected)
+template<typename T>
+void VariableSetT<T>::getIndex(const VariableSetT& selected)
 {
-    num_active_vars = 0;
-    for (int i = 0; i < NameAndValue.size(); ++i) {
-        Index[i] = selected.getIndex(NameAndValue[i].first);
-        if (Index[i] >= 0)
-            num_active_vars++;
-    }
+  num_active_vars = 0;
+  for (int i = 0; i < NameAndValue.size(); ++i)
+  {
+    Index[i] = selected.getIndex(NameAndValue[i].first);
+    if (Index[i] >= 0)
+      num_active_vars++;
+  }
 }
 
-template <typename T>
-int
-VariableSetT<T>::getIndex(const std::string& vname) const
+template<typename T>
+int VariableSetT<T>::getIndex(const std::string& vname) const
 {
-    int loc = 0;
-    while (loc != NameAndValue.size()) {
-        if (NameAndValue[loc].first == vname)
-            return Index[loc];
-        ++loc;
-    }
-    return -1;
+  int loc = 0;
+  while (loc != NameAndValue.size())
+  {
+    if (NameAndValue[loc].first == vname)
+      return Index[loc];
+    ++loc;
+  }
+  return -1;
 }
 
-template <typename T>
-void
-VariableSetT<T>::setIndexDefault()
+template<typename T>
+void VariableSetT<T>::setIndexDefault()
 {
-    for (int i = 0; i < Index.size(); ++i)
-        Index[i] = i;
+  for (int i = 0; i < Index.size(); ++i)
+    Index[i] = i;
 }
 
-template <typename T>
-void
-VariableSetT<T>::print(
-    std::ostream& os, int leftPadSpaces, bool printHeader) const
+template<typename T>
+void VariableSetT<T>::print(std::ostream& os, int leftPadSpaces, bool printHeader) const
 {
-    std::string pad_str = std::string(leftPadSpaces, ' ');
-    int max_name_len = 0;
-    if (NameAndValue.size() > 0)
-        max_name_len = std::max_element(NameAndValue.begin(),
-            NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) {
-                return e1.first.length() < e2.first.length();
-            })->first.length();
-
-    int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading
-                            // value, period, and exponent.
-    int max_type_len = 1;
-    int max_recompute_len = 1;
-    int max_use_len = 3;
-    int max_index_len = 1;
-    if (printHeader) {
-        max_name_len = std::max(max_name_len, 4); // size of "Name" header
-        max_type_len = 4;
-        max_recompute_len = 9;
-        max_index_len = 5;
-        os << pad_str << setw(max_name_len) << "Name"
-           << " " << setw(max_value_len) << "Value"
-           << " " << setw(max_type_len) << "Type"
-           << " " << setw(max_recompute_len) << "Recompute"
-           << " " << setw(max_use_len) << "Use"
-           << " " << setw(max_index_len) << "Index" << std::endl;
-        os << pad_str << std::setfill('-') << setw(max_name_len) << ""
-           << " " << setw(max_value_len) << ""
-           << " " << setw(max_type_len) << ""
-           << " " << setw(max_recompute_len) << ""
-           << " " << setw(max_use_len) << ""
-           << " " << setw(max_index_len) << "" << std::endl;
-        os << std::setfill(' ');
-    }
-
-    for (int i = 0; i < NameAndValue.size(); ++i) {
-        os << pad_str << setw(max_name_len) << NameAndValue[i].first << " "
-           << std::setprecision(6) << std::scientific << setw(max_value_len)
-           << NameAndValue[i].second << " " << setw(max_type_len)
-           << ParameterType[i].second << " " << setw(max_recompute_len)
-           << Recompute[i].second << " ";
-
-        os << std::defaultfloat;
-
-        if (Index[i] < 0)
-            os << setw(max_use_len) << "OFF" << std::endl;
-        else
-            os << setw(max_use_len) << "ON"
-               << " " << setw(max_index_len) << Index[i] << std::endl;
-    }
+  std::string pad_str = std::string(leftPadSpaces, ' ');
+  int max_name_len    = 0;
+  if (NameAndValue.size() > 0)
+    max_name_len =
+        std::max_element(NameAndValue.begin(), NameAndValue.end(), [](const pair_type& e1, const pair_type& e2) {
+          return e1.first.length() < e2.first.length();
+        })->first.length();
+
+  int max_value_len = 28; // 6 for the precision and 7 for minus sign, leading
+                          // value, period, and exponent.
+  int max_type_len      = 1;
+  int max_recompute_len = 1;
+  int max_use_len       = 3;
+  int max_index_len     = 1;
+  if (printHeader)
+  {
+    max_name_len      = std::max(max_name_len, 4); // size of "Name" header
+    max_type_len      = 4;
+    max_recompute_len = 9;
+    max_index_len     = 5;
+    os << pad_str << setw(max_name_len) << "Name"
+       << " " << setw(max_value_len) << "Value"
+       << " " << setw(max_type_len) << "Type"
+       << " " << setw(max_recompute_len) << "Recompute"
+       << " " << setw(max_use_len) << "Use"
+       << " " << setw(max_index_len) << "Index" << std::endl;
+    os << pad_str << std::setfill('-') << setw(max_name_len) << ""
+       << " " << setw(max_value_len) << ""
+       << " " << setw(max_type_len) << ""
+       << " " << setw(max_recompute_len) << ""
+       << " " << setw(max_use_len) << ""
+       << " " << setw(max_index_len) << "" << std::endl;
+    os << std::setfill(' ');
+  }
+
+  for (int i = 0; i < NameAndValue.size(); ++i)
+  {
+    os << pad_str << setw(max_name_len) << NameAndValue[i].first << " " << std::setprecision(6) << std::scientific
+       << setw(max_value_len) << NameAndValue[i].second << " " << setw(max_type_len) << ParameterType[i].second << " "
+       << setw(max_recompute_len) << Recompute[i].second << " ";
+
+    os << std::defaultfloat;
+
+    if (Index[i] < 0)
+      os << setw(max_use_len) << "OFF" << std::endl;
+    else
+      os << setw(max_use_len) << "ON"
+         << " " << setw(max_index_len) << Index[i] << std::endl;
+  }
 }
 
-template <typename T>
-void
-VariableSetT<T>::writeToHDF(
-    const std::string& filename, qmcplusplus::hdf_archive& hout) const
+template<typename T>
+void VariableSetT<T>::writeToHDF(const std::string& filename, qmcplusplus::hdf_archive& hout) const
 {
-    hout.create(filename);
-
-    // File Versioning
-    // 1.0.0  Initial file version
-    // 1.1.0  Files could have object-specific data from
-    // OptimizableObject::read/writeVariationalParameters
-    std::vector<int> vp_file_version{1, 1, 0};
-    hout.write(vp_file_version, "version");
-
-    std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z"));
-    hout.write(timestamp, "timestamp");
-
-    hout.push("name_value_lists");
-
-    std::vector<value_type> param_values;
-    std::vector<std::string> param_names;
-    for (auto& pair_it : NameAndValue) {
-        param_names.push_back(pair_it.first);
-        param_values.push_back(pair_it.second);
-    }
-
-    hout.write(param_names, "parameter_names");
-    hout.write(param_values, "parameter_values");
-    hout.pop();
+  hout.create(filename);
+
+  // File Versioning
+  // 1.0.0  Initial file version
+  // 1.1.0  Files could have object-specific data from
+  // OptimizableObject::read/writeVariationalParameters
+  std::vector<int> vp_file_version{1, 1, 0};
+  hout.write(vp_file_version, "version");
+
+  std::string timestamp(getDateAndTime("%Y-%m-%d %H:%M:%S %Z"));
+  hout.write(timestamp, "timestamp");
+
+  hout.push("name_value_lists");
+
+  std::vector<value_type> param_values;
+  std::vector<std::string> param_names;
+  for (auto& pair_it : NameAndValue)
+  {
+    param_names.push_back(pair_it.first);
+    param_values.push_back(pair_it.second);
+  }
+
+  hout.write(param_names, "parameter_names");
+  hout.write(param_values, "parameter_values");
+  hout.pop();
 }
 
-template <typename T>
-void
-VariableSetT<T>::readFromHDF(
-    const std::string& filename, qmcplusplus::hdf_archive& hin)
+template<typename T>
+void VariableSetT<T>::readFromHDF(const std::string& filename, qmcplusplus::hdf_archive& hin)
 {
-    if (!hin.open(filename, H5F_ACC_RDONLY)) {
-        std::ostringstream err_msg;
-        err_msg << "Unable to open VP file: " << filename;
-        throw std::runtime_error(err_msg.str());
-    }
-
-    try {
-        hin.push("name_value_lists", false);
-    }
-    catch (std::runtime_error&) {
-        std::ostringstream err_msg;
-        err_msg << "The group name_value_lists in not present in file: "
-                << filename;
-        throw std::runtime_error(err_msg.str());
-    }
-
-    std::vector<value_type> param_values;
-    hin.read(param_values, "parameter_values");
-
-    std::vector<std::string> param_names;
-    hin.read(param_names, "parameter_names");
-
-    for (int i = 0; i < param_names.size(); i++) {
-        std::string& vp_name = param_names[i];
-        // Find and set values by name.
-        // Values that are not present do not get added.
-        if (find(vp_name) != end())
-            (*this)[vp_name] = param_values[i];
-    }
-
-    hin.pop();
+  if (!hin.open(filename, H5F_ACC_RDONLY))
+  {
+    std::ostringstream err_msg;
+    err_msg << "Unable to open VP file: " << filename;
+    throw std::runtime_error(err_msg.str());
+  }
+
+  try
+  {
+    hin.push("name_value_lists", false);
+  }
+  catch (std::runtime_error&)
+  {
+    std::ostringstream err_msg;
+    err_msg << "The group name_value_lists in not present in file: " << filename;
+    throw std::runtime_error(err_msg.str());
+  }
+
+  std::vector<value_type> param_values;
+  hin.read(param_values, "parameter_values");
+
+  std::vector<std::string> param_names;
+  hin.read(param_names, "parameter_names");
+
+  for (int i = 0; i < param_names.size(); i++)
+  {
+    std::string& vp_name = param_names[i];
+    // Find and set values by name.
+    // Values that are not present do not get added.
+    if (find(vp_name) != end())
+      (*this)[vp_name] = param_values[i];
+  }
+
+  hin.pop();
 }
 
 template class VariableSetT<double>;
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
index 5ada9b4f9d3..7814d952e88 100644
--- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.cpp
@@ -4,8 +4,7 @@
 //
 // Copyright (c) 2023 Raymond Clay and QMCPACK developers.
 //
-// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National
-// Laboratories
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //
 // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
@@ -15,109 +14,106 @@
 namespace qmcplusplus
 {
 
-template <class T>
-ConstantSPOSetT<T>::ConstantSPOSetT(
-    const std::string& my_name, const int nparticles, const int norbitals) :
-    SPOSetT<T>(my_name),
-    numparticles_(nparticles)
+template<class T>
+ConstantSPOSetT<T>::ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals)
+    : SPOSetT<T>(my_name), numparticles_(nparticles)
 {
-    this->OrbitalSetSize = norbitals;
-    ref_psi_.resize(numparticles_, this->OrbitalSetSize);
-    ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
-    ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
+  this->OrbitalSetSize = norbitals;
+  ref_psi_.resize(numparticles_, this->OrbitalSetSize);
+  ref_egrad_.resize(numparticles_, this->OrbitalSetSize);
+  ref_elapl_.resize(numparticles_, this->OrbitalSetSize);
 
-    ref_psi_ = 0.0;
-    ref_egrad_ = 0.0;
-    ref_elapl_ = 0.0;
+  ref_psi_   = 0.0;
+  ref_egrad_ = 0.0;
+  ref_elapl_ = 0.0;
 }
 
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-ConstantSPOSetT<T>::makeClone() const
+template<class T>
+std::unique_ptr<SPOSetT<T>> ConstantSPOSetT<T>::makeClone() const
 {
-    auto myclone = std::make_unique<ConstantSPOSetT<T>>(
-        this->my_name_, numparticles_, this->OrbitalSetSize);
-    myclone->setRefVals(ref_psi_);
-    myclone->setRefEGrads(ref_egrad_);
-    myclone->setRefELapls(ref_elapl_);
-    return myclone;
+  auto myclone = std::make_unique<ConstantSPOSetT<T>>(this->my_name_, numparticles_, this->OrbitalSetSize);
+  myclone->setRefVals(ref_psi_);
+  myclone->setRefEGrads(ref_egrad_);
+  myclone->setRefELapls(ref_elapl_);
+  return myclone;
 }
 
 template<class T>
 void ConstantSPOSetT<T>::checkOutVariables(const OptVariablesTypeT<T>& active)
 {
-    APP_ABORT("ConstantSPOSet should not call checkOutVariables");
+  APP_ABORT("ConstantSPOSet should not call checkOutVariables");
 };
 
-template <class T>
-void
-ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void ConstantSPOSetT<T>::setOrbitalSetSize(int norbs)
 {
-    APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
+  APP_ABORT("ConstantSPOSet should not call setOrbitalSetSize()");
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
+template<class T>
+void ConstantSPOSetT<T>::setRefVals(const ValueMatrix& vals)
 {
-    assert(vals.cols() == this->OrbitalSetSize);
-    assert(vals.rows() == numparticles_);
-    ref_psi_ = vals;
+  assert(vals.cols() == this->OrbitalSetSize);
+  assert(vals.rows() == numparticles_);
+  ref_psi_ = vals;
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
+template<class T>
+void ConstantSPOSetT<T>::setRefEGrads(const GradMatrix& grads)
 {
-    assert(grads.cols() == this->OrbitalSetSize);
-    assert(grads.rows() == numparticles_);
-    ref_egrad_ = grads;
+  assert(grads.cols() == this->OrbitalSetSize);
+  assert(grads.rows() == numparticles_);
+  ref_egrad_ = grads;
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
+template<class T>
+void ConstantSPOSetT<T>::setRefELapls(const ValueMatrix& lapls)
 {
-    assert(lapls.cols() == this->OrbitalSetSize);
-    assert(lapls.rows() == numparticles_);
-    ref_elapl_ = lapls;
+  assert(lapls.cols() == this->OrbitalSetSize);
+  assert(lapls.rows() == numparticles_);
+  ref_elapl_ = lapls;
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::evaluateValue(
-    const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void ConstantSPOSetT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    const auto* vp = dynamic_cast<const VirtualParticleSetT<T>*>(&P);
-    int ptcl = vp ? vp->refPtcl : iat;
-    assert(psi.size() == this->OrbitalSetSize);
-    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
-        psi[iorb] = ref_psi_(ptcl, iorb);
+  const auto* vp = dynamic_cast<const VirtualParticleSetT<T>*>(&P);
+  int ptcl       = vp ? vp->refPtcl : iat;
+  assert(psi.size() == this->OrbitalSetSize);
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+    psi[iorb] = ref_psi_(ptcl, iorb);
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat,
-    ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
+template<class T>
+void ConstantSPOSetT<T>::evaluateVGL(const ParticleSetT<T>& P,
+                                     int iat,
+                                     ValueVector& psi,
+                                     GradVector& dpsi,
+                                     ValueVector& d2psi)
 {
-    for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++) {
-        psi[iorb] = ref_psi_(iat, iorb);
-        dpsi[iorb] = ref_egrad_(iat, iorb);
-        d2psi[iorb] = ref_elapl_(iat, iorb);
-    }
+  for (int iorb = 0; iorb < this->OrbitalSetSize; iorb++)
+  {
+    psi[iorb]   = ref_psi_(iat, iorb);
+    dpsi[iorb]  = ref_egrad_(iat, iorb);
+    d2psi[iorb] = ref_elapl_(iat, iorb);
+  }
 }
 
-template <class T>
-void
-ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first,
-    int last, ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+template<class T>
+void ConstantSPOSetT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                              int first,
+                                              int last,
+                                              ValueMatrix& logdet,
+                                              GradMatrix& dlogdet,
+                                              ValueMatrix& d2logdet)
 {
-    for (int iat = first, i = 0; iat < last; ++iat, ++i) {
-        ValueVector v(logdet[i], logdet.cols());
-        GradVector g(dlogdet[i], dlogdet.cols());
-        ValueVector l(d2logdet[i], d2logdet.cols());
-        evaluateVGL(P, iat, v, g, l);
-    }
+  for (int iat = first, i = 0; iat < last; ++iat, ++i)
+  {
+    ValueVector v(logdet[i], logdet.cols());
+    GradVector g(dlogdet[i], dlogdet.cols());
+    ValueVector l(d2logdet[i], d2logdet.cols());
+    evaluateVGL(P, iat, v, g, l);
+  }
 }
 
 template class ConstantSPOSetT<float>;
diff --git a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
index 335796df964..16942cd1e01 100644
--- a/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
+++ b/src/QMCWaveFunctions/tests/ConstantSPOSetT.h
@@ -4,8 +4,7 @@
 //
 // Copyright (c) 2023 Raymond Clay and QMCPACK developers.
 //
-// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National
-// Laboratories
+// File developed by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //
 // File created by: Raymond Clay, rclay@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
@@ -23,85 +22,75 @@ namespace qmcplusplus
  * deterministic and known output to objects requiring SPOSet evaluations.
  *
  */
-template <class T>
+template<class T>
 class ConstantSPOSetT : public SPOSetT<T>
 {
 public:
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using GradVector = typename SPOSetT<T>::GradVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using GradVector  = typename SPOSetT<T>::GradVector;
 
-    ConstantSPOSetT(const std::string& my_name) = delete;
+  ConstantSPOSetT(const std::string& my_name) = delete;
 
-    // Constructor needs number of particles and number of orbitals.  This is
-    // the minimum amount of information needed to sanely construct all data
-    // members and perform size checks later.
-    ConstantSPOSetT(
-        const std::string& my_name, const int nparticles, const int norbitals);
+  // Constructor needs number of particles and number of orbitals.  This is
+  // the minimum amount of information needed to sanely construct all data
+  // members and perform size checks later.
+  ConstantSPOSetT(const std::string& my_name, const int nparticles, const int norbitals);
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const final;
+  std::unique_ptr<SPOSetT<T>> makeClone() const final;
 
-    std::string
-    getClassName() const final
-    {
-        return "ConstantSPOSet";
-    };
+  std::string getClassName() const final { return "ConstantSPOSet"; };
 
-    void checkOutVariables(const OptVariablesTypeT<T>& active) final;
+  void checkOutVariables(const OptVariablesTypeT<T>& active) final;
 
-    void
-    setOrbitalSetSize(int norbs) final;
+  void setOrbitalSetSize(int norbs) final;
 
-    /**
+  /**
      * @brief Setter method to set \phi_j(r_i). Stores input matrix in ref_psi_.
      * @param Nelec x Nion ValueType matrix of \phi_j(r_i)
      * @return void
      */
-    void
-    setRefVals(const ValueMatrix& vals);
-    /**
+  void setRefVals(const ValueMatrix& vals);
+  /**
      * @brief Setter method to set \nabla_i \phi_j(r_i). Stores input matrix in
      * ref_egrad_.
      * @param Nelec x Nion GradType matrix of \grad_i \phi_j(r_i)
      * @return void
      */
-    void
-    setRefEGrads(const GradMatrix& grads);
-    /**
+  void setRefEGrads(const GradMatrix& grads);
+  /**
      * @brief Setter method to set \nabla^2_i \phi_j(r_i). Stores input matrix
      * in ref_elapl_.
      * @param Nelec x Nion GradType matrix of \grad^2_i \phi_j(r_i)
      * @return void
      */
-    void
-    setRefELapls(const ValueMatrix& lapls);
+  void setRefELapls(const ValueMatrix& lapls);
 
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) final;
 
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) final;
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) final;
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet) final;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) final;
 
 private:
-    const int numparticles_; /// evaluate_notranspose arrays are nparticle x
-                             /// norb matrices. To ensure consistent array
-                             /// sizing and enforcement, we agree at
-                             /// construction how large these matrices will be.
-                             /// norb is stored in SPOSet::OrbitalSetSize.
-
-    // Value, electron gradient, and electron laplacian at "reference
-    // configuration". i.e. before any attempted moves.
-
-    ValueMatrix ref_psi_;
-    GradMatrix ref_egrad_;
-    ValueMatrix ref_elapl_;
+  const int numparticles_; /// evaluate_notranspose arrays are nparticle x
+                           /// norb matrices. To ensure consistent array
+                           /// sizing and enforcement, we agree at
+                           /// construction how large these matrices will be.
+                           /// norb is stored in SPOSet::OrbitalSetSize.
+
+  // Value, electron gradient, and electron laplacian at "reference
+  // configuration". i.e. before any attempted moves.
+
+  ValueMatrix ref_psi_;
+  GradMatrix ref_egrad_;
+  ValueMatrix ref_elapl_;
 };
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.cpp b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
index 85678ce5f39..2f5e486082a 100644
--- a/src/QMCWaveFunctions/tests/FakeSPOT.cpp
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.cpp
@@ -13,135 +13,141 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 FakeSPOT<T>::FakeSPOT() : SPOSetT<T>("one_FakeSPO")
 {
-    a.resize(3, 3);
+  a.resize(3, 3);
 
-    a(0, 0) = 2.3;
-    a(0, 1) = 4.5;
-    a(0, 2) = 2.6;
-    a(1, 0) = 0.5;
-    a(1, 1) = 8.5;
-    a(1, 2) = 3.3;
-    a(2, 0) = 1.8;
-    a(2, 1) = 4.4;
-    a(2, 2) = 4.9;
+  a(0, 0) = 2.3;
+  a(0, 1) = 4.5;
+  a(0, 2) = 2.6;
+  a(1, 0) = 0.5;
+  a(1, 1) = 8.5;
+  a(1, 2) = 3.3;
+  a(2, 0) = 1.8;
+  a(2, 1) = 4.4;
+  a(2, 2) = 4.9;
 
-    v.resize(3);
-    v[0] = 1.9;
-    v[1] = 2.0;
-    v[2] = 3.1;
+  v.resize(3);
+  v[0] = 1.9;
+  v[1] = 2.0;
+  v[2] = 3.1;
 
-    a2.resize(4, 4);
-    a2(0, 0) = 2.3;
-    a2(0, 1) = 4.5;
-    a2(0, 2) = 2.6;
-    a2(0, 3) = 1.2;
-    a2(1, 0) = 0.5;
-    a2(1, 1) = 8.5;
-    a2(1, 2) = 3.3;
-    a2(1, 3) = 0.3;
-    a2(2, 0) = 1.8;
-    a2(2, 1) = 4.4;
-    a2(2, 2) = 4.9;
-    a2(2, 3) = 2.8;
-    a2(3, 0) = 0.8;
-    a2(3, 1) = 4.1;
-    a2(3, 2) = 3.2;
-    a2(3, 3) = 1.1;
+  a2.resize(4, 4);
+  a2(0, 0) = 2.3;
+  a2(0, 1) = 4.5;
+  a2(0, 2) = 2.6;
+  a2(0, 3) = 1.2;
+  a2(1, 0) = 0.5;
+  a2(1, 1) = 8.5;
+  a2(1, 2) = 3.3;
+  a2(1, 3) = 0.3;
+  a2(2, 0) = 1.8;
+  a2(2, 1) = 4.4;
+  a2(2, 2) = 4.9;
+  a2(2, 3) = 2.8;
+  a2(3, 0) = 0.8;
+  a2(3, 1) = 4.1;
+  a2(3, 2) = 3.2;
+  a2(3, 3) = 1.1;
 
-    v2.resize(4, 4);
+  v2.resize(4, 4);
 
-    v2(0, 0) = 3.2;
-    v2(0, 1) = 0.5;
-    v2(0, 2) = 5.9;
-    v2(0, 3) = 3.7;
-    v2(1, 0) = 0.3;
-    v2(1, 1) = 1.4;
-    v2(1, 2) = 3.9;
-    v2(1, 3) = 8.2;
-    v2(2, 0) = 3.3;
-    v2(2, 1) = 5.4;
-    v2(2, 2) = 4.9;
-    v2(2, 3) = 2.2;
-    v2(3, 1) = 5.4;
-    v2(3, 2) = 4.9;
-    v2(3, 3) = 2.2;
+  v2(0, 0) = 3.2;
+  v2(0, 1) = 0.5;
+  v2(0, 2) = 5.9;
+  v2(0, 3) = 3.7;
+  v2(1, 0) = 0.3;
+  v2(1, 1) = 1.4;
+  v2(1, 2) = 3.9;
+  v2(1, 3) = 8.2;
+  v2(2, 0) = 3.3;
+  v2(2, 1) = 5.4;
+  v2(2, 2) = 4.9;
+  v2(2, 3) = 2.2;
+  v2(3, 1) = 5.4;
+  v2(3, 2) = 4.9;
+  v2(3, 3) = 2.2;
 
-    gv.resize(4);
-    gv[0] = GradType(1.0, 0.0, 0.1);
-    gv[1] = GradType(1.0, 2.0, 0.1);
-    gv[2] = GradType(2.0, 1.0, 0.1);
-    gv[3] = GradType(0.4, 0.3, 0.1);
+  gv.resize(4);
+  gv[0] = GradType(1.0, 0.0, 0.1);
+  gv[1] = GradType(1.0, 2.0, 0.1);
+  gv[2] = GradType(2.0, 1.0, 0.1);
+  gv[3] = GradType(0.4, 0.3, 0.1);
 }
-template <class T>
-std::unique_ptr<SPOSetT<T>>
-FakeSPOT<T>::makeClone() const
+template<class T>
+std::unique_ptr<SPOSetT<T>> FakeSPOT<T>::makeClone() const
 {
-    return std::make_unique<FakeSPOT<T>>(*this);
+  return std::make_unique<FakeSPOT<T>>(*this);
 }
 
-template <class T>
-void
-FakeSPOT<T>::setOrbitalSetSize(int norbs)
+template<class T>
+void FakeSPOT<T>::setOrbitalSetSize(int norbs)
 {
-    this->OrbitalSetSize = norbs;
+  this->OrbitalSetSize = norbs;
 }
 
-template <class T>
-void
-FakeSPOT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
+template<class T>
+void FakeSPOT<T>::evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi)
 {
-    if (iat < 0)
-        for (int i = 0; i < psi.size(); i++)
-            psi[i] = 1.2 * i - i * i;
-    else if (this->OrbitalSetSize == 3)
-        for (int i = 0; i < 3; i++)
-            psi[i] = a(iat, i);
-    else if (this->OrbitalSetSize == 4)
-        for (int i = 0; i < 4; i++)
-            psi[i] = a2(iat, i);
+  if (iat < 0)
+    for (int i = 0; i < psi.size(); i++)
+      psi[i] = 1.2 * i - i * i;
+  else if (this->OrbitalSetSize == 3)
+    for (int i = 0; i < 3; i++)
+      psi[i] = a(iat, i);
+  else if (this->OrbitalSetSize == 4)
+    for (int i = 0; i < 4; i++)
+      psi[i] = a2(iat, i);
 }
 
-template <class T>
-void
-FakeSPOT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-    GradVector& dpsi, ValueVector& d2psi)
+template<class T>
+void FakeSPOT<T>::evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi)
 {
-    if (this->OrbitalSetSize == 3) {
-        for (int i = 0; i < 3; i++) {
-            psi[i] = v[i];
-            dpsi[i] = gv[i];
-        }
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+    {
+      psi[i]  = v[i];
+      dpsi[i] = gv[i];
     }
-    else if (this->OrbitalSetSize == 4) {
-        for (int i = 0; i < 4; i++) {
-            psi[i] = v2(iat, i);
-            dpsi[i] = gv[i];
-        }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+    {
+      psi[i]  = v2(iat, i);
+      dpsi[i] = gv[i];
     }
+  }
 }
 
-template <class T>
-void
-FakeSPOT<T>::evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-    ValueMatrix& logdet, GradMatrix& dlogdet, ValueMatrix& d2logdet)
+template<class T>
+void FakeSPOT<T>::evaluate_notranspose(const ParticleSetT<T>& P,
+                                       int first,
+                                       int last,
+                                       ValueMatrix& logdet,
+                                       GradMatrix& dlogdet,
+                                       ValueMatrix& d2logdet)
 {
-    if (this->OrbitalSetSize == 3) {
-        for (int i = 0; i < 3; i++)
-            for (int j = 0; j < 3; j++) {
-                logdet(j, i) = a(i, j);
-                dlogdet[i][j] = gv[j] + GradType(i);
-            }
-    }
-    else if (this->OrbitalSetSize == 4) {
-        for (int i = 0; i < 4; i++)
-            for (int j = 0; j < 4; j++) {
-                logdet(j, i) = a2(i, j);
-                dlogdet[i][j] = gv[j] + GradType(i);
-            }
-    }
+  if (this->OrbitalSetSize == 3)
+  {
+    for (int i = 0; i < 3; i++)
+      for (int j = 0; j < 3; j++)
+      {
+        logdet(j, i)  = a(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
+  else if (this->OrbitalSetSize == 4)
+  {
+    for (int i = 0; i < 4; i++)
+      for (int j = 0; j < 4; j++)
+      {
+        logdet(j, i)  = a2(i, j);
+        dlogdet[i][j] = gv[j] + GradType(i);
+      }
+  }
 }
 
 // Class concrete types from ValueType
diff --git a/src/QMCWaveFunctions/tests/FakeSPOT.h b/src/QMCWaveFunctions/tests/FakeSPOT.h
index f0a6f1ef80a..6603f8f52a1 100644
--- a/src/QMCWaveFunctions/tests/FakeSPOT.h
+++ b/src/QMCWaveFunctions/tests/FakeSPOT.h
@@ -16,55 +16,45 @@
 
 namespace qmcplusplus
 {
-template <class T>
+template<class T>
 class FakeSPOT : public SPOSetT<T>
 {
 public:
-    Matrix<T> a;
-    Matrix<T> a2;
-    Vector<T> v;
-    Matrix<T> v2;
+  Matrix<T> a;
+  Matrix<T> a2;
+  Vector<T> v;
+  Matrix<T> v2;
 
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-    using GradType = typename SPOSetT<T>::GradType;
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+  using GradType    = typename SPOSetT<T>::GradType;
 
-    typename SPOSetT<T>::GradVector gv;
+  typename SPOSetT<T>::GradVector gv;
 
-    FakeSPOT();
+  FakeSPOT();
 
-    ~FakeSPOT() override = default;
+  ~FakeSPOT() override = default;
 
-    std::string
-    getClassName() const override
-    {
-        return "FakeSPO";
-    }
+  std::string getClassName() const override { return "FakeSPO"; }
 
-    std::unique_ptr<SPOSetT<T>>
-    makeClone() const override;
+  std::unique_ptr<SPOSetT<T>> makeClone() const override;
 
-    virtual void
-    report()
-    {
-    }
+  virtual void report() {}
 
-    void
-    setOrbitalSetSize(int norbs) override;
+  void setOrbitalSetSize(int norbs) override;
 
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
+  void evaluateValue(const ParticleSetT<T>& P, int iat, ValueVector& psi) override;
 
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override;
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;
 
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override;
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp
index 708e4780535..496d1b1f463 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOsT.cpp
@@ -4,11 +4,9 @@
 //
 // Copyright (c) 2022 QMCPACK developers.
 //
-// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National
-// Laboratories
+// File developed by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories
 //
-// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National
-// Laboratories
+// File created by: Joshua Townsend, jptowns@sandia.gov, Sandia National Laboratories
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "FakeSPOT.h"
@@ -34,53 +32,29 @@ using std::string;
 
 namespace qmcplusplus
 {
-template <typename T>
+template<typename T>
 struct ValueApproxHelper
 {
-    using Type = Catch::Detail::Approx;
+  using Type = Catch::Detail::Approx;
 };
-template <typename T>
+template<typename T>
 struct ValueApproxHelper<std::complex<T>>
 {
-    using Type = Catch::Detail::ComplexApprox;
+  using Type = Catch::Detail::ComplexApprox;
 };
 
-template <typename T>
+template<typename T>
 using ValueApprox = typename ValueApproxHelper<T>::Type;
 
 namespace testing
 {
-OptVariablesTypeT<float>&
-getMyVars(SPOSetT<float>& rot)
-{
-    return rot.myVars;
-}
-OptVariablesTypeT<double>&
-getMyVars(SPOSetT<double>& rot)
-{
-    return rot.myVars;
-}
-OptVariablesTypeT<float>&
-getMyVarsFull(RotatedSPOsT<float>& rot)
-{
-    return rot.myVarsFull;
-}
-OptVariablesTypeT<double>&
-getMyVarsFull(RotatedSPOsT<double>& rot)
-{
-    return rot.myVarsFull;
-}
-std::vector<std::vector<float>>&
-getHistoryParams(RotatedSPOsT<float>& rot)
-{
-    return rot.history_params_;
-}
+OptVariablesTypeT<float>& getMyVars(SPOSetT<float>& rot) { return rot.myVars; }
+OptVariablesTypeT<double>& getMyVars(SPOSetT<double>& rot) { return rot.myVars; }
+OptVariablesTypeT<float>& getMyVarsFull(RotatedSPOsT<float>& rot) { return rot.myVarsFull; }
+OptVariablesTypeT<double>& getMyVarsFull(RotatedSPOsT<double>& rot) { return rot.myVarsFull; }
+std::vector<std::vector<float>>& getHistoryParams(RotatedSPOsT<float>& rot) { return rot.history_params_; }
 
-std::vector<std::vector<double>>&
-getHistoryParams(RotatedSPOsT<double>& rot)
-{
-    return rot.history_params_;
-}
+std::vector<std::vector<double>>& getHistoryParams(RotatedSPOsT<double>& rot) { return rot.history_params_; }
 } // namespace testing
 
 #ifndef QMC_COMPLEX
@@ -97,119 +71,104 @@ using TestTypeList = std::tuple<>;
   JPT 04.01.2022: Adapted from test_einset.cpp
   Test the spline rotated machinery for SplineR2R (extend to others later).
 */
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs via SplineR2R", "[wavefunction][template]", TestTypeList)
 {
-    using RealType = typename SPOSetT<TestType>::RealType;
+  using RealType = typename SPOSetT<TestType>::RealType;
 
-    /*
+  /*
       BEGIN Boilerplate stuff to make a simple SPOSet. Copied from
       test_einset.cpp
     */
 
-    Communicate* c = OHMMS::Controller;
-
-    // We get a "Mismatched supercell lattices" error due to default ctor?
-    typename ParticleSetT<TestType>::ParticleLayout lattice;
-
-    // diamondC_1x1x1
-    lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115,
-        3.37316115, 0.0, 3.37316115};
-
-    ParticleSetPoolT<TestType> ptcl = ParticleSetPoolT<TestType>(c);
-    ptcl.setSimulationCell(lattice);
-    // LAttice seems fine after this point...
-
-    auto ions_uptr =
-        std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
-    auto elec_uptr =
-        std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
-    ParticleSetT<TestType>& ions_(*ions_uptr);
-    ParticleSetT<TestType>& elec_(*elec_uptr);
-
-    ions_.setName("ion");
-    ptcl.addParticleSet(std::move(ions_uptr));
-    ions_.create({2});
-    ions_.R[0] = {0.0, 0.0, 0.0};
-    ions_.R[1] = {1.68658058, 1.68658058, 1.68658058};
-    elec_.setName("elec");
-    ptcl.addParticleSet(std::move(elec_uptr));
-    elec_.create({2});
-    elec_.R[0] = {0.0, 0.0, 0.0};
-    elec_.R[1] = {0.0, 1.0, 0.0};
-    SpeciesSet& tspecies = elec_.getSpeciesSet();
-    int upIdx = tspecies.addSpecies("u");
-    int chargeIdx = tspecies.addAttribute("charge");
-    tspecies(chargeIdx, upIdx) = -1;
-
-    // diamondC_1x1x1 - 8 bands available
-    const char* particles = R"(<tmp>
+  Communicate* c = OHMMS::Controller;
+
+  // We get a "Mismatched supercell lattices" error due to default ctor?
+  typename ParticleSetT<TestType>::ParticleLayout lattice;
+
+  // diamondC_1x1x1
+  lattice.R = {3.37316115, 3.37316115, 0.0, 0.0, 3.37316115, 3.37316115, 3.37316115, 0.0, 3.37316115};
+
+  ParticleSetPoolT<TestType> ptcl = ParticleSetPoolT<TestType>(c);
+  ptcl.setSimulationCell(lattice);
+  // LAttice seems fine after this point...
+
+  auto ions_uptr = std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
+  auto elec_uptr = std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
+  ParticleSetT<TestType>& ions_(*ions_uptr);
+  ParticleSetT<TestType>& elec_(*elec_uptr);
+
+  ions_.setName("ion");
+  ptcl.addParticleSet(std::move(ions_uptr));
+  ions_.create({2});
+  ions_.R[0] = {0.0, 0.0, 0.0};
+  ions_.R[1] = {1.68658058, 1.68658058, 1.68658058};
+  elec_.setName("elec");
+  ptcl.addParticleSet(std::move(elec_uptr));
+  elec_.create({2});
+  elec_.R[0]                 = {0.0, 0.0, 0.0};
+  elec_.R[1]                 = {0.0, 1.0, 0.0};
+  SpeciesSet& tspecies       = elec_.getSpeciesSet();
+  int upIdx                  = tspecies.addSpecies("u");
+  int chargeIdx              = tspecies.addAttribute("charge");
+  tspecies(chargeIdx, upIdx) = -1;
+
+  // diamondC_1x1x1 - 8 bands available
+  const char* particles = R"(<tmp>
 <determinantset type="einspline" href="diamondC_1x1x1.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" source="ion" meshfactor="1.0" precision="float" size="8"/>
 </tmp>
 )";
 
-    Libxml2Document doc;
-    bool okay = doc.parseFromString(particles);
-    REQUIRE(okay);
+  Libxml2Document doc;
+  bool okay = doc.parseFromString(particles);
+  REQUIRE(okay);
 
-    xmlNodePtr root = doc.getRoot();
+  xmlNodePtr root = doc.getRoot();
 
-    xmlNodePtr ein1 = xmlFirstElementChild(root);
+  xmlNodePtr ein1 = xmlFirstElementChild(root);
 
-    EinsplineSetBuilderT<TestType> einSet(elec_, ptcl.getPool(), c, ein1);
-    auto spo = einSet.createSPOSetFromXML(ein1);
-    REQUIRE(spo);
+  EinsplineSetBuilderT<TestType> einSet(elec_, ptcl.getPool(), c, ein1);
+  auto spo = einSet.createSPOSetFromXML(ein1);
+  REQUIRE(spo);
 
-    /*
+  /*
       END Boilerplate stuff. Now we have a SplineR2R wavefunction
       ready for rotation. What follows is the actual test.
     */
 
-    // SplineR2R only for the moment, so skip if QMC_COMPLEX is set
+  // SplineR2R only for the moment, so skip if QMC_COMPLEX is set
 #if !defined(QMC_COMPLEX)
 
-    spo->storeParamsBeforeRotation();
-    // 1.) Make a RotatedSPOs object so that we can use the rotation routines
-    auto rot_spo = std::make_unique<RotatedSPOsT<TestType>>(
-        "one_rotated_set", std::move(spo));
-
-    // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb.
-    const auto orbitalsetsize = rot_spo->getOrbitalSetSize();
-    REQUIRE(orbitalsetsize == 8);
-
-    // 2.) Get data for unrotated orbitals. Check that there's no rotation
-    rot_spo->buildOptVariables(elec_.R.size());
-    typename SPOSetT<TestType>::ValueMatrix psiM_bare(
-        elec_.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::GradMatrix dpsiM_bare(
-        elec_.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::ValueMatrix d2psiM_bare(
-        elec_.R.size(), orbitalsetsize);
-    rot_spo->evaluate_notranspose(
-        elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
-
-    // This stuff checks that no rotation was applied. Copied from
-    // test_einset.cpp. value
-    CHECK(std::real(psiM_bare[1][0]) == ValueApprox<TestType>(-0.8886948824));
-    CHECK(std::real(psiM_bare[1][1]) == ValueApprox<TestType>(1.4194120169));
-    // grad
-    CHECK(
-        std::real(dpsiM_bare[1][0][0]) == ValueApprox<TestType>(-0.0000183403));
-    CHECK(
-        std::real(dpsiM_bare[1][0][1]) == ValueApprox<TestType>(0.1655139178));
-    CHECK(
-        std::real(dpsiM_bare[1][0][2]) == ValueApprox<TestType>(-0.0000193077));
-    CHECK(
-        std::real(dpsiM_bare[1][1][0]) == ValueApprox<TestType>(-1.3131694794));
-    CHECK(
-        std::real(dpsiM_bare[1][1][1]) == ValueApprox<TestType>(-1.1174004078));
-    CHECK(
-        std::real(dpsiM_bare[1][1][2]) == ValueApprox<TestType>(-0.8462534547));
-    // lapl
-    CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox<TestType>(1.3313053846));
-    CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox<TestType>(-4.712583065));
-
-    /*
+  spo->storeParamsBeforeRotation();
+  // 1.) Make a RotatedSPOs object so that we can use the rotation routines
+  auto rot_spo = std::make_unique<RotatedSPOsT<TestType>>("one_rotated_set", std::move(spo));
+
+  // Sanity check for orbs. Expect 2 electrons, 8 orbitals, & 79507 coefs/orb.
+  const auto orbitalsetsize = rot_spo->getOrbitalSetSize();
+  REQUIRE(orbitalsetsize == 8);
+
+  // 2.) Get data for unrotated orbitals. Check that there's no rotation
+  rot_spo->buildOptVariables(elec_.R.size());
+  typename SPOSetT<TestType>::ValueMatrix psiM_bare(elec_.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::GradMatrix dpsiM_bare(elec_.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::ValueMatrix d2psiM_bare(elec_.R.size(), orbitalsetsize);
+  rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
+
+  // This stuff checks that no rotation was applied. Copied from
+  // test_einset.cpp. value
+  CHECK(std::real(psiM_bare[1][0]) == ValueApprox<TestType>(-0.8886948824));
+  CHECK(std::real(psiM_bare[1][1]) == ValueApprox<TestType>(1.4194120169));
+  // grad
+  CHECK(std::real(dpsiM_bare[1][0][0]) == ValueApprox<TestType>(-0.0000183403));
+  CHECK(std::real(dpsiM_bare[1][0][1]) == ValueApprox<TestType>(0.1655139178));
+  CHECK(std::real(dpsiM_bare[1][0][2]) == ValueApprox<TestType>(-0.0000193077));
+  CHECK(std::real(dpsiM_bare[1][1][0]) == ValueApprox<TestType>(-1.3131694794));
+  CHECK(std::real(dpsiM_bare[1][1][1]) == ValueApprox<TestType>(-1.1174004078));
+  CHECK(std::real(dpsiM_bare[1][1][2]) == ValueApprox<TestType>(-0.8462534547));
+  // lapl
+  CHECK(std::real(d2psiM_bare[1][0]) == ValueApprox<TestType>(1.3313053846));
+  CHECK(std::real(d2psiM_bare[1][1]) == ValueApprox<TestType>(-4.712583065));
+
+  /*
        3.) Apply a rotation to the orbitals
            To do this, construct a params vector and call the
        RotatedSPOs::apply_rotation(params) method. That should do the
@@ -217,26 +176,22 @@ TEMPLATE_LIST_TEST_CASE(
 
        For 2 electrons in 8 orbs, we expect 2*(8-2) = 12 params.
     */
-    const auto rot_size = rot_spo->m_act_rot_inds.size();
-    REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12
-    std::vector<RealType> param(rot_size);
-    for (auto i = 0; i < rot_size; i++) {
-        param[i] = 0.01 * static_cast<RealType>(i);
-    }
-    rot_spo->apply_rotation(
-        param, false); // Expect this to call SplineR2R::applyRotation()
-
-    // 4.) Get data for rotated orbitals.
-    typename SPOSetT<TestType>::ValueMatrix psiM_rot(
-        elec_.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::GradMatrix dpsiM_rot(
-        elec_.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::ValueMatrix d2psiM_rot(
-        elec_.R.size(), orbitalsetsize);
-    rot_spo->evaluate_notranspose(
-        elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot);
-
-    /*
+  const auto rot_size = rot_spo->m_act_rot_inds.size();
+  REQUIRE(rot_size == 12); // = Nelec*(Norbs - Nelec) = 2*(8-2) = 12
+  std::vector<RealType> param(rot_size);
+  for (auto i = 0; i < rot_size; i++)
+  {
+    param[i] = 0.01 * static_cast<RealType>(i);
+  }
+  rot_spo->apply_rotation(param, false); // Expect this to call SplineR2R::applyRotation()
+
+  // 4.) Get data for rotated orbitals.
+  typename SPOSetT<TestType>::ValueMatrix psiM_rot(elec_.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::GradMatrix dpsiM_rot(elec_.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::ValueMatrix d2psiM_rot(elec_.R.size(), orbitalsetsize);
+  rot_spo->evaluate_notranspose(elec_, 0, elec_.R.size(), psiM_rot, dpsiM_rot, d2psiM_rot);
+
+  /*
        Manually encode the unitary transformation. Ugly, but it works.
        @TODO: Use the total rotation machinery when it's implemented
 
@@ -246,232 +201,217 @@ TEMPLATE_LIST_TEST_CASE(
        probably be ditched once we have a way to grab the actual
        rotation matrix...
     */
-    typename SPOSetT<TestType>::ValueMatrix rot_mat(
-        orbitalsetsize, orbitalsetsize);
-    rot_mat[0][0] = 0.99726;
-    rot_mat[0][1] = -0.00722;
-    rot_mat[0][2] = 0.00014;
-    rot_mat[0][3] = -0.00982;
-    rot_mat[0][4] = -0.01979;
-    rot_mat[0][5] = -0.02976;
-    rot_mat[0][6] = -0.03972;
-    rot_mat[0][7] = -0.04969;
-    rot_mat[1][0] = -0.00722;
-    rot_mat[1][1] = 0.97754;
-    rot_mat[1][2] = -0.05955;
-    rot_mat[1][3] = -0.06945;
-    rot_mat[1][4] = -0.07935;
-    rot_mat[1][5] = -0.08925;
-    rot_mat[1][6] = -0.09915;
-    rot_mat[1][7] = -0.10905;
-    rot_mat[2][0] = -0.00014;
-    rot_mat[2][1] = 0.05955;
-    rot_mat[2][2] = 0.99821;
-    rot_mat[2][3] = -0.00209;
-    rot_mat[2][4] = -0.00239;
-    rot_mat[2][5] = -0.00269;
-    rot_mat[2][6] = -0.00299;
-    rot_mat[2][7] = -0.00329;
-    rot_mat[3][0] = 0.00982;
-    rot_mat[3][1] = 0.06945;
-    rot_mat[3][2] = -0.00209;
-    rot_mat[3][3] = 0.99751;
-    rot_mat[3][4] = -0.00289;
-    rot_mat[3][5] = -0.00329;
-    rot_mat[3][6] = -0.00368;
-    rot_mat[3][7] = -0.00408;
-    rot_mat[4][0] = 0.01979;
-    rot_mat[4][1] = 0.07935;
-    rot_mat[4][2] = -0.00239;
-    rot_mat[4][3] = -0.00289;
-    rot_mat[4][4] = 0.99661;
-    rot_mat[4][5] = -0.00388;
-    rot_mat[4][6] = -0.00438;
-    rot_mat[4][7] = -0.00488;
-    rot_mat[5][0] = 0.02976;
-    rot_mat[5][1] = 0.08925;
-    rot_mat[5][2] = -0.00269;
-    rot_mat[5][3] = -0.00329;
-    rot_mat[5][4] = -0.00388;
-    rot_mat[5][5] = 0.99552;
-    rot_mat[5][6] = -0.00508;
-    rot_mat[5][7] = -0.00568;
-    rot_mat[6][0] = 0.03972;
-    rot_mat[6][1] = 0.09915;
-    rot_mat[6][2] = -0.00299;
-    rot_mat[6][3] = -0.00368;
-    rot_mat[6][4] = -0.00438;
-    rot_mat[6][5] = -0.00508;
-    rot_mat[6][6] = 0.99422;
-    rot_mat[6][7] = -0.00647;
-    rot_mat[7][0] = 0.04969;
-    rot_mat[7][1] = 0.10905;
-    rot_mat[7][2] = -0.00329;
-    rot_mat[7][3] = -0.00408;
-    rot_mat[7][4] = -0.00488;
-    rot_mat[7][5] = -0.00568;
-    rot_mat[7][6] = -0.00647;
-    rot_mat[7][7] = 0.99273;
-
-    // Now compute the expected values by hand using the transformation above
-    double val1 = 0.;
-    double val2 = 0.;
-    for (auto i = 0; i < rot_mat.size1(); i++) {
-        val1 += psiM_bare[0][i] * rot_mat[i][0];
-        val2 += psiM_bare[1][i] * rot_mat[i][0];
-    }
-
-    // value
-    CHECK(std::real(psiM_rot[0][0]) == ValueApprox<TestType>(val1));
-    CHECK(std::real(psiM_rot[1][0]) == ValueApprox<TestType>(val2));
-
-    std::vector<double> grad1(3);
-    std::vector<double> grad2(3);
-    for (auto j = 0; j < grad1.size(); j++) {
-        for (auto i = 0; i < rot_mat.size1(); i++) {
-            grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0];
-            grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0];
-        }
-    }
-
-    // grad
-    CHECK(
-        dpsiM_rot[0][0][0] == ValueApprox<TestType>(grad1[0]).epsilon(0.0001));
-    CHECK(
-        dpsiM_rot[0][0][1] == ValueApprox<TestType>(grad1[1]).epsilon(0.0001));
-    CHECK(
-        dpsiM_rot[0][0][2] == ValueApprox<TestType>(grad1[2]).epsilon(0.0001));
-    CHECK(
-        dpsiM_rot[1][0][0] == ValueApprox<TestType>(grad2[0]).epsilon(0.0001));
-    CHECK(
-        dpsiM_rot[1][0][1] == ValueApprox<TestType>(grad2[1]).epsilon(0.0001));
-    CHECK(
-        dpsiM_rot[1][0][2] == ValueApprox<TestType>(grad2[2]).epsilon(0.0001));
-
-    double lap1 = 0.;
-    double lap2 = 0.;
-    for (auto i = 0; i < rot_mat.size1(); i++) {
-        lap1 += d2psiM_bare[0][i] * rot_mat[i][0];
-        lap2 += d2psiM_bare[1][i] * rot_mat[i][0];
+  typename SPOSetT<TestType>::ValueMatrix rot_mat(orbitalsetsize, orbitalsetsize);
+  rot_mat[0][0] = 0.99726;
+  rot_mat[0][1] = -0.00722;
+  rot_mat[0][2] = 0.00014;
+  rot_mat[0][3] = -0.00982;
+  rot_mat[0][4] = -0.01979;
+  rot_mat[0][5] = -0.02976;
+  rot_mat[0][6] = -0.03972;
+  rot_mat[0][7] = -0.04969;
+  rot_mat[1][0] = -0.00722;
+  rot_mat[1][1] = 0.97754;
+  rot_mat[1][2] = -0.05955;
+  rot_mat[1][3] = -0.06945;
+  rot_mat[1][4] = -0.07935;
+  rot_mat[1][5] = -0.08925;
+  rot_mat[1][6] = -0.09915;
+  rot_mat[1][7] = -0.10905;
+  rot_mat[2][0] = -0.00014;
+  rot_mat[2][1] = 0.05955;
+  rot_mat[2][2] = 0.99821;
+  rot_mat[2][3] = -0.00209;
+  rot_mat[2][4] = -0.00239;
+  rot_mat[2][5] = -0.00269;
+  rot_mat[2][6] = -0.00299;
+  rot_mat[2][7] = -0.00329;
+  rot_mat[3][0] = 0.00982;
+  rot_mat[3][1] = 0.06945;
+  rot_mat[3][2] = -0.00209;
+  rot_mat[3][3] = 0.99751;
+  rot_mat[3][4] = -0.00289;
+  rot_mat[3][5] = -0.00329;
+  rot_mat[3][6] = -0.00368;
+  rot_mat[3][7] = -0.00408;
+  rot_mat[4][0] = 0.01979;
+  rot_mat[4][1] = 0.07935;
+  rot_mat[4][2] = -0.00239;
+  rot_mat[4][3] = -0.00289;
+  rot_mat[4][4] = 0.99661;
+  rot_mat[4][5] = -0.00388;
+  rot_mat[4][6] = -0.00438;
+  rot_mat[4][7] = -0.00488;
+  rot_mat[5][0] = 0.02976;
+  rot_mat[5][1] = 0.08925;
+  rot_mat[5][2] = -0.00269;
+  rot_mat[5][3] = -0.00329;
+  rot_mat[5][4] = -0.00388;
+  rot_mat[5][5] = 0.99552;
+  rot_mat[5][6] = -0.00508;
+  rot_mat[5][7] = -0.00568;
+  rot_mat[6][0] = 0.03972;
+  rot_mat[6][1] = 0.09915;
+  rot_mat[6][2] = -0.00299;
+  rot_mat[6][3] = -0.00368;
+  rot_mat[6][4] = -0.00438;
+  rot_mat[6][5] = -0.00508;
+  rot_mat[6][6] = 0.99422;
+  rot_mat[6][7] = -0.00647;
+  rot_mat[7][0] = 0.04969;
+  rot_mat[7][1] = 0.10905;
+  rot_mat[7][2] = -0.00329;
+  rot_mat[7][3] = -0.00408;
+  rot_mat[7][4] = -0.00488;
+  rot_mat[7][5] = -0.00568;
+  rot_mat[7][6] = -0.00647;
+  rot_mat[7][7] = 0.99273;
+
+  // Now compute the expected values by hand using the transformation above
+  double val1 = 0.;
+  double val2 = 0.;
+  for (auto i = 0; i < rot_mat.size1(); i++)
+  {
+    val1 += psiM_bare[0][i] * rot_mat[i][0];
+    val2 += psiM_bare[1][i] * rot_mat[i][0];
+  }
+
+  // value
+  CHECK(std::real(psiM_rot[0][0]) == ValueApprox<TestType>(val1));
+  CHECK(std::real(psiM_rot[1][0]) == ValueApprox<TestType>(val2));
+
+  std::vector<double> grad1(3);
+  std::vector<double> grad2(3);
+  for (auto j = 0; j < grad1.size(); j++)
+  {
+    for (auto i = 0; i < rot_mat.size1(); i++)
+    {
+      grad1[j] += dpsiM_bare[0][i][j] * rot_mat[i][0];
+      grad2[j] += dpsiM_bare[1][i][j] * rot_mat[i][0];
     }
-
-    // Lapl
-    CHECK(std::real(d2psiM_rot[0][0]) ==
-        ValueApprox<TestType>(lap1).epsilon(0.0001));
-    CHECK(std::real(d2psiM_rot[1][0]) ==
-        ValueApprox<TestType>(lap2).epsilon(0.0001));
+  }
+
+  // grad
+  CHECK(dpsiM_rot[0][0][0] == ValueApprox<TestType>(grad1[0]).epsilon(0.0001));
+  CHECK(dpsiM_rot[0][0][1] == ValueApprox<TestType>(grad1[1]).epsilon(0.0001));
+  CHECK(dpsiM_rot[0][0][2] == ValueApprox<TestType>(grad1[2]).epsilon(0.0001));
+  CHECK(dpsiM_rot[1][0][0] == ValueApprox<TestType>(grad2[0]).epsilon(0.0001));
+  CHECK(dpsiM_rot[1][0][1] == ValueApprox<TestType>(grad2[1]).epsilon(0.0001));
+  CHECK(dpsiM_rot[1][0][2] == ValueApprox<TestType>(grad2[2]).epsilon(0.0001));
+
+  double lap1 = 0.;
+  double lap2 = 0.;
+  for (auto i = 0; i < rot_mat.size1(); i++)
+  {
+    lap1 += d2psiM_bare[0][i] * rot_mat[i][0];
+    lap2 += d2psiM_bare[1][i] * rot_mat[i][0];
+  }
+
+  // Lapl
+  CHECK(std::real(d2psiM_rot[0][0]) == ValueApprox<TestType>(lap1).epsilon(0.0001));
+  CHECK(std::real(d2psiM_rot[1][0]) == ValueApprox<TestType>(lap2).epsilon(0.0001));
 
 #endif
 }
 
-TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices",
-    "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs createRotationIndices", "[wavefunction][template]", TestTypeList)
 {
-    // No active-active or virtual-virtual rotations
-    // Only active-virtual
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
-    int nel = 1;
-    int nmo = 3;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
-    CHECK(rot_ind.size() == 2);
-
-    // Full rotation contains all rotations
-    // Size should be number of pairs of orbitals: nmo*(nmo-1)/2
-    typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind;
-    RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind);
-    CHECK(full_rot_ind.size() == 3);
-
-    nel = 2;
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind2;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind2);
-    CHECK(rot_ind2.size() == 2);
-
-    typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind2;
-    RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind2);
-    CHECK(full_rot_ind2.size() == 3);
-
-    nmo = 4;
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind3;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind3);
-    CHECK(rot_ind3.size() == 4);
-
-    typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind3;
-    RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind3);
-    CHECK(full_rot_ind3.size() == 6);
+  // No active-active or virtual-virtual rotations
+  // Only active-virtual
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
+  int nel = 1;
+  int nmo = 3;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
+  CHECK(rot_ind.size() == 2);
+
+  // Full rotation contains all rotations
+  // Size should be number of pairs of orbitals: nmo*(nmo-1)/2
+  typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind;
+  RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind);
+  CHECK(full_rot_ind.size() == 3);
+
+  nel = 2;
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind2;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind2);
+  CHECK(rot_ind2.size() == 2);
+
+  typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind2;
+  RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind2);
+  CHECK(full_rot_ind2.size() == 3);
+
+  nmo = 4;
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind3;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind3);
+  CHECK(rot_ind3.size() == 4);
+
+  typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind3;
+  RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind3);
+  CHECK(full_rot_ind3.size() == 6);
 }
 
-TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix",
-    "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs constructAntiSymmetricMatrix", "[wavefunction][template]", TestTypeList)
 {
-    using ValueType = typename SPOSetT<TestType>::ValueType;
-    using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
+  using ValueType   = typename SPOSetT<TestType>::ValueType;
+  using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
 
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
-    int nel = 1;
-    int nmo = 3;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
+  int nel = 1;
+  int nmo = 3;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
 
-    ValueMatrix m3(nmo, nmo);
-    m3 = ValueType(0);
-    std::vector<ValueType> params = {0.1, 0.2};
+  ValueMatrix m3(nmo, nmo);
+  m3                            = ValueType(0);
+  std::vector<ValueType> params = {0.1, 0.2};
 
-    RotatedSPOsT<TestType>::constructAntiSymmetricMatrix(rot_ind, params, m3);
+  RotatedSPOsT<TestType>::constructAntiSymmetricMatrix(rot_ind, params, m3);
 
-    // clang-format off
+  // clang-format off
   std::vector<ValueType> expected_data = { 0.0,  -0.1, -0.2,
                                            0.1,   0.0,  0.0,
                                            0.2,   0.0,  0.0 };
-    // clang-format on
+  // clang-format on
 
-    ValueMatrix expected_m3(expected_data.data(), 3, 3);
+  ValueMatrix expected_m3(expected_data.data(), 3, 3);
 
-    CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true);
-    CHECKED_ELSE(check_matrix_result.result)
-    {
-        FAIL(check_matrix_result.result_message);
-    }
+  CheckMatrixResult check_matrix_result = checkMatrix(m3, expected_m3, true);
+  CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
 
-    std::vector<ValueType> params_out(2);
-    RotatedSPOsT<TestType>::extractParamsFromAntiSymmetricMatrix(
-        rot_ind, m3, params_out);
-    CHECK(params_out[0] == ValueApprox<TestType>(0.1));
-    CHECK(params_out[1] == ValueApprox<TestType>(0.2));
+  std::vector<ValueType> params_out(2);
+  RotatedSPOsT<TestType>::extractParamsFromAntiSymmetricMatrix(rot_ind, m3, params_out);
+  CHECK(params_out[0] == ValueApprox<TestType>(0.1));
+  CHECK(params_out[1] == ValueApprox<TestType>(0.2));
 }
 
 // Expected values of the matrix exponential come from gen_matrix_ops.py
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs exponentiate matrix", "[wavefunction][template]", TestTypeList)
 {
-    using ValueType = typename SPOSetT<TestType>::ValueType;
-    using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
+  using ValueType   = typename SPOSetT<TestType>::ValueType;
+  using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
 
-    std::vector<typename SPOSetT<TestType>::ValueType> mat1_data = {0.0};
-    typename SPOSetT<TestType>::ValueMatrix m1(mat1_data.data(), 1, 1);
-    RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m1);
-    // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0)
-    CHECK(m1(0, 0) == ValueApprox<TestType>(1.0));
+  std::vector<typename SPOSetT<TestType>::ValueType> mat1_data = {0.0};
+  typename SPOSetT<TestType>::ValueMatrix m1(mat1_data.data(), 1, 1);
+  RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m1);
+  // Always return 1.0 (the only possible anti-symmetric 1x1 matrix is 0)
+  CHECK(m1(0, 0) == ValueApprox<TestType>(1.0));
 
-    // clang-format off
+  // clang-format off
   std::vector<typename SPOSetT<TestType>::ValueType> mat2_data = { 0.0, -0.1,
                                                0.1,  0.0 };
-    // clang-format on
+  // clang-format on
 
-    typename SPOSetT<TestType>::ValueMatrix m2(mat2_data.data(), 2, 2);
-    RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m2);
+  typename SPOSetT<TestType>::ValueMatrix m2(mat2_data.data(), 2, 2);
+  RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m2);
 
-    // clang-format off
+  // clang-format off
   std::vector<ValueType> expected_rot2 = {  0.995004165278026,  -0.0998334166468282,
                                             0.0998334166468282,  0.995004165278026 };
-    // clang-format on
+  // clang-format on
 
-    ValueMatrix expected_m2(expected_rot2.data(), 2, 2);
-    CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true);
-    CHECKED_ELSE(check_matrix_result2.result)
-    {
-        FAIL(check_matrix_result2.result_message);
-    }
+  ValueMatrix expected_m2(expected_rot2.data(), 2, 2);
+  CheckMatrixResult check_matrix_result2 = checkMatrix(m2, expected_m2, true);
+  CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); }
 
-    // clang-format off
+  // clang-format off
   std::vector<ValueType> m3_input_data = { 0.0,  -0.3, -0.1,
                                            0.3,   0.0, -0.2,
                                            0.1,   0.2,  0.0 };
@@ -481,53 +421,46 @@ TEMPLATE_LIST_TEST_CASE(
                                             0.283164960565074,  0.935754803277919, -0.210191705950743,
                                             0.127334574917630,  0.180540076694398,  0.975290308953046 };
 
-    // clang-format on
+  // clang-format on
 
-    ValueMatrix m3(m3_input_data.data(), 3, 3);
-    ValueMatrix expected_m3(expected_rot3.data(), 3, 3);
+  ValueMatrix m3(m3_input_data.data(), 3, 3);
+  ValueMatrix expected_m3(expected_rot3.data(), 3, 3);
 
-    RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m3);
+  RotatedSPOsT<TestType>::exponentiate_antisym_matrix(m3);
 
-    CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true);
-    CHECKED_ELSE(check_matrix_result3.result)
-    {
-        FAIL(check_matrix_result3.result_message);
-    }
+  CheckMatrixResult check_matrix_result3 = checkMatrix(m3, expected_m3, true);
+  CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); }
 }
 
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs log matrix", "[wavefunction][template]", TestTypeList)
 {
-    using ValueType = typename SPOSetT<TestType>::ValueType;
-    using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
+  using ValueType   = typename SPOSetT<TestType>::ValueType;
+  using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
 
-    std::vector<typename SPOSetT<TestType>::ValueType> mat1_data = {1.0};
-    typename SPOSetT<TestType>::ValueMatrix m1(mat1_data.data(), 1, 1);
-    typename SPOSetT<TestType>::ValueMatrix out_m1(1, 1);
-    RotatedSPOsT<TestType>::log_antisym_matrix(m1, out_m1);
-    // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0)
-    CHECK(out_m1(0, 0) == ValueApprox<TestType>(0.0));
+  std::vector<typename SPOSetT<TestType>::ValueType> mat1_data = {1.0};
+  typename SPOSetT<TestType>::ValueMatrix m1(mat1_data.data(), 1, 1);
+  typename SPOSetT<TestType>::ValueMatrix out_m1(1, 1);
+  RotatedSPOsT<TestType>::log_antisym_matrix(m1, out_m1);
+  // Should always be 1.0 (the only possible anti-symmetric 1x1 matrix is 0)
+  CHECK(out_m1(0, 0) == ValueApprox<TestType>(0.0));
 
-    // clang-format off
+  // clang-format off
   std::vector<ValueType> start_rot2 = {  0.995004165278026,  -0.0998334166468282,
                                          0.0998334166468282,  0.995004165278026 };
 
   std::vector<typename SPOSetT<TestType>::ValueType> mat2_data = { 0.0, -0.1,
                                                0.1,  0.0 };
-    // clang-format on
+  // clang-format on
 
-    ValueMatrix rot_m2(start_rot2.data(), 2, 2);
-    ValueMatrix out_m2(2, 2);
-    RotatedSPOsT<TestType>::log_antisym_matrix(rot_m2, out_m2);
+  ValueMatrix rot_m2(start_rot2.data(), 2, 2);
+  ValueMatrix out_m2(2, 2);
+  RotatedSPOsT<TestType>::log_antisym_matrix(rot_m2, out_m2);
 
-    typename SPOSetT<TestType>::ValueMatrix m2(mat2_data.data(), 2, 2);
-    CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true);
-    CHECKED_ELSE(check_matrix_result2.result)
-    {
-        FAIL(check_matrix_result2.result_message);
-    }
+  typename SPOSetT<TestType>::ValueMatrix m2(mat2_data.data(), 2, 2);
+  CheckMatrixResult check_matrix_result2 = checkMatrix(m2, out_m2, true);
+  CHECKED_ELSE(check_matrix_result2.result) { FAIL(check_matrix_result2.result_message); }
 
-    // clang-format off
+  // clang-format off
   std::vector<ValueType> start_rot3 = {  0.950580617906092, -0.302932713402637, -0.0680313164049401,
                                          0.283164960565074,  0.935754803277919, -0.210191705950743,
                                          0.127334574917630,  0.180540076694398,  0.975290308953046 };
@@ -535,502 +468,442 @@ TEMPLATE_LIST_TEST_CASE(
   std::vector<ValueType> m3_input_data = { 0.0,  -0.3, -0.1,
                                            0.3,   0.0, -0.2,
                                            0.1,   0.2,  0.0 };
-    // clang-format on
-    ValueMatrix rot_m3(start_rot3.data(), 3, 3);
-    ValueMatrix out_m3(3, 3);
-    RotatedSPOsT<TestType>::log_antisym_matrix(rot_m3, out_m3);
-
-    typename SPOSetT<TestType>::ValueMatrix m3(m3_input_data.data(), 3, 3);
-    CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true);
-    CHECKED_ELSE(check_matrix_result3.result)
-    {
-        FAIL(check_matrix_result3.result_message);
-    }
+  // clang-format on
+  ValueMatrix rot_m3(start_rot3.data(), 3, 3);
+  ValueMatrix out_m3(3, 3);
+  RotatedSPOsT<TestType>::log_antisym_matrix(rot_m3, out_m3);
+
+  typename SPOSetT<TestType>::ValueMatrix m3(m3_input_data.data(), 3, 3);
+  CheckMatrixResult check_matrix_result3 = checkMatrix(m3, out_m3, true);
+  CHECKED_ELSE(check_matrix_result3.result) { FAIL(check_matrix_result3.result_message); }
 }
 
 // Test round trip A -> exp(A) -> log(exp(A))
 // The log is multi-valued so this test may fail if the rotation parameters are
 // too large. The exponentials will be the same, though
 //   exp(log(exp(A))) == exp(A)
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs exp-log matrix", "[wavefunction][template]", TestTypeList)
 {
-    using ValueType = typename SPOSetT<TestType>::ValueType;
-    using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
+  using ValueType   = typename SPOSetT<TestType>::ValueType;
+  using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
 
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
-    int nel = 2;
-    int nmo = 4;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
+  int nel = 2;
+  int nmo = 4;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
 
-    ValueMatrix rot_m4(nmo, nmo);
-    rot_m4 = ValueType(0);
+  ValueMatrix rot_m4(nmo, nmo);
+  rot_m4 = ValueType(0);
 
-    std::vector<ValueType> params4 = {-1.1, 1.5, 0.2, -0.15};
+  std::vector<ValueType> params4 = {-1.1, 1.5, 0.2, -0.15};
 
-    RotatedSPOsT<TestType>::constructAntiSymmetricMatrix(
-        rot_ind, params4, rot_m4);
-    ValueMatrix orig_rot_m4 = rot_m4;
-    ValueMatrix out_m4(nmo, nmo);
+  RotatedSPOsT<TestType>::constructAntiSymmetricMatrix(rot_ind, params4, rot_m4);
+  ValueMatrix orig_rot_m4 = rot_m4;
+  ValueMatrix out_m4(nmo, nmo);
 
-    RotatedSPOsT<TestType>::exponentiate_antisym_matrix(rot_m4);
+  RotatedSPOsT<TestType>::exponentiate_antisym_matrix(rot_m4);
 
-    RotatedSPOsT<TestType>::log_antisym_matrix(rot_m4, out_m4);
+  RotatedSPOsT<TestType>::log_antisym_matrix(rot_m4, out_m4);
 
-    CheckMatrixResult check_matrix_result4 =
-        checkMatrix(out_m4, orig_rot_m4, true);
-    CHECKED_ELSE(check_matrix_result4.result)
-    {
-        FAIL(check_matrix_result4.result_message);
-    }
+  CheckMatrixResult check_matrix_result4 = checkMatrix(out_m4, orig_rot_m4, true);
+  CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); }
 
-    std::vector<ValueType> params4out(4);
-    RotatedSPOsT<TestType>::extractParamsFromAntiSymmetricMatrix(
-        rot_ind, out_m4, params4out);
-    for (int i = 0; i < params4.size(); i++) {
-        CHECK(params4[i] == ValueApprox<TestType>(params4out[i]));
-    }
+  std::vector<ValueType> params4out(4);
+  RotatedSPOsT<TestType>::extractParamsFromAntiSymmetricMatrix(rot_ind, out_m4, params4out);
+  for (int i = 0; i < params4.size(); i++)
+  {
+    CHECK(params4[i] == ValueApprox<TestType>(params4out[i]));
+  }
 }
 
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs hcpBe", "[wavefunction][template]", TestTypeList)
 {
-    using RealType = typename OrbitalSetTraits<TestType>::RealType;
-    Communicate* c = OHMMS::Controller;
-
-    typename ParticleSetT<TestType>::ParticleLayout lattice;
-    lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142,
-        0.00000000, 0.00000000, 0.00000000, 6.78114995};
-
-    ParticleSetPoolT<TestType> ptcl = ParticleSetPoolT<TestType>(c);
-    ptcl.setSimulationCell(lattice);
-    auto ions_uptr =
-        std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
-    auto elec_uptr =
-        std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
-    ParticleSetT<TestType>& ions(*ions_uptr);
-    ParticleSetT<TestType>& elec(*elec_uptr);
-
-    ions.setName("ion");
-    ptcl.addParticleSet(std::move(ions_uptr));
-    ions.create({1});
-    ions.R[0] = {0.0, 0.0, 0.0};
-
-    elec.setName("elec");
-    ptcl.addParticleSet(std::move(elec_uptr));
-    elec.create({1});
-    elec.R[0] = {0.0, 0.0, 0.0};
-
-    SpeciesSet& tspecies = elec.getSpeciesSet();
-    int upIdx = tspecies.addSpecies("u");
-    int chargeIdx = tspecies.addAttribute("charge");
-    tspecies(chargeIdx, upIdx) = -1;
-
-    // Add the attribute save_coefs="yes" to the sposet_builder tag to generate
-    // the spline file for use in eval_bspline_spo.py
-
-    const char* particles = R"(<tmp>
+  using RealType = typename OrbitalSetTraits<TestType>::RealType;
+  Communicate* c = OHMMS::Controller;
+
+  typename ParticleSetT<TestType>::ParticleLayout lattice;
+  lattice.R = {4.32747284, 0.00000000, 0.00000000, -2.16373642, 3.74770142,
+               0.00000000, 0.00000000, 0.00000000, 6.78114995};
+
+  ParticleSetPoolT<TestType> ptcl = ParticleSetPoolT<TestType>(c);
+  ptcl.setSimulationCell(lattice);
+  auto ions_uptr = std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
+  auto elec_uptr = std::make_unique<ParticleSetT<TestType>>(ptcl.getSimulationCell());
+  ParticleSetT<TestType>& ions(*ions_uptr);
+  ParticleSetT<TestType>& elec(*elec_uptr);
+
+  ions.setName("ion");
+  ptcl.addParticleSet(std::move(ions_uptr));
+  ions.create({1});
+  ions.R[0] = {0.0, 0.0, 0.0};
+
+  elec.setName("elec");
+  ptcl.addParticleSet(std::move(elec_uptr));
+  elec.create({1});
+  elec.R[0] = {0.0, 0.0, 0.0};
+
+  SpeciesSet& tspecies       = elec.getSpeciesSet();
+  int upIdx                  = tspecies.addSpecies("u");
+  int chargeIdx              = tspecies.addAttribute("charge");
+  tspecies(chargeIdx, upIdx) = -1;
+
+  // Add the attribute save_coefs="yes" to the sposet_builder tag to generate
+  // the spline file for use in eval_bspline_spo.py
+
+  const char* particles = R"(<tmp>
 <sposet_builder type="bspline" href="hcpBe.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" source="ion" meshfactor="1.0" precision="double">
       <sposet type="bspline" name="spo_ud" spindataset="0" size="2"/>
 </sposet_builder>
 </tmp>)";
 
-    Libxml2Document doc;
-    bool okay = doc.parseFromString(particles);
-    REQUIRE(okay);
-
-    xmlNodePtr root = doc.getRoot();
-
-    xmlNodePtr sposet_builder = xmlFirstElementChild(root);
-    xmlNodePtr sposet_ptr = xmlFirstElementChild(sposet_builder);
-
-    EinsplineSetBuilderT<TestType> einSet(
-        elec, ptcl.getPool(), c, sposet_builder);
-    auto spo = einSet.createSPOSetFromXML(sposet_ptr);
-    REQUIRE(spo);
-
-    spo->storeParamsBeforeRotation();
-    auto rot_spo = std::make_unique<RotatedSPOsT<TestType>>(
-        "one_rotated_set", std::move(spo));
-
-    // Sanity check for orbs. Expect 1 electron, 2 orbitals
-    const auto orbitalsetsize = rot_spo->getOrbitalSetSize();
-    REQUIRE(orbitalsetsize == 2);
-
-    rot_spo->buildOptVariables(elec.R.size());
-
-    typename SPOSetT<TestType>::ValueMatrix psiM_bare(
-        elec.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::GradMatrix dpsiM_bare(
-        elec.R.size(), orbitalsetsize);
-    typename SPOSetT<TestType>::ValueMatrix d2psiM_bare(
-        elec.R.size(), orbitalsetsize);
-    rot_spo->evaluate_notranspose(
-        elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
-
-    // Values generated from eval_bspline_spo.py, the
-    // generate_point_values_hcpBe function
-    CHECK(
-        std::real(psiM_bare[0][0]) == ValueApprox<TestType>(0.210221765375514));
-    CHECK(std::real(psiM_bare[0][1]) ==
-        ValueApprox<TestType>(-2.984345024542937e-06));
-
-    CHECK(std::real(d2psiM_bare[0][0]) ==
-        ValueApprox<TestType>(5.303848362116568));
-
-    OptVariablesTypeT<TestType> opt_vars;
-    rot_spo->checkInVariablesExclusive(opt_vars);
-    opt_vars.resetIndex();
-    rot_spo->checkOutVariables(opt_vars);
-    rot_spo->resetParametersExclusive(opt_vars);
-
-    using ValueType = TestType;
-    Vector<ValueType> dlogpsi(1);
-    Vector<ValueType> dhpsioverpsi(1);
-    rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1);
-
-    CHECK(dlogpsi[0] == ValueApprox<TestType>(-1.41961753e-05));
-    CHECK(dhpsioverpsi[0] == ValueApprox<TestType>(-0.00060853));
-
-    std::vector<RealType> params = {0.1};
-    rot_spo->apply_rotation(params, false);
-
-    rot_spo->evaluate_notranspose(
-        elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
-    CHECK(std::real(psiM_bare[0][0]) ==
-        ValueApprox<TestType>(0.20917123424337608));
-    CHECK(std::real(psiM_bare[0][1]) ==
-        ValueApprox<TestType>(-0.02099012652669549));
-
-    CHECK(std::real(d2psiM_bare[0][0]) ==
-        ValueApprox<TestType>(5.277362065087747));
-
-    dlogpsi[0] = 0.0;
-    dhpsioverpsi[0] = 0.0;
-
-    rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1);
-    CHECK(dlogpsi[0] == ValueApprox<TestType>(-0.10034901119468914));
-    CHECK(dhpsioverpsi[0] == ValueApprox<TestType>(32.96939041498753));
-}
+  Libxml2Document doc;
+  bool okay = doc.parseFromString(particles);
+  REQUIRE(okay);
 
-// Test construction of delta rotation
-TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix",
-    "[wavefunction][template]", TestTypeList)
-{
-    using ValueType = typename SPOSetT<TestType>::ValueType;
-    using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
-
-    int nel = 2;
-    int nmo = 4;
-    typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
-    RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
-    typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind;
-    RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind);
-    // rot_ind size is 4 and full rot_ind size is 6
-
-    ValueMatrix rot_m4(nmo, nmo);
-    rot_m4 = ValueType(0);
-
-    // When comparing with gen_matrix_ops.py, be aware of the order of indices
-    // in full_rot
-    // rot_ind is (0,2) (0,3) (1,2) (1,3)
-    // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3)
-    // The extra indices go at the back
-    std::vector<ValueType> old_params = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05};
-    std::vector<ValueType> delta_params = {0.1, 0.3, 0.2, -0.1};
-    std::vector<ValueType> new_params(6);
-
-    RotatedSPOsT<TestType>::constructDeltaRotation(
-        delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4);
-
-    // clang-format off
-  std::vector<ValueType> rot_data4 =
-    { -0.371126931484737,  0.491586564957393,   -0.784780958819798,   0.0687480658200083,
-      -0.373372784561548,  0.66111547793048,     0.610450337985578,   0.225542620014052,
-       0.751270334458895,  0.566737323353515,   -0.0297901110611425, -0.336918744155143,
-       0.398058348785074,  0.00881931472604944, -0.102867783149713,   0.911531672428406 };
-    // clang-format on
+  xmlNodePtr root = doc.getRoot();
 
-    ValueMatrix new_rot_m4(rot_data4.data(), 4, 4);
+  xmlNodePtr sposet_builder = xmlFirstElementChild(root);
+  xmlNodePtr sposet_ptr     = xmlFirstElementChild(sposet_builder);
 
-    CheckMatrixResult check_matrix_result4 =
-        checkMatrix(rot_m4, new_rot_m4, true);
-    CHECKED_ELSE(check_matrix_result4.result)
-    {
-        FAIL(check_matrix_result4.result_message);
-    }
+  EinsplineSetBuilderT<TestType> einSet(elec, ptcl.getPool(), c, sposet_builder);
+  auto spo = einSet.createSPOSetFromXML(sposet_ptr);
+  REQUIRE(spo);
 
-    // Reminder: Ordering!
-    std::vector<ValueType> expected_new_param = {1.6813965019790489,
-        0.3623564254653294, -0.05486544454559908, -0.20574472941408453,
-        -0.9542513302873077, 0.27497788909911774};
-    for (int i = 0; i < new_params.size(); i++)
-        CHECK(new_params[i] == ValueApprox<TestType>(expected_new_param[i]));
-
-    // Rotated back to original position
-
-    std::vector<ValueType> new_params2(6);
-    std::vector<ValueType> reverse_delta_params = {-0.1, -0.3, -0.2, 0.1};
-    RotatedSPOsT<TestType>::constructDeltaRotation(reverse_delta_params,
-        new_params, rot_ind, full_rot_ind, new_params2, rot_m4);
-    for (int i = 0; i < new_params2.size(); i++)
-        CHECK(new_params2[i] == ValueApprox<TestType>(old_params[i]));
-}
+  spo->storeParamsBeforeRotation();
+  auto rot_spo = std::make_unique<RotatedSPOsT<TestType>>("one_rotated_set", std::move(spo));
 
-// Test using global rotation
-TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters",
-    "[wavefunction][template]", TestTypeList)
-{
-    auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
-    fake_spo->setOrbitalSetSize(4);
-    RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
-    int nel = 2;
-    rot.buildOptVariables(nel);
-
-    optimize::VariableSetT<TestType> vs;
-    rot.checkInVariablesExclusive(vs);
-    vs[0] = 0.1;
-    vs[1] = 0.15;
-    vs[2] = 0.2;
-    vs[3] = 0.25;
-    rot.resetParametersExclusive(vs);
+  // Sanity check for orbs. Expect 1 electron, 2 orbitals
+  const auto orbitalsetsize = rot_spo->getOrbitalSetSize();
+  REQUIRE(orbitalsetsize == 2);
 
-    {
-        hdf_archive hout;
-        vs.writeToHDF("rot_vp.h5", hout);
+  rot_spo->buildOptVariables(elec.R.size());
 
-        rot.writeVariationalParameters(hout);
-    }
+  typename SPOSetT<TestType>::ValueMatrix psiM_bare(elec.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::GradMatrix dpsiM_bare(elec.R.size(), orbitalsetsize);
+  typename SPOSetT<TestType>::ValueMatrix d2psiM_bare(elec.R.size(), orbitalsetsize);
+  rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
+
+  // Values generated from eval_bspline_spo.py, the
+  // generate_point_values_hcpBe function
+  CHECK(std::real(psiM_bare[0][0]) == ValueApprox<TestType>(0.210221765375514));
+  CHECK(std::real(psiM_bare[0][1]) == ValueApprox<TestType>(-2.984345024542937e-06));
 
-    auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
-    fake_spo2->setOrbitalSetSize(4);
+  CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox<TestType>(5.303848362116568));
 
-    RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
-    rot2.buildOptVariables(nel);
+  OptVariablesTypeT<TestType> opt_vars;
+  rot_spo->checkInVariablesExclusive(opt_vars);
+  opt_vars.resetIndex();
+  rot_spo->checkOutVariables(opt_vars);
+  rot_spo->resetParametersExclusive(opt_vars);
 
-    optimize::VariableSetT<TestType> vs2;
-    rot2.checkInVariablesExclusive(vs2);
+  using ValueType = TestType;
+  Vector<ValueType> dlogpsi(1);
+  Vector<ValueType> dhpsioverpsi(1);
+  rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1);
 
-    hdf_archive hin;
-    vs2.readFromHDF("rot_vp.h5", hin);
-    rot2.readVariationalParameters(hin);
+  CHECK(dlogpsi[0] == ValueApprox<TestType>(-1.41961753e-05));
+  CHECK(dhpsioverpsi[0] == ValueApprox<TestType>(-0.00060853));
 
-    auto& var = testing::getMyVars(rot2);
-    CHECK(var[0] == ValueApprox<TestType>(vs[0]));
-    CHECK(var[1] == ValueApprox<TestType>(vs[1]));
-    CHECK(var[2] == ValueApprox<TestType>(vs[2]));
-    CHECK(var[3] == ValueApprox<TestType>(vs[3]));
+  std::vector<RealType> params = {0.1};
+  rot_spo->apply_rotation(params, false);
 
-    auto& full_var = testing::getMyVarsFull(rot2);
-    CHECK(full_var[0] == ValueApprox<TestType>(vs[0]));
-    CHECK(full_var[1] == ValueApprox<TestType>(vs[1]));
-    CHECK(full_var[2] == ValueApprox<TestType>(vs[2]));
-    CHECK(full_var[3] == ValueApprox<TestType>(vs[3]));
-    CHECK(full_var[4] == ValueApprox<TestType>(0.0));
-    CHECK(full_var[5] == ValueApprox<TestType>(0.0));
+  rot_spo->evaluate_notranspose(elec, 0, elec.R.size(), psiM_bare, dpsiM_bare, d2psiM_bare);
+  CHECK(std::real(psiM_bare[0][0]) == ValueApprox<TestType>(0.20917123424337608));
+  CHECK(std::real(psiM_bare[0][1]) == ValueApprox<TestType>(-0.02099012652669549));
+
+  CHECK(std::real(d2psiM_bare[0][0]) == ValueApprox<TestType>(5.277362065087747));
+
+  dlogpsi[0]      = 0.0;
+  dhpsioverpsi[0] = 0.0;
+
+  rot_spo->evaluateDerivatives(elec, opt_vars, dlogpsi, dhpsioverpsi, 0, 1);
+  CHECK(dlogpsi[0] == ValueApprox<TestType>(-0.10034901119468914));
+  CHECK(dhpsioverpsi[0] == ValueApprox<TestType>(32.96939041498753));
 }
 
-// Test using history list.
-TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history",
-    "[wavefunction][template]", TestTypeList)
+// Test construction of delta rotation
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs construct delta matrix", "[wavefunction][template]", TestTypeList)
 {
-    auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
-    fake_spo->setOrbitalSetSize(4);
-    RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
-    rot.set_use_global_rotation(false);
-    int nel = 2;
-    rot.buildOptVariables(nel);
-
-    optimize::VariableSetT<TestType> vs;
-    rot.checkInVariablesExclusive(vs);
-    vs[0] = 0.1;
-    vs[1] = 0.15;
-    vs[2] = 0.2;
-    vs[3] = 0.25;
-    rot.resetParametersExclusive(vs);
-
-    {
-        hdf_archive hout;
-        vs.writeToHDF("rot_vp_hist.h5", hout);
+  using ValueType   = typename SPOSetT<TestType>::ValueType;
+  using ValueMatrix = typename SPOSetT<TestType>::ValueMatrix;
+
+  int nel = 2;
+  int nmo = 4;
+  typename RotatedSPOsT<TestType>::RotationIndices rot_ind;
+  RotatedSPOsT<TestType>::createRotationIndices(nel, nmo, rot_ind);
+  typename RotatedSPOsT<TestType>::RotationIndices full_rot_ind;
+  RotatedSPOsT<TestType>::createRotationIndicesFull(nel, nmo, full_rot_ind);
+  // rot_ind size is 4 and full rot_ind size is 6
+
+  ValueMatrix rot_m4(nmo, nmo);
+  rot_m4 = ValueType(0);
+
+  // When comparing with gen_matrix_ops.py, be aware of the order of indices
+  // in full_rot
+  // rot_ind is (0,2) (0,3) (1,2) (1,3)
+  // full_rot_ind is (0,2) (0,3) (1,2) (1,3) (0,1) (2,3)
+  // The extra indices go at the back
+  std::vector<ValueType> old_params   = {1.5, 0.2, -0.15, 0.03, -1.1, 0.05};
+  std::vector<ValueType> delta_params = {0.1, 0.3, 0.2, -0.1};
+  std::vector<ValueType> new_params(6);
+
+  RotatedSPOsT<TestType>::constructDeltaRotation(delta_params, old_params, rot_ind, full_rot_ind, new_params, rot_m4);
+
+  // clang-format off
+  std::vector<ValueType> rot_data4 =
+    { -0.371126931484737,  0.491586564957393,   -0.784780958819798,   0.0687480658200083,
+      -0.373372784561548,  0.66111547793048,     0.610450337985578,   0.225542620014052,
+       0.751270334458895,  0.566737323353515,   -0.0297901110611425, -0.336918744155143,
+       0.398058348785074,  0.00881931472604944, -0.102867783149713,   0.911531672428406 };
+  // clang-format on
 
-        rot.writeVariationalParameters(hout);
-    }
+  ValueMatrix new_rot_m4(rot_data4.data(), 4, 4);
 
-    auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
-    fake_spo2->setOrbitalSetSize(4);
+  CheckMatrixResult check_matrix_result4 = checkMatrix(rot_m4, new_rot_m4, true);
+  CHECKED_ELSE(check_matrix_result4.result) { FAIL(check_matrix_result4.result_message); }
 
-    RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
-    rot2.buildOptVariables(nel);
+  // Reminder: Ordering!
+  std::vector<ValueType> expected_new_param = {1.6813965019790489,   0.3623564254653294,  -0.05486544454559908,
+                                               -0.20574472941408453, -0.9542513302873077, 0.27497788909911774};
+  for (int i = 0; i < new_params.size(); i++)
+    CHECK(new_params[i] == ValueApprox<TestType>(expected_new_param[i]));
 
-    optimize::VariableSetT<TestType> vs2;
-    rot2.checkInVariablesExclusive(vs2);
+  // Rotated back to original position
 
-    hdf_archive hin;
-    vs2.readFromHDF("rot_vp_hist.h5", hin);
-    rot2.readVariationalParameters(hin);
+  std::vector<ValueType> new_params2(6);
+  std::vector<ValueType> reverse_delta_params = {-0.1, -0.3, -0.2, 0.1};
+  RotatedSPOsT<TestType>::constructDeltaRotation(reverse_delta_params, new_params, rot_ind, full_rot_ind, new_params2,
+                                                 rot_m4);
+  for (int i = 0; i < new_params2.size(); i++)
+    CHECK(new_params2[i] == ValueApprox<TestType>(old_params[i]));
+}
 
-    auto& var = testing::getMyVars(rot2);
-    CHECK(var[0] == ValueApprox<TestType>(vs[0]));
-    CHECK(var[1] == ValueApprox<TestType>(vs[1]));
-    CHECK(var[2] == ValueApprox<TestType>(vs[2]));
-    CHECK(var[3] == ValueApprox<TestType>(vs[3]));
+// Test using global rotation
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters", "[wavefunction][template]", TestTypeList)
+{
+  auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo->setOrbitalSetSize(4);
+  RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
+  int nel = 2;
+  rot.buildOptVariables(nel);
+
+  optimize::VariableSetT<TestType> vs;
+  rot.checkInVariablesExclusive(vs);
+  vs[0] = 0.1;
+  vs[1] = 0.15;
+  vs[2] = 0.2;
+  vs[3] = 0.25;
+  rot.resetParametersExclusive(vs);
+
+  {
+    hdf_archive hout;
+    vs.writeToHDF("rot_vp.h5", hout);
+
+    rot.writeVariationalParameters(hout);
+  }
+
+  auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo2->setOrbitalSetSize(4);
+
+  RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
+  rot2.buildOptVariables(nel);
+
+  optimize::VariableSetT<TestType> vs2;
+  rot2.checkInVariablesExclusive(vs2);
+
+  hdf_archive hin;
+  vs2.readFromHDF("rot_vp.h5", hin);
+  rot2.readVariationalParameters(hin);
+
+  auto& var = testing::getMyVars(rot2);
+  CHECK(var[0] == ValueApprox<TestType>(vs[0]));
+  CHECK(var[1] == ValueApprox<TestType>(vs[1]));
+  CHECK(var[2] == ValueApprox<TestType>(vs[2]));
+  CHECK(var[3] == ValueApprox<TestType>(vs[3]));
+
+  auto& full_var = testing::getMyVarsFull(rot2);
+  CHECK(full_var[0] == ValueApprox<TestType>(vs[0]));
+  CHECK(full_var[1] == ValueApprox<TestType>(vs[1]));
+  CHECK(full_var[2] == ValueApprox<TestType>(vs[2]));
+  CHECK(full_var[3] == ValueApprox<TestType>(vs[3]));
+  CHECK(full_var[4] == ValueApprox<TestType>(0.0));
+  CHECK(full_var[5] == ValueApprox<TestType>(0.0));
+}
 
-    auto hist = testing::getHistoryParams(rot2);
-    REQUIRE(hist.size() == 1);
-    REQUIRE(hist[0].size() == 4);
+// Test using history list.
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction][template]", TestTypeList)
+{
+  auto fake_spo = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo->setOrbitalSetSize(4);
+  RotatedSPOsT<TestType> rot("fake_rot", std::move(fake_spo));
+  rot.set_use_global_rotation(false);
+  int nel = 2;
+  rot.buildOptVariables(nel);
+
+  optimize::VariableSetT<TestType> vs;
+  rot.checkInVariablesExclusive(vs);
+  vs[0] = 0.1;
+  vs[1] = 0.15;
+  vs[2] = 0.2;
+  vs[3] = 0.25;
+  rot.resetParametersExclusive(vs);
+
+  {
+    hdf_archive hout;
+    vs.writeToHDF("rot_vp_hist.h5", hout);
+
+    rot.writeVariationalParameters(hout);
+  }
+
+  auto fake_spo2 = std::make_unique<FakeSPOT<TestType>>();
+  fake_spo2->setOrbitalSetSize(4);
+
+  RotatedSPOsT<TestType> rot2("fake_rot", std::move(fake_spo2));
+  rot2.buildOptVariables(nel);
+
+  optimize::VariableSetT<TestType> vs2;
+  rot2.checkInVariablesExclusive(vs2);
+
+  hdf_archive hin;
+  vs2.readFromHDF("rot_vp_hist.h5", hin);
+  rot2.readVariationalParameters(hin);
+
+  auto& var = testing::getMyVars(rot2);
+  CHECK(var[0] == ValueApprox<TestType>(vs[0]));
+  CHECK(var[1] == ValueApprox<TestType>(vs[1]));
+  CHECK(var[2] == ValueApprox<TestType>(vs[2]));
+  CHECK(var[3] == ValueApprox<TestType>(vs[3]));
+
+  auto hist = testing::getHistoryParams(rot2);
+  REQUIRE(hist.size() == 1);
+  REQUIRE(hist[0].size() == 4);
 }
 
-template <typename T>
+template<typename T>
 class DummySPOSetWithoutMWT : public SPOSetT<T>
 {
 public:
-    using ValueVector = typename SPOSetT<T>::ValueVector;
-    using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
-    using GradVector = typename SPOSetT<T>::GradVector;
-    using GradMatrix = typename SPOSetT<T>::GradMatrix;
-
-    DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT<T>(my_name)
-    {
-    }
-    void
-    setOrbitalSetSize(int norbs) override
-    {
-    }
-    void
-    evaluateValue(const ParticleSetT<T>& P, int iat,
-        typename SPOSetT<T>::ValueVector& psi) override
-    {
-        assert(psi.size() == 3);
-        psi[0] = 123;
-        psi[1] = 456;
-        psi[2] = 789;
-    }
-    void
-    evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi,
-        GradVector& dpsi, ValueVector& d2psi) override
-    {
-    }
-    void
-    evaluate_notranspose(const ParticleSetT<T>& P, int first, int last,
-        ValueMatrix& logdet, GradMatrix& dlogdet,
-        ValueMatrix& d2logdet) override
-    {
-    }
-    std::string
-    getClassName() const override
-    {
-        return this->my_name_;
-    }
+  using ValueVector = typename SPOSetT<T>::ValueVector;
+  using ValueMatrix = typename SPOSetT<T>::ValueMatrix;
+  using GradVector  = typename SPOSetT<T>::GradVector;
+  using GradMatrix  = typename SPOSetT<T>::GradMatrix;
+
+  DummySPOSetWithoutMWT(const std::string& my_name) : SPOSetT<T>(my_name) {}
+  void setOrbitalSetSize(int norbs) override {}
+  void evaluateValue(const ParticleSetT<T>& P, int iat, typename SPOSetT<T>::ValueVector& psi) override
+  {
+    assert(psi.size() == 3);
+    psi[0] = 123;
+    psi[1] = 456;
+    psi[2] = 789;
+  }
+  void evaluateVGL(const ParticleSetT<T>& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override
+  {}
+  void evaluate_notranspose(const ParticleSetT<T>& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {}
+  std::string getClassName() const override { return this->my_name_; }
 };
 
-template <typename T>
+template<typename T>
 class DummySPOSetWithMWT : public DummySPOSetWithoutMWT<T>
 {
 public:
-    using ValueVector = typename DummySPOSetWithoutMWT<T>::ValueVector;
-
-    DummySPOSetWithMWT(const std::string& my_name) :
-        DummySPOSetWithoutMWT<T>(my_name)
-    {
-    }
-    void
-    mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
-        const RefVectorWithLeader<ParticleSetT<T>>& P_list, int iat,
-        const RefVector<ValueVector>& psi_v_list) const override
+  using ValueVector = typename DummySPOSetWithoutMWT<T>::ValueVector;
+
+  DummySPOSetWithMWT(const std::string& my_name) : DummySPOSetWithoutMWT<T>(my_name) {}
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSetT<T>>& spo_list,
+                        const RefVectorWithLeader<ParticleSetT<T>>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override
+  {
+    for (auto& psi : psi_v_list)
     {
-        for (auto& psi : psi_v_list) {
-            assert(psi.get().size() == 3);
-            psi.get()[0] = 321;
-            psi.get()[1] = 654;
-            psi.get()[2] = 987;
-        }
+      assert(psi.get().size() == 3);
+      psi.get()[0] = 321;
+      psi.get()[1] = 654;
+      psi.get()[2] = 987;
     }
+  }
 };
 
-TEMPLATE_LIST_TEST_CASE(
-    "RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList)
+TEMPLATE_LIST_TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction][template]", TestTypeList)
 {
-    // checking that mw_ API works in RotatedSPOs and is not defaulting to
-    // SPOSet default implementation
+  // checking that mw_ API works in RotatedSPOs and is not defaulting to
+  // SPOSet default implementation
+  {
+    // First check calling the mw_ APIs for RotatedSPOs, for which the
+    // underlying implementation just calls the underlying SPOSet mw_ API
+    // In the case that the underlying SPOSet doesn't specialize the mw_
+    // API, the underlying SPOSet will fall back to the default SPOSet mw_,
+    // which is just a loop over the single walker API.
+    RotatedSPOsT<TestType> rot_spo0("rotated0", std::make_unique<DummySPOSetWithoutMWT<TestType>>("no mw 0"));
+    RotatedSPOsT<TestType> rot_spo1("rotated1", std::make_unique<DummySPOSetWithoutMWT<TestType>>("no mw 1"));
+    RefVectorWithLeader<SPOSetT<TestType>> spo_list(rot_spo0, {rot_spo0, rot_spo1});
+
+    ResourceCollection spo_res("test_rot_res");
+    rot_spo0.createResource(spo_res);
+    ResourceCollectionTeamLock<SPOSetT<TestType>> mw_sposet_lock(spo_res, spo_list);
+
+    const SimulationCellT<TestType> simulation_cell;
+    ParticleSetT<TestType> elec0(simulation_cell);
+    ParticleSetT<TestType> elec1(simulation_cell);
+    RefVectorWithLeader<ParticleSetT<TestType>> p_list(elec0, {elec0, elec1});
+
+    typename SPOSetT<TestType>::ValueVector psi0(3);
+    typename SPOSetT<TestType>::ValueVector psi1(3);
+    RefVector<typename SPOSetT<TestType>::ValueVector> psi_v_list{psi0, psi1};
+
+    rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
+    for (int iw = 0; iw < spo_list.size(); iw++)
     {
-        // First check calling the mw_ APIs for RotatedSPOs, for which the
-        // underlying implementation just calls the underlying SPOSet mw_ API
-        // In the case that the underlying SPOSet doesn't specialize the mw_
-        // API, the underlying SPOSet will fall back to the default SPOSet mw_,
-        // which is just a loop over the single walker API.
-        RotatedSPOsT<TestType> rot_spo0("rotated0",
-            std::make_unique<DummySPOSetWithoutMWT<TestType>>("no mw 0"));
-        RotatedSPOsT<TestType> rot_spo1("rotated1",
-            std::make_unique<DummySPOSetWithoutMWT<TestType>>("no mw 1"));
-        RefVectorWithLeader<SPOSetT<TestType>> spo_list(
-            rot_spo0, {rot_spo0, rot_spo1});
-
-        ResourceCollection spo_res("test_rot_res");
-        rot_spo0.createResource(spo_res);
-        ResourceCollectionTeamLock<SPOSetT<TestType>> mw_sposet_lock(
-            spo_res, spo_list);
-
-        const SimulationCellT<TestType> simulation_cell;
-        ParticleSetT<TestType> elec0(simulation_cell);
-        ParticleSetT<TestType> elec1(simulation_cell);
-        RefVectorWithLeader<ParticleSetT<TestType>> p_list(
-            elec0, {elec0, elec1});
-
-        typename SPOSetT<TestType>::ValueVector psi0(3);
-        typename SPOSetT<TestType>::ValueVector psi1(3);
-        RefVector<typename SPOSetT<TestType>::ValueVector> psi_v_list{
-            psi0, psi1};
-
-        rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
-        for (int iw = 0; iw < spo_list.size(); iw++) {
-            CHECK(psi_v_list[iw].get()[0] == ValueApprox<TestType>(123));
-            CHECK(psi_v_list[iw].get()[1] == ValueApprox<TestType>(456));
-            CHECK(psi_v_list[iw].get()[2] == ValueApprox<TestType>(789));
-        }
+      CHECK(psi_v_list[iw].get()[0] == ValueApprox<TestType>(123));
+      CHECK(psi_v_list[iw].get()[1] == ValueApprox<TestType>(456));
+      CHECK(psi_v_list[iw].get()[2] == ValueApprox<TestType>(789));
     }
+  }
+  {
+    // In the case that the underlying SPOSet DOES have mw_ specializations,
+    // we want to make sure that RotatedSPOs are triggering that
+    // appropriately This will mean that the underlying SPOSets will do the
+    // appropriate offloading To check this, DummySPOSetWithMW has an
+    // explicit mw_evaluateValue which sets different values than what gets
+    // set in evaluateValue. By doing this, we are ensuring that
+    // RotatedSPOs->mw_evaluaeValue is calling the specialization in the
+    // underlying SPO and not using the default SPOSet implementation which
+    // loops over single walker APIs (which have different values enforced
+    // in
+    //  DummySPOSetWithoutMW
+
+    RotatedSPOsT<TestType> rot_spo0("rotated0", std::make_unique<DummySPOSetWithMWT<TestType>>("mw 0"));
+    RotatedSPOsT<TestType> rot_spo1("rotated1", std::make_unique<DummySPOSetWithMWT<TestType>>("mw 1"));
+    RefVectorWithLeader<SPOSetT<TestType>> spo_list(rot_spo0, {rot_spo0, rot_spo1});
+
+    ResourceCollection spo_res("test_rot_res");
+    rot_spo0.createResource(spo_res);
+    ResourceCollectionTeamLock<SPOSetT<TestType>> mw_sposet_lock(spo_res, spo_list);
+
+    const SimulationCellT<TestType> simulation_cell;
+    ParticleSetT<TestType> elec0(simulation_cell);
+    ParticleSetT<TestType> elec1(simulation_cell);
+    RefVectorWithLeader<ParticleSetT<TestType>> p_list(elec0, {elec0, elec1});
+
+    typename SPOSetT<TestType>::ValueVector psi0(3);
+    typename SPOSetT<TestType>::ValueVector psi1(3);
+    RefVector<typename SPOSetT<TestType>::ValueVector> psi_v_list{psi0, psi1};
+
+    rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
+    for (int iw = 0; iw < spo_list.size(); iw++)
     {
-        // In the case that the underlying SPOSet DOES have mw_ specializations,
-        // we want to make sure that RotatedSPOs are triggering that
-        // appropriately This will mean that the underlying SPOSets will do the
-        // appropriate offloading To check this, DummySPOSetWithMW has an
-        // explicit mw_evaluateValue which sets different values than what gets
-        // set in evaluateValue. By doing this, we are ensuring that
-        // RotatedSPOs->mw_evaluaeValue is calling the specialization in the
-        // underlying SPO and not using the default SPOSet implementation which
-        // loops over single walker APIs (which have different values enforced
-        // in
-        //  DummySPOSetWithoutMW
-
-        RotatedSPOsT<TestType> rot_spo0(
-            "rotated0", std::make_unique<DummySPOSetWithMWT<TestType>>("mw 0"));
-        RotatedSPOsT<TestType> rot_spo1(
-            "rotated1", std::make_unique<DummySPOSetWithMWT<TestType>>("mw 1"));
-        RefVectorWithLeader<SPOSetT<TestType>> spo_list(
-            rot_spo0, {rot_spo0, rot_spo1});
-
-        ResourceCollection spo_res("test_rot_res");
-        rot_spo0.createResource(spo_res);
-        ResourceCollectionTeamLock<SPOSetT<TestType>> mw_sposet_lock(
-            spo_res, spo_list);
-
-        const SimulationCellT<TestType> simulation_cell;
-        ParticleSetT<TestType> elec0(simulation_cell);
-        ParticleSetT<TestType> elec1(simulation_cell);
-        RefVectorWithLeader<ParticleSetT<TestType>> p_list(
-            elec0, {elec0, elec1});
-
-        typename SPOSetT<TestType>::ValueVector psi0(3);
-        typename SPOSetT<TestType>::ValueVector psi1(3);
-        RefVector<typename SPOSetT<TestType>::ValueVector> psi_v_list{
-            psi0, psi1};
-
-        rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
-        for (int iw = 0; iw < spo_list.size(); iw++) {
-            CHECK(psi_v_list[iw].get()[0] == ValueApprox<TestType>(321));
-            CHECK(psi_v_list[iw].get()[1] == ValueApprox<TestType>(654));
-            CHECK(psi_v_list[iw].get()[2] == ValueApprox<TestType>(987));
-        }
+      CHECK(psi_v_list[iw].get()[0] == ValueApprox<TestType>(321));
+      CHECK(psi_v_list[iw].get()[1] == ValueApprox<TestType>(654));
+      CHECK(psi_v_list[iw].get()[2] == ValueApprox<TestType>(987));
     }
+  }
 }
 
 } // namespace qmcplusplus