diff --git a/cmake/dca_config.cmake b/cmake/dca_config.cmake index d47d68434..1cf88a856 100644 --- a/cmake/dca_config.cmake +++ b/cmake/dca_config.cmake @@ -201,20 +201,34 @@ configure_file("${PROJECT_SOURCE_DIR}/include/dca/config/rng.hpp.in" ################################################################################ # Select the cluster solver. set(DCA_CLUSTER_SOLVER "CT-AUX" CACHE STRING - "The cluster solver for the DCA(+) loop. Options are: CT-AUX | SS-CT-HYB.") -set_property(CACHE DCA_CLUSTER_SOLVER PROPERTY STRINGS CT-AUX SS-CT-HYB) + "The cluster solver for the DCA(+) loop. Options are: CT-AUX | CT-INT | SS-CT-HYB.") +set_property(CACHE DCA_CLUSTER_SOLVER PROPERTY STRINGS CT-AUX CT-INT SS-CT-HYB) + +if (DCA_CLUSTER_SOLVER STREQUAL "CT-INT") + set(DCA_CLUSTER_SOLVER_NAME dca::phys::solver::CT_INT) + set(DCA_CLUSTER_SOLVER_INCLUDE "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp") + + set(DCA_USE_CTINT_SUBMATRIX ON CACHE BOOL "Use submatrix updates if the CT-INT solver is selected.") + if(DCA_USE_CTINT_SUBMATRIX) + set(DCA_CLUSTER_SOLVER_TYPE + "dca::phys::solver::CtintClusterSolver") + else() + set(DCA_CLUSTER_SOLVER_TYPE + "dca::phys::solver::CtintClusterSolver") + endif() -if (DCA_CLUSTER_SOLVER STREQUAL "CT-AUX") +elseif (DCA_CLUSTER_SOLVER STREQUAL "CT-AUX") set(DCA_CLUSTER_SOLVER_NAME dca::phys::solver::CT_AUX) set(DCA_CLUSTER_SOLVER_TYPE "dca::phys::solver::CtauxClusterSolver") set(DCA_CLUSTER_SOLVER_INCLUDE - "dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp") + "dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp") + elseif (DCA_CLUSTER_SOLVER STREQUAL "SS-CT-HYB") set(DCA_CLUSTER_SOLVER_NAME dca::phys::solver::SS_CT_HYB) set(DCA_CLUSTER_SOLVER_TYPE "dca::phys::solver::SsCtHybClusterSolver") set(DCA_CLUSTER_SOLVER_INCLUDE - "dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_cluster_solver.hpp") + "dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_cluster_solver.hpp") # elseif (DCA_CLUSTER_SOLVER STREQUAL "HTS") # set(DCA_CLUSTER_SOLVER_NAME dca::phys::solver::HIGH_TEMPERATURE_SERIES) @@ -222,7 +236,8 @@ elseif (DCA_CLUSTER_SOLVER STREQUAL "SS-CT-HYB") # "dca/phys/dca_step/cluster_solver/high_temperature_series_expansion/high_temperature_series_expansion_solver.hpp") else() - message(FATAL_ERROR "Please set DCA_CLUSTER_SOLVER to a valid option: CT-AUX | SS-CT-HYB.") + message(FATAL_ERROR "Please set DCA_CLUSTER_SOLVER to a valid option: CT-AUX | CT_INT | + SS-CT-HYB.") endif() ################################################################################ @@ -312,6 +327,15 @@ configure_file("${PROJECT_SOURCE_DIR}/include/dca/config/mc_options.hpp.in" "${CMAKE_BINARY_DIR}/include/dca/config/mc_options.hpp" @ONLY) +################################################################################ +# Symmetrization +option(DCA_SYMMETRIZE "Apply cluster, time and frequency symmetries to single particle functions." + ON) + +if(DCA_SYMMETRIZE) + add_compile_definitions(DCA_WITH_SYMMETRIZATION) +endif() + ################################################################################ # Generate applications' config files. configure_file("${PROJECT_SOURCE_DIR}/include/dca/config/analysis.hpp.in" diff --git a/include/dca/config/cmake_options.hpp b/include/dca/config/cmake_options.hpp index 7c49452d8..ae1e95730 100644 --- a/include/dca/config/cmake_options.hpp +++ b/include/dca/config/cmake_options.hpp @@ -26,7 +26,6 @@ struct CMakeOptions { // Parallelization static const std::string dca_with_mpi; static const std::string dca_with_threaded_solver; - static const std::string dca_threading_library; // Others static const std::string dca_cluster_solver; diff --git a/include/dca/linalg/lapack/magma.hpp b/include/dca/linalg/lapack/magma.hpp index 8a47425f7..9e6d7cd83 100644 --- a/include/dca/linalg/lapack/magma.hpp +++ b/include/dca/linalg/lapack/magma.hpp @@ -124,7 +124,7 @@ inline magma_trans_t toMagmaTrans(const char x) { inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m, int* n, int* k, const float alpha, const float* const* a, int* lda, const float* const* b, int* ldb, const float beta, float** c, - int* ldc, const int batch_count, magma_queue_t& queue) { + int* ldc, const int batch_count, magma_queue_t queue) { magmablas_sgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, queue); checkErrorsCudaDebug(); @@ -132,7 +132,7 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m, int* n, int* k, const double alpha, const double* const* a, int* lda, const double* const* b, int* ldb, const double beta, double** c, - int* ldc, const int batch_count, const magma_queue_t& queue) { + int* ldc, const int batch_count, const magma_queue_t queue) { magmablas_dgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, queue); checkErrorsCudaDebug(); @@ -142,7 +142,7 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m const std::complex* const* a, int* lda, const std::complex* const* b, int* ldb, const std::complex beta, std::complex** c, - int* ldc, const int batch_count, const magma_queue_t& queue) { + int* ldc, const int batch_count, const magma_queue_t queue) { using util::castCudaComplex; magmablas_cgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), castCudaComplex(a), lda, castCudaComplex(b), @@ -154,7 +154,7 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m const std::complex* const* a, int* lda, const std::complex* const* b, int* ldb, const std::complex beta, std::complex** c, - int* ldc, const int batch_count, const magma_queue_t& queue) { + int* ldc, const int batch_count, const magma_queue_t queue) { using util::castCudaComplex; magmablas_zgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), castCudaComplex(a), lda, castCudaComplex(b), @@ -168,7 +168,7 @@ inline void magmablas_gemm_vbatched_max_nocheck(const char transa, const char tr const float* const* b, int* ldb, const float beta, float** c, int* ldc, const int batch_count, const int m_max, const int n_max, const int k_max, - magma_queue_t& queue) { + magma_queue_t queue) { magmablas_sgemm_vbatched_max_nocheck(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, m_max, n_max, k_max, queue); @@ -181,7 +181,7 @@ inline void magmablas_gemm_vbatched_max_nocheck(const char transa, const char tr const double* const* b, int* ldb, const double beta, double** c, int* ldc, const int batch_count, const int m_max, const int n_max, const int k_max, - magma_queue_t& queue) { + magma_queue_t queue) { magmablas_dgemm_vbatched_max_nocheck(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, m_max, n_max, k_max, queue); @@ -192,7 +192,7 @@ inline void magmablas_gemm_vbatched_max_nocheck( const char transa, const char transb, int* m, int* n, int* k, const std::complex alpha, const std::complex* const* a, int* lda, const std::complex* const* b, int* ldb, const std::complex beta, std::complex** c, int* ldc, const int batch_count, - const int m_max, const int n_max, const int k_max, magma_queue_t& queue) { + const int m_max, const int n_max, const int k_max, magma_queue_t queue) { using util::castCudaComplex; magmablas_cgemm_vbatched_max_nocheck( toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), @@ -205,7 +205,7 @@ inline void magmablas_gemm_vbatched_max_nocheck( const char transa, const char transb, int* m, int* n, int* k, const std::complex alpha, const std::complex* const* a, int* lda, const std::complex* const* b, int* ldb, const std::complex beta, std::complex** c, int* ldc, const int batch_count, - const int m_max, const int n_max, const int k_max, magma_queue_t& queue) { + const int m_max, const int n_max, const int k_max, magma_queue_t queue) { using util::castCudaComplex; magmablas_zgemm_vbatched_max_nocheck( toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), @@ -218,7 +218,7 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const int k, const float alpha, const float* const* a, const int lda, const float* const* b, const int ldb, const float beta, float** c, const int ldc, - const int batch_count, magma_queue_t& queue) { + const int batch_count, magma_queue_t queue) { magmablas_sgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, queue); checkErrorsCudaDebug(); @@ -227,7 +227,7 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const int k, const double alpha, const double* const* a, const int lda, const double* const* b, const int ldb, const double beta, double** c, const int ldc, - const int batch_count, const magma_queue_t& queue) { + const int batch_count, const magma_queue_t queue) { magmablas_dgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, queue); checkErrorsCudaDebug(); @@ -237,7 +237,7 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const std::complex* const* a, const int lda, const std::complex* const* b, const int ldb, const std::complex beta, std::complex** c, - const int ldc, const int batch_count, const magma_queue_t& queue) { + const int ldc, const int batch_count, const magma_queue_t queue) { using util::castCudaComplex; magmablas_cgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), castCudaComplex(a), lda, castCudaComplex(b), ldb, @@ -249,7 +249,7 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const std::complex* const* a, const int lda, const std::complex* const* b, const int ldb, const std::complex beta, std::complex** c, - const int ldc, const int batch_count, const magma_queue_t& queue) { + const int ldc, const int batch_count, const magma_queue_t queue) { using util::castCudaComplex; magmablas_zgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castCudaComplex(alpha), castCudaComplex(a), lda, castCudaComplex(b), ldb, @@ -276,8 +276,8 @@ inline int get_getri_nb>(int n) { return magma_get_zgetri_nb(n); } -} // magma -} // linalg -} // dca +} // namespace magma +} // namespace linalg +} // namespace dca #endif // DCA_LINALG_LAPACK_MAGMA_HPP diff --git a/include/dca/linalg/util/cuda_stream.hpp b/include/dca/linalg/util/cuda_stream.hpp index ea935e141..b087fa162 100644 --- a/include/dca/linalg/util/cuda_stream.hpp +++ b/include/dca/linalg/util/cuda_stream.hpp @@ -31,9 +31,14 @@ class CudaStream { } CudaStream(const CudaStream& other) = delete; + CudaStream& operator=(const CudaStream& other) = delete; - CudaStream(CudaStream&& other) { - std::swap(stream_, other.stream_); + CudaStream(CudaStream&& other) noexcept { + swap(other); + } + CudaStream& operator=(CudaStream&& other) noexcept { + swap(other); + return *this; } void sync() const { @@ -49,6 +54,10 @@ class CudaStream { return stream_; } + void swap(CudaStream& other) noexcept { + std::swap(stream_, other.stream_); + } + private: cudaStream_t stream_ = nullptr; }; diff --git a/include/dca/linalg/util/magma_batched_gemm.hpp b/include/dca/linalg/util/magma_batched_gemm.hpp index e3d1fda48..b3d9515cd 100644 --- a/include/dca/linalg/util/magma_batched_gemm.hpp +++ b/include/dca/linalg/util/magma_batched_gemm.hpp @@ -19,6 +19,7 @@ #include "dca/linalg/lapack/magma.hpp" #include "dca/linalg/util/allocators/vectors_typedefs.hpp" #include "dca/linalg/util/cuda_event.hpp" +#include "dca/linalg/util/magma_queue.hpp" #include "dca/linalg/vector.hpp" namespace dca { @@ -30,7 +31,7 @@ template class MagmaBatchedGemm { public: // Creates a plan for a batched gemm. - MagmaBatchedGemm(magma_queue_t queue); + MagmaBatchedGemm(const linalg::util::MagmaQueue& queue); // Creates a plan for a batched gemm and allocates the memory for the arguments of `size` // multiplications. MagmaBatchedGemm(int size, magma_queue_t queue); @@ -52,20 +53,19 @@ class MagmaBatchedGemm { void synchronizeCopy(); private: - magma_queue_t queue_; - const cudaStream_t stream_; + const linalg::util::MagmaQueue& queue_; CudaEvent copied_; - linalg::util::HostVector a_ptr_, b_ptr_; + linalg::util::HostVector a_ptr_, b_ptr_; linalg::util::HostVector c_ptr_; - linalg::Vector a_ptr_dev_, b_ptr_dev_; + linalg::Vector a_ptr_dev_, b_ptr_dev_; linalg::Vector c_ptr_dev_; }; template -MagmaBatchedGemm::MagmaBatchedGemm(magma_queue_t queue) - : queue_(queue), stream_(magma_queue_get_cuda_stream(queue_)) {} +MagmaBatchedGemm::MagmaBatchedGemm(const linalg::util::MagmaQueue& queue) + : queue_(queue) {} template MagmaBatchedGemm::MagmaBatchedGemm(const int size, magma_queue_t queue) @@ -99,10 +99,11 @@ void MagmaBatchedGemm::execute(const char transa, const char transb, const int n, const int k, const ScalarType alpha, const ScalarType beta, const int lda, const int ldb, const int ldc) { - a_ptr_dev_.setAsync(a_ptr_, stream_); - b_ptr_dev_.setAsync(b_ptr_, stream_); - c_ptr_dev_.setAsync(c_ptr_, stream_); - copied_.record(stream_); + // TODO: store in a buffer if the performance gain is necessary. + a_ptr_dev_.setAsync(a_ptr_, queue_); + b_ptr_dev_.setAsync(b_ptr_, queue_); + c_ptr_dev_.setAsync(c_ptr_, queue_); + copied_.record(queue_); const int n_batched = a_ptr_.size(); magma::magmablas_gemm_batched(transa, transb, m, n, k, alpha, a_ptr_dev_.ptr(), lda, @@ -111,9 +112,9 @@ void MagmaBatchedGemm::execute(const char transa, const char transb, assert(cudaPeekAtLastError() == cudaSuccess); } -} // util -} // linalg -} // dca +} // namespace util +} // namespace linalg +} // namespace dca #endif // DCA_HAVE_CUDA #endif // DCA_LINALG_UTIL_MAGMA_BATCHED_GEMM_HPP diff --git a/include/dca/linalg/util/magma_queue.hpp b/include/dca/linalg/util/magma_queue.hpp index decc571b8..8c2f6a7d7 100644 --- a/include/dca/linalg/util/magma_queue.hpp +++ b/include/dca/linalg/util/magma_queue.hpp @@ -13,8 +13,12 @@ #define DCA_LINALG_UTIL_MAGMA_QUEUE_HPP #ifdef DCA_HAVE_CUDA +#include #include -#include +#include +#include + +#include "dca/linalg/util/cuda_stream.hpp" namespace dca { namespace linalg { @@ -24,28 +28,65 @@ namespace util { class MagmaQueue { public: MagmaQueue() { - magma_queue_create(&queue_); + cublasCreate(&cublas_handle_); + cusparseCreate(&cusparse_handle_); + int device; + cudaGetDevice(&device); + magma_queue_create_from_cuda(device, stream_, cublas_handle_, cusparse_handle_, &queue_); + } + + MagmaQueue(const MagmaQueue& rhs) = delete; + MagmaQueue& operator=(const MagmaQueue& rhs) = delete; + + MagmaQueue(MagmaQueue&& rhs) noexcept : queue_(std::move(rhs.queue_)) { + std::swap(cublas_handle_, rhs.cublas_handle_); + std::swap(cusparse_handle_, rhs.cusparse_handle_); + std::swap(queue_, rhs.queue_); + } + + MagmaQueue& operator=(MagmaQueue&& rhs) noexcept { + swap(rhs); + return *this; } ~MagmaQueue() { magma_queue_destroy(queue_); + cublasDestroy(cublas_handle_); + cusparseDestroy(cusparse_handle_); } - inline operator magma_queue_t() { + operator magma_queue_t() const { return queue_; } - cudaStream_t getStream() const { - return magma_queue_get_cuda_stream(queue_); + // Allows a large number of calls that previously took a stream + // take a MagmaQueue, this makes all this code less intelligible + // but less verbose. Consider this carefully. + operator cudaStream_t() const { + return static_cast(stream_); + } + + const CudaStream& getStream() const { + return stream_; + } + + void swap(MagmaQueue& rhs) noexcept { + std::swap(stream_, rhs.stream_); + std::swap(cublas_handle_, rhs.cublas_handle_); + std::swap(cusparse_handle_, rhs.cusparse_handle_); + std::swap(queue_, rhs.queue_); } private: + CudaStream stream_; magma_queue_t queue_ = nullptr; + cublasHandle_t cublas_handle_ = nullptr; + cusparseHandle_t cusparse_handle_ = nullptr; }; -} // util -} // linalg -} // dca +} // namespace util +} // namespace linalg +} // namespace dca #endif // DCA_HAVE_CUDA #endif // DCA_LINALG_UTIL_MAGMA_QUEUE_HPP diff --git a/include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp b/include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp index 8ca5114d6..d990a92ae 100644 --- a/include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp +++ b/include/dca/math/function_transform/special_transforms/space_transform_2D_gpu.hpp @@ -48,7 +48,7 @@ class SpaceTransform2DGpu : private SpaceTransform2D { // Constructor // In: nw_pos: number of extended positive frequencies. // In: queue: the magma queue on which execute will run. - SpaceTransform2DGpu(int nw_pos, magma_queue_t queue); + SpaceTransform2DGpu(int nw_pos, const linalg::util::MagmaQueue& queue); // Performs the 2D fourier transform from real to momentum space in place and rearranges the // order of M's labels from (r, b, w) to (b, r, w). @@ -63,7 +63,7 @@ class SpaceTransform2DGpu : private SpaceTransform2D { // Returns: the stream associated with the magma queue. cudaStream_t get_stream() const { - return stream_; + return queue_.getStream(); } std::size_t deviceFingerprint() const { @@ -86,8 +86,7 @@ class SpaceTransform2DGpu : private SpaceTransform2D { const int nw_; const int nc_; - magma_queue_t queue_; - cudaStream_t stream_; + const linalg::util::MagmaQueue& queue_; std::shared_ptr workspace_; @@ -96,12 +95,12 @@ class SpaceTransform2DGpu : private SpaceTransform2D { }; template -SpaceTransform2DGpu::SpaceTransform2DGpu(const int nw_pos, magma_queue_t queue) +SpaceTransform2DGpu::SpaceTransform2DGpu(const int nw_pos, + const linalg::util::MagmaQueue& queue) : n_bands_(BDmn::dmn_size()), nw_(2 * nw_pos), nc_(RDmn::dmn_size()), queue_(queue), - stream_(magma_queue_get_cuda_stream(queue_)), plan1_(queue_), plan2_(queue_) { workspace_ = std::make_shared(); @@ -158,7 +157,7 @@ void SpaceTransform2DGpu::phaseFactorsAndRearrange(const RMatr BaseClass::hasPhaseFactors() ? getPhaseFactors().ptr() : nullptr; details::phaseFactorsAndRearrange(in.ptr(), in.leadingDimension(), out.ptr(), out.leadingDimension(), n_bands_, nc_, nw_, phase_factors_ptr, - stream_); + queue_); } template diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/accumulator/tp/tp_equal_time_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/accumulator/tp/tp_equal_time_accumulator.hpp index 08d6d0417..f4f963a74 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/accumulator/tp/tp_equal_time_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/accumulator/tp/tp_equal_time_accumulator.hpp @@ -71,7 +71,7 @@ class TpEqualTimeAccumulator { typedef func::dmn_variadic akima_nu_nu_r_dmn_t_shifted_t; public: - TpEqualTimeAccumulator(Parameters& parameters_ref, Data& MOMS_ref, int id); + TpEqualTimeAccumulator(const Parameters& parameters_ref, Data& MOMS_ref, int id); void resetAccumulation(); @@ -155,8 +155,8 @@ class TpEqualTimeAccumulator { }; private: - Parameters& parameters; - concurrency_type& concurrency; + const Parameters& parameters; + const concurrency_type& concurrency; Data& MOMS; int thread_id; @@ -235,7 +235,7 @@ class TpEqualTimeAccumulator { }; template -TpEqualTimeAccumulator::TpEqualTimeAccumulator(Parameters& parameters_ref, +TpEqualTimeAccumulator::TpEqualTimeAccumulator(const Parameters& parameters_ref, Data& MOMS_ref, int id) : parameters(parameters_ref), concurrency(parameters.get_concurrency()), diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp index 718ea952e..f1fc5aa35 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_accumulator.hpp @@ -85,7 +85,7 @@ class CtauxAccumulator : public MC_accumulator_data { typedef CT_AUX_HS_configuration configuration_type; - CtauxAccumulator(Parameters& parameters_ref, Data& data_ref, int id); + CtauxAccumulator(const Parameters& parameters_ref, Data& data_ref, int id); template void write(Writer& writer); @@ -176,9 +176,9 @@ class CtauxAccumulator : public MC_accumulator_data { void accumulate_two_particle_quantities(); protected: - Parameters& parameters_; + const Parameters& parameters_; Data& data_; - concurrency_type& concurrency; + const concurrency_type& concurrency; int thread_id; @@ -212,7 +212,7 @@ class CtauxAccumulator : public MC_accumulator_data { }; template -CtauxAccumulator::CtauxAccumulator(Parameters& parameters_ref, +CtauxAccumulator::CtauxAccumulator(const Parameters& parameters_ref, Data& data_ref, int id) : MC_accumulator_data(), @@ -278,7 +278,7 @@ void CtauxAccumulator::finalize() { for (int l = 0; l < M_r_w_stddev.size(); l++) M_r_w_stddev(l) = std::sqrt(abs(M_r_w_squared(l)) - std::pow(abs(M_r_w(l)), 2)); - Real factor = 1. / std::sqrt(parameters_.get_measurements() - 1); + Real factor = 1. / std::sqrt(parameters_.get_measurements().at(DCA_iteration) - 1); M_r_w_stddev *= factor; } diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp index 996b740a5..dfd575312 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp @@ -225,7 +225,7 @@ void CtauxClusterSolver::integrate() { Walker walker(parameters_, data_, rng_, 0); - walker.initialize(); + walker.initialize(dca_iteration_); { dca::profiling::WallTime start_time; @@ -252,7 +252,8 @@ void CtauxClusterSolver::integrate() { if (concurrency_.id() == concurrency_.first()) { std::cout << "On-node integration has ended: " << dca::util::print_time() - << "\n\nTotal number of measurements: " << parameters_.get_measurements() << std::endl; + << "\n\nTotal number of measurements: " + << parameters_.get_measurements()[dca_iteration_] << std::endl; walker.printSummary(); } @@ -345,7 +346,8 @@ void CtauxClusterSolver::measure(Walker& walke if (concurrency_.id() == concurrency_.first()) std::cout << "\n\t\t measuring has started \n" << std::endl; - const int n_meas = parallel::util::getWorkload(parameters_.get_measurements(), concurrency_); + const int n_meas = + parallel::util::getWorkload(parameters_.get_measurements()[dca_iteration_], concurrency_); for (int i = 0; i < n_meas; i++) { { @@ -486,7 +488,8 @@ void CtauxClusterSolver::collect_measurements( if (parameters_.additional_time_measurements()) { accumulator_.get_G_r_t() /= accumulated_sign_; data_.G_r_t = accumulator_.get_G_r_t(); - accumulator_.get_G_r_t_stddev() /= accumulated_sign_ * std::sqrt(parameters_.get_measurements()); + accumulator_.get_G_r_t_stddev() /= + accumulated_sign_ * std::sqrt(parameters_.get_measurements()[dca_iteration_]); accumulator_.get_charge_cluster_moment() /= accumulated_sign_; accumulator_.get_magnetic_cluster_moment() /= accumulated_sign_; accumulator_.get_dwave_pp_correlator() /= accumulated_sign_; @@ -498,8 +501,8 @@ void CtauxClusterSolver::collect_measurements( << "\n\t\t\t QMC-total-time : " << total_time_ << " [sec]" << "\n\t\t\t Gflop : " << accumulator_.get_Gflop() << " [Gf]" << "\n\t\t\t Gflop/s : " << accumulator_.get_Gflop() / local_time << " [Gf/s]" - << "\n\t\t\t sign : " << accumulated_sign_ / parameters_.get_measurements() - << " \n"; + << "\n\t\t\t sign : " + << accumulated_sign_ / parameters_.get_measurements()[dca_iteration_] << " \n"; averaged_ = true; } diff --git a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp index 958b6e159..965883bd9 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctaux/ctaux_walker.hpp @@ -63,7 +63,7 @@ class CtauxWalker : public WalkerBIT, public: CtauxWalker(/*const*/ Parameters& parameters, Data& MOMS_ref, Rng& rng_ref, int id); - void initialize(); + void initialize(int iteration); bool is_thermalized() const { return thermalized_; @@ -314,6 +314,8 @@ class CtauxWalker : public WalkerBIT, std::array m_computed_events_; bool config_initialized_; + + double sweeps_per_measurement_ = 1.; }; template @@ -428,8 +430,9 @@ void CtauxWalker::markThermalized() { } template -void CtauxWalker::initialize() { +void CtauxWalker::initialize(int iteration) { WalkerBIT::initialize(); + sweeps_per_measurement_ = parameters_.get_sweeps_per_measurement().at(iteration); number_of_creations = 0; number_of_annihilations = 0; @@ -490,7 +493,7 @@ void CtauxWalker::initialize() { template void CtauxWalker::doSweep() { Profiler profiler("do_sweep", "CT-AUX walker", __LINE__, thread_id); - const double sweeps_per_measurement{thermalized_ ? parameters_.get_sweeps_per_measurement() : 1.}; + const double sweeps_per_measurement{thermalized_ ? sweeps_per_measurement_ : 1.}; // Do at least one single spin update per sweep. const int single_spin_updates_per_sweep{warm_up_expansion_order_.count() > 0 && diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp index ce62ec496..8e4dac51a 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp @@ -15,6 +15,7 @@ #include #include +#include "dca/linalg/util/cuda_event.hpp" #include "dca/linalg/util/cuda_stream.hpp" #include "dca/phys/dca_data/dca_data.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/structs/ct_int_matrix_configuration.hpp" @@ -33,10 +34,10 @@ namespace solver { namespace ctint { // dca::phys::solver::ctint:: -template +template class CtintAccumulator { public: - using this_type = CtintAccumulator; + using this_type = CtintAccumulator; using ParametersType = Parameters; using DataType = phys::DcaData; @@ -95,18 +96,19 @@ class CtintAccumulator { const Parameters& parameters_; // Internal instantaneous configuration. - std::array, 2> M_; + std::array, 2> M_; MatrixConfiguration configuration_; int sign_ = 0; - std::vector streams_; + std::vector streams_; + linalg::util::CudaEvent event_; util::Accumulator accumulated_sign_; util::Accumulator accumulated_order_; const int thread_id_; - accumulator::SpAccumulator sp_accumulator_; + accumulator::SpAccumulator sp_accumulator_; accumulator::TpAccumulator tp_accumulator_; bool perform_tp_accumulation_ = false; @@ -116,9 +118,9 @@ class CtintAccumulator { float flop_ = 0.; }; -template +template template -CtintAccumulator::CtintAccumulator(const Parameters& pars, const Data& data, +CtintAccumulator::CtintAccumulator(const Parameters& pars, const Data& data, int id) : parameters_(pars), thread_id_(id), @@ -129,8 +131,8 @@ CtintAccumulator::CtintAccumulator(const Parameters& pars, c streams_.push_back(tp_accumulator_.get_stream()); } -template -void CtintAccumulator::initialize(const int dca_iteration) { +template +void CtintAccumulator::initialize(const int dca_iteration) { perform_tp_accumulation_ = parameters_.isAccumulatingG4() && dca_iteration == parameters_.get_dca_iterations() - 1; accumulated_order_.reset(); @@ -143,26 +145,38 @@ void CtintAccumulator::initialize(const int dca_iteration) { finalized_ = false; } -template +template template -void CtintAccumulator::updateFrom(Walker& walker) { - walker.computeM(M_, streams_); +void CtintAccumulator::updateFrom(Walker& walker) { + // Compute M. + walker.computeM(M_); + + if (device == linalg::GPU) { + for (int s = 0; s < 2; ++s) { + event_.record(walker.get_stream(s)); + // Synchronize sp accumulator streams with walker. + event_.block(*sp_accumulator_.get_streams()[s]); + // Synchronize both walker streams with tp accumulator. + event_.block(*tp_accumulator_.get_stream()); + } + } + configuration_ = walker.getConfiguration(); - sign_ = walker.getSign(); + sign_ = walker.get_sign(); flop_ += walker.stealFLOPs(); ready_ = true; } -template +template template -void CtintAccumulator::accumulate(Walker& walker) { +void CtintAccumulator::accumulate(Walker& walker) { updateFrom(walker); measure(); } -template -void CtintAccumulator::measure() { +template +void CtintAccumulator::measure() { if (!ready_ || sign_ == 0) throw(std::logic_error("No or invalid configuration to accumulate.")); @@ -176,8 +190,8 @@ void CtintAccumulator::measure() { ready_ = false; } -template -void CtintAccumulator::sumTo(this_type& other_one) { +template +void CtintAccumulator::sumTo(this_type& other_one) { other_one.accumulated_order_ += accumulated_order_; other_one.accumulated_sign_ += accumulated_sign_; @@ -189,8 +203,8 @@ void CtintAccumulator::sumTo(this_type& other_one) { other_one.flop_ += flop_; } -template -void CtintAccumulator::finalize() { +template +void CtintAccumulator::finalize() { if (finalized_) return; @@ -201,13 +215,13 @@ void CtintAccumulator::finalize() { finalized_ = true; } -template -const auto& CtintAccumulator::get_sign_times_M_r_w() const { +template +const auto& CtintAccumulator::get_sign_times_M_r_w() const { return sp_accumulator_.get_sign_times_M_r_w(); } -template -const auto& CtintAccumulator::get_sign_times_G4() const { +template +const auto& CtintAccumulator::get_sign_times_G4() const { if (!perform_tp_accumulation_) throw(std::logic_error("G4 was not accumulated.")); return tp_accumulator_.get_sign_times_G4(); diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp new file mode 100644 index 000000000..3aec98ac6 --- /dev/null +++ b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp @@ -0,0 +1,510 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi(gbalduzz@itp.phys.ethz.ch) +// +// Cluster Monte Carlo integrator based on a CT-INT algorithm. + +#ifndef DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_CTINT_CTINT_CLUSTER_SOLVER_HPP +#define DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_CTINT_CTINT_CLUSTER_SOLVER_HPP + +#include +#include +#include +#include +#include +#include + +#include "dca/config/mc_options.hpp" +#include "dca/function/function.hpp" +#include "dca/linalg/matrix.hpp" +#include "dca/linalg/matrixop.hpp" +#include "dca/math/function_transform/function_transform.hpp" +#include "dca/math/statistics/util.hpp" +#include "dca/parallel/util/get_workload.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/details/solver_methods.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/domains/common_domains.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_choice.hpp" +//#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_loop/dca_loop_data.hpp" +#include "dca/phys/dca_step/symmetrization/symmetrize.hpp" +#include "dca/phys/domains/cluster/cluster_domain_aliases.hpp" +#include "dca/phys/four_point_type.hpp" +#include "dca/profiling/events/time.hpp" +#include "dca/util/print_time.hpp" + +namespace dca { +namespace phys { +namespace solver { +// dca::phys::solver:: + +template +class CtintClusterSolver { +public: + using Real = typename config::McOptions::MCScalar; + + using Data = DcaData; + static constexpr linalg::DeviceType device = device_t; + + CtintClusterSolver(Parameters& parameters_ref, Data& Data_ref); + + ~CtintClusterSolver(); + + // Initialize g0_interpolation and reset internal state. Must be called before integrate. + void initialize(int dca_iteration = 0); + + // Monte Carlo walk and accumulation. + void integrate(); + + // gather the walker's measurements and average them across the processes. + // Then it computes the final integration results. + // Postcondition: DcaData contains Sigma, G_r_t, G_r_w, G_k_w, G_k_t + double finalize(); + + // Calls finalize(). In addition: + // Postcondition: dca_info_struct contains metadata on the integration. + // Returns: L2 difference between Sigma and Sigma_cluster. + double finalize(DcaLoopData& dca_info_struct); + + double computeDensity() const; + + template + void write(Writer& /*writer*/) {} + + // For testing purposes. + // Returns the function G(k,w) without averaging across MPI ranks. + auto local_G_k_w() const; + +protected: // thread jacket interface. + using ParametersType = Parameters; + using DataType = Data; + using Rng = typename Parameters::random_number_generator; + using Profiler = typename Parameters::profiler_type; + using Concurrency = typename Parameters::concurrency_type; + using Lattice = typename Parameters::lattice_type; + + using Walker = ctint::CtintWalkerChoice; + using Accumulator = ctint::CtintAccumulator; + +private: + using BDmn = func::dmn_0; + using SDmn = func::dmn_0; + using CDA = ClusterDomainAliases; + using KDmn = typename CDA::KClusterDmn; + using RDmn = typename CDA::RClusterDmn; + using Nu = func::dmn_variadic; + using TDmn = func::dmn_0; + using WDmn = func::dmn_0; + using LabelDomain = func::dmn_variadic; + + using SpGreensFunction = + func::function, func::dmn_variadic>; + +protected: // Protected for testing purposes. + void warmUp(); + + void measure(); + + void computeG_k_w(const SpGreensFunction& G0, const SpGreensFunction& M_k_w, + SpGreensFunction& G_k_w) const; + + void computeSigma(const SpGreensFunction& G, const SpGreensFunction& G0, + SpGreensFunction& Sigma) const; + + // Returns: average sign. + double gatherMAndG4(SpGreensFunction& M, bool compute_error) const; + + double L2Difference() const; + + void computeErrorBars() const {} + +protected: + Parameters& parameters_; + Concurrency& concurrency_; + Data& data_; + + // Stores the integration result + Accumulator accumulator_; + + double total_time_ = 0; + double warm_up_time_ = 0; + int dca_iteration_ = 0; + +private: + bool perform_tp_accumulation_; + const LabelDomain label_dmn_; + std::unique_ptr walker_; + // Walker input. + ctint::G0Interpolation g0_; + Rng rng_; +}; + +template +CtintClusterSolver::CtintClusterSolver(Parameters& parameters_ref, + Data& data_ref) + : parameters_(parameters_ref), + concurrency_(parameters_.get_concurrency()), + data_(data_ref), + + accumulator_(parameters_, data_), + + rng_(concurrency_.id(), concurrency_.number_of_processors(), parameters_.get_seed()) { + Walker::setDMatrixBuilder(g0_); + // TimeCorrelator::setG0(g0_); + Walker::setInteractionVertices(data_, parameters_); + + if (concurrency_.id() == concurrency_.first()) + std::cout << "\n\n\t CT-INT Integrator is born \n\n"; +} + +template +CtintClusterSolver::~CtintClusterSolver() { + if (concurrency_.id() == concurrency_.first()) + std::cout << "\n\n\t CT-INT Integrator has died \n\n"; +} + +template +void CtintClusterSolver::initialize(int dca_iteration) { + dca_iteration_ = dca_iteration; + + g0_.initialize(ctint::details::shrinkG0(data_.G0_r_t_cluster_excluded)); + + Walker::setDMatrixAlpha(parameters_.getAlphas(), parameters_.adjustAlphaDd()); + + perform_tp_accumulation_ = + parameters_.isAccumulatingG4() && dca_iteration == parameters_.get_dca_iterations() - 1; + accumulator_.initialize(dca_iteration_); +} + +template +void CtintClusterSolver::integrate() { + walker_ = std::make_unique(parameters_, data_, rng_, 0); + walker_->initialize(dca_iteration_); + + dca::profiling::WallTime start_time; + auto getTime = [&]() { + dca::profiling::Duration split_time(dca::profiling::WallTime(), start_time); + return split_time.sec + 1.e-6 * split_time.usec; + }; + + if (concurrency_.id() == concurrency_.first()) + std::cout << "\n\t\t Warm up has started.\n" << std::endl; + warmUp(); + warm_up_time_ = getTime(); + + if (concurrency_.id() == concurrency_.first()) + std::cout << "\n\t\t Measuring has started.\n\n"; + measure(); + + total_time_ = getTime(); + + if (concurrency_.id() == concurrency_.first()) { + std::cout << "\n\tMeasuring has ended. Done " << parameters_.get_measurements()[dca_iteration_] + << " measurements.\n"; + walker_->printSummary(); + } +} + +template +double CtintClusterSolver::finalize() { + bool compute_error = false; + if (dca_iteration_ == parameters_.get_dca_iterations() - 1) { + if (parameters_.get_error_computation_type() == ErrorComputationType::JACK_KNIFE) { + if (concurrency_.id() == concurrency_.first()) + std::cout << "Computing jack knife error.\n\n"; + compute_error = true; + } + else if (parameters_.get_error_computation_type() == ErrorComputationType::STANDARD_DEVIATION) + std::cout << "CT-INT does not support ErrorComputationType::STANDARD_DEVIATION.\n" + << "Error computation will be skipped.\n"; + } + + SpGreensFunction M; + + // average M across ranks. + double avg_sign = gatherMAndG4(M, compute_error); + + // compute G_r_t and save it into data_. + computeG_k_w(data_.G0_k_w_cluster_excluded, M, data_.G_k_w); + symmetrize::execute(data_.G_k_w); + + // transform G_k_w and save into data_. + math::transform::FunctionTransform::execute(data_.G_k_w, data_.G_r_w); + symmetrize::execute(data_.G_r_w); + + // compute and save Sigma into data_ + // TODO: check if a better estimate exists + computeSigma(data_.G_k_w, data_.G0_k_w_cluster_excluded, data_.Sigma); + + // compute error + if (compute_error) { + data_.get_Sigma_error() = concurrency_.jackknifeError(data_.Sigma); + data_.get_G_k_w_error() = concurrency_.jackknifeError(data_.G_k_w); + if (perform_tp_accumulation_) { + for (int channel = 0; channel < data_.get_G4().size(); ++channel) + data_.get_G4_error()[channel] = concurrency_.jackknifeError(data_.get_G4()[channel]); + } + } + + // Fourier transform the Green's function. + // TODO check and write function + auto G_k_w_copy = data_.G_k_w; + G_k_w_copy -= data_.G0_k_w_cluster_excluded; + math::transform::FunctionTransform::execute(G_k_w_copy, data_.G_k_t); + data_.G_k_t += data_.G0_k_t_cluster_excluded; + math::transform::FunctionTransform::execute(data_.G_k_t, data_.G_r_t); + + auto local_time = total_time_; + concurrency_.sum(total_time_); + auto gflop = accumulator_.getFLOPs() * 1e-9; + concurrency_.sum(gflop); + + if (concurrency_.id() == 0) { + std::cout << "\n\t\t Collected measurements \t" << dca::util::print_time() << "\n" + << "\n\t\t\t QMC-local-time : " << local_time << " [sec]" + << "\n\t\t\t QMC-total-time : " << total_time_ << " [sec]" + << "\n\t\t\t Gflop : " << gflop << " [Gf]" + << "\n\t\t\t Gflop/s : " << gflop / local_time << " [Gf/s]" + << "\n\t\t\t sign : " << avg_sign << "\n\t\t\t Density = " << computeDensity() + << "\n" + << std::endl; + } + + return avg_sign; +} + +template +double CtintClusterSolver::finalize( + DcaLoopData& loop_data) { + double avg_sign = finalize(); + // Compute and save into loop_data Sigma_zero_mom and std deviation + for (int nu = 0; nu < Nu::dmn_size(); nu++) { + for (int k = 0; k < KDmn::dmn_size(); k++) { + std::vector x; + for (int l = 0; l < WDmn::dmn_size() / 4; l++) + x.push_back(real(data_.Sigma(nu, nu, k, l))); + + loop_data.Sigma_zero_moment(nu, k, dca_iteration_) = math::statistics::util::mean(x); + loop_data.standard_deviation(nu, k, dca_iteration_) = + math::statistics::util::standard_deviation(x); + } + } + + loop_data.average_expansion_order(dca_iteration_) = accumulator_.avgOrder(); + loop_data.sign(dca_iteration_) = avg_sign; // This is already averaged. + loop_data.MC_integration_per_mpi_task(dca_iteration_) = total_time_; // This is already averaged. + loop_data.thermalization_per_mpi_task(dca_iteration_) = warm_up_time_; + + if (dca_iteration_ == parameters_.get_dca_iterations() - 1) { + concurrency_.delayedSum(loop_data.Sigma_zero_moment); + concurrency_.delayedSum(loop_data.standard_deviation); + concurrency_.delayedSum(loop_data.average_expansion_order); + concurrency_.delayedSum(loop_data.thermalization_per_mpi_task); + + concurrency_.resolveSums(); + + loop_data.Sigma_zero_moment /= concurrency_.number_of_processors(); + loop_data.Sigma_zero_moment /= concurrency_.number_of_processors(); + loop_data.standard_deviation /= concurrency_.number_of_processors(); + loop_data.average_expansion_order /= concurrency_.number_of_processors(); + loop_data.thermalization_per_mpi_task /= concurrency_.number_of_processors(); + } + + // Free walker memory for the dca loop. + walker_.release(); + // Compute and return L2 difference between Sigma and Sigma cluster. + return loop_data.L2_Sigma_difference(dca_iteration_) = L2Difference(); +} + +template +void CtintClusterSolver::warmUp() { + const int n_sweep = parameters_.get_warm_up_sweeps(); + for (int i = 0; i < n_sweep; i++) { + walker_->doSweep(); + + walker_->updateShell(i, n_sweep); + } + + walker_->markThermalized(); +} + +template +void CtintClusterSolver::measure() { + const int n_meas = parallel::util::getWorkload(parameters_.get_measurements()[dca_iteration_], 1, + 0, concurrency_); + + for (int i = 0; i < n_meas; i++) { + { + Profiler profiler("updating", "QMCI", __LINE__); + walker_->doSweep(); + } + { + Profiler profiler("measurements", "QMCI", __LINE__); + accumulator_.accumulate(*walker_); + } + walker_->updateShell(i, n_meas); + } + + accumulator_.finalize(); +} + +template +void CtintClusterSolver::computeG_k_w( + const SpGreensFunction& G0, const SpGreensFunction& M_k_w, SpGreensFunction& G_k_w) const { + const int matrix_dim = Nu::dmn_size(); + dca::linalg::Matrix, dca::linalg::CPU> G0_M(matrix_dim, matrix_dim); + + const char op = 'N'; + const double one_over_beta = 1. / parameters_.get_beta(); + + // G(w) = g0_(w) - g0_(w) *M(w)* g0_(w) / beta + G_k_w = G0; + for (int k_ind = 0; k_ind < KDmn::dmn_size(); k_ind++) { + for (int w_ind = 0; w_ind < WDmn::dmn_size(); w_ind++) { + // G0_M <- G0 * M + dca::linalg::blas::gemm(&op, &op, matrix_dim, matrix_dim, matrix_dim, 1., + &G0(0, 0, k_ind, w_ind), matrix_dim, &M_k_w(0, 0, k_ind, w_ind), + matrix_dim, 0., G0_M.ptr(), G0_M.leadingDimension()); + + // G -= G0 M G0 / beta + dca::linalg::blas::gemm(&op, &op, matrix_dim, matrix_dim, matrix_dim, -one_over_beta, + G0_M.ptr(), G0_M.leadingDimension(), &G0(0, 0, k_ind, w_ind), + matrix_dim, 1., &G_k_w(0, 0, k_ind, w_ind), matrix_dim); + } + } +} + +template +void CtintClusterSolver::computeSigma( + const SpGreensFunction& G, const SpGreensFunction& G0, SpGreensFunction& Sigma) const { + const int matrix_dim = Nu::dmn_size(); + + dca::linalg::Matrix, dca::linalg::CPU> G_inv(matrix_dim); + dca::linalg::Matrix, dca::linalg::CPU> G0_inv(matrix_dim); + dca::linalg::Vector ipiv; + dca::linalg::Vector, dca::linalg::CPU> work; + + // Sigma = 1/G0 - 1/G + for (int k_ind = 0; k_ind < KDmn::dmn_size(); k_ind++) { + for (int w_ind = 0; w_ind < WDmn::dmn_size(); w_ind++) { + dca::linalg::matrixop::copyArrayToMatrix(matrix_dim, matrix_dim, &G(0, 0, k_ind, w_ind), + matrix_dim, G_inv); + dca::linalg::matrixop::smallInverse(G_inv, ipiv, work); + + dca::linalg::matrixop::copyArrayToMatrix(matrix_dim, matrix_dim, &G0(0, 0, k_ind, w_ind), + matrix_dim, G0_inv); + dca::linalg::matrixop::smallInverse(G0_inv, ipiv, work); + + for (int nu2 = 0; nu2 < matrix_dim; ++nu2) + for (int nu1 = 0; nu1 < matrix_dim; ++nu1) + Sigma(nu1, nu2, k_ind, w_ind) = G0_inv(nu1, nu2) - G_inv(nu1, nu2); + } + } + + symmetrize::execute(data_.Sigma, data_.H_symmetry); + // TODO : if it is needed implement. + // if (parameters_.adjust_self_energy_for_double_counting()) + // adjust_self_energy_for_double_counting(); +} + +template +double CtintClusterSolver::L2Difference() const { + const double alpha = parameters_.get_self_energy_mixing_factor(); + + for (int l = 0; l < data_.Sigma.size(); l++) + data_.Sigma(l) = alpha * data_.Sigma(l) + (1. - alpha) * data_.Sigma_cluster(l); + + const int offset = std::min(1, WDmn::dmn_size() / 2); + + double diff_L2 = 0.; + for (int w_ind = WDmn::dmn_size() / 2; w_ind < WDmn::dmn_size() / 2 + offset; w_ind++) { + for (int k_ind = 0; k_ind < KDmn::dmn_size(); k_ind++) { + for (int l1 = 0; l1 < Nu::dmn_size(); l1++) { + diff_L2 += std::pow( + std::abs(data_.Sigma(l1, l1, k_ind, w_ind) - data_.Sigma_cluster(l1, l1, k_ind, w_ind)), + 2); + } + } + } + + double L2_error = std::sqrt(diff_L2) / double(KDmn::dmn_size()); + if (concurrency_.id() == concurrency_.first()) + std::cout << "\n\t\t |Sigma_QMC - Sigma_cg|_2 ~ " << L2_error << "\n\n"; + + return L2_error; +} + +template +double CtintClusterSolver::computeDensity() const { + double result(0.); + const int t0_minus = TDmn::dmn_size() / 2 - 1; + for (int i = 0; i < Nu::dmn_size(); i++) + result += data_.G_r_t(i, i, RDmn::parameter_type::origin_index(), t0_minus); + + return result; +} + +template +double CtintClusterSolver::gatherMAndG4(SpGreensFunction& M, + bool compute_error) const { + const auto& M_r = accumulator_.get_sign_times_M_r_w(); + math::transform::FunctionTransform::execute(M_r, M); + + double sign = accumulator_.get_accumulated_sign(); + + symmetrize::execute(M, data_.H_symmetry); + + // TODO: delay sum. + auto collect = [&](auto& f) { + if (compute_error) + concurrency_.leaveOneOutSum(f); + else + concurrency_.sum(f); + }; + + collect(M); + collect(sign); + + std::size_t n_meas = accumulator_.get_number_of_measurements(); + concurrency_.sum(n_meas); + + M /= std::complex(sign, 0.); + + if (perform_tp_accumulation_) { + for (int channel = 0; channel < data_.get_G4().size(); ++channel) { + auto& G4 = data_.get_G4()[channel]; + G4 = accumulator_.get_sign_times_G4()[channel]; + collect(G4); + G4 /= sign * parameters_.get_beta() * parameters_.get_beta(); + } + } + + return sign / double(n_meas); +} + +template +auto CtintClusterSolver::local_G_k_w() const { + const auto& M_r = accumulator_.get_sign_times_M_r_w(); + SpGreensFunction M; + math::transform::FunctionTransform::execute(M_r, M); + + const double sign = accumulator_.get_accumulated_sign(); + + M /= sign; + SpGreensFunction G_k_w("G_k_w"); + computeG_k_w(data_.G0_k_w_cluster_excluded, M, G_k_w); + + return G_k_w; +} + +} // namespace solver +} // namespace phys +} // namespace dca + +#endif // DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_CTINT_CTINT_CLUSTER_SOLVER_HPP diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp index 0932ec613..c063ddf2c 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp @@ -61,6 +61,9 @@ class CtintWalkerBase { using MatrixPair = std::array, 2>; using CudaStream = linalg::util::CudaStream; + using Scalar = Real; + constexpr static linalg::DeviceType device = linalg::CPU; + protected: // The class is not instantiable. CtintWalkerBase(const Parameters& pars_ref, Rng& rng_ref, int id = 0); @@ -102,7 +105,7 @@ class CtintWalkerBase { return Real(n_accepted_) / Real(n_steps_); } - void initialize(); + void initialize(int iter); const auto& get_configuration() const { return configuration_; @@ -158,6 +161,8 @@ class CtintWalkerBase { return *streams_[s]; } + static void sumConcurrency(const Concurrency&) {} + protected: // typedefs using RDmn = typename Parameters::RClusterDmn; @@ -202,6 +207,8 @@ class CtintWalkerBase { float flop_ = 0.; + double sweeps_per_meas_ = 1.; + double mc_log_weight_ = 0; private: @@ -229,8 +236,11 @@ CtintWalkerBase::CtintWalkerBase(const Parameters& parameters_ total_interaction_(vertices_.integratedInteraction()) {} template -void CtintWalkerBase::initialize() { +void CtintWalkerBase::initialize(int iteration) { assert(total_interaction_); + + sweeps_per_meas_ = parameters_.get_sweeps_per_measurement().at(iteration); + sign_ = 1; mc_log_weight_ = 1.; @@ -247,7 +257,7 @@ void CtintWalkerBase::initialize() { template void CtintWalkerBase::setMFromConfig() { - mc_log_weight_ = 1.; + mc_log_weight_ = 0.; sign_ = 1; for (int s = 0; s < 2; ++s) { @@ -265,7 +275,7 @@ void CtintWalkerBase::setMFromConfig() { if (M.nrRows()) { const auto [log_det, sign] = linalg::matrixop::inverseAndLogDeterminant(M); - mc_log_weight_ += log_det; + mc_log_weight_ -= log_det; // Weight proportional to det(M^{-1}) sign_ *= sign; } } @@ -291,8 +301,7 @@ template void CtintWalkerBase::markThermalized() { thermalized_ = true; - nb_steps_per_sweep_ = - std::ceil(parameters_.get_sweeps_per_measurement() * partial_order_avg_.mean()); + nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); order_avg_.reset(); sign_avg_.reset(); diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator.hpp index 617081591..582742386 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator.hpp @@ -24,6 +24,7 @@ #include "dca/math/nfft/dnfft_1d.hpp" #include "dca/linalg/device_type.hpp" #include "dca/linalg/matrix.hpp" +#include "dca/linalg/util/cuda_stream.hpp" #include "dca/phys/error_computation_type.hpp" #include "dca/phys/domains/quantum/electron_band_domain.hpp" #include "dca/phys/domains/quantum/electron_spin_domain.hpp" @@ -55,7 +56,7 @@ class SpAccumulator { using PDmn = func::dmn_variadic; public: - SpAccumulator(/*const*/ Parameters& parameters_ref, bool accumulate_m_squared = false); + SpAccumulator(const Parameters& parameters_ref, bool accumulate_m_squared = false); void resetAccumulation(); @@ -81,9 +82,13 @@ class SpAccumulator { return 0; } + std::vector get_streams() const { + return std::vector(); + } + protected: constexpr static int oversampling = 8; - /*const*/ Parameters& parameters_; + const Parameters& parameters_; bool initialized_ = false; bool finalized_ = false; @@ -101,7 +106,7 @@ class SpAccumulator { }; template -SpAccumulator::SpAccumulator(/*const*/ Parameters& parameters_ref, +SpAccumulator::SpAccumulator(const Parameters& parameters_ref, const bool accumulate_m_sqr) : parameters_(parameters_ref), accumulate_m_sqr_(accumulate_m_sqr) {} diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp index c768db23b..20665eb0d 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp @@ -25,8 +25,8 @@ #include #include -#include "dca/linalg/util/cuda_stream.hpp" #include "dca/linalg/util/cuda_event.hpp" +#include "dca/linalg/util/cuda_stream.hpp" #include "dca/math/nfft/dnfft_1d_gpu.hpp" #include "dca/phys/error_computation_type.hpp" @@ -50,7 +50,7 @@ class SpAccumulator using typename BaseClass::Profiler; public: - SpAccumulator(/*const*/ Parameters& parameters_ref, bool accumulate_m_squared = false); + SpAccumulator(const Parameters& parameters_ref, bool accumulate_m_squared = false); void resetAccumulation(); @@ -77,8 +77,8 @@ class SpAccumulator event.block(stream); } - const auto& get_streams() const { - return streams_; + auto get_streams() { + return std::array{&streams_[0], &streams_[1]}; } // Returns the allocated device memory in bytes. @@ -100,8 +100,8 @@ class SpAccumulator }; template -SpAccumulator::SpAccumulator(/*const*/ Parameters& parameters_ref, - const bool accumulate_m_sqr) +SpAccumulator::SpAccumulator(const Parameters& parameters_ref, + const bool accumulate_m_sqr) : BaseClass(parameters_ref, accumulate_m_sqr), streams_(), cached_nfft_obj_{NfftType(parameters_.get_beta(), streams_[0], accumulate_m_sqr), @@ -174,7 +174,8 @@ void SpAccumulator::finalize() { } template -void SpAccumulator::sumTo(SpAccumulator& other) { +void SpAccumulator::sumTo( + SpAccumulator& other) { for (int s = 0; s < 2; ++s) other.cached_nfft_obj_[s] += cached_nfft_obj_[s]; } diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp index 5a25db8b7..37a103087 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu.hpp @@ -50,8 +50,8 @@ class CachedNdft using Complex = std::complex; using Matrix = linalg::Matrix; - using RMatrix = linalg::ReshapableMatrix>; + using RMatrix = + linalg::ReshapableMatrix>; using MatrixHost = linalg::Matrix; @@ -196,7 +196,7 @@ double CachedNdft:: auto& T_times_M = M_out; bool realloc = T_times_M.resizeNoCopy(std::make_pair(nw / 2 * n_orbitals_, order)); - util::ignoreUnused(realloc); + dca::util::ignoreUnused(realloc); assert(!realloc); T_times_M.setToZero(stream_); diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp index 523a7b44e..0e536acc2 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator.hpp @@ -23,6 +23,7 @@ #include "dca/linalg/matrix.hpp" #include "dca/linalg/matrix_view.hpp" #include "dca/linalg/matrixop.hpp" +#include "dca/linalg/util/cuda_stream.hpp" #include "dca/math/function_transform/special_transforms/space_transform_2D.hpp" #include "dca/phys/dca_data/dca_data.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_cpu.hpp" @@ -122,6 +123,11 @@ class TpAccumulator { return 0; } + const linalg::util::CudaStream* get_stream() const { + static const dca::linalg::util::CudaStream mock_stream; + return &mock_stream; + } + protected: void initializeG0(); diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp index 0884d544f..51cd67b03 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_gpu.hpp @@ -87,8 +87,8 @@ class TpAccumulator : public TpAccumulator : public TpAccumulator : public TpAccumulator : public TpAccumulator; std::array queues_; - std::array streams_; linalg::util::CudaEvent event_; std::vector> workspaces_; @@ -208,16 +207,15 @@ TpAccumulator::TpAccumulator( const Parameters& pars, const int thread_id) : BaseClass(G0, pars, thread_id), queues_(), - streams_{queues_[0].getStream(), queues_[1].getStream()}, ndft_objs_{NdftType(queues_[0]), NdftType(queues_[1])}, space_trsf_objs_{DftType(n_pos_frqs_, queues_[0]), DftType(n_pos_frqs_, queues_[1])}, nr_accumulators_(pars.get_accumulators()) { initializeG4Helpers(); // Create shared workspaces. - for (int i = 0; i < n_ndft_streams_; ++i) { + for (int i = 0; i < n_ndft_queues_; ++i) { workspaces_.emplace_back(std::make_shared()); - workspaces_[i]->setStream(streams_[i]); + workspaces_[i]->setStream(queues_[i]); ndft_objs_[i].setWorkspace(workspaces_[i]); space_trsf_objs_[i].setWorkspace(workspaces_[i]); } @@ -225,9 +223,9 @@ TpAccumulator::TpAccumulator( template void TpAccumulator::resetAccumulation(const unsigned int dca_loop) { - static util::OncePerLoopFlag flag; + static dca::util::OncePerLoopFlag flag; - util::callOncePerLoop(flag, dca_loop, [&]() { + dca::util::callOncePerLoop(flag, dca_loop, [&]() { resetG4(); initializeG0(); synchronizeStreams(); @@ -254,7 +252,7 @@ void TpAccumulator::initializeG0() { for (int b1 = 0; b1 < n_bands_; ++b1) G0_host[s](bkw_dmn(b1, k, w), b2) = (*G0_ptr_)(b1, s, b2, s, k, w + sp_index_offset); - G0[s].setAsync(G0_host[s], streams_[s]); + G0[s].setAsync(G0_host[s], queues_[s]); } } @@ -267,10 +265,10 @@ void TpAccumulator::resetG4() { try { typename BaseClass::TpDomain tp_dmn; if (!multiple_accumulators_) { - G4_channel.setStream(streams_[0]); + G4_channel.setStream(queues_[0]); } G4_channel.resizeNoCopy(tp_dmn.get_size()); - G4_channel.setToZeroAsync(streams_[0]); + G4_channel.setToZeroAsync(queues_[0]); } catch (std::bad_alloc& err) { std::cerr << "Failed to allocate G4 on device.\n"; @@ -326,7 +324,7 @@ float TpAccumulator::accumulate( const std::array& configs, const int sign) { std::array, 2> M_dev; for (int s = 0; s < 2; ++s) - M_dev[s].setAsync(M[s], streams_[0]); + M_dev[s].setAsync(M[s], queues_[0]); return accumulate(M_dev, configs, sign); } @@ -336,7 +334,7 @@ template float TpAccumulator::computeM( const std::array, 2>& M_pair, const std::array& configs) { - auto stream_id = [&](const int s) { return n_ndft_streams_ == 1 ? 0 : s; }; + auto stream_id = [&](const int s) { return n_ndft_queues_ == 1 ? 0 : s; }; float flop = 0.; @@ -356,9 +354,9 @@ float TpAccumulator::computeM( template void TpAccumulator::computeG() { - if (n_ndft_streams_ == 1) { - event_.record(streams_[0]); - event_.block(streams_[1]); + if (n_ndft_queues_ == 1) { + event_.record(queues_[0]); + event_.block(queues_[1]); } { Profiler prf("ComputeG: HOST", "tp-accumulation", __LINE__, thread_id_); @@ -370,14 +368,14 @@ void TpAccumulator::computeG() { } } - event_.record(streams_[1]); - event_.block(streams_[0]); + event_.record(queues_[1]); + event_.block(queues_[0]); } template void TpAccumulator::computeGSingleband(const int s) { details::computeGSingleband(G_[s].ptr(), G_[s].leadingDimension(), get_G0()[s].ptr(), - KDmn::dmn_size(), n_pos_frqs_, beta_, streams_[s]); + KDmn::dmn_size(), n_pos_frqs_, beta_, queues_[s]); assert(cudaPeekAtLastError() == cudaSuccess); } @@ -385,7 +383,7 @@ template void TpAccumulator::computeGMultiband(const int s) { details::computeGMultiband(G_[s].ptr(), G_[s].leadingDimension(), get_G0()[s].ptr(), get_G0()[s].leadingDimension(), n_bands_, KDmn::dmn_size(), - n_pos_frqs_, beta_, streams_[s]); + n_pos_frqs_, beta_, queues_[s]); assert(cudaPeekAtLastError() == cudaSuccess); } @@ -399,8 +397,7 @@ float TpAccumulator::updateG4(const std::size_t channel // b2 ------------------------ b4 // TODO: set stream only if this thread gets exclusive access to G4. - // get_G4().setStream(streams_[0]); - + // get_G4().setStream(queues_[0]); const FourPointType channel = channels_[channel_index]; typename BaseClass::TpDomain tp_dmn; uint64_t start = 0; @@ -409,32 +406,32 @@ float TpAccumulator::updateG4(const std::size_t channel case PARTICLE_HOLE_TRANSVERSE: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); case PARTICLE_HOLE_MAGNETIC: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); case PARTICLE_HOLE_CHARGE: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); case PARTICLE_HOLE_LONGITUDINAL_UP_UP: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); case PARTICLE_PARTICLE_UP_DOWN: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start, end); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start, end); default: throw std::logic_error("Specified four point type not implemented."); @@ -458,7 +455,7 @@ void TpAccumulator::finalize() { template void TpAccumulator::synchronizeStreams() { - for (auto stream : streams_) + for (auto& stream : queues_) cudaStreamSynchronize(stream); } diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp index f0ffd517f..025ec8410 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_mpi_gpu.hpp @@ -51,7 +51,7 @@ class TpAccumulator using BaseClass::n_bands_; using BaseClass::sign_; using BaseClass::multiple_accumulators_; - using BaseClass::streams_; + using BaseClass::queues_; using BaseClass::G_; using BaseClass::thread_id_; using BaseClass::nr_accumulators_; @@ -65,7 +65,7 @@ class TpAccumulator using BaseClass::workspaces_; using BaseClass::ndft_objs_; using BaseClass::space_trsf_objs_; - + uint64_t start_; uint64_t end_; @@ -139,9 +139,9 @@ TpAccumulator::TpAccumulator( // I think it unlikely that want multiple streams for dist G4 // since there is a memory cost. - for (int i = 0; i < BaseClass::n_ndft_streams_; ++i) { + for (int i = 0; i < BaseClass::n_ndft_queues_; ++i) { workspaces_.emplace_back(std::make_shared()); - workspaces_[i]->setStream(streams_[i]); + workspaces_[i]->setStream(queues_[i]); ndft_objs_[i].setWorkspace(workspaces_[i]); space_trsf_objs_[i].setWorkspace(workspaces_[i]); } @@ -182,7 +182,7 @@ float TpAccumulator::accumulate( const std::array& configs, const int sign) { std::array, 2> M_dev; for (int s = 0; s < 2; ++s) - M_dev[s].setAsync(M[s], streams_[0]); + M_dev[s].setAsync(M[s], queues_[0]); return accumulate(M_dev, configs, sign); } @@ -229,11 +229,11 @@ void TpAccumulator::resetG4() { for (auto& G4_channel : get_G4()) { try { if (!BaseClass::multiple_accumulators_) { - G4_channel.setStream(BaseClass::streams_[0]); + G4_channel.setStream(BaseClass::queues_[0]); } G4_channel.resizeNoCopy(end_ - start_); - G4_channel.setToZeroAsync(BaseClass::streams_[0]); + G4_channel.setToZeroAsync(BaseClass::queues_[0]); } catch (std::bad_alloc& err) { std::cerr << "Failed to allocate G4 on device.\n"; @@ -252,7 +252,7 @@ float TpAccumulator::updateG4(const std: // b2 ------------------------ b4 // TODO: set stream only if this thread gets exclusive access to G4. - // get_G4().setStream(streams_[0]); + // get_G4().setStream(queues_[0]); const FourPointType channel = BaseClass::channels_[channel_index]; @@ -264,32 +264,32 @@ float TpAccumulator::updateG4(const std: case PARTICLE_HOLE_TRANSVERSE: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); case PARTICLE_HOLE_MAGNETIC: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); case PARTICLE_HOLE_CHARGE: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); case PARTICLE_HOLE_LONGITUDINAL_UP_UP: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); case PARTICLE_HOLE_LONGITUDINAL_UP_DOWN: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); case PARTICLE_PARTICLE_UP_DOWN: return details::updateG4( get_G4()[channel_index].ptr(), G_[0].ptr(), G_[0].leadingDimension(), G_[1].ptr(), - G_[1].leadingDimension(), sign_, multiple_accumulators_, streams_[0], start_, end_); + G_[1].leadingDimension(), sign_, multiple_accumulators_, queues_[0], start_, end_); default: throw std::logic_error("Specified four point type not implemented."); diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/util/accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/shared_tools/util/accumulator.hpp index 09776d57a..23127fe0b 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/util/accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/util/accumulator.hpp @@ -52,15 +52,15 @@ struct MeanType, typename std::enable_if_t; }; -} // details +} // namespace details template class Accumulator { +public: using SampleType = T; using MeanType = typename details::MeanType::type; using CountType = std::size_t; -public: Accumulator() : count_{}, sum_{} {} void addSample(const SampleType& sample) { @@ -68,6 +68,12 @@ class Accumulator { ++count_; } + Accumulator& operator+=(const Accumulator& other) { + count_ += other.count_; + sum_ += other.sum_; + return *this; + } + CountType count() const { return count_; } @@ -90,14 +96,21 @@ class Accumulator { sum_ = {}; } + template + void sumConcurrency(const Concurrency& concurrency) { + // TODO: delay + concurrency.sum(count_); + concurrency.sum(sum_); + } + private: CountType count_; SampleType sum_; }; -} // util -} // solver -} // phys -} // dca +} // namespace util +} // namespace solver +} // namespace phys +} // namespace dca #endif // DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SHARED_TOOLS_UTIL_ACCUMULATOR_HPP diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/accumulator/sp/sp_accumulator_nfft.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/accumulator/sp/sp_accumulator_nfft.hpp index 68e73f829..ac3d850e2 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/accumulator/sp/sp_accumulator_nfft.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/accumulator/sp/sp_accumulator_nfft.hpp @@ -71,7 +71,7 @@ class SpAccumulatorNfft { void sumTo(SpAccumulatorNfft& other) const; public: - SpAccumulatorNfft(parameters_type& parameters_ref); + SpAccumulatorNfft(const parameters_type& parameters_ref); void initialize(func::function, func::dmn_variadic>& G_r_w, func::function, func::dmn_variadic>& GS_r_w); @@ -89,8 +89,8 @@ class SpAccumulatorNfft { double t_start, int flavor); private: - parameters_type& parameters; - concurrency_type& concurrency; + const parameters_type& parameters; + const concurrency_type& concurrency; int N_spin_orbitals; @@ -102,7 +102,8 @@ class SpAccumulatorNfft { }; template -SpAccumulatorNfft::SpAccumulatorNfft(parameters_type& parameters_ref) +SpAccumulatorNfft::SpAccumulatorNfft( + const parameters_type& parameters_ref) : parameters(parameters_ref), concurrency(parameters.get_concurrency()), @@ -255,9 +256,9 @@ void SpAccumulatorNfft::sumTo( other.cached_nfft_1D_GS_obj += cached_nfft_1D_GS_obj; } -} // cthyb -} // solver -} // phys -} // dca +} // namespace cthyb +} // namespace solver +} // namespace phys +} // namespace dca #endif // DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_SS_CT_HYB_ACCUMULATOR_SP_SP_ACCUMULATOR_NFFT_HPP diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_accumulator.hpp index 81b685944..0653215e3 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_accumulator.hpp @@ -74,7 +74,7 @@ class SsCtHybAccumulator : public MC_accumulator_data, typedef func::function M_matrix_type; public: - SsCtHybAccumulator(parameters_type& parameters_ref, Data& data_ref, int id = 0); + SsCtHybAccumulator(const parameters_type& parameters_ref, Data& data_ref, int id = 0); void initialize(int dca_iteration); @@ -142,9 +142,9 @@ class SsCtHybAccumulator : public MC_accumulator_data, using MC_accumulator_data::current_sign; using MC_accumulator_data::accumulated_sign; - parameters_type& parameters_; + const parameters_type& parameters_; Data& data_; - concurrency_type& concurrency; + const concurrency_type& concurrency; int thread_id; @@ -165,7 +165,7 @@ class SsCtHybAccumulator : public MC_accumulator_data, }; template -SsCtHybAccumulator::SsCtHybAccumulator(parameters_type& parameters_ref, +SsCtHybAccumulator::SsCtHybAccumulator(const parameters_type& parameters_ref, Data& data_ref, int id) : ss_hybridization_solver_routines(parameters_ref, data_ref), diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_walker.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_walker.hpp index 3d9ec2fed..520e9a0db 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_walker.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_ct_hyb_walker.hpp @@ -82,7 +82,7 @@ class SsCtHybWalker { * \brief Initializes the configuration and sets \f$\mu_i = \frac12 \sum_j * \frac{U_{ij}+U_{ji}}{2}\f$. */ - void initialize(); // func::function mu_DC); + void initialize(int iteration); // func::function mu_DC); /*! * \brief Returns if the configuration has gone through a warm-up sweep. @@ -168,7 +168,7 @@ class SsCtHybWalker { private: parameters_type& parameters; MOMS_type& MOMS; - concurrency_type& concurrency; + const concurrency_type& concurrency; rng_type& rng; @@ -205,6 +205,8 @@ class SsCtHybWalker { double p_0; double p_1; + + double sweeps_per_measurement_ = 1.; }; template @@ -253,7 +255,7 @@ void SsCtHybWalker::printSummary() const { } template -void SsCtHybWalker::initialize() { +void SsCtHybWalker::initialize(int iteration) { ss_hybridization_solver_routines_obj.initialize_functions(); ss_hybridization_walker_routines_obj.initialize_akima_coefficients(F_r_t); @@ -286,6 +288,8 @@ void SsCtHybWalker::initialize() { M(i).resize(0); } } + + sweeps_per_measurement_ = parameters.get_sweeps_per_measurement().at(iteration); } template @@ -332,9 +336,7 @@ void SsCtHybWalker::test_interpolation() { template void SsCtHybWalker::doSweep() { - double factor = 1.; - if (thermalized) - factor = parameters.get_sweeps_per_measurement(); + const double factor = thermalized ? sweeps_per_measurement_ : 1.; int nr_of_segments = std::max(16, configuration.size()); diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_hybridization_solver_routines.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_hybridization_solver_routines.hpp index 1c46297bb..a842218da 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_hybridization_solver_routines.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/ss_hybridization_solver_routines.hpp @@ -54,7 +54,7 @@ class ss_hybridization_solver_routines { using nu_nu_k_DCA_w = func::dmn_variadic; public: - ss_hybridization_solver_routines(parameters_t& parameters_ref, MOMS_t& MOMS_ref); + ss_hybridization_solver_routines(const parameters_t& parameters_ref, MOMS_t& MOMS_ref); void initialize(); @@ -99,9 +99,9 @@ class ss_hybridization_solver_routines { void compensate_for_moments(func::function, nu_nu_k_DCA_t>& f_source); private: - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; - concurrency_type& concurrency; + const concurrency_type& concurrency; std::vector is_interacting_band_vector; @@ -118,7 +118,7 @@ class ss_hybridization_solver_routines { template ss_hybridization_solver_routines::ss_hybridization_solver_routines( - parameters_t& parameters_ref, MOMS_t& MOMS_ref) + const parameters_t& parameters_ref, MOMS_t& MOMS_ref) : parameters(parameters_ref), MOMS(MOMS_ref), concurrency(parameters.get_concurrency()), diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/anti_segment_tools.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/anti_segment_tools.hpp index bf1569caf..292b56db8 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/anti_segment_tools.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/anti_segment_tools.hpp @@ -80,7 +80,7 @@ class anti_segment_tools { private: hybridization_routines_type& hybridization_routines; - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; configuration_type& configuration; diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/full_line_tools.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/full_line_tools.hpp index 02c058e90..95d0772b1 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/full_line_tools.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/full_line_tools.hpp @@ -52,7 +52,7 @@ class full_line_tools { private: hybridization_routines_type& hybridization_routines; - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; configuration_type& configuration; diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/segment_tools.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/segment_tools.hpp index 0c4ba8e49..bdd0ff15c 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/segment_tools.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/segment_tools.hpp @@ -54,7 +54,7 @@ class segment_tools { private: hybridization_routines_type& hybridization_routines; - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; configuration_type& configuration; diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/shift_segment_tools.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/shift_segment_tools.hpp index 3402843ab..0e41c031e 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/shift_segment_tools.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/shift_segment_tools.hpp @@ -54,7 +54,7 @@ class shift_segment_tools { private: hybridization_routines_type& hybridization_routines; - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; configuration_type& configuration; diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/ss_hybridization_walker_routines.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/ss_hybridization_walker_routines.hpp index 4ab0ad8ae..f5eb3a009 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/ss_hybridization_walker_routines.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/ss_hybridization_walker_routines.hpp @@ -70,7 +70,7 @@ class ss_hybridization_walker_routines void initialize() {} void initialize_akima_coefficients(func::function& F_r_t); - parameters_t& get_parameters() { + const parameters_t& get_parameters() { return parameters; } MOMS_t& get_MOMS() { @@ -202,9 +202,9 @@ class ss_hybridization_walker_routines vertex_vertex_matrix_type& M); private: - parameters_t& parameters; + const parameters_t& parameters; MOMS_t& MOMS; - concurrency_type& concurrency; + const concurrency_type& concurrency; configuration_t& configuration; rng_t& rng; diff --git a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/swap_segment_tools.hpp b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/swap_segment_tools.hpp index c0912bf3b..5374a48c4 100644 --- a/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/swap_segment_tools.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ss_ct_hyb/walker_tools/swap_segment_tools.hpp @@ -58,7 +58,7 @@ class swap_segment_tools { private: hybridization_routines_type& hybridization_routines; - parameters_type& parameters; + const parameters_type& parameters; MOMS_type& MOMS; configuration_type& configuration; diff --git a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_accumulator.hpp index e59a778f2..a49fd0463 100644 --- a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_accumulator.hpp @@ -33,16 +33,15 @@ class StdThreadQmciAccumulator : public QmciAccumulator { using Data = typename QmciAccumulator::DataType; public: - StdThreadQmciAccumulator(Parameters& parameters_ref, Data& data_ref, int id); + StdThreadQmciAccumulator(const Parameters& parameters_ref, Data& data_ref, int id); ~StdThreadQmciAccumulator(); using QmciAccumulator::finalize; using QmciAccumulator::initialize; - using QmciAccumulator::get_configuration; - template - void updateFrom(walker_type& walker); + template + void updateFrom(Walker& walker); void waitForQmciWalker(); @@ -59,15 +58,7 @@ class StdThreadQmciAccumulator : public QmciAccumulator { return done_; } -protected: - using QmciAccumulator::get_Gflop; - using QmciAccumulator::get_number_of_measurements; - using QmciAccumulator::get_accumulated_sign; - private: - using QmciAccumulator::data_; - using QmciAccumulator::parameters_; - int thread_id_; bool measuring_; std::atomic done_; @@ -76,7 +67,7 @@ class StdThreadQmciAccumulator : public QmciAccumulator { }; template -StdThreadQmciAccumulator::StdThreadQmciAccumulator(Parameters& parameters_ref, +StdThreadQmciAccumulator::StdThreadQmciAccumulator(const Parameters& parameters_ref, Data& data_ref, const int id) : QmciAccumulator(parameters_ref, data_ref, id), thread_id_(id), measuring_(false), done_(false) {} @@ -84,8 +75,8 @@ template StdThreadQmciAccumulator::~StdThreadQmciAccumulator() {} template -template -void StdThreadQmciAccumulator::updateFrom(walker_type& walker) { +template +void StdThreadQmciAccumulator::updateFrom(Walker& walker) { { // take a lock and keep it until it goes out of scope std::unique_lock lock(mutex_accumulator_); @@ -129,9 +120,9 @@ void StdThreadQmciAccumulator::notifyDone() { start_measuring_.notify_one(); } -} // stdthreadqmci -} // solver -} // phys -} // dca +} // namespace stdthreadqmci +} // namespace solver +} // namespace phys +} // namespace dca #endif // DCA_PHYS_DCA_STEP_CLUSTER_SOLVER_STDTHREAD_QMCI_STDTHREAD_QMCI_ACCUMULATOR_HPP diff --git a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp index b7d5e0a2f..04f187dfa 100644 --- a/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp @@ -39,6 +39,7 @@ namespace solver { template class StdThreadQmciClusterSolver : public QmciSolver { +public: using BaseClass = QmciSolver; using ThisType = StdThreadQmciClusterSolver; @@ -52,7 +53,6 @@ class StdThreadQmciClusterSolver : public QmciSolver { using typename BaseClass::Walker; using StdThreadAccumulatorType = stdthreadqmci::StdThreadQmciAccumulator; -public: StdThreadQmciClusterSolver(Parameters& parameters_ref, Data& data_ref); void initialize(int dca_iteration); @@ -103,6 +103,8 @@ class StdThreadQmciClusterSolver : public QmciSolver { std::condition_variable queue_insertion_; std::vector config_dump_; + + unsigned measurements_ = 0; }; template @@ -145,6 +147,7 @@ void StdThreadQmciClusterSolver::initialize(int dca_iteration) { BaseClass::initialize(dca_iteration); + measurements_ = parameters_.get_measurements()[dca_iteration]; walk_finished_ = 0; measurements_done_ = 0; } @@ -302,10 +305,10 @@ void StdThreadQmciClusterSolver::initializeAndWarmUp(Walker& walker, // Read previous configuration. if (config_dump_[walker_id].size()) { walker.readConfig(config_dump_[walker_id]); - config_dump_[walker_id].setg(0); // Ready to read again if it is not overwritten. + config_dump_[walker_id].setg(0); // Ready to read again if it is not overwritten. } - walker.initialize(); + walker.initialize(dca_iteration_); if (id == 0 && concurrency_.id() == concurrency_.first()) std::cout << "\n\t\t warm-up starts\n" << std::endl; @@ -329,7 +332,7 @@ template void StdThreadQmciClusterSolver::iterateOverLocalMeasurements( const int walker_id, std::function&& f) { const bool fix_thread_meas = parameters_.fix_meas_per_walker(); - const int total_meas = parallel::util::getWorkload(parameters_.get_measurements(), concurrency_); + const int total_meas = parallel::util::getWorkload(measurements_, concurrency_); const int n_local_meas = fix_thread_meas ? parallel::util::getWorkload(total_meas, parameters_.get_walkers(), walker_id) @@ -503,8 +506,8 @@ template void StdThreadQmciClusterSolver::printIntegrationMetadata() const { if (concurrency_.id() == concurrency_.first()) { std::cout << "Threaded on-node integration has ended: " << dca::util::print_time() - << "\n\nTotal number of measurements: " << parameters_.get_measurements() - << "\nQMC-time\t" << total_time_ << "\n"; + << "\n\nTotal number of measurements: " << measurements_ << "\nQMC-time\t" + << total_time_ << "\n"; if (QmciSolver::device == linalg::GPU) { std::cout << "\nWalker fingerprints [MB]: \n"; for (const auto& x : walker_fingerprints_) diff --git a/include/dca/phys/parameters/mci_parameters.hpp b/include/dca/phys/parameters/mci_parameters.hpp index 48a7bb195..8136d250e 100644 --- a/include/dca/phys/parameters/mci_parameters.hpp +++ b/include/dca/phys/parameters/mci_parameters.hpp @@ -37,8 +37,8 @@ class MciParameters { MciParameters() : seed_(default_seed), warm_up_sweeps_(20), - sweeps_per_measurement_(1.), - measurements_(100), + sweeps_per_measurement_{1.}, + measurements_{100}, walkers_(1), accumulators_(1), shared_walk_and_accumulation_thread_(false), @@ -65,16 +65,19 @@ class MciParameters { int get_warm_up_sweeps() const { return warm_up_sweeps_; } - double get_sweeps_per_measurement() const { + + const std::vector& get_sweeps_per_measurement() const { return sweeps_per_measurement_; } - int get_measurements() const { + + const std::vector& get_measurements() const { return measurements_; } void set_measurements(const int measurements) { assert(measurements >= 0); - measurements_ = measurements; + std::fill(measurements_.begin(), measurements_.end(), measurements); } + int get_walkers() const { return walkers_; } @@ -107,6 +110,10 @@ class MciParameters { return store_configuration_; } +protected: + // Resize vector arguments to have the same size as the number of iterations. + void inline solveDcaIterationConflict(int iterations); + private: void generateRandomSeed() { std::random_device rd; @@ -118,8 +125,8 @@ class MciParameters { int seed_; int warm_up_sweeps_; - double sweeps_per_measurement_; - int measurements_; + std::vector sweeps_per_measurement_; + std::vector measurements_; int walkers_; int accumulators_; bool shared_walk_and_accumulation_thread_; @@ -194,6 +201,16 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { } }; + auto try_to_read_write_vector = [&](const std::string& name, auto& vec) { + try { // read as a vector. + reader_or_writer.execute(name, vec); + } + catch (std::exception&) { // read as a scalar. + vec.resize(1); + try_to_read_write(name, vec[0]); + } + }; + reader_or_writer.open_group("Monte-Carlo-integration"); if (reader_or_writer.is_reader()) { @@ -227,8 +244,8 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { error_computation_type_ = stringToErrorComputationType(error_type); try_to_read_write("warm-up-sweeps", warm_up_sweeps_); - try_to_read_write("sweeps-per-measurement", sweeps_per_measurement_); - try_to_read_write("measurements", measurements_); + try_to_read_write_vector("sweeps-per-measurement", sweeps_per_measurement_); + try_to_read_write_vector("measurements", measurements_); try_to_read_write("store-configuration", store_configuration_); @@ -253,7 +270,7 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { reader_or_writer.close_group(); - // Check parameters requirements. + // Check parameters consistency. if (g4_distribution_ == DistType::MPI) { #ifdef DCA_HAVE_MPI // Check for number of accumulators and walkers consistency. @@ -267,8 +284,8 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { // Check for number of ranks and g4 measurements consistency. int mpi_size; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - int local_meas = measurements_ / mpi_size; - if (measurements_ % mpi_size != 0 || local_meas % accumulators_ != 0) { + int local_meas = measurements_.back() / mpi_size; + if (measurements_.back() % mpi_size != 0 || local_meas % accumulators_ != 0) { throw std::logic_error( "\n With distributed g4 enabled, 1) local measurements should be same across " "ranks, " @@ -280,6 +297,14 @@ void MciParameters::readWrite(ReaderOrWriter& reader_or_writer) { } } +void MciParameters::solveDcaIterationConflict(int iterations) { + // Solve conflicts between number of iterations and mci parameters. + auto solve_confilct = [&](auto& param) { param.resize(iterations, param.back()); }; + + solve_confilct(measurements_); + solve_confilct(sweeps_per_measurement_); +} + } // namespace params } // namespace phys } // namespace dca diff --git a/include/dca/phys/parameters/parameters.hpp b/include/dca/phys/parameters/parameters.hpp index 519840de5..b6314d6b7 100644 --- a/include/dca/phys/parameters/parameters.hpp +++ b/include/dca/phys/parameters/parameters.hpp @@ -391,6 +391,8 @@ void Parameters::readWrite(reader_or_writer); OutputParameters::readWrite(reader_or_writer); PhysicsParameters::readWrite(reader_or_writer); + + solveDcaIterationConflict(get_dca_iterations()); } template ({1.}) +Number of sweeps per measurement in each Monte Carlo iteration. Can be initialized with a single +scalar, in which case all the iterations will use this value. Automatically resized in case the +length is mismatched with the parameter "iterations". + +`"measurements":` vector\ ({100}) +Number of measurements in each Monte Carlo iteration. Can be initialized with a single scalar, +in which case all the iterations will use this value. Automatically resized in case the length is +mismatched with the parameter "iterations". `"error-computation-type:"` string("NONE")\ Determines the type of error computation that will be performed during the last Monte Carlo iteration. Possible options are: diff --git a/test/integration/cluster_solver/ctint/CMakeLists.txt b/test/integration/cluster_solver/ctint/CMakeLists.txt index 3e45cd149..1bffe4613 100644 --- a/test/integration/cluster_solver/ctint/CMakeLists.txt +++ b/test/integration/cluster_solver/ctint/CMakeLists.txt @@ -1,9 +1,45 @@ set(TEST_INCLUDES ${DCA_INCLUDE_DIRS};${PROJECT_SOURCE_DIR}) set(TEST_LIBS ${DCA_LIBS}) +dca_add_gtest(ctint_square_lattice_test + EXTENSIVE + MPI MPI_NUMPROC 4 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + +dca_add_gtest(ctint_square_lattice_test_gpu + EXTENSIVE + CUDA + GTEST_MAIN + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS};mc_kernels;ctint;mc_kernels + ) + +dca_add_gtest(ctint_hund_lattice_test + EXTENSIVE + MPI MPI_NUMPROC 4 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + +dca_add_gtest(ctint_fe_as_test + EXTENSIVE + MPI MPI_NUMPROC 4 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + dca_add_gtest(ctint_double_update_comparison_test EXTENSIVE GTEST_MAIN INCLUDE_DIRS ${TEST_INCLUDES} LIBS ${TEST_LIBS} ) + +dca_add_gtest(ctint_square_lattice_tp_test + EXTENSIVE + GTEST_MAIN + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) diff --git a/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp b/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp index def53a60d..069ca3296 100644 --- a/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp +++ b/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp @@ -102,5 +102,15 @@ TEST(CtintDoubleUpdateComparisonTest, Self_Energy) { EXPECT_NEAR(walker1.getAcceptanceProbability(), walker2.getAcceptanceProbability(), 5e-7); EXPECT_NEAR(walker1.get_MC_log_weight(), walker2.get_MC_log_weight(), 5e-7); EXPECT_EQ(walker1.get_sign(), walker2.get_sign()); + + auto check_direct_weight = [] (auto& walker) { + const auto fast_weight = walker.get_MC_log_weight(); + walker.setMFromConfig(); + const auto direct_weight = walker.get_MC_log_weight(); + EXPECT_NEAR(fast_weight, direct_weight, 5e-7); + }; + + check_direct_weight(walker1); + check_direct_weight(walker2); } } diff --git a/test/integration/cluster_solver/ctint/ctint_fe_as_test.cpp b/test/integration/cluster_solver/ctint/ctint_fe_as_test.cpp new file mode 100644 index 000000000..d5d3dfd4e --- /dev/null +++ b/test/integration/cluster_solver/ctint/ctint_fe_as_test.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT. +// Bilayer lattice with two band and two sites. + +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/fe_as_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_threading/no_threading.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +constexpr bool update_baseline = false; + +dca::testing::DcaMpiTestEnvironment* dca_test_env; +const std::string input_dir = + DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; + +TEST(squareLattice_Nc4_nn, Self_Energy) { + using RngType = dca::math::random::StdRandomWrapper; + using Lattice = dca::phys::models::FeAsLattice; + using Model = dca::phys::models::TightBindingModel; + using Threading = dca::parallel::NoThreading; + using Parameters = + dca::phys::params::Parameters; + using Data = dca::phys::DcaData; + using QmcSolverType = dca::phys::solver::CtintClusterSolver; + + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + Parameters parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + // Initialize data with G0 computation. + Data data(parameters); + data.initialize(); + + // Do one integration step. + QmcSolverType qmc_solver(parameters, data); + qmc_solver.initialize(); + qmc_solver.integrate(); + qmc_solver.finalize(); + + if (not update_baseline) { + // Read and confront with previous run. + if (dca_test_env->concurrency.id() == 0) { + Data::SpGreensFunction G_k_w_check = (data.G_k_w.get_name()); + dca::io::HDF5Reader reader; + reader.open_file(input_dir + "fe_as_lattice_baseline.hdf5"); + reader.open_group("functions"); + reader.execute(G_k_w_check); + reader.close_group(), reader.close_file(); + + const auto diff = dca::func::util::difference(G_k_w_check, data.G_k_w); + EXPECT_GE(5e-7, diff.l_inf); + } + } + else { + // Write results + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::io::HDF5Writer writer; + writer.open_file(input_dir + "fe_as_lattice_baseline.hdf5"); + writer.open_group("functions"); + writer.execute(data.G_k_w); + writer.close_group(), writer.close_file(); + } + } +} + +int main(int argc, char** argv) { + int result = 0; + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = + new dca::testing::DcaMpiTestEnvironment(argc, argv, input_dir + "fe_as_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + return result; +} diff --git a/test/integration/cluster_solver/ctint/ctint_hund_lattice_test.cpp b/test/integration/cluster_solver/ctint/ctint_hund_lattice_test.cpp new file mode 100644 index 000000000..1f65666e7 --- /dev/null +++ b/test/integration/cluster_solver/ctint/ctint_hund_lattice_test.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT. +// Bilayer lattice with two band and two sites. + +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/hund_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_threading/no_threading.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +constexpr bool update_baseline = false; + +dca::testing::DcaMpiTestEnvironment* dca_test_env; +const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; + +TEST(CtintHundLatticeTest, Self_Energy) { + using RngType = dca::math::random::StdRandomWrapper; + using Lattice = dca::phys::models::HundLattice; + using Model = dca::phys::models::TightBindingModel; + using Threading = dca::parallel::NoThreading; + using Parameters = + dca::phys::params::Parameters; + using Data = dca::phys::DcaData; + using QmcSolverType = dca::phys::solver::CtintClusterSolver; + + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + Parameters parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + // Initialize data with G0 computation. + Data data(parameters); + data.initialize(); + + // Do one integration step. + QmcSolverType qmc_solver(parameters, data); + qmc_solver.initialize(); + qmc_solver.integrate(); + qmc_solver.finalize(); + + EXPECT_NEAR(2., qmc_solver.computeDensity(), 1e-2); + + if (!update_baseline) { + // Read and confront with previous run. + if (dca_test_env->concurrency.id() == 0) { + Data::SpGreensFunction G_k_w_check(data.G_k_w.get_name()); + dca::io::HDF5Reader reader; + reader.open_file(input_dir + "hund_lattice_baseline.hdf5"); + reader.open_group("functions"); + reader.execute(G_k_w_check); + reader.close_group(), reader.close_file(); + + for (int i = 0; i < G_k_w_check.size(); i++) { + EXPECT_NEAR(G_k_w_check(i).real(), data.G_k_w(i).real(), 5e-7); + EXPECT_NEAR(G_k_w_check(i).imag(), data.G_k_w(i).imag(), 5e-7); + } + } + } + else { + // Write results + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::io::HDF5Writer writer; + writer.open_file(input_dir + "hund_lattice_baseline.hdf5"); + writer.open_group("functions"); + writer.execute(data.G_k_w); + writer.close_group(), writer.close_file(); + } + } +} + +int main(int argc, char** argv) { + int result = 0; + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = + new dca::testing::DcaMpiTestEnvironment(argc, argv, input_dir + "hund_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + return result; +} diff --git a/test/integration/cluster_solver/ctint/ctint_square_lattice_test.cpp b/test/integration/cluster_solver/ctint/ctint_square_lattice_test.cpp new file mode 100644 index 000000000..3da3bf269 --- /dev/null +++ b/test/integration/cluster_solver/ctint/ctint_square_lattice_test.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT. +// Square lattice with single band and double occupancy repulsion U. + +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_threading/no_threading.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; +const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; + +constexpr bool update_baseline = false; + +TEST(squareLattice_Nc4_nn, Self_Energy) { + using RngType = dca::math::random::StdRandomWrapper; + using Lattice = dca::phys::models::square_lattice; + using Model = dca::phys::models::TightBindingModel; + using Threading = dca::parallel::NoThreading; + using Parameters = + dca::phys::params::Parameters; + using Data = dca::phys::DcaData; + using QmcSolverType = dca::phys::solver::CtintClusterSolver; + + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + Parameters parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + // Initialize data with G0 computation. + Data data(parameters); + data.initialize(); + + // Do one integration step. + QmcSolverType qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + qmc_solver.finalize(); + + EXPECT_NEAR(1.0, qmc_solver.computeDensity(), 1e-5); + + if (not update_baseline) { + // Read and confront with previous run + if (dca_test_env->concurrency.id() == 0) { + Data::SpGreensFunction G_k_w_check(data.G_k_w.get_name()); + dca::io::HDF5Reader reader; + reader.open_file(input_dir + "square_lattice_baseline.hdf5"); + reader.open_group("functions"); + reader.execute(G_k_w_check); + reader.close_group(), reader.close_file(); + + auto diff = dca::func::util::difference(G_k_w_check, data.G_k_w); + EXPECT_GE(1e-6, diff.l2); + } + } + else { + // Write results + if (dca_test_env->concurrency.id() == dca_test_env->concurrency.first()) { + dca::io::HDF5Writer writer; + writer.open_file(input_dir + "square_lattice_baseline.hdf5"); + writer.open_group("functions"); + writer.execute(data.G_k_w); + writer.close_group(), writer.close_file(); + } + } +} + +int main(int argc, char** argv) { + int result = 0; + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = + new dca::testing::DcaMpiTestEnvironment(argc, argv, input_dir + "square_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + return result; +} diff --git a/test/integration/cluster_solver/ctint/ctint_square_lattice_test_gpu.cpp b/test/integration/cluster_solver/ctint/ctint_square_lattice_test_gpu.cpp new file mode 100644 index 000000000..19b7f65c9 --- /dev/null +++ b/test/integration/cluster_solver/ctint/ctint_square_lattice_test_gpu.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Confront GPU and CPU runs with CT-INT. +// Model: square lattice with single band and double occupancy repulsion U. + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_threading/no_threading.hpp" +#include "dca/parallel/no_concurrency/no_concurrency.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; + +using RngType = dca::math::random::StdRandomWrapper; +using Lattice = dca::phys::models::square_lattice; +using Model = dca::phys::models::TightBindingModel; +using Threading = dca::parallel::NoThreading; +using Concurrency = dca::parallel::NoConcurrency; +using Parameters = dca::phys::params::Parameters; +using Data = dca::phys::DcaData; + +TEST(SquareLatticeTest, GpuSolver) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + + Concurrency concurrency(1, NULL); + Parameters parameters(dca::util::GitVersion::string(), concurrency); + parameters.read_input_and_broadcast(input_dir + + "square_lattice_gpu_input.json"); + parameters.update_model(); + parameters.update_domains(); + + Data data_gpu(parameters); + data_gpu.initialize(); + dca::phys::solver::CtintClusterSolver qmc_solver_gpu( + parameters, data_gpu); + qmc_solver_gpu.initialize(0); + cudaProfilerStart(); + qmc_solver_gpu.integrate(); + cudaProfilerStop(); + qmc_solver_gpu.finalize(); + EXPECT_NEAR(1.0, qmc_solver_gpu.computeDensity(), 1e-3); + + // Confront with CPU run. + Data data_cpu(parameters); + data_cpu.initialize(); + RngType::resetCounter(); // Use the same random numbers. + using GpuSolver = dca::phys::solver::CtintClusterSolver; + GpuSolver qmc_solver_cpu(parameters, data_cpu); + qmc_solver_cpu.initialize(); + qmc_solver_cpu.integrate(); + qmc_solver_cpu.finalize(); + + auto diff = dca::func::util::difference(data_cpu.G_k_w, data_gpu.G_k_w); + auto tolerance = std::max(1e-10, 100 * std::numeric_limits::epsilon()); + EXPECT_GE(tolerance, diff.l_inf); +} diff --git a/test/integration/cluster_solver/ctint/ctint_square_lattice_tp_test.cpp b/test/integration/cluster_solver/ctint/ctint_square_lattice_tp_test.cpp new file mode 100644 index 000000000..83a194983 --- /dev/null +++ b/test/integration/cluster_solver/ctint/ctint_square_lattice_tp_test.cpp @@ -0,0 +1,101 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT. +// Square lattice with single band and double occupancy repulsion U. +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_concurrency/no_concurrency.hpp" +#include "dca/parallel/stdthread/stdthread.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +const std::string input_dir = + DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; + +constexpr bool update_baseline = false; + +TEST(CtintSquareLatticeTpTest, Self_Energy) { + using RngType = dca::math::random::StdRandomWrapper; + using Lattice = dca::phys::models::square_lattice; + using Model = dca::phys::models::TightBindingModel; + using Concurrency = dca::parallel::NoConcurrency; + using Threading = dca::parallel::stdthread; + using Parameters = + dca::phys::params::Parameters; + using Data = dca::phys::DcaData; + using BaseSolverType = dca::phys::solver::CtintClusterSolver; + using QmcSolverType = dca::phys::solver::StdThreadQmciClusterSolver; + + Concurrency concurrency(0, nullptr); + dca::util::GitVersion::print(); + dca::util::Modules::print(); + + Parameters parameters(dca::util::GitVersion::string(), concurrency); + parameters.read_input_and_broadcast(input_dir + + "square_lattice_tp_input.json"); + parameters.update_model(); + parameters.update_domains(); + + // Initialize data with G0 computation. + Data data(parameters); + data.initialize(); + + // Do one integration step. + QmcSolverType qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + dca::phys::DcaLoopData loop_data; + qmc_solver.finalize(loop_data); + + if (!update_baseline) { + // Read and confront with previous run + Data::TpGreensFunction G4_check(data.get_G4()[0].get_name()); + Data::SpGreensFunction G_check(data.G_k_w.get_name()); + dca::io::HDF5Reader reader; + reader.open_file(input_dir + "square_lattice_tp_baseline.hdf5"); + reader.open_group("functions"); + reader.execute("G4", G4_check); + reader.execute(G_check); + reader.close_group(), reader.close_file(); + + const auto diff_g = dca::func::util::difference(G_check, data.G_k_w); + const auto diff_g4 = dca::func::util::difference(G4_check, data.get_G4()[0]); + EXPECT_GE(5e-7, diff_g.l_inf); + EXPECT_GE(5e-7, diff_g4.l_inf); + } + else { + // Write results + dca::io::HDF5Writer writer; + writer.open_file(input_dir + "square_lattice_tp_baseline.hdf5"); + writer.open_group("functions"); + writer.execute("G4", data.get_G4()[0]); + writer.execute(data.G_k_w); + writer.close_group(), writer.close_file(); + } +} diff --git a/test/integration/cluster_solver/ctint/fe_as_lattice_input.json b/test/integration/cluster_solver/ctint/fe_as_lattice_input.json new file mode 100644 index 000000000..e0f50f135 --- /dev/null +++ b/test/integration/cluster_solver/ctint/fe_as_lattice_input.json @@ -0,0 +1,58 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 1, + "chemical-potential" : 0 + }, + + "FeAs-model" : + { + "t" : 0, + "tz" : 0, + "U" : 3, + "V" : -3, + "V-prime" : 3, + "Jh" : 5 + }, + + "CT-INT" : + { + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.51, + "alpha-dd-neg" : 0.51, + "alpha-ndd" : 0.0001, + "double-update-probability" : 1 + }, + +"domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + "imaginary-time": { + "sp-time-intervals": 512 + }, + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } +}, + + "DCA" : { + "interacting-orbitals" : [0,1] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 1000, + "sweeps-per-measurement" : 1, + "measurements" : 50000, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/ctint/hund_lattice_input.json b/test/integration/cluster_solver/ctint/hund_lattice_input.json new file mode 100644 index 000000000..dba82fe38 --- /dev/null +++ b/test/integration/cluster_solver/ctint/hund_lattice_input.json @@ -0,0 +1,58 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 1, + "chemical-potential" : 0 + }, + + "Hund-model" : + { + "t" : 0, + "tz" : 0, + "U" : 3, + "V" : -3, + "V-prime" : 3, + "Jh" : 5 + }, + + "CT-INT" : + { + "double-update-probability" : 0, + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.51, + "alpha-dd-neg" : 0.51, + "alpha-ndd" : 0.0001 + }, + +"domains": { + "real-space-grids": { + "cluster": [[1, 0], + [0, 1]] + }, + "imaginary-time": { + "sp-time-intervals": 512 + }, + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } +}, + + "DCA" : { + "interacting-orbitals" : [0,1] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 1000, + "sweeps-per-measurement" : 1, + "measurements" : 100000, + + "seed" : 42 + } +} diff --git a/test/integration/cluster_solver/ctint/square_lattice_baseline.hdf5 b/test/integration/cluster_solver/ctint/square_lattice_baseline.hdf5 new file mode 100644 index 000000000..927ef2ce9 Binary files /dev/null and b/test/integration/cluster_solver/ctint/square_lattice_baseline.hdf5 differ diff --git a/test/integration/cluster_solver/ctint/square_lattice_gpu_input.json b/test/integration/cluster_solver/ctint/square_lattice_gpu_input.json new file mode 100644 index 000000000..cef0cc082 --- /dev/null +++ b/test/integration/cluster_solver/ctint/square_lattice_gpu_input.json @@ -0,0 +1,60 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 8, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 5 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } + }, + + "CT-INT" : + { + "initial-configuration-size" :64, + "alpha-dd-pos" : 0.501, + "max-submatrix-size" : 16 + }, + + "ED": { + "eigenvalue-cut-off": 1.e-8 + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 20, + "sweeps-per-measurement" : 1, + "measurements" : 500, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/ctint/square_lattice_input.json b/test/integration/cluster_solver/ctint/square_lattice_input.json new file mode 100644 index 000000000..0a9af69dd --- /dev/null +++ b/test/integration/cluster_solver/ctint/square_lattice_input.json @@ -0,0 +1,65 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 2, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 5 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } + }, + + "CT-INT" : + { + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.501 + }, + + "CT-AUX" : { + "expansion-parameter-K": 5., + "initial-configuration-size": 16, + "submatrix-size": 1 + }, + + "ED": { + "eigenvalue-cut-off": 1.e-8 + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 100, + "sweeps-per-measurement" : 1, + "measurements" : 10000, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/ctint/square_lattice_result_gpu.hdf5 b/test/integration/cluster_solver/ctint/square_lattice_result_gpu.hdf5 new file mode 100644 index 000000000..c033acb77 Binary files /dev/null and b/test/integration/cluster_solver/ctint/square_lattice_result_gpu.hdf5 differ diff --git a/test/integration/cluster_solver/ctint/square_lattice_tp_baseline.hdf5 b/test/integration/cluster_solver/ctint/square_lattice_tp_baseline.hdf5 new file mode 100644 index 000000000..22ae1618f Binary files /dev/null and b/test/integration/cluster_solver/ctint/square_lattice_tp_baseline.hdf5 differ diff --git a/test/integration/cluster_solver/ctint/square_lattice_tp_input.json b/test/integration/cluster_solver/ctint/square_lattice_tp_input.json new file mode 100644 index 000000000..f4afad5d0 --- /dev/null +++ b/test/integration/cluster_solver/ctint/square_lattice_tp_input.json @@ -0,0 +1,70 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 1, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 1, + "V" : 1 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 256 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 256, + "four-point-fermionic-frequencies" : 16 + } + }, + + "CT-INT" : + { + "initial-matrix-size" :5, + "alpha-dd-pos" : 0.501 + }, + + "four-point": { + "type": "PARTICLE_PARTICLE_UP_DOWN", + "momentum-transfer": [0., 3.1415], + "frequency-transfer": 2 + }, + + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 100, + "sweeps-per-measurement" : 1, + "measurements" : 600, + + "threaded-solver" : { + "walkers": 1, + "accumulators": 3, + "shared-walk-and-accumulation-thread": false + }, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/stdthread_qmci/CMakeLists.txt b/test/integration/cluster_solver/stdthread_qmci/CMakeLists.txt index 4d588d84b..5b638589b 100644 --- a/test/integration/cluster_solver/stdthread_qmci/CMakeLists.txt +++ b/test/integration/cluster_solver/stdthread_qmci/CMakeLists.txt @@ -1,8 +1,15 @@ dca_add_gtest(stdthread_ctaux_tp_test - EXTENSIVE - GTEST_MAIN - INCLUDE_DIRS ${DCA_INCLUDE_DIRS} - LIBS ${DCA_LIBS} - ) + EXTENSIVE + GTEST_MAIN + INCLUDE_DIRS ${DCA_INCLUDE_DIRS} + LIBS ${DCA_LIBS} + ) + +dca_add_gtest(stdthread_ctint_cluster_solver_test + EXTENSIVE + GTEST_MAIN + INCLUDE_DIRS ${DCA_INCLUDE_DIRS} + LIBS ${DCA_LIBS} + ) add_subdirectory(gpu) diff --git a/test/integration/cluster_solver/stdthread_qmci/gpu/CMakeLists.txt b/test/integration/cluster_solver/stdthread_qmci/gpu/CMakeLists.txt index 1b8a94bba..16ec26334 100644 --- a/test/integration/cluster_solver/stdthread_qmci/gpu/CMakeLists.txt +++ b/test/integration/cluster_solver/stdthread_qmci/gpu/CMakeLists.txt @@ -1,9 +1,16 @@ dca_add_gtest(stdthread_ctaux_gpu_tp_test - EXTENSIVE - CUDA - GTEST_MAIN - INCLUDE_DIRS ${DCA_INCLUDE_DIRS} - LIBS ${DCA_LIBS} - ) + EXTENSIVE + CUDA + GTEST_MAIN + INCLUDE_DIRS ${DCA_INCLUDE_DIRS} + LIBS ${DCA_LIBS} + ) +dca_add_gtest(stdthread_ctint_gpu_tp_test + EXTENSIVE + CUDA + GTEST_MAIN + INCLUDE_DIRS ${DCA_INCLUDE_DIRS} + LIBS ${DCA_LIBS} + ) diff --git a/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctaux_gpu_tp_test.cpp b/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctaux_gpu_tp_test.cpp index 2fffd8675..63ea99e88 100644 --- a/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctaux_gpu_tp_test.cpp +++ b/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctaux_gpu_tp_test.cpp @@ -64,7 +64,7 @@ TEST(PosixCtauxClusterSolverTest, G_k_w) { Parameters parameters(dca::util::GitVersion::string(), concurrency); parameters.read_input_and_broadcast( - input_dir + "stdthread_ctaux_gpu_tp_test_input.json"); + input_dir + "threaded_input.json"); parameters.update_model(); parameters.update_domains(); @@ -73,10 +73,6 @@ TEST(PosixCtauxClusterSolverTest, G_k_w) { data_cpu.initialize(); data_gpu.initialize(); - QmcSolverCpu qmc_solver_cpu(parameters, data_cpu); - RngType::resetCounter(); // Use the same seed for both solvers. - QmcSolverGpu qmc_solver_gpu(parameters, data_gpu); - // Do one integration step. auto perform_integration = [&](auto& solver) { solver.initialize(0); @@ -84,7 +80,11 @@ TEST(PosixCtauxClusterSolverTest, G_k_w) { dca::phys::DcaLoopData loop_data; solver.finalize(loop_data); }; + QmcSolverCpu qmc_solver_cpu(parameters, data_cpu); perform_integration(qmc_solver_cpu); + + RngType::resetCounter(); // Use the same seed for both solvers. + QmcSolverGpu qmc_solver_gpu(parameters, data_gpu); perform_integration(qmc_solver_gpu); const auto err_g = dca::func::util::difference(data_cpu.G_k_w, data_gpu.G_k_w); diff --git a/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctint_gpu_tp_test.cpp b/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctint_gpu_tp_test.cpp new file mode 100644 index 000000000..81bc9b472 --- /dev/null +++ b/test/integration/cluster_solver/stdthread_qmci/gpu/stdthread_ctint_gpu_tp_test.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT posix wrapper. + +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_concurrency/no_concurrency.hpp" +#include "dca/parallel/stdthread/stdthread.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +constexpr bool UPDATE_RESULTS = false; + +const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/stdthread_qmci/gpu/"; + +using Concurrency = dca::parallel::NoConcurrency; +using RngType = dca::math::random::StdRandomWrapper; +using Lattice = dca::phys::models::square_lattice; +using Model = dca::phys::models::TightBindingModel; +using Threading = dca::parallel::stdthread; +using Parameters = dca::phys::params::Parameters; +using Data = dca::phys::DcaData; +template +using BaseSolver = dca::phys::solver::CtintClusterSolver; +template +using QmcSolver = dca::phys::solver::StdThreadQmciClusterSolver>; + +TEST(StdthreadCtintGpuTest, GpuVsCpu) { + Concurrency concurrency(0, nullptr); + dca::linalg::util::initializeMagma(); + + Parameters parameters("", concurrency); + parameters.read_input_and_broadcast(input_dir + "threaded_input.json"); + parameters.update_model(); + parameters.update_domains(); + + // Initialize data with G0 computation. + Data data_cpu(parameters), data_gpu(parameters); + data_cpu.initialize(); + data_gpu.initialize(); + + // Do one integration step. + auto perform_integration = [&](auto& solver) { + solver.initialize(0); + solver.integrate(); + dca::phys::DcaLoopData loop_data; + solver.finalize(loop_data); + }; + + QmcSolver qmc_solver_gpu(parameters, data_gpu); + perform_integration(qmc_solver_gpu); + + RngType::resetCounter(); // Use the same seed for both solvers. + QmcSolver qmc_solver_cpu(parameters, data_cpu); + perform_integration(qmc_solver_cpu); + + const auto err_g = dca::func::util::difference(data_cpu.G_k_w, data_gpu.G_k_w); + const auto err_g4 = dca::func::util::difference(data_cpu.get_G4()[0], data_gpu.get_G4()[0]); + + EXPECT_GE(5e-7, err_g.l_inf); + EXPECT_GE(5e-7, err_g4.l_inf); +} diff --git a/test/integration/cluster_solver/stdthread_qmci/gpu/threaded_input.json b/test/integration/cluster_solver/stdthread_qmci/gpu/threaded_input.json new file mode 100644 index 000000000..e7db372bb --- /dev/null +++ b/test/integration/cluster_solver/stdthread_qmci/gpu/threaded_input.json @@ -0,0 +1,76 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 2, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 5 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512, + "four-point-fermionic-frequencies": 4 + } + }, + + "four-point": { + "type": "PARTICLE_PARTICLE_UP_DOWN", + "momentum-transfer": [0., 0.], + "frequency-transfer": -1 + }, + + + "CT-AUX" : { + "expansion-parameter-K": 5., + "initial-configuration-size": 8, + "max-submatrix-size": 4 + }, + + "CT-INT" : { + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.501, + "max-submatrix-size": 4 + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 100, + "sweeps-per-measurement" : 1, + "measurements" : 1000, + + "threaded-solver" : { + "walkers": 2, + "accumulators": 2, + "shared-walk-and-accumulation-thread": false, + "fix-meas-per-walker" : true + }, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctaux_tp_test.cpp b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctaux_tp_test.cpp index 0d379ef37..6e642fb90 100644 --- a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctaux_tp_test.cpp +++ b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctaux_tp_test.cpp @@ -116,7 +116,7 @@ void performTest(const std::string& input, const std::string& baseline) { writer.open_file(input_dir + baseline); writer.open_group("functions"); writer.execute(data.G_k_w); - writer.execute(data.get_G4()[0]); + writer.execute("G4", data.get_G4()[0]); writer.close_group(), writer.close_file(); } } diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_cluster_solver_test.cpp b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_cluster_solver_test.cpp new file mode 100644 index 000000000..6bb60b2ae --- /dev/null +++ b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_cluster_solver_test.cpp @@ -0,0 +1,115 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// No-change test for CT-INT posix wrapper. + +#include +#include + +#include "gtest/gtest.h" + +#include "dca/function/function.hpp" +#include "dca/function/util/difference.hpp" +#include "dca/io/hdf5/hdf5_reader.hpp" +#include "dca/io/hdf5/hdf5_writer.hpp" +#include "dca/io/json/json_reader.hpp" +#include "dca/math/random/std_random_wrapper.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/parallel/no_concurrency/no_concurrency.hpp" +#include "dca/parallel/stdthread/stdthread.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" + +constexpr bool update_results = false; + +const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/stdthread_qmci/"; + +using Concurrency = dca::parallel::NoConcurrency; +using RngType = dca::math::random::StdRandomWrapper; +using Lattice = dca::phys::models::square_lattice; +using Model = dca::phys::models::TightBindingModel; +using Threading = dca::parallel::stdthread; +using Parameters = dca::phys::params::Parameters; +using Data = dca::phys::DcaData; +using BaseSolver = dca::phys::solver::CtintClusterSolver; +using QmcSolver = dca::phys::solver::StdThreadQmciClusterSolver; + +void performTest(const std::string& input, const std::string& baseline) { + static bool update_model = true; + + Concurrency concurrency(0, nullptr); + + Parameters parameters(dca::util::GitVersion::string(), concurrency); + parameters.read_input_and_broadcast(input_dir + input); + if (update_model) { + parameters.update_model(); + parameters.update_domains(); + } + update_model = false; + + // Initialize data with G0 computation. + Data data(parameters); + data.initialize(); + + // Do one integration step. + QmcSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + dca::phys::DcaLoopData loop_data; + qmc_solver.finalize(loop_data); + + EXPECT_NEAR(1., qmc_solver.computeDensity(), 1e-2); + + if (not update_results) { + // Read and confront with previous run. + if (concurrency.id() == 0) { + Data::SpGreensFunction G_k_w_check(data.G_k_w.get_name()); + dca::io::HDF5Reader reader; + reader.open_file(input_dir + baseline); + reader.open_group("functions"); + reader.execute(G_k_w_check); + reader.close_group(), reader.close_file(); + + const auto err_g = dca::func::util::difference(G_k_w_check, data.G_k_w); + + EXPECT_GE(5e-7, err_g.l_inf); + } + } + else { + // Write results + if (concurrency.id() == concurrency.first()) { + dca::io::HDF5Writer writer; + writer.open_file(input_dir + baseline); + writer.open_group("functions"); + writer.execute(data.G_k_w); + writer.close_group(), writer.close_file(); + } + } +} + +TEST(PosixCtintClusterSolverTest, NonShared) { + performTest("stdthread_ctint_test_nonshared_input.json", + "stdthread_ctint_test_nonshared_baseline.hdf5"); +} + +TEST(PosixCtintClusterSolverTest, Shared) { + performTest("stdthread_ctint_test_shared_input.json", + "stdthread_ctint_test_shared_baseline.hdf5"); +} diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_baseline.hdf5 b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_baseline.hdf5 new file mode 100644 index 000000000..7cb7180dd Binary files /dev/null and b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_baseline.hdf5 differ diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_input.json b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_input.json new file mode 100644 index 000000000..83ff1f46e --- /dev/null +++ b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_nonshared_input.json @@ -0,0 +1,67 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 2, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 5 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512, + "four-point-fermionic-frequencies": 8 + } + }, + + "four-point": { + "type": "NONE", + "momentum-transfer": [0, 3.1415], + "frequency-transfer": -1 + }, + + "CT-INT" : { + "max-submatrix-size" : 4, + "alpha-dd-pos" : 0.501 + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 100, + "sweeps-per-measurement" : 1, + "measurements" : 400, + + "threaded-solver" : { + "walkers": 1, + "accumulators": 3, + "shared-walk-and-accumulation-thread": false + }, + + "seed" : 0 + } +} diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_baseline.hdf5 b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_baseline.hdf5 new file mode 100644 index 000000000..eb28ee0c7 Binary files /dev/null and b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_baseline.hdf5 differ diff --git a/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_input.json b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_input.json new file mode 100644 index 000000000..f71300b45 --- /dev/null +++ b/test/integration/cluster_solver/stdthread_qmci/stdthread_ctint_test_shared_input.json @@ -0,0 +1,62 @@ +{ + "output" : + { + "output-format" : "HDF5", + "output-ED" : "ed_results.hdf5", + "output-QMC" : "ctint_results.hdf5" + }, + + "physics": { + "beta" : 2, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 5 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512, + "four-point-fermionic-frequencies": 8 + } + }, + + "CT-INT" : { + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.501 + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 1., + "interacting-orbitals": [0] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 100, + "sweeps-per-measurement" : 1, + "measurements" : 400, + + "threaded-solver" : { + "walkers": 4, + "accumulators": 4, + "shared-walk-and-accumulation-thread": true, + "fix-meas-per-walker": true + }, + + "seed" : 0 + } +} diff --git a/test/integration/statistical_tests/CMakeLists.txt b/test/integration/statistical_tests/CMakeLists.txt index bbf929f61..569437645 100644 --- a/test/integration/statistical_tests/CMakeLists.txt +++ b/test/integration/statistical_tests/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(bilayer_lattice) +add_subdirectory(FeAs) add_subdirectory(real_materials) add_subdirectory(square_lattice) diff --git a/test/integration/statistical_tests/FeAs/CMakeLists.txt b/test/integration/statistical_tests/FeAs/CMakeLists.txt new file mode 100644 index 000000000..9a2e08962 --- /dev/null +++ b/test/integration/statistical_tests/FeAs/CMakeLists.txt @@ -0,0 +1,14 @@ +################################################################################ +# test/integration/cluster-solver/square_lattice/ +# CMakeLists.txt +################################################################################ +set(TEST_INCLUDES ${DCA_INCLUDE_DIRS};${PROJECT_SOURCE_DIR}) +set(TEST_LIBS ${DCA_LIBS};statistical_testing) + +dca_add_gtest(ctint_fe_as_validation_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 32 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} +) diff --git a/test/integration/statistical_tests/FeAs/ctint_fe_as_validation_stattest.cpp b/test/integration/statistical_tests/FeAs/ctint_fe_as_validation_stattest.cpp new file mode 100644 index 000000000..0fce7014d --- /dev/null +++ b/test/integration/statistical_tests/FeAs/ctint_fe_as_validation_stattest.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Verification test of CT-AUX against a reference run + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "test/integration/statistical_tests/FeAs/fe_as_setup.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; + +TEST(CtintFeAsValidationTest, GreensFunction) { + // dca::linalg::util::initializeMagma(); + + using namespace dca::testing; + const std::string ed_data_name = dca::testing::test_directory + "/ed_check.hdf5"; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration + QuantumClusterSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // stores quantities from integration. + using dca::func::function; + function G_k_w_sample = + cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + G_k_w_sample.set_name("G_k_w"); + + // read the expected result + function G_k_w_expected; + + if (id == dca_test_env->concurrency.first()) { + function, SigmaDomain> G_k_w_full("cluster_greens_function_G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file(ed_data_name); + reader.open_group("functions"); + reader.execute(G_k_w_full); + reader.close_group(); + reader.close_file(); + G_k_w_expected = cutFrequency(G_k_w_full, n_frequencies); + } + dca_test_env->concurrency.broadcast(G_k_w_expected); + + // compute covariance and average ctin result. + function G_k_w_covariance("G_k_w_covariance"); + dca_test_env->concurrency.computeCovarianceAndAvg(G_k_w_sample, G_k_w_covariance); + + // compute p-value + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + dca::math::StatisticalTesting test(G_k_w_sample, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(false, number_of_samples); + test.printInfo("FeAsCu_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // Uncomment to write integrator output. + dca::phys::DcaLoopData loop_data; + qmc_solver.finalize(loop_data); + if (id == 0) { + std::cout << "\nProcessor " << id << " is writing data " << std::endl; + dca::io::HDF5Writer writer; + writer.open_file("FeAs_results.hdf5"); + writer.open_group("functions"); + writer.execute(data.G_k_w); + writer.close_group(); + writer.close_file(); + } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment( + argc, argv, dca::testing::test_directory + "fe_as_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/FeAs/ed_check.hdf5 b/test/integration/statistical_tests/FeAs/ed_check.hdf5 new file mode 100644 index 000000000..a4b05edc2 Binary files /dev/null and b/test/integration/statistical_tests/FeAs/ed_check.hdf5 differ diff --git a/test/integration/statistical_tests/FeAs/fe_as_input.json b/test/integration/statistical_tests/FeAs/fe_as_input.json new file mode 100644 index 000000000..f454f687d --- /dev/null +++ b/test/integration/statistical_tests/FeAs/fe_as_input.json @@ -0,0 +1,69 @@ +{ + "output" : + { + "filename-qmc" : "qmc_FeAs.hdf5", + "dump-cluster-Greens-functions" : true + }, + + "physics": { + "beta" : 5, + "chemical-potential" : 1.45, + "adjust-chemichal-potential" : false + }, + + "FeAs-model" : { + "t1" : -1, + "t2" : 1.3, + "t3" : -0.85, + "t4" : -0.85, + "U" : 4, + "V" : 1, + "J" : 1, + "Jp" : 1 + }, + + "CT-INT" : + { + "double-update-probability" : 1, + "all-sites-partnership" : true, + "max-submatrix-size" : 16, + "initial-configuration-size" :5, + "alpha-dd-pos" : 0.51, + "alpha-dd-neg" : 0.51, + "alpha-ndd" : 0.0001 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 2]] + }, + "imaginary-time": { + "sp-time-intervals": 512 + }, + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } + }, + + "DCA" : { + "interacting-orbitals" : [0,1] + }, + + "Monte-Carlo-integration" : + { + "warm-up-sweeps" : 500, + "sweeps-per-measurement" : 1, + "measurements" : 10000, + "error-computation-type" : "NONE", + + "threaded-solver": { + "walkers": 1, + "accumulators": 1, + "shared-walk-and-accumulation-thread": true + }, + + + "seed" : "random" + } +} diff --git a/test/integration/statistical_tests/FeAs/fe_as_setup.hpp b/test/integration/statistical_tests/FeAs/fe_as_setup.hpp new file mode 100644 index 000000000..fb3aa0999 --- /dev/null +++ b/test/integration/statistical_tests/FeAs/fe_as_setup.hpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Type definitions for statistical tests on a square lattice. + +#ifndef DCA_TEST_INTEGRATION_STATISTICAL_TESTS_FEAS_FE_AS_SETUP_HPP +#define DCA_TEST_INTEGRATION_STATISTICAL_TESTS_FEAS_FE_AS_SETUP_HPP + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_loop/dca_loop_data.hpp" +#include "dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" +#include "dca/phys/models/analytic_hamiltonians/fe_as_lattice.hpp" +#include "dca/math/random/random.hpp" +#include "dca/math/statistical_testing/function_cut.hpp" +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "dca/parallel/stdthread/stdthread.hpp" +#include "dca/phys/models/analytic_hamiltonians/twoband_Cu.hpp" +#include "dca/phys/models/tight_binding_model.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" + +namespace dca { +namespace testing { +// dca::testing:: + +constexpr int n_frequencies = 10; +using dca::linalg::DeviceType; +using dca::linalg::GPU; +using dca::linalg::CPU; + +#ifdef DCA_HAVE_CUDA +constexpr DeviceType default_device = GPU; +#else +constexpr DeviceType default_device = CPU; +#endif // DCA_HAVE_CUDA + +const std::string test_directory = DCA_SOURCE_DIR "/test/integration/statistical_tests/FeAs/"; + +using Model = + dca::phys::models::TightBindingModel>; +using RandomNumberGenerator = dca::math::random::StdRandomWrapper; + +using dca::phys::solver::CT_INT; + +using ParametersType = + dca::phys::params::Parameters; + +using DcaData = dca::phys::DcaData; + +template +using QuantumClusterSolver = dca::phys::solver::CtintClusterSolver; + +template +using ThreadedSolver = dca::phys::solver::StdThreadQmciClusterSolver>; + +using SigmaCutDomain = dca::math::util::SigmaCutDomain; +using SigmaDomain = dca::math::util::SigmaDomain; +using CovarianceDomain = dca::math::util::CovarianceDomain; +using dca::math::util::cutFrequency; + +} // namespace testing +} // namespace dca + +#endif // DCA_TEST_INTEGRATION_STATISTICAL_TESTS_FEAS_FE_AS_SETUP_HPP diff --git a/test/integration/statistical_tests/bilayer_lattice/CMakeLists.txt b/test/integration/statistical_tests/bilayer_lattice/CMakeLists.txt index b5d4c7aac..1ac9b46f5 100644 --- a/test/integration/statistical_tests/bilayer_lattice/CMakeLists.txt +++ b/test/integration/statistical_tests/bilayer_lattice/CMakeLists.txt @@ -5,6 +5,22 @@ set(TEST_INCLUDES ${DCA_INCLUDE_DIRS};${PROJECT_SOURCE_DIR}) set(TEST_LIBS ${DCA_LIBS};statistical_testing) +dca_add_gtest(ctint_bilayer_validation_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 32 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + +dca_add_gtest(ctint_hund_validation_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 32 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + dca_add_gtest(ctaux_bilayer_validation_stattest STOCHASTIC # Run with more ranks for better error detection. @@ -20,3 +36,11 @@ dca_add_gtest(ctaux_bilayer_verification_stattest INCLUDE_DIRS ${TEST_INCLUDES} LIBS ${TEST_LIBS} ) + +dca_add_gtest(ctint_bilayer_verification_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 1 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) diff --git a/test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp b/test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp index abbd72b80..af677c24e 100644 --- a/test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp +++ b/test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp @@ -22,7 +22,7 @@ #include "dca/phys/dca_data/dca_data.hpp" #include "dca/phys/dca_loop/dca_loop_data.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp" -//#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" #include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" #include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" #include "dca/math/random/random.hpp" @@ -62,7 +62,7 @@ using RandomNumberGenerator = dca::math::random::StdRandomWrapper using ParametersType = @@ -80,10 +80,10 @@ template struct ClusterSolverSelector { using type = dca::phys::solver::CtauxClusterSolver, DcaData>; }; -//template -//struct ClusterSolverSelector { -// using type = dca::phys::solver::CtintClusterSolver, true>; -//}; +template +struct ClusterSolverSelector { + using type = dca::phys::solver::CtintClusterSolver, true>; +}; template using QuantumClusterSolver = typename ClusterSolverSelector::type; diff --git a/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_validation_stattest.cpp b/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_validation_stattest.cpp index 20c45ea6e..da5015334 100644 --- a/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_validation_stattest.cpp +++ b/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_validation_stattest.cpp @@ -39,7 +39,7 @@ TEST(CtauxBilayerValidationTest, GreensFunction) { parameters.update_model(); parameters.update_domains(); - parameters.set_measurements(parameters.get_measurements() * number_of_samples); + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples); DcaData data(parameters); data.initialize(); diff --git a/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_verification_stattest.cpp b/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_verification_stattest.cpp index a1cdd7289..a461a8d39 100644 --- a/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_verification_stattest.cpp +++ b/test/integration/statistical_tests/bilayer_lattice/ctaux_bilayer_verification_stattest.cpp @@ -31,7 +31,7 @@ TEST(CtauxBilayerLatticeVerificationTest, GreensFunction) { parameters.read_input_and_broadcast(dca_test_env->input_file_name); - const int meas_per_node = parameters.get_measurements(); + const int meas_per_node = parameters.get_measurements().back(); parameters.set_measurements(meas_per_node * dca_test_env->concurrency.get_size()); parameters.update_model(); diff --git a/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_validation_stattest.cpp b/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_validation_stattest.cpp new file mode 100644 index 000000000..f34ee1800 --- /dev/null +++ b/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_validation_stattest.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Verification test of CT-INT against a reference run + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; + +TEST(CtintBilayerValidationTest, GreensFunction) { + // dca::linalg::util::initializeMagma(); + + using namespace dca::testing; + const std::string ed_data_name = dca::testing::test_directory + "/data.ed.hdf5"; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration + QuantumClusterSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // stores quantities from integration. + using dca::func::function; + function G_k_w_sample = + cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + G_k_w_sample.set_name("G_k_w"); + + // read the expected result + function G_k_w_expected; + + if (id == dca_test_env->concurrency.first()) { + function, SigmaDomain> G_k_w_full("cluster_greens_function_G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file(ed_data_name); + reader.open_group("functions"); + reader.execute(G_k_w_full); + reader.close_group(); + reader.close_file(); + G_k_w_expected = cutFrequency(G_k_w_full, n_frequencies); + } + dca_test_env->concurrency.broadcast(G_k_w_expected); + + // compute covariance and average ctin result. + function G_k_w_covariance("G_k_w_covariance"); + dca_test_env->concurrency.computeCovarianceAndAvg(G_k_w_sample, G_k_w_covariance); + + // compute p-value + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + dca::math::StatisticalTesting test(G_k_w_sample, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(false, number_of_samples); + test.printInfo("ctint_bilayer_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // Uncomment to write integrator output. + // dca::phys::DcaLoopData loop_data; + // qmc_solver.finalize(loop_data); + // if (id == 0) { + // std::cout << "\nProcessor " << id << " is writing data " << std::endl; + // dca::io::HDF5Writer writer; + // writer.open_file("ctint_square_results.hdf5"); + // writer.open_group("functions"); + // writer.execute(data.G_k_w); + // writer.close_group(); + // writer.close_file(); + // } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment( + argc, argv, dca::testing::test_directory + "bilayer_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_verification_stattest.cpp b/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_verification_stattest.cpp new file mode 100644 index 000000000..55428f5f5 --- /dev/null +++ b/test/integration/statistical_tests/bilayer_lattice/ctint_bilayer_verification_stattest.cpp @@ -0,0 +1,134 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Verification test of CT-INT against a reference run + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "test/integration/statistical_tests/bilayer_lattice/bilayer_lattice_setup.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; + +TEST(CtintBilayerLatticeVerificationTest, GreensFunction) { + dca::linalg::util::initializeMagma(); + + using namespace dca::testing; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + + + const int meas_per_node = parameters.get_measurements().back(); + parameters.set_measurements(meas_per_node * dca_test_env->concurrency.get_size()); + + parameters.update_model(); + parameters.update_domains(); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration. + ThreadedSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // Read the portion of the Greens's function to be tested. + using dca::func::function; + function G_on_node = cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + + function G_k_w_measured(G_on_node, "G_k_w"); + dca_test_env->concurrency.sum_and_average(G_k_w_measured); + + // End of concurrent section. + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + function G_k_w_covariance("G_k_w_covariance"); + function G_k_w_expected("G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file(test_directory + "verification_covariance_input.hdf5"); + reader.open_group("functions"); + reader.execute(G_k_w_covariance); + reader.execute(G_k_w_expected); + reader.close_group(); + reader.open_group("parameters"); + int reference_n_meas; + reader.execute("measurements_per_node", reference_n_meas); + EXPECT_EQ(reference_n_meas, meas_per_node); + reader.close_file(); + + dca::math::StatisticalTesting test(G_k_w_measured, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(true, number_of_samples); + test.printInfo("ctint_verification_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // If many MPI ranks where used store covariance and mean for future testing. + if (number_of_samples > G_k_w_measured.size()) { + function covariance_measured("G_k_w_covariance"); + dca_test_env->concurrency.computeCovariance(G_on_node, G_k_w_measured, covariance_measured); + if (id == dca_test_env->concurrency.last()) { + std::cout << "\nProcessor " << id << " is writing the covariance" << std::endl; + dca::io::HDF5Writer writer; + writer.open_file(test_directory + "verification_covariance_output.hdf5"); + writer.open_group("functions"); + writer.execute(covariance_measured); + writer.execute(G_k_w_measured); + writer.close_group(); + // store the number of used measurements + writer.open_group("parameters"); + writer.execute("measurements_per_node", meas_per_node); + writer.execute("nodes", number_of_samples); + writer.close_group(); + writer.close_file(); + } + } + + // Uncomment to write integrator output. + // dca::phys::DcaLoopData loop_data; + // qmc_solver.finalize(loop_data); + // if (id == 0) { + // std::cout << "\nProcessor " << id << " is writing data " << std::endl; + // dca::io::HDF5Writer writer; + // writer.open_file("verification_solver_output.hdf5"); + // writer.open_group("functions"); + // qmc_solver.write(writer); + // writer.close_group(); + // writer.close_file(); + // } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment( + argc, argv, dca::testing::test_directory + "bilayer_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/bilayer_lattice/ctint_hund_validation_stattest.cpp b/test/integration/statistical_tests/bilayer_lattice/ctint_hund_validation_stattest.cpp new file mode 100644 index 000000000..a03409891 --- /dev/null +++ b/test/integration/statistical_tests/bilayer_lattice/ctint_hund_validation_stattest.cpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Validation test of CT-INT against ED with a non density-density coupling. + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/phys/dca_loop/dca_loop_data.hpp" +#include "dca/math/random/random.hpp" +#include "dca/math/statistical_testing/function_cut.hpp" +#include "dca/parallel/stdthread/stdthread.hpp" +#include "dca/phys/models/analytic_hamiltonians/hund_lattice.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/no_symmetry.hpp" +#include "dca/phys/parameters/parameters.hpp" +#include "dca/profiling/null_profiler.hpp" +#include "dca/util/git_version.hpp" +#include "dca/util/modules.hpp" +#include "dca/testing/dca_mpi_test_environment.hpp" +#include "dca/testing/minimalist_printer.hpp" + +using Model = dca::phys::models::TightBindingModel< + dca::phys::models::HundLattice>>; +using RandomNumberGenerator = dca::math::random::StdRandomWrapper; + +using ParametersType = + dca::phys::params::Parameters; + +using DcaData = dca::phys::DcaData; +using Solver = dca::phys::solver::CtintClusterSolver; + +using SigmaCutDomain = dca::math::util::SigmaCutDomain; +using SigmaDomain = dca::math::util::SigmaDomain; +using CovarianceDomain = dca::math::util::CovarianceDomain; +using dca::math::util::cutFrequency; + +dca::testing::DcaMpiTestEnvironment* dca_test_env; +const std::string test_directory = + DCA_SOURCE_DIR "/test/integration/statistical_tests/bilayer_lattice/"; +const int n_frequencies = 5; + +TEST(CtintHundValidationTest, GreensFunction) { + using namespace dca::testing; + const std::string ed_data_name = test_directory + "/hund_data.ed.hdf5"; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(test_directory + + "hund_lattice_input.json"); + parameters.update_model(); + parameters.update_domains(); + + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration + Solver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // stores quantities from integration. + using dca::func::function; + function G_k_w_sample = + cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + G_k_w_sample.set_name("G_k_w"); + + // read the expected result + function G_k_w_expected; + + if (id == dca_test_env->concurrency.first()) { + function, SigmaDomain> G_k_w_full("cluster_greens_function_G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file(ed_data_name); + reader.open_group("functions"); + reader.execute(G_k_w_full); + reader.close_group(); + reader.close_file(); + G_k_w_expected = cutFrequency(G_k_w_full, n_frequencies); + } + dca_test_env->concurrency.broadcast(G_k_w_expected); + + // compute covariance and average ctin result. + function G_k_w_covariance("G_k_w_covariance"); + dca_test_env->concurrency.computeCovarianceAndAvg(G_k_w_sample, G_k_w_covariance); + + // compute p-value + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + dca::math::StatisticalTesting test(G_k_w_sample, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(false, number_of_samples); + test.printInfo("ctint_hund_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // Uncomment to write integrator output. + // dca::phys::DcaLoopData loop_data; + // qmc_solver.finalize(loop_data); + // if (id == 0) { + // std::cout << "\nProcessor " << id << " is writing data " << std::endl; + // dca::io::HDF5Writer writer; + // writer.open_file("ctint_square_results.hdf5"); + // writer.open_group("functions"); + // writer.execute(data.G_k_w); + // writer.close_group(); + // writer.close_file(); + // } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment(argc, argv, ""); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/bilayer_lattice/data.ed.hdf5 b/test/integration/statistical_tests/bilayer_lattice/data.ed.hdf5 index 43a956e37..94d718903 100644 Binary files a/test/integration/statistical_tests/bilayer_lattice/data.ed.hdf5 and b/test/integration/statistical_tests/bilayer_lattice/data.ed.hdf5 differ diff --git a/test/integration/statistical_tests/bilayer_lattice/hund_data.ed.hdf5 b/test/integration/statistical_tests/bilayer_lattice/hund_data.ed.hdf5 new file mode 100644 index 000000000..ccd19f61a Binary files /dev/null and b/test/integration/statistical_tests/bilayer_lattice/hund_data.ed.hdf5 differ diff --git a/test/integration/statistical_tests/bilayer_lattice/hund_lattice_input.json b/test/integration/statistical_tests/bilayer_lattice/hund_lattice_input.json new file mode 100644 index 000000000..47bc004bd --- /dev/null +++ b/test/integration/statistical_tests/bilayer_lattice/hund_lattice_input.json @@ -0,0 +1,62 @@ +{ + "output" : { + "dump-cluster-Greens-functions" : true + }, + "physics": { + "beta" : 1., + "chemical-potential" : 0. + }, + + "Hund-model": + { + "t" : 1., + "U" : 2., + "V" : 2, + "V-prime" : 2, + "Jh": 4 + }, + + "domains": { + "real-space-grids": { + "cluster": [[2, 0], + [0, 1]] + }, + + "imaginary-time": { + "sp-time-intervals": 512 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 512 + } + }, + + "CT-INT" : + { + "max-submatrix-size": 8, + "initial-configuration-size": 5, + "alpha-dd-pos" : 0.501, + "alpha-ndd" : 0.0001, + "double-update-probability" : 1, + "all-sites-partnership" : true + }, + + "DCA": { + "iterations": 1, + "self-energy-mixing-factor": 0., + "interacting-orbitals": [0, 1] + }, + + "Monte-Carlo-integration" : { + "warm-up-sweeps": 1000, + "sweeps-per-measurement": 1, + "measurements": 20000, + "seed": "random", + "threaded-solver": { + "walkers": 1, + "accumulators": 1, + "shared-walk-and-accumulation-thread" : true, + "fix-meas-per-walker" : true + } + } +} diff --git a/test/integration/statistical_tests/bilayer_lattice/verification_covariance_input.hdf5 b/test/integration/statistical_tests/bilayer_lattice/verification_covariance_input.hdf5 index 1cdd2eb48..4474deee5 100644 Binary files a/test/integration/statistical_tests/bilayer_lattice/verification_covariance_input.hdf5 and b/test/integration/statistical_tests/bilayer_lattice/verification_covariance_input.hdf5 differ diff --git a/test/integration/statistical_tests/real_materials/NiO_stattest.cpp b/test/integration/statistical_tests/real_materials/NiO_stattest.cpp index 68859bce2..d8883b24b 100644 --- a/test/integration/statistical_tests/real_materials/NiO_stattest.cpp +++ b/test/integration/statistical_tests/real_materials/NiO_stattest.cpp @@ -79,7 +79,7 @@ TEST(Ni0, GS) { parameters.update_domains(); // Perform the same number of measurements per rank. - const int meas_per_process = parameters.get_measurements(); + const int meas_per_process = parameters.get_measurements().back(); parameters.set_measurements(meas_per_process * dca_test_env->concurrency.number_of_processors()); Data data(parameters); diff --git a/test/integration/statistical_tests/square_lattice/CMakeLists.txt b/test/integration/statistical_tests/square_lattice/CMakeLists.txt index 4ae8168eb..2321e4702 100644 --- a/test/integration/statistical_tests/square_lattice/CMakeLists.txt +++ b/test/integration/statistical_tests/square_lattice/CMakeLists.txt @@ -21,5 +21,22 @@ dca_add_gtest(ctaux_square_lattice_validation_stattest LIBS ${TEST_LIBS} ) +dca_add_gtest(ctint_square_lattice_verification_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 1 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + +dca_add_gtest(ctint_square_lattice_validation_stattest + STOCHASTIC + # Run with more ranks for better error detection. + MPI MPI_NUMPROC 32 + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} + ) + + configure_file(data.ed.hdf5 ${CMAKE_CURRENT_BINARY_DIR}/data.ed.hdf5 COPYONLY) configure_file(verification_covariance_input.hdf5 ${CMAKE_CURRENT_BINARY_DIR}/verification_covariance_input.hdf5 COPYONLY) diff --git a/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_validation_stattest.cpp b/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_validation_stattest.cpp index 023404cd3..1ac974ac6 100644 --- a/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_validation_stattest.cpp +++ b/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_validation_stattest.cpp @@ -21,6 +21,8 @@ dca::testing::DcaMpiTestEnvironment* dca_test_env; TEST(CtauxSquareLatticeValidationTest, GreensFunction) { + dca::linalg::util::initializeMagma(); + using namespace dca::testing; const std::string ed_data_name = "data.ed.hdf5"; @@ -32,16 +34,18 @@ TEST(CtauxSquareLatticeValidationTest, GreensFunction) { dca::util::Modules::print(); } - ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); parameters.read_input_and_broadcast(dca_test_env->input_file_name); parameters.update_model(); parameters.update_domains(); - DcaData data(parameters); + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples / 50); + + DcaData data(parameters); data.initialize(); // Do one QMC iteration - QuantumClusterSolver qmc_solver(parameters, data); + QuantumClusterSolver qmc_solver(parameters, data); qmc_solver.initialize(0); qmc_solver.integrate(); diff --git a/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_verification_stattest.cpp b/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_verification_stattest.cpp index 40877b779..d52ed9188 100644 --- a/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_verification_stattest.cpp +++ b/test/integration/statistical_tests/square_lattice/ctaux_square_lattice_verification_stattest.cpp @@ -16,6 +16,7 @@ dca::testing::DcaMpiTestEnvironment* dca_test_env; TEST(CtauxSquareLatticeVerificationTest, GreensFunction) { + dca::linalg::util::initializeMagma(); using namespace dca::testing; const int id = dca_test_env->concurrency.id(); @@ -26,20 +27,16 @@ TEST(CtauxSquareLatticeVerificationTest, GreensFunction) { dca::util::Modules::print(); } - ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); parameters.read_input_and_broadcast(dca_test_env->input_file_name); parameters.update_model(); parameters.update_domains(); - // Perform the same number of measurements per rank. - const int meas_per_process = parameters.get_measurements(); - parameters.set_measurements(meas_per_process * dca_test_env->concurrency.number_of_processors()); - - DcaData data(parameters); + DcaData data(parameters); data.initialize(); - // Do one QMC iteration - ThreadedSolver qmc_solver(parameters, data); + // Do one QMC iteration. + ThreadedSolver qmc_solver(parameters, data); qmc_solver.initialize(0); qmc_solver.integrate(); @@ -64,7 +61,7 @@ TEST(CtauxSquareLatticeVerificationTest, GreensFunction) { reader.open_group("parameters"); int reference_n_meas; reader.execute("measurements_per_node", reference_n_meas); - EXPECT_EQ(reference_n_meas, meas_per_process); + EXPECT_EQ(reference_n_meas, parameters.get_measurements().back()); reader.close_file(); dca::math::StatisticalTesting test(G_k_w_measured, G_k_w_expected, G_k_w_covariance, 1); @@ -89,7 +86,7 @@ TEST(CtauxSquareLatticeVerificationTest, GreensFunction) { writer.close_group(); // store the number of used measurements writer.open_group("parameters"); - writer.execute("measurements_per_node", meas_per_process); + writer.execute("measurements_per_node", parameters.get_measurements().back()); writer.execute("nodes", number_of_samples); writer.close_group(); writer.close_file(); diff --git a/test/integration/statistical_tests/square_lattice/ctint_square_lattice_validation_stattest.cpp b/test/integration/statistical_tests/square_lattice/ctint_square_lattice_validation_stattest.cpp new file mode 100644 index 000000000..366df27e7 --- /dev/null +++ b/test/integration/statistical_tests/square_lattice/ctint_square_lattice_validation_stattest.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Verification test of CT-INT against a reference run + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; + +TEST(CtauxSquareLatticeValidationTest, GreensFunction) { + dca::linalg::util::initializeMagma(); + + using namespace dca::testing; + const std::string ed_data_name = "data.ed.hdf5"; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + parameters.set_measurements(parameters.get_measurements().back() * number_of_samples / 50); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration + QuantumClusterSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // stores quantities from integration. + using dca::func::function; + function G_k_w_sample = + cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + G_k_w_sample.set_name("G_k_w"); + + // read the expected result + function G_k_w_expected; + + if (id == dca_test_env->concurrency.first()) { + function, SigmaDomain> G_k_w_full("cluster_greens_function_G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file(ed_data_name); + reader.open_group("functions"); + reader.execute(G_k_w_full); + reader.close_group(); + reader.close_file(); + G_k_w_expected = cutFrequency(G_k_w_full, n_frequencies); + } + dca_test_env->concurrency.broadcast(G_k_w_expected); + + // compute covariance and average ctin result. + function G_k_w_covariance("G_k_w_covariance"); + dca_test_env->concurrency.computeCovarianceAndAvg(G_k_w_sample, G_k_w_covariance); + + // compute p-value + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + dca::math::StatisticalTesting test(G_k_w_sample, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(false, number_of_samples); + test.printInfo("ctint_square_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // Uncomment to write integrator output. + // dca::phys::DcaLoopData loop_data; + // qmc_solver.finalize(loop_data); + // if (id == 0) { + // std::cout << "\nProcessor " << id << " is writing data " << std::endl; + // dca::io::HDF5Writer writer; + // writer.open_file("ctint_square_results.hdf5"); + // writer.open_group("functions"); + // writer.execute(data.G_k_w); + // writer.close_group(); + // writer.close_file(); + // } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment( + argc, argv, dca::testing::test_directory + "square_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/square_lattice/ctint_square_lattice_verification_stattest.cpp b/test/integration/statistical_tests/square_lattice/ctint_square_lattice_verification_stattest.cpp new file mode 100644 index 000000000..0af00953e --- /dev/null +++ b/test/integration/statistical_tests/square_lattice/ctint_square_lattice_verification_stattest.cpp @@ -0,0 +1,129 @@ +// Copyright (C) 2018 ETH Zurich +// Copyright (C) 2018 UT-Battelle, LLC +// All rights reserved. +// +// See LICENSE.txt for terms of usage. +// See CITATION.md for citation guidelines, if DCA++ is used for scientific publications. +// +// Author: Andrei Plamada (plamada@phys.ethz.ch) +// Giovanni Balduzzi (gbalduzz@itp.phys.ethz.ch) +// +// Verification test of CT-AUX against a reference run + +#include "dca/math/statistical_testing/statistical_testing.hpp" +#include "test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp" + +dca::testing::DcaMpiTestEnvironment* dca_test_env; + +TEST(CtintVerificationTest, GreensFunction) { + using namespace dca::testing; + + const int id = dca_test_env->concurrency.id(); + const int number_of_samples = dca_test_env->concurrency.number_of_processors(); + + if (id == dca_test_env->concurrency.first()) { + dca::util::GitVersion::print(); + dca::util::Modules::print(); + } + + ParametersType parameters(dca::util::GitVersion::string(), + dca_test_env->concurrency); + parameters.read_input_and_broadcast(dca_test_env->input_file_name); + parameters.update_model(); + parameters.update_domains(); + + DcaData data(parameters); + data.initialize(); + + // Do one QMC iteration + ThreadedSolver qmc_solver(parameters, data); + qmc_solver.initialize(0); + qmc_solver.integrate(); + + // Read the portion of the Greens's function to be tested. + using dca::func::function; + function G_on_node = cutFrequency(qmc_solver.local_G_k_w(), n_frequencies); + + function G_k_w_measured(G_on_node, "G_k_w"); + dca_test_env->concurrency.sum_and_average(G_k_w_measured); + + // End of concurrent section. + if (id == dca_test_env->concurrency.first()) { + // read the stored reference data + function G_k_w_covariance("G_k_w_covariance"); + function G_k_w_expected("G_k_w"); + dca::io::HDF5Reader reader; + reader.open_file("verification_covariance_input.hdf5"); + reader.open_group("functions"); + reader.execute(G_k_w_covariance); + reader.execute(G_k_w_expected); + reader.close_group(); + reader.open_group("parameters"); + int reference_n_meas; + reader.execute("measurements_per_node", reference_n_meas); + EXPECT_EQ(reference_n_meas, parameters.get_measurements().back()); + reader.close_file(); + + dca::math::StatisticalTesting test(G_k_w_measured, G_k_w_expected, G_k_w_covariance, 1); + double p_value = test.computePValue(true, number_of_samples); + test.printInfo("ctint_verification_testinfo.out", true); + double p_value_default = 0.05; + std::cout << "\n***\nThe p-value is " << p_value << "\n***\n"; + EXPECT_LT(p_value_default, p_value); + } + + // If many MPI ranks where used store covariance and mean for future testing. + if (number_of_samples > G_k_w_measured.size()) { + function covariance_measured("G_k_w_covariance"); + dca_test_env->concurrency.computeCovariance(G_on_node, G_k_w_measured, covariance_measured); + if (id == dca_test_env->concurrency.last()) { + std::cout << "\nProcessor " << id << " is writing the covariance" << std::endl; + dca::io::HDF5Writer writer; + writer.open_file("verification_covariance_output.hdf5"); + writer.open_group("functions"); + writer.execute(covariance_measured); + writer.execute(G_k_w_measured); + writer.close_group(); + // store the number of used measurements + writer.open_group("parameters"); + writer.execute("measurements_per_node", parameters.get_measurements().back()); + writer.execute("nodes", number_of_samples); + writer.close_group(); + writer.close_file(); + } + } + + // write integrator output + // INTERNAL: do we need it? + // qmc_solver.finalize(); + // if (id == 0) { + // std::cout << "\nProcessor " << id << " is writing data " << std::endl; + // dca::io::HDF5Writer writer; + // writer.open_file("ctint_verification_solver_output.hdf5"); + // writer.open_group("functions"); + // qmc_solver.write(writer); + // writer.close_group(); + // writer.close_file(); + // } +} + +int main(int argc, char** argv) { + int result = 0; + + ::testing::InitGoogleTest(&argc, argv); + + dca_test_env = new dca::testing::DcaMpiTestEnvironment( + argc, argv, dca::testing::test_directory + "square_lattice_input.json"); + ::testing::AddGlobalTestEnvironment(dca_test_env); + + ::testing::TestEventListeners& listeners = ::testing::UnitTest::GetInstance()->listeners(); + + if (dca_test_env->concurrency.id() != 0) { + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new dca::testing::MinimalistPrinter); + } + + result = RUN_ALL_TESTS(); + + return result; +} diff --git a/test/integration/statistical_tests/square_lattice/data.ed.hdf5 b/test/integration/statistical_tests/square_lattice/data.ed.hdf5 index ebb7d2356..f3dbb1a4d 100644 Binary files a/test/integration/statistical_tests/square_lattice/data.ed.hdf5 and b/test/integration/statistical_tests/square_lattice/data.ed.hdf5 differ diff --git a/test/integration/statistical_tests/square_lattice/square_lattice_input.json b/test/integration/statistical_tests/square_lattice/square_lattice_input.json index 9013370f6..503df3c2c 100644 --- a/test/integration/statistical_tests/square_lattice/square_lattice_input.json +++ b/test/integration/statistical_tests/square_lattice/square_lattice_input.json @@ -31,6 +31,14 @@ "max-submatrix-size": 16 }, + "CT-INT" : + { + "use-submatrix" : true, + "max-submatrix-size": 4, + "initial-configuration-size": 5, + "alpha-dd-pos" : 0.501 + }, + "DCA": { "iterations": 1, "self-energy-mixing-factor": 0., diff --git a/test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp b/test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp index c2504e359..19424f9ad 100644 --- a/test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp +++ b/test/integration/statistical_tests/square_lattice/square_lattice_setup.hpp @@ -19,8 +19,10 @@ #include "gtest/gtest.h" #include "dca/phys/dca_data/dca_data.hpp" +#include "dca/linalg/util/util_cublas.hpp" #include "dca/phys/dca_loop/dca_loop_data.hpp" #include "dca/phys/dca_step/cluster_solver/ctaux/ctaux_cluster_solver.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" #include "dca/phys/dca_step/cluster_solver/stdthread_qmci/stdthread_qmci_cluster_solver.hpp" #include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" #include "dca/math/random/random.hpp" @@ -40,7 +42,13 @@ namespace dca { namespace testing { // dca::testing:: -const int n_frequencies = 10; +constexpr int n_frequencies = 10; + +#ifdef DCA_HAVE_CUDA +constexpr dca::linalg::DeviceType device = dca::linalg::GPU; +#else +constexpr dca::linalg::DeviceType device = dca::linalg::CPU; +#endif // DCA_HAVE_CUDA const std::string test_directory = DCA_SOURCE_DIR "/test/integration/statistical_tests/square_lattice/"; @@ -48,14 +56,36 @@ const std::string test_directory = using Model = dca::phys::models::TightBindingModel>; using RandomNumberGenerator = dca::math::random::StdRandomWrapper; + +using dca::phys::solver::ClusterSolverName; +using dca::phys::solver::CT_AUX; +using dca::phys::solver::CT_INT; + +template using ParametersType = dca::phys::params::Parameters; -using DcaData = dca::phys::DcaData; -using QuantumClusterSolver = - dca::phys::solver::CtauxClusterSolver; -using ThreadedSolver = dca::phys::solver::StdThreadQmciClusterSolver; + RandomNumberGenerator, name>; + +template +using DcaData = dca::phys::DcaData>; + +template +struct ClusterSolverSelector; +template <> +struct ClusterSolverSelector { + using type = dca::phys::solver::CtauxClusterSolver, + DcaData>; +}; +template <> +struct ClusterSolverSelector { + using type = dca::phys::solver::CtintClusterSolver, true>; +}; +template +using QuantumClusterSolver = typename ClusterSolverSelector::type; + +template +using ThreadedSolver = dca::phys::solver::StdThreadQmciClusterSolver>; using SigmaCutDomain = dca::math::util::SigmaCutDomain; using SigmaDomain = dca::math::util::SigmaDomain; diff --git a/test/integration/statistical_tests/square_lattice/verification_covariance_input.hdf5 b/test/integration/statistical_tests/square_lattice/verification_covariance_input.hdf5 index 4ffd4f6b5..909a4bba7 100644 Binary files a/test/integration/statistical_tests/square_lattice/verification_covariance_input.hdf5 and b/test/integration/statistical_tests/square_lattice/verification_covariance_input.hdf5 differ diff --git a/test/performance/phys/accumulation/sp_accumulation_performance_test.cpp b/test/performance/phys/accumulation/sp_accumulation_performance_test.cpp index 51d30b36c..3625033d9 100644 --- a/test/performance/phys/accumulation/sp_accumulation_performance_test.cpp +++ b/test/performance/phys/accumulation/sp_accumulation_performance_test.cpp @@ -22,6 +22,7 @@ #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/sp/sp_accumulator_gpu.hpp" #endif // DCA_HAVE_CUDA +#include "dca/config/mc_options.hpp" #include "dca/io/json/json_reader.hpp" #include "dca/math/random/std_random_wrapper.hpp" #include "dca/linalg/util/cuda_event.hpp" @@ -71,7 +72,7 @@ using Parameters = dca::phys::params::Parameters; using Data = dca::phys::DcaData; -using Real = double; +using Real = typename dca::config::McOptions::MCScalar; template using MatrixPair = std::array, 2>; @@ -101,7 +102,8 @@ int main(int argc, char** argv) { Configuration config; prepareRandomConfig(config, M, n); - dca::phys::solver::accumulator::SpAccumulator accumulator(parameters); + dca::phys::solver::accumulator::SpAccumulator accumulator( + parameters); accumulator.resetAccumulation(); // Allows memory to be assigned. @@ -122,7 +124,7 @@ int main(int argc, char** argv) { const double time = duration(end_time, start_time); std::string precision("double"); - if (std::is_same::Scalar>::value) + if (std::is_same_v) precision = "single"; std::cout << "\nExpansion order:\t" << n; @@ -136,26 +138,26 @@ int main(int argc, char** argv) { dca::linalg::util::CudaEvent start_event; dca::linalg::util::CudaEvent stop_event; - dca::phys::solver::accumulator::SpAccumulator gpu_accumulator( + dca::phys::solver::accumulator::SpAccumulator gpu_accumulator( parameters); MatrixPair M_dev{M[0], M[1]}; // Allows memory to be assigned. gpu_accumulator.resetAccumulation(); gpu_accumulator.accumulate(M_dev, config, sign); - cudaStreamSynchronize(gpu_accumulator.get_streams()[0]); - cudaStreamSynchronize(gpu_accumulator.get_streams()[1]); + gpu_accumulator.get_streams()[0]->sync(); + gpu_accumulator.get_streams()[1]->sync(); gpu_accumulator.resetAccumulation(); Profiler::start(); cudaProfilerStart(); // Profile Single invocation. - start_event.record(gpu_accumulator.get_streams()[0]); + start_event.record(*gpu_accumulator.get_streams()[0]); dca::profiling::WallTime host_start_time; gpu_accumulator.accumulate(M_dev, config, sign); dca::profiling::WallTime host_end_time; - stop_event.record(gpu_accumulator.get_streams()[1]); + stop_event.record(*gpu_accumulator.get_streams()[1]); const double host_time = duration(host_end_time, host_start_time); const double dev_time = dca::linalg::util::elapsedTime(stop_event, start_event); diff --git a/test/performance/phys/accumulation/tp_accumulation_performance_test.cpp b/test/performance/phys/accumulation/tp_accumulation_performance_test.cpp index 571aa8aee..c386f5d29 100644 --- a/test/performance/phys/accumulation/tp_accumulation_performance_test.cpp +++ b/test/performance/phys/accumulation/tp_accumulation_performance_test.cpp @@ -146,18 +146,18 @@ int main(int argc, char** argv) { // Allow memory to be assigned. gpu_accumulator.accumulate(M_dev, config, sign); - cudaStreamSynchronize(gpu_accumulator.get_stream()); + cudaDeviceSynchronize(); gpu_accumulator.resetAccumulation(1); Profiler::start(); cudaProfilerStart(); // Time a single execution. - start_event.record(gpu_accumulator.get_stream()); + start_event.record(*gpu_accumulator.get_stream()); dca::profiling::WallTime host_start_time; gpu_accumulator.accumulate(M_dev, config, sign); dca::profiling::WallTime host_end_time; - stop_event.record(gpu_accumulator.get_stream()); + stop_event.record(*gpu_accumulator.get_stream()); const double host_time = duration(host_end_time, host_start_time); const double dev_time = dca::linalg::util::elapsedTime(stop_event, start_event); diff --git a/test/performance/phys/ctaux/ctaux_walker_performance_test.cpp b/test/performance/phys/ctaux/ctaux_walker_performance_test.cpp index 4c7005281..d92875772 100644 --- a/test/performance/phys/ctaux/ctaux_walker_performance_test.cpp +++ b/test/performance/phys/ctaux/ctaux_walker_performance_test.cpp @@ -106,7 +106,7 @@ int main(int argc, char** argv) { // Do one integration step. RngType rng(0, 1, 0); Walker walker(parameters, data, rng, 0); - walker.initialize(); + walker.initialize(0); do_sweeps(walker, n_warmup); std::cout << "\n Warmed up.\n" << std::endl; @@ -135,7 +135,7 @@ int main(int argc, char** argv) { RngType::resetCounter(); RngType rng(0, 1, 0); Walker walker_gpu(parameters, data, rng, 0); - walker_gpu.initialize(); + walker_gpu.initialize(0); do_sweeps(walker_gpu, n_warmup); std::cout << "\n Warmed up.\n" << std::endl; diff --git a/test/performance/phys/ctint/ctint_walker_performance_test.cpp b/test/performance/phys/ctint/ctint_walker_performance_test.cpp index 40d3434ff..d3fd8ebf1 100644 --- a/test/performance/phys/ctint/ctint_walker_performance_test.cpp +++ b/test/performance/phys/ctint/ctint_walker_performance_test.cpp @@ -129,7 +129,7 @@ int main(int argc, char** argv) { // Do one integration step. RngType rng(0, 1, 0); Walker walker(parameters, data, rng); - walker.initialize(); + walker.initialize(0); // Timed section. dca::profiling::WallTime start_t; @@ -153,7 +153,7 @@ int main(int argc, char** argv) { RngType::resetCounter(); RngType rng(0, 1, 0); Walker walker_gpu(parameters, data, rng, 0); - walker_gpu.initialize(); + walker_gpu.initialize(0); // Timed section. cudaProfilerStart(); diff --git a/test/unit/math/function_transform/space_transform_2D_gpu_test.cpp b/test/unit/math/function_transform/space_transform_2D_gpu_test.cpp index 976920d26..cf5ffe233 100644 --- a/test/unit/math/function_transform/space_transform_2D_gpu_test.cpp +++ b/test/unit/math/function_transform/space_transform_2D_gpu_test.cpp @@ -99,13 +99,15 @@ TYPED_TEST(SpaceTransform2DGpuTest, Execute) { dca::math::transform::SpaceTransform2D::execute(f_in, f_out); // Transform on the GPU. - RMatrix M_dev(M_in); - magma_queue_t queue; - magma_queue_create(&queue); + dca::linalg::ReshapableMatrix> + M_dev(M_in); + + dca::linalg::util::MagmaQueue queue; + dca::math::transform::SpaceTransform2DGpu transform_obj(nw, queue); transform_obj.execute(M_dev); - cudaStreamSynchronize(transform_obj.get_stream()); - magma_queue_destroy(queue); + + queue.getStream().sync(); constexpr Real tolerance = std::numeric_limits::epsilon() * 500; diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/CMakeLists.txt b/test/unit/phys/dca_step/cluster_solver/ctint/CMakeLists.txt index cf7ea3e62..81456f70d 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/CMakeLists.txt +++ b/test/unit/phys/dca_step/cluster_solver/ctint/CMakeLists.txt @@ -1,2 +1,12 @@ +set(TEST_INCLUDES ${DCA_INCLUDE_DIRS};${PROJECT_SOURCE_DIR}) +set(TEST_LIBS ${DCA_LIBS}) + +dca_add_gtest(ct_int_solver_methods_test + FAST + GTEST_MAIN + INCLUDE_DIRS ${TEST_INCLUDES} + LIBS ${TEST_LIBS} +) + add_subdirectory(structs) add_subdirectory(walker) diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/ct_int_solver_methods_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/ct_int_solver_methods_test.cpp new file mode 100644 index 000000000..d9a2765bb --- /dev/null +++ b/test/unit/phys/dca_step/cluster_solver/ctint/ct_int_solver_methods_test.cpp @@ -0,0 +1,83 @@ +#include "dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp" + +#include "gtest/gtest.h" + +#include "dca/phys/parameters/parameters.hpp" +#include "test/unit/phys/dca_step/cluster_solver/stub_rng.hpp" +#include "dca/phys/domains/cluster/symmetries/point_groups/2d/2d_square.hpp" +#include "dca/phys/models/analytic_hamiltonians/square_lattice.hpp" +#include "dca/parallel/no_concurrency/no_concurrency.hpp" +#include "dca/parallel/no_threading/no_threading.hpp" +#include "dca/phys/dca_step/cluster_solver/ctint/domains/common_domains.hpp" +#include "dca/phys/dca_data/dca_data.hpp" +#include "dca/profiling/null_profiler.hpp" + +using RngType = dca::testing::StubRng; +using LatticeType = dca::phys::models::square_lattice; +using ModelType = dca::phys::models::TightBindingModel; +using Concurrency = dca::parallel::NoConcurrency; +using Parameters = + dca::phys::params::Parameters; +using Data = dca::phys::DcaData; + +struct SolverWrapper + : public dca::phys::solver::CtintClusterSolver { + using BaseClass = dca::phys::solver::CtintClusterSolver; + + SolverWrapper(Parameters& parameters_ref, typename BaseClass::Data& data_ref) + : BaseClass(parameters_ref, data_ref) {} + + using BaseClass::computeSigma; + using BaseClass::computeG_k_w; + using Nu = dca::func::dmn_variadic, + dca::func::dmn_0>; + using Rdmn = typename Parameters::RClusterDmn; + using Kdmn = typename Parameters::KClusterDmn; + using Wdmn = typename Parameters::WDmn; + + using F_k_w = + dca::func::function, dca::func::dmn_variadic>; + void setGandMAnalytic(F_k_w& G, F_k_w& G0, F_k_w& M) const; +}; + +TEST(CtintClusterSolvertest, ComputeSigma) { + Concurrency concurrency(1, NULL); + Parameters parameters("", concurrency); + parameters.read_input_and_broadcast( + DCA_SOURCE_DIR "/test/unit/phys/dca_step/cluster_solver/ctint/inputs/input_single_site.json"); + parameters.update_model(); + parameters.update_domains(); + + Data data(parameters); + + SolverWrapper solver(parameters, data); + SolverWrapper::F_k_w G0, G, M, Sigma; + solver.setGandMAnalytic(G, G0, M); + + solver.computeSigma(G, G0, Sigma); + + const double U = parameters.get_U(); + const double tollerance = 1e-10; + for (int s = 0; s < 2; s++) + for (int w_idx = 0; w_idx < SolverWrapper::Wdmn::dmn_size(); w_idx++) { + const double w = SolverWrapper::Wdmn::get_elements()[w_idx]; + EXPECT_NEAR(-U * U / (4. * w), Sigma(s, s, 0, w_idx).imag(), tollerance); + EXPECT_NEAR(0, Sigma(s, s, 0, w_idx).real(), tollerance); + } +} + +void SolverWrapper::setGandMAnalytic(F_k_w& G, F_k_w& G0, F_k_w& M) const { + const double U = parameters_.get_U(); + + for (int i = 0; i < Wdmn::dmn_size(); i++) { + for (int spin = 0; spin < 2; spin++) { + const double w = Wdmn::get_elements()[i]; + const std::complex iw(0, w); + + G0(spin, spin, 0, i) = 1. / iw; + G(spin, spin, 0, i) = 1. / (iw - U * U / (4. * iw)); + M(spin, spin, 0, i) = iw + w * w / (iw - U * U / (4. * iw)); + } + } +} diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/inputs/input_single_site.json b/test/unit/phys/dca_step/cluster_solver/ctint/inputs/input_single_site.json new file mode 100644 index 000000000..44161e99e --- /dev/null +++ b/test/unit/phys/dca_step/cluster_solver/ctint/inputs/input_single_site.json @@ -0,0 +1,38 @@ +{ + "physics" : + { + "beta" : 10, + "chemical-potential" : 0 + }, + + "single-band-Hubbard-model": + { + "t" : 1, + "U" : 2 + }, + + "domains": { + "real-space-grids": { + "cluster": [[1, 0], + [0, 1]] + }, + + "imaginary-time": { + "sp-time-intervals": 128 + }, + + "imaginary-frequency": { + "sp-fermionic-frequencies": 128 + } + } +} + + + + + + + + + + diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp index 8c4c2e64a..ad4f3d540 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_test.cpp @@ -67,13 +67,12 @@ TYPED_TEST(CtintWalkerTest, InsertAndRemoveVertex) { Walker::setInteractionVertices(data, parameters); Walker walker(parameters, rng); - walker.initialize(); // ******************************* // Test vertex removal *********** // ******************************* - // Set rng value to select: last vertex, accept - rng.setNewValues(std::vector{0.95, 0.01}); + // Set rng value to select: last vertex, unused, unused, accept + rng.setNewValues(std::vector{0.95, -1, -1, 0.01}); MatrixPair old_M(walker.getM()); bool result = walker.tryVertexRemoval(); MatrixPair new_M(walker.getM()); diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp index d99af2bb1..7fa33d22a 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper.hpp @@ -30,7 +30,7 @@ class WalkerWrapper : public CtintWalker { WalkerWrapper(Parameters& parameters_ref, Rng& rng_ref) : BaseClass(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, 0) { - BaseClass::initialize(); + BaseClass::initialize(0); } using BaseClass::doStep; diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp index a1a72c808..978bcab38 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp @@ -56,7 +56,7 @@ struct WalkerWrapperSubmatrix : public WalkerSelector(parameters_ref), rng_ref, 0), streams_(3) { - BaseClass::initialize(); + BaseClass::initialize(0); } void doStep(const int n_steps_to_delay) { diff --git a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu_test.cpp b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu_test.cpp index f5421f6b9..5e424cdc4 100644 --- a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/ndft/cached_ndft_gpu_test.cpp @@ -16,7 +16,6 @@ #include "gtest/gtest.h" -//#include "dca/config/mc_options.hpp" #include "dca/function/util/difference.hpp" #include "dca/linalg/matrix.hpp" #include "dca/linalg/reshapable_matrix.hpp" diff --git a/test/unit/phys/parameters/mci_parameters/input_read_all.json b/test/unit/phys/parameters/mci_parameters/input_read_all.json index 16dd7f649..32cd9c00f 100644 --- a/test/unit/phys/parameters/mci_parameters/input_read_all.json +++ b/test/unit/phys/parameters/mci_parameters/input_read_all.json @@ -2,8 +2,8 @@ "Monte-Carlo-integration": { "seed": 42, "warm-up-sweeps": 40, - "sweeps-per-measurement": 4., - "measurements": 200, + "sweeps-per-measurement": [1.5, 1.5, 4.], + "measurements": [100, 150, 200], "error-computation-type" : "JACK_KNIFE", "store-configuration" : false, "g4-distribution": "MPI", diff --git a/test/unit/phys/parameters/mci_parameters/input_scalar_for_vec.json b/test/unit/phys/parameters/mci_parameters/input_scalar_for_vec.json new file mode 100644 index 000000000..125ee44f3 --- /dev/null +++ b/test/unit/phys/parameters/mci_parameters/input_scalar_for_vec.json @@ -0,0 +1,6 @@ +{ + "Monte-Carlo-integration": { + "sweeps-per-measurement": 4., + "measurements": 200 + } +} diff --git a/test/unit/phys/parameters/mci_parameters/mci_parameters_test.cpp b/test/unit/phys/parameters/mci_parameters/mci_parameters_test.cpp index 10a8c9e5b..7b0a736b8 100644 --- a/test/unit/phys/parameters/mci_parameters/mci_parameters_test.cpp +++ b/test/unit/phys/parameters/mci_parameters/mci_parameters_test.cpp @@ -28,8 +28,8 @@ TEST(MciParametersTest, DefaultValues) { EXPECT_EQ(985456376, pars.get_seed()); EXPECT_EQ(20, pars.get_warm_up_sweeps()); - EXPECT_EQ(1., pars.get_sweeps_per_measurement()); - EXPECT_EQ(100, pars.get_measurements()); + EXPECT_EQ(std::vector{1.}, pars.get_sweeps_per_measurement()); + EXPECT_EQ(std::vector{100}, pars.get_measurements()); EXPECT_EQ(dca::phys::ErrorComputationType::NONE, pars.get_error_computation_type()); EXPECT_EQ(1, pars.get_walkers()); EXPECT_EQ(1, pars.get_accumulators()); @@ -48,8 +48,10 @@ TEST(MciParametersTest, ReadAll) { EXPECT_EQ(42, pars.get_seed()); EXPECT_EQ(40, pars.get_warm_up_sweeps()); - EXPECT_EQ(4., pars.get_sweeps_per_measurement()); - EXPECT_EQ(200, pars.get_measurements()); + const auto expected_sweeps = std::vector{1.5, 1.5, 4}; + EXPECT_EQ(expected_sweeps, pars.get_sweeps_per_measurement()); + const auto expected_meas = std::vector{100, 150, 200}; + EXPECT_EQ(expected_meas, pars.get_measurements()); EXPECT_EQ(dca::phys::ErrorComputationType::JACK_KNIFE, pars.get_error_computation_type()); EXPECT_EQ(3, pars.get_walkers()); EXPECT_EQ(5, pars.get_accumulators()); @@ -81,51 +83,18 @@ TEST(MciParametersTest, ReadNegativeIntegerSeed) { EXPECT_EQ(-1, pars.get_seed()); } -// Revise these tests when the JSON reader has been refactored. -// TEST(MciParametersTest, ReadTooLargeSeed) { -// // Generate an input file that contains a number that is larger than the maximum value of int. -// const int max = std::numeric_limits::max(); -// std::ofstream input; -// input.open("input_too_large_seed.json"); -// input << "{\n" -// << " \"Monte-Carlo-integration\": {\n" -// << " \"seed\": " << max << "0\n" // Writes max * 10. -// << " }\n" -// << "}\n"; -// input.close(); - -// dca::io::JSONReader reader; -// dca::phys::params::MciParameters pars; - -// reader.open_file("input_too_large_seed.json"); -// pars.readWrite(reader); -// reader.close_file(); - -// EXPECT_EQ(max, pars.get_seed()); -// } - -// TEST(MciParametersTest, ReadTooSmallSeed) { -// // Generate an input file that contains a number that is smaller than the minimum value of int. -// int min = std::numeric_limits::min(); -// long long llmin = min - 10; -// std::ofstream input; -// input.open("input_too_small_seed.json"); -// input << "{\n" -// << " \"Monte-Carlo-integration\": {\n" -// << " \"seed\": " << llmin << "\n" -// << " }\n" -// << "}\n"; -// input.close(); - -// dca::io::JSONReader reader; -// dca::phys::params::MciParameters pars; - -// reader.open_file("input_too_small_seed.json"); -// pars.readWrite(reader); -// reader.close_file(); - -// EXPECT_EQ(min, pars.get_seed()); -// } +TEST(MciParametersTest, ReadScalarForVector) { + dca::io::JSONReader reader; + dca::phys::params::MciParameters pars; + + reader.open_file(DCA_SOURCE_DIR + "/test/unit/phys/parameters/mci_parameters/input_scalar_for_vec.json"); + pars.readWrite(reader); + reader.close_file(); + + EXPECT_EQ(std::vector{4.}, pars.get_sweeps_per_measurement()); + EXPECT_EQ(std::vector{200}, pars.get_measurements()); +} TEST(MciParametersTest, RandomSeed) { // The input file contains the seeding option "random" instead of a number.