diff --git a/benchmarks/dataframe_performance.cc b/benchmarks/dataframe_performance.cc index d6e9a227..7321b3f4 100644 --- a/benchmarks/dataframe_performance.cc +++ b/benchmarks/dataframe_performance.cc @@ -29,53 +29,54 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include using namespace hmdf; -constexpr std::size_t ALIGNMENT = 256; +constexpr std::size_t ALIGNMENT = 64; +constexpr std::size_t SIZE = 100000000; -typedef StdDataFrame256 MyDataFrame; +typedef StdDataFrame64 MyDataFrame; // ----------------------------------------------------------------------------- -int main(int, char *[]) { +using namespace std::chrono; - std::cout << "Starting ... " << std::endl; +int main(int, char *[]) { - const auto first = time(nullptr); - auto index_vec = - MyDataFrame::gen_datetime_index("01/01/1970", "08/15/2019", - time_frequency::secondly, 1); - const auto index_sz = index_vec.size(); + const auto first = high_resolution_clock::now(); MyDataFrame df; df.load_data( - std::move(index_vec), - std::make_pair("normal", gen_normal_dist(index_sz)), - std::make_pair("log_normal", gen_lognormal_dist(index_sz)), - std::make_pair("exponential", gen_exponential_dist(index_sz))); + MyDataFrame::gen_sequence_index(0, SIZE, 1), + std::make_pair("normal", gen_normal_dist(SIZE)), + std::make_pair("log_normal", gen_lognormal_dist(SIZE)), + std::make_pair("exponential", gen_exponential_dist(SIZE))); - const auto second = time(nullptr); + const auto second = high_resolution_clock::now(); std::cout << "All data loadings are done. Calculating means ... " - << second - first << std::endl; + << double(duration_cast(second - first).count()) / 1000000.0 + << std::endl; MeanVisitor n_mv; - MeanVisitor ln_mv; - MeanVisitor e_mv; + VarVisitor ln_vv; + CorrVisitor e_ln_cv; auto fut1 = df.visit_async("normal", n_mv); - auto fut2 = df.visit_async("log_normal", ln_mv); - auto fut3 = df.visit_async("exponential", e_mv); + auto fut2 = df.visit_async("log_normal", ln_vv); + auto fut3 = df.visit_async("exponential", "log_normal", e_ln_cv); std::cout << fut1.get().get_result() << ", " << fut2.get().get_result() << ", " << fut3.get().get_result() << std::endl; - const auto third = time(nullptr); + const auto third = high_resolution_clock::now(); - std::cout << third - second << ", " << third - first + std::cout << double(duration_cast(third - second).count()) / 1000000.0 + << ", " + << double(duration_cast(third - first).count()) / 1000000.0 << " All done" << std::endl; return (0); } diff --git a/benchmarks/polars_performance.py b/benchmarks/polars_performance.py new file mode 100644 index 00000000..ce0407e4 --- /dev/null +++ b/benchmarks/polars_performance.py @@ -0,0 +1,35 @@ +import datetime +import numpy as np +import polars as pl + +# ------------------------------------------------------------------------------ + +SIZE: int = 100000000 + +first = datetime.datetime.now() +df = pl.DataFrame({"normal": np.random.normal(size=SIZE), + "log_normal": np.random.lognormal(size=SIZE), + "exponential": np.random.exponential(size=SIZE), + }) +second = datetime.datetime.now() +print(f"All data loadings are done. Calculating means ... " + f"{(second - first).seconds}.{(second - first).microseconds}") + +m1: float = df["normal"].mean() +m2: float = df["log_normal"].var() +m3: float = df.select(pl.corr("exponential", "log_normal")) + +print(f"{m1}, {m2}, {m3}") +third = datetime.datetime.now() + + +print(f"{(third - second).seconds}.{(third - second).microseconds}, " + f"{(third - first).seconds}.{(third - first).microseconds} All done"); + +# ------------------------------------------------------------------------------ + +# Local Variables: +# mode:Python +# tab-width:4 +# c-basic-offset:4 +# End: