From 47bca3205b1c1a31f65071c1c79cc194546cf909 Mon Sep 17 00:00:00 2001 From: Hossein Moein Date: Fri, 1 Nov 2024 14:42:42 -0400 Subject: [PATCH] Made assign() behave correctly in all situations --- docs/HTML/DataFrame.html | 4 + docs/HTML/assign.html | 132 ++++++++++++++++++++++ include/DataFrame/DataFrame.h | 2 + include/DataFrame/Internals/DataFrame.tcc | 82 ++++++++++++-- test/dataframe_tester_4.cc | 58 ++++++++++ 5 files changed, 271 insertions(+), 7 deletions(-) create mode 100644 docs/HTML/assign.html diff --git a/docs/HTML/DataFrame.html b/docs/HTML/DataFrame.html index 137c3ab6..3e590ede 100644 --- a/docs/HTML/DataFrame.html +++ b/docs/HTML/DataFrame.html @@ -629,6 +629,10 @@

API Reference with code samples &# apply( 3 ) + + assign() + + multi_visit() diff --git a/docs/HTML/assign.html b/docs/HTML/assign.html new file mode 100644 index 00000000..5649d0b3 --- /dev/null +++ b/docs/HTML/assign.html @@ -0,0 +1,132 @@ + + + + + + + + + + Back to Documentations

+ + + + + + + + + + + + +
Signature Description Parameters
+

+template<typename OTHER, typename ... Ts>
+DataFrame &
+assign(const OTHER &rhs);
+
+template<typename OTHER, typename ... Ts>
+DataFrame &
+assign(OTHER &rhs);
+        
+
+ The purpose of assign() is to make it possible to copy any version of DataFrame to any other version. It means you can assign a View to a DataFrame or assign a PtrView to a View and so on and so forth. assign() copies the data from rhs to self. It returns a reference to self.
+
+ OTHER: DataFrame type of rhs
+ Ts: List all the types of all data columns. A type should be specified in the list only once.
+ rhs: DataFrame instance that's copied from
+
+ +
static void test_view_assign()  {
+
+    std::cout << "\nTesting view_assign( ) ..." << std::endl;
+
+    typedef StdDataFrame64<std::string> StrDataFrame;
+
+    StrDataFrame    df;
+
+    try  {
+        df.read("SHORT_IBM.csv", io_format::csv2);
+    }
+    catch (const DataFrameError &ex)  {
+        std::cout << ex.what() << std::endl;
+    }
+
+    auto    lbd = [](const std::string &, const double &) -> bool { return (true); };
+    auto    ptr_view = df.get_view_by_sel<double, decltype(lbd), double, long>("IBM_Open", lbd);
+    auto    view = df.get_view_by_loc<double, long>(Index2D<long>{ 100, 500 });
+
+    StrDataFrame    df2;
+    StrDataFrame    df3;
+
+    df2.assign<decltype(ptr_view), double, long>(ptr_view);
+    df3.assign<decltype(view), double, long>(view);
+
+    assert(df2.get_index().size() == 1721);
+    assert(df3.get_index().size() == 400);
+    assert(std::fabs(df2.get_column<double>("IBM_Open")[100] - 184.48) < 0.001);
+    assert(df3.get_column<long>("IBM_Volume")[100] == 4350200);
+
+    StrDataFrame::View      dfv;
+    StrDataFrame::PtrView   dfpv;
+
+    dfv.assign<StrDataFrame, double, long>(df2);
+    dfpv.assign<StrDataFrame, double, long>(df3);
+    assert(dfv.get_index().size() == 1721);
+    assert(dfpv.get_index().size() == 400);
+    assert(std::fabs(dfv.get_column<double>("IBM_Open")[100] - 184.48) < 0.001);
+    assert(dfpv.get_column<long>("IBM_Volume")[100] == 4350200);
+
+    StrDataFrame::View      dfv2;
+    StrDataFrame::PtrView   dfpv2;
+
+    dfv2.assign<decltype(dfpv), double, long>(dfpv);
+    dfpv2.assign<decltype(dfv), double, long>(dfv);
+    assert(dfv2.get_index().size() == 400);
+    assert(dfpv2.get_index().size() == 1721);
+    assert(std::fabs(dfv2.get_column<double>("IBM_Open")[100] - 181.24) < 0.001);
+    assert(dfpv2.get_column<long>("IBM_Volume")[100] == 3721600);
+}
+
+ +
C++ DataFrame + + + + + diff --git a/include/DataFrame/DataFrame.h b/include/DataFrame/DataFrame.h index b9249ea3..6f2ce71e 100644 --- a/include/DataFrame/DataFrame.h +++ b/include/DataFrame/DataFrame.h @@ -126,6 +126,8 @@ class DataFrame : public ThreadGranularity { // template DataFrame &assign(const OTHER &rhs); + template + DataFrame &assign(OTHER &rhs); public: // Load/append/remove interfaces diff --git a/include/DataFrame/Internals/DataFrame.tcc b/include/DataFrame/Internals/DataFrame.tcc index c7353d22..abfcc05d 100644 --- a/include/DataFrame/Internals/DataFrame.tcc +++ b/include/DataFrame/Internals/DataFrame.tcc @@ -60,6 +60,53 @@ DataFrame::operator= (const DataFrame &that) { return (*this); } +// ---------------------------------------------------------------------------- +template +template +DataFrame & +DataFrame::assign(OTHER &rhs) { + + indices_.clear(); + indices_.reserve(rhs.indices_.size()); + if constexpr (std::is_base_of, H>::value) { + indices_.set_begin_end_special(&(rhs.indices_.front()), + &(rhs.indices_.back())); + } + else if constexpr (std::is_base_of, H>::value) { + for (auto &val : rhs.indices_) + indices_.push_back(&val); + } + else { + for (const auto &val : rhs.indices_) indices_.push_back(val); + } + + column_tb_.clear(); + column_list_.clear(); + + const SpinGuard guard(lock_); + + data_.clear(); + if constexpr (std::is_base_of, H>::value || + std::is_base_of, H>::value) { + for (const auto &[name, idx] : rhs.column_list_) [[likely]] { + view_setup_functor_ functor( + name.c_str(), 0, indices_.size(), *this); + + rhs.data_[idx].change(functor); + } + } + else { + for (const auto &[rhs_name, rhs_idx] : rhs.column_list_) { + load_all_functor_ functor( + rhs_name.c_str(), *this); + + rhs.data_[rhs_idx].change(functor); + } + } + + return (*this); +} + // ---------------------------------------------------------------------------- template @@ -69,7 +116,17 @@ DataFrame::assign(const OTHER &rhs) { indices_.clear(); indices_.reserve(rhs.indices_.size()); - for (const auto &val : rhs.indices_) indices_.push_back(val); + if constexpr (std::is_base_of, H>::value) { + indices_.set_begin_end_special(&(rhs.indices_.front()), + &(rhs.indices_.back())); + } + else if constexpr (std::is_base_of, H>::value) { + for (auto &val : rhs.indices_) + indices_.push_back(&val); + } + else { + for (const auto &val : rhs.indices_) indices_.push_back(val); + } column_tb_.clear(); column_list_.clear(); @@ -77,11 +134,22 @@ DataFrame::assign(const OTHER &rhs) { const SpinGuard guard(lock_); data_.clear(); - for (const auto &[rhs_name, rhs_idx] : rhs.column_list_) { - load_all_functor_ functor (rhs_name.c_str(), - *this); + if constexpr (std::is_base_of, H>::value || + std::is_base_of, H>::value) { + for (const auto &[name, idx] : rhs.column_list_) [[likely]] { + view_setup_functor_ functor( + name.c_str(), 0, indices_.size(), *this); + + rhs.data_[idx].change(functor); + } + } + else { + for (const auto &[rhs_name, rhs_idx] : rhs.column_list_) { + load_all_functor_ functor( + rhs_name.c_str(), *this); - rhs.data_[rhs_idx].change(functor); + rhs.data_[rhs_idx].change(functor); + } } return (*this); @@ -1085,7 +1153,7 @@ DataFrame::peaks(const char *col_name, size_type n) const { }; if (thread_level > 2) { - std::vector> futures; + std::vector> futures; if (n == 1) futures = thr_pool_.parallel_loop(n, col_s - n, std::move(lbd1)); @@ -1174,7 +1242,7 @@ DataFrame::valleys(const char *col_name, size_type n) const { }; if (thread_level > 2) { - std::vector> futures; + std::vector> futures; if (n == 1) futures = thr_pool_.parallel_loop(n, col_s - n, std::move(lbd1)); diff --git a/test/dataframe_tester_4.cc b/test/dataframe_tester_4.cc index 1f84c58b..bbe60b7e 100644 --- a/test/dataframe_tester_4.cc +++ b/test/dataframe_tester_4.cc @@ -1872,6 +1872,63 @@ void test_get_data_by_mshift() { // ---------------------------------------------------------------------------- +static void test_view_assign() { + + std::cout << "\nTesting view_assign( ) ..." << std::endl; + + typedef StdDataFrame64 StrDataFrame; + + StrDataFrame df; + + try { + df.read("SHORT_IBM.csv", io_format::csv2); + } + catch (const DataFrameError &ex) { + std::cout << ex.what() << std::endl; + } + + auto lbd = + [](const std::string &, const double &) -> bool { return (true); }; + auto ptr_view = + df.get_view_by_sel + ("IBM_Open", lbd); + auto view = df.get_view_by_loc(Index2D{ 100, 500 }); + + StrDataFrame df2; + StrDataFrame df3; + + df2.assign(ptr_view); + df3.assign(view); + + assert(df2.get_index().size() == 1721); + assert(df3.get_index().size() == 400); + assert(std::fabs(df2.get_column("IBM_Open")[100] - 184.48) < 0.001); + assert(df3.get_column("IBM_Volume")[100] == 4350200); + + StrDataFrame::View dfv; + StrDataFrame::PtrView dfpv; + + dfv.assign(df2); + dfpv.assign(df3); + assert(dfv.get_index().size() == 1721); + assert(dfpv.get_index().size() == 400); + assert(std::fabs(dfv.get_column("IBM_Open")[100] - 184.48) < 0.001); + assert(dfpv.get_column("IBM_Volume")[100] == 4350200); + + StrDataFrame::View dfv2; + StrDataFrame::PtrView dfpv2; + + dfv2.assign(dfpv); + dfpv2.assign(dfv); + assert(dfv2.get_index().size() == 400); + assert(dfpv2.get_index().size() == 1721); + assert(( + std::fabs(dfv2.get_column("IBM_Open")[100] - 181.24) < 0.001)); + assert(dfpv2.get_column("IBM_Volume")[100] == 3721600); +} + +// ---------------------------------------------------------------------------- + int main(int, char *[]) { test_starts_with(); @@ -1905,6 +1962,7 @@ int main(int, char *[]) { test_MeanShiftVisitor(); test_get_data_by_dbscan(); test_get_data_by_mshift(); + test_view_assign(); return (0); }