From a4ca9046f3c43ceacf43ddbfe43df4e6d8f980fe Mon Sep 17 00:00:00 2001 From: Hossein Moein Date: Fri, 6 Sep 2024 11:18:04 -0400 Subject: [PATCH] Implemented get_[data|view]_after_times() --- docs/HTML/DataFrame.html | 4 + docs/HTML/get_data_after_times.html | 161 ++++++++++++++++++ include/DataFrame/DataFrame.h | 43 +++++ .../DataFrame/Internals/DataFrame_slice.tcc | 127 ++++++++++++++ test/dataframe_tester_4.cc | 35 ++++ 5 files changed, 370 insertions(+) create mode 100644 docs/HTML/get_data_after_times.html diff --git a/docs/HTML/DataFrame.html b/docs/HTML/DataFrame.html index 023b3f45..fb54d190 100644 --- a/docs/HTML/DataFrame.html +++ b/docs/HTML/DataFrame.html @@ -358,6 +358,10 @@

API Reference with code samples &# get_data()
get_view()
+ + get_data_after_times()
get_view_after_times()
+ + get_data_at_times()
get_view_at_times()
diff --git a/docs/HTML/get_data_after_times.html b/docs/HTML/get_data_after_times.html new file mode 100644 index 00000000..c4918a7c --- /dev/null +++ b/docs/HTML/get_data_after_times.html @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Signature Description Parameters
+

+template<typename ... Ts>
+DataFrame<DateTime, H>
+get_data_after_times(DateTime::HourType hour,  // 24 hour
+                     DateTime::MinuteType minute = 0,
+                     DateTime::SecondType second = 0,
+                     DateTime::MillisecondType msec = 0) const;
+        
+
+ This selects the rows using the index column that happen after the specified time. It returns another DataFrame with selected data indexed by DateTime. The specified times are excluded. Self is unchanged.

+ NOTE: The index column type must be DateTime or it won't compile
+
+ Ts: List all the types of all data columns. A type should be specified in the list only once.
+ hour: Specified hour
+ minute: Specified minute
+ second: Specified second
+ msec: Specified milli-second
+
+

+template<typename ... Ts>
+PtrView
+get_view_after_times(DateTime::HourType hour,  // 24 hour
+                     DateTime::MinuteType minute = 0,
+                     DateTime::SecondType second = 0,
+                     DateTime::MillisecondType msec = 0);
+        
+
+ It behaves like get_data_after_times(), but it returns a View. A view is a DataFrame that is a reference to the original DataFrame. So if you modify anything in the view the original DataFrame will also be modified.

+ NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc.
+
+ Ts: List all the types of all data columns. A type should be specified in the list only once.
+ hour: Specified hour
+ minute: Specified minute
+ second: Specified second
+ msec: Specified milli-second
+
+

+template<typename ... Ts>
+ConstPtrView
+get_view_after_times(DateTime::HourType hour,  // 24 hour
+                     DateTime::MinuteType minute = 0,
+                     DateTime::SecondType second = 0,
+                     DateTime::MillisecondType msec = 0) const;
+        
+
+ Same as above view, but it returns a const view. You can not change data in const views. But if the data is changed in the original DataFrame or through another view, it is reflected in the const view. + + Ts: List all the types of all data columns. A type should be specified in the list only once.
+ hour: Specified hour
+ minute: Specified minute
+ second: Specified second
+ msec: Specified milli-second
+
+ +
static void test_get_data_after_times()  {
+
+    std::cout << "\nTesting load_get_data_after_times( ) ..." << std::endl;
+
+    DTDataFrame df;
+
+    try  {
+        df.read("DT_Intraday.csv", io_format::csv2);
+    }
+    catch (const DataFrameError &ex)  {
+        std::cout << ex.what() << std::endl;
+    }
+
+    const auto  result = df.get_view_after_times<double, long>(23, 40);
+
+    assert(result.get_index().size() == 137);
+    assert(result.get_index()[0].date() == 19861118);
+    assert(result.get_index()[10].date() == 19861203);
+    assert(result.get_index()[100].date() == 19870423);
+    assert(result.get_index()[136].date() == 19870623);
+    assert(result.get_column<double>("dbl value").size() == 137);
+    assert(result.get_column<double>("dbl value")[0] == 78.0);
+    assert(result.get_column<double>("dbl value")[10] == 415.0);
+    assert(result.get_column<double>("dbl value")[100] == 3601.5);
+    assert(result.get_column<double>("dbl value")[136] == 4995.0);
+    assert(result.get_column<long>("lng value").size() == 137);
+    assert(result.get_column<long>("lng value")[0] == 1560);
+    assert(result.get_column<long>("lng value")[10] == 8300);
+    assert(result.get_column<long>("lng value")[100] == 72030);
+    assert(result.get_column<long>("lng value")[136] == 99900);
+}
+
+ +
C++ DataFrame + + + + + diff --git a/include/DataFrame/DataFrame.h b/include/DataFrame/DataFrame.h index c1883385..956ae7dc 100644 --- a/include/DataFrame/DataFrame.h +++ b/include/DataFrame/DataFrame.h @@ -3759,6 +3759,49 @@ class DataFrame : public ThreadGranularity { DateTime::SecondType sc = 0, DateTime::MillisecondType msc = 0) const; + // This selects the rows using the index column that happen after the + // specified time. It returns another DataFrame with selected data indexed + // by DateTime. The specified times are excluded. Self is unchanged. + // + // NOTE: The index column type must be DateTime or it won’t compile + // + // Ts: + // List all the types of all data columns. A type should be specified in + // the list only once. + // hr: + // Specified hour + // mn: + // Specified minute + // sc: + // Specified second + // msc: + // Specified milli-second + // + template + [[nodiscard]] DataFrame> + get_data_after_times(DateTime::HourType hr, // 24 hour notation + DateTime::MinuteType mn = 0, + DateTime::SecondType sc = 0, + DateTime::MillisecondType msc = 0) const; + + // Same as get_view_after_times() above, but it returns a view + // + template + [[nodiscard]] PtrView + get_view_after_times(DateTime::HourType hr, // 24 hour notation + DateTime::MinuteType mn = 0, + DateTime::SecondType sc = 0, + DateTime::MillisecondType msc = 0); + + // Same as get_view_after_times() above, but it returns a const view + // + template + [[nodiscard]] ConstPtrView + get_view_after_times(DateTime::HourType hr, // 24 hour notation + DateTime::MinuteType mn = 0, + DateTime::SecondType sc = 0, + DateTime::MillisecondType msc = 0) const; + public: // Visitors // apply is a shortcut for a simple visit. It applies the func to every diff --git a/include/DataFrame/Internals/DataFrame_slice.tcc b/include/DataFrame/Internals/DataFrame_slice.tcc index 16b03dae..40a2b032 100644 --- a/include/DataFrame/Internals/DataFrame_slice.tcc +++ b/include/DataFrame/Internals/DataFrame_slice.tcc @@ -2647,6 +2647,133 @@ get_view_before_times(DateTime::HourType hr, return (view_by_sel_common_(col_indices, idx_s)); } +// ---------------------------------------------------------------------------- + +template +template +DataFrame> +DataFrame:: +get_data_after_times(DateTime::HourType hr, + DateTime::MinuteType mn, + DateTime::SecondType sc, + DateTime::MillisecondType msc) const { + + static_assert( + std::is_base_of::value, + "Index type must be DateTime to call get_data_after_time()"); + + const size_type idx_s = indices_.size(); + StlVecType col_indices; + + col_indices.reserve(idx_s / 5); + for (size_type i = 0; i < idx_s; ++i) { + if (indices_[i].hour() > hr) { + col_indices.push_back(i); + } + else if (indices_[i].hour() == hr) { + if (indices_[i].minute() > mn) { + col_indices.push_back(i); + } + else if (indices_[i].minute() == mn) { + if (indices_[i].sec() > sc) { + col_indices.push_back(i); + } + else if (indices_[i].sec() == sc) { + if (indices_[i].msec() > msc) { + col_indices.push_back(i); + } + } + } + } + } + + return (data_by_sel_common_(col_indices, idx_s)); +} + +// ---------------------------------------------------------------------------- + +template +template +typename DataFrame::PtrView DataFrame:: +get_view_after_times(DateTime::HourType hr, + DateTime::MinuteType mn, + DateTime::SecondType sc, + DateTime::MillisecondType msc) { + + static_assert( + std::is_base_of::value, + "Index type must be DateTime to call get_view_after_times()"); + + const size_type idx_s = indices_.size(); + StlVecType col_indices; + + col_indices.reserve(idx_s / 5); + for (size_type i = 0; i < idx_s; ++i) { + if (indices_[i].hour() > hr) { + col_indices.push_back(i); + } + else if (indices_[i].hour() == hr) { + if (indices_[i].minute() > mn) { + col_indices.push_back(i); + } + else if (indices_[i].minute() == mn) { + if (indices_[i].sec() > sc) { + col_indices.push_back(i); + } + else if (indices_[i].sec() == sc) { + if (indices_[i].msec() > msc) { + col_indices.push_back(i); + } + } + } + } + } + + return (view_by_sel_common_(col_indices, idx_s)); +} + +// ---------------------------------------------------------------------------- + +template +template +typename DataFrame::ConstPtrView DataFrame:: +get_view_after_times(DateTime::HourType hr, + DateTime::MinuteType mn, + DateTime::SecondType sc, + DateTime::MillisecondType msc) const { + + static_assert( + std::is_base_of::value, + "Index type must be DateTime to call get_view_after_times()"); + + const size_type idx_s = indices_.size(); + StlVecType col_indices; + + col_indices.reserve(idx_s / 5); + for (size_type i = 0; i < idx_s; ++i) { + if (indices_[i].hour() > hr) { + col_indices.push_back(i); + } + else if (indices_[i].hour() == hr) { + if (indices_[i].minute() > mn) { + col_indices.push_back(i); + } + else if (indices_[i].minute() == mn) { + if (indices_[i].sec() > sc) { + col_indices.push_back(i); + } + else if (indices_[i].sec() == sc) { + if (indices_[i].msec() > msc) { + col_indices.push_back(i); + } + } + } + } + } + + return (view_by_sel_common_(col_indices, idx_s)); +} + } // namespace hmdf // ---------------------------------------------------------------------------- diff --git a/test/dataframe_tester_4.cc b/test/dataframe_tester_4.cc index a1ce9dc7..3af92ca5 100644 --- a/test/dataframe_tester_4.cc +++ b/test/dataframe_tester_4.cc @@ -998,6 +998,40 @@ static void test_get_data_before_times() { // ---------------------------------------------------------------------------- +static void test_get_data_after_times() { + + std::cout << "\nTesting load_get_data_after_times( ) ..." << std::endl; + + DTDataFrame df; + + try { + df.read("DT_Intraday.csv", io_format::csv2); + } + catch (const DataFrameError &ex) { + std::cout << ex.what() << std::endl; + } + + const auto result = df.get_view_after_times(23, 40); + + assert(result.get_index().size() == 137); + assert(result.get_index()[0].date() == 19861118); + assert(result.get_index()[10].date() == 19861203); + assert(result.get_index()[100].date() == 19870423); + assert(result.get_index()[136].date() == 19870623); + assert(result.get_column("dbl value").size() == 137); + assert(result.get_column("dbl value")[0] == 78.0); + assert(result.get_column("dbl value")[10] == 415.0); + assert(result.get_column("dbl value")[100] == 3601.5); + assert(result.get_column("dbl value")[136] == 4995.0); + assert(result.get_column("lng value").size() == 137); + assert(result.get_column("lng value")[0] == 1560); + assert(result.get_column("lng value")[10] == 8300); + assert(result.get_column("lng value")[100] == 72030); + assert(result.get_column("lng value")[136] == 99900); +} + +// ---------------------------------------------------------------------------- + int main(int, char *[]) { test_starts_with(); @@ -1013,6 +1047,7 @@ int main(int, char *[]) { test_difference(); test_get_data_at_times(); test_get_data_before_times(); + test_get_data_after_times(); return (0); }