diff --git a/docs/HTML/DataFrame.html b/docs/HTML/DataFrame.html index 6d267b6e..742115e7 100644 --- a/docs/HTML/DataFrame.html +++ b/docs/HTML/DataFrame.html @@ -439,7 +439,7 @@

API Reference with code samples &# - Modifying Data    🚜 + Altering Data    @@ -470,6 +470,10 @@

API Reference with code samples &# modify_by_idx() + + remove_bottom_n_data() + + remove_column( 2 ) diff --git a/docs/HTML/remove_top_n_data.html b/docs/HTML/remove_top_n_data.html index 407f419b..30ddebae 100644 --- a/docs/HTML/remove_top_n_data.html +++ b/docs/HTML/remove_top_n_data.html @@ -38,8 +38,8 @@ Signature Description Parameters - +

@@ -60,6 +60,26 @@
       
     
 
+    
+       
+        

+template<comparable T, typename ... Ts>
+void
+remove_bottom_n_data(const char *col_name, size_type n);
+        
+ + + It removes data rows corresponding to n bottom rows of the named column.

+ NOTE Comparison operators (<, >, ==) must be well defined for type T.
+ + + T: Type of the named column
+ Ts: The list of types for all columns. A type should be specified only once
+ col_name: Name of the data column
+ n: Number of bottom rows
+ + +
static void test_remove_top_n_data()  {
@@ -100,6 +120,47 @@
     assert(df.get_column<int>("col_4")[2] == 24);
     assert(view.get_column<int>("col_4")[2] == 24);
 }
+
+
// -----------------------------------------------------------------------------
+
+static void test_remove_bottom_n_data()  {
+
+    std::cout << "\nTesting remove_bottom_n_data( ) ..." << std::endl;
+
+    StlVecType<unsigned long>   idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123463 };
+    StlVecType<double>          d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
+    StlVecType<double>          d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89 };
+    StlVecType<double>          d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0, 10 };
+    StlVecType<int>             i1 = { 22, 23, 24, 25, 99 };
+    MyDataFrame                 df;
+
+    df.load_data(std::move(idx),
+                 std::make_pair("col_1", d1),
+                 std::make_pair("col_2", d2),
+                 std::make_pair("col_3", d3),
+                 std::make_pair("col_4", i1));
+
+    MyDataFrame df2 = df;
+
+    auto    lbd = [](const unsigned long &, const double &val) -> bool { return (val < 100.0); };
+    auto    view = df2.get_view_by_sel<double, decltype(lbd), double, int, std::string>("col_1", lbd);
+
+    df.remove_bottom_n_data<double, int, double, std::string>("col_3", 4);
+    view.remove_bottom_n_data<double, int, double, std::string>("col_3", 4);
+
+    assert(df.get_index().size() == 10);
+    assert(view.get_index().size() == 10);
+    assert(df.get_column<double>("col_2").size() == 10);
+    assert(view.get_column<double>("col_2").size() == 10);
+    assert(df.get_column<int>("col_4").size() == 10);
+    assert(view.get_column<int>("col_4").size() == 10);
+    assert(df.get_index()[4] == 123454);
+    assert(view.get_index()[4] == 123454);
+    assert(df.get_column<double>("col_1")[6] == 7);
+    assert(view.get_column<double>("col_1")[6] == 7);
+    assert(df.get_column<int>("col_4")[2] == 24);
+    assert(view.get_column<int>("col_4")[2] == 24);
+}
 

C++ DataFrame, ==) must be well defined for type T. + // + // T: + // Type of column name + // Ts: + // List all the types of all data columns. A type should be specified in + // the list only once. + // col_name: + // Name of the given column + // n: + // Number of bottom rows + // + template + void + remove_bottom_n_data(const char *col_name, size_type n); + // It removes duplicate rows and returns a new DataFrame. Duplication is // determined by the given column. remove_dup_spec determines which // of the duplicated rows to keep. diff --git a/include/DataFrame/Internals/DataFrame_remove.tcc b/include/DataFrame/Internals/DataFrame_remove.tcc index 27110b39..69de9995 100644 --- a/include/DataFrame/Internals/DataFrame_remove.tcc +++ b/include/DataFrame/Internals/DataFrame_remove.tcc @@ -376,6 +376,36 @@ void DataFrame::remove_top_n_data(const char *col_name, size_type n) { // ---------------------------------------------------------------------------- +template +template +void DataFrame:: +remove_bottom_n_data(const char *col_name, size_type n) { + + static_assert(std::is_base_of, H>::value || + std::is_base_of, H>::value, + "Only a StdDataFrame or a PtrView can call " + "remove_bottom_n_data()"); + + const ColumnVecType &vec = get_column(col_name); + NSmallestVisitor nsv { n }; + + nsv.pre(); + nsv(indices_.begin(), indices_.end(), vec.begin(), vec.end()); + nsv.post(); + nsv.sort_by_index_idx(); + + StlVecType col_indices; + + col_indices.reserve(n); + for (const auto &res : nsv.get_result()) + col_indices.push_back(res.index_idx); + + remove_data_by_sel_common_(col_indices); + return; +} + +// ---------------------------------------------------------------------------- + template template DataFrame> DataFrame:: diff --git a/test/dataframe_tester_4.cc b/test/dataframe_tester_4.cc index a5bb27d8..213fa7da 100644 --- a/test/dataframe_tester_4.cc +++ b/test/dataframe_tester_4.cc @@ -1224,6 +1224,57 @@ static void test_remove_top_n_data() { // ----------------------------------------------------------------------------- +static void test_remove_bottom_n_data() { + + std::cout << "\nTesting remove_bottom_n_data( ) ..." << std::endl; + + StlVecType idx = + { 123450, 123451, 123452, 123453, 123454, 123455, 123456, + 123457, 123458, 123459, 123460, 123461, 123462, 123463 }; + StlVecType d1 = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; + StlVecType d2 = + { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89 }; + StlVecType d3 = + { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0, 10 }; + StlVecType i1 = { 22, 23, 24, 25, 99 }; + MyDataFrame df; + + df.load_data(std::move(idx), + std::make_pair("col_1", d1), + std::make_pair("col_2", d2), + std::make_pair("col_3", d3), + std::make_pair("col_4", i1)); + + MyDataFrame df2 = df; + + auto lbd = + [](const unsigned long &, const double &val) -> bool { + return (val < 100.0); + }; + auto view = + df2.get_view_by_sel + ("col_1", lbd); + + df.remove_bottom_n_data("col_3", 4); + view.remove_bottom_n_data("col_3", 4); + + assert(df.get_index().size() == 10); + assert(view.get_index().size() == 10); + assert(df.get_column("col_2").size() == 10); + assert(view.get_column("col_2").size() == 10); + assert(df.get_column("col_4").size() == 10); + assert(view.get_column("col_4").size() == 10); + assert(df.get_index()[4] == 123454); + assert(view.get_index()[4] == 123454); + assert(df.get_column("col_1")[6] == 7); + assert(view.get_column("col_1")[6] == 7); + assert(df.get_column("col_4")[2] == 24); + assert(view.get_column("col_4")[2] == 24); +} + +// ----------------------------------------------------------------------------- + int main(int, char *[]) { test_starts_with(); @@ -1245,6 +1296,7 @@ int main(int, char *[]) { test_get_data_on_days_in_month(); test_get_data_between_times(); test_remove_top_n_data(); + test_remove_bottom_n_data(); return (0); }