From 0db4655af4c2d624d307d08659f7c7ae9d112b51 Mon Sep 17 00:00:00 2001 From: Hossein Moein Date: Tue, 27 Aug 2024 14:23:12 -0400 Subject: [PATCH] Implemented to read/write std:pairs columns from/to files --- data/sample_data_2.csv | 3 + docs/HTML/DataFrame.html | 24 +- docs/HTML/read.html | 22 +- docs/HTML/write.html | 51 ++- .../DataFrame/Internals/DataFrame_misc.tcc | 29 +- .../DataFrame/Internals/DataFrame_read.tcc | 148 +++++++ .../Internals/DataFrame_standalone.tcc | 362 +++++++++++++++++- test/dataframe_tester_2.cc | 3 + test/dataframe_tester_4.cc | 200 ++++++++++ test/dataframe_tester_output.txt | 100 ++--- 10 files changed, 840 insertions(+), 102 deletions(-) diff --git a/data/sample_data_2.csv b/data/sample_data_2.csv index ac928cc2d..339c6d02a 100644 --- a/data/sample_data_2.csv +++ b/data/sample_data_2.csv @@ -8,3 +8,6 @@ str_col_2:28::XXXX10,XXXX11,XXXX01,XXXX02,XXXX03,XXXX6,XXXX7,Running fas dbl_col_3:28::2.009,3.111,10,4.2222,5.3333,12,6.25,10,0.9999,1.2345,4.2345,3,8,3.3333,2.2345,4.25,3.2345,0.009,1.111,5.25,11,5.2345,2.2222,1.009,2.111,9,3.2222,4.3333, dbl_col_2_2:28::0.87865,-0.6999,0.4111,0.1902,-0.4888,0.2,0.1056,0.1,0.06743,0.998,0.15678,0.923,0.0111,-0.8888,0.3456,0.0056,0.056,0.07865,-0.9999,0.0456,0.14,0.00345,0.1002,0.078654,-0.8999,0.01119,0.8002,-0.9888, bool_col_2:28::0,1,1,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0, +str_str_pair_col:5::,<:>,<:Second String 13>,,, +dbl_dbl_pair_col:5::<1234:456.7>,<:>,<:456.8>,<456.9:>,<789.1:789.2>, +str_dbl_pair_col:5::,<:>,<:456.8>,,, diff --git a/docs/HTML/DataFrame.html b/docs/HTML/DataFrame.html index 3f1c1bfc8..2b8cdc089 100644 --- a/docs/HTML/DataFrame.html +++ b/docs/HTML/DataFrame.html @@ -185,7 +185,7 @@

API Reference with code samples &# - Loading Data + Loading Data    🚚 @@ -225,7 +225,7 @@

API Reference with code samples &# - Getting Data + Getting Data    🛒 @@ -249,7 +249,7 @@

API Reference with code samples &# - Getting Information + Getting Information    @@ -337,7 +337,7 @@

API Reference with code samples &# - Slicing Data + Slicing Data    🔪 @@ -381,7 +381,7 @@

API Reference with code samples &# - Mixing Data + Mixing Data    👩 @@ -409,7 +409,7 @@

API Reference with code samples &# - Modifying Data + Modifying Data    🧰 @@ -501,7 +501,7 @@

API Reference with code samples &# - Input/Output + Input/Output    🔌 @@ -529,7 +529,7 @@

API Reference with code samples &# - Miscellaneous + Miscellaneous    @@ -810,7 +810,7 @@

API Reference with code samples &# - Transformers &
Filters + Transformers &
Filters @@ -1380,7 +1380,7 @@

API Reference with code samples &# - Arithmetic
Operators + Arithmetic
Operators @@ -1416,7 +1416,7 @@

API Reference with code samples &# - Exceptions + Exceptions @@ -1541,7 +1541,7 @@

API Reference with code samples &# - Multithreading
Static Functions + Multithreading
Static Functions diff --git a/docs/HTML/read.html b/docs/HTML/read.html index 952feca01..916cdb23b 100644 --- a/docs/HTML/read.html +++ b/docs/HTML/read.html @@ -125,29 +125,37 @@ In case of io_format::csv2, io_format::csv, and io_format::binary the following additional types are also supported:
-dbl_vec        -- A vector of double precision values,
+str_dbl_pair   -- std::pair<std::string, double>.
+                  The pair is printed as "<s:d>,<s:d>, ...
+                  Where s's are strings and d's are doubles.
+str_str_pair   -- std::pair<std::string, std::string>.
+                  The pair is printed as "<s1:s2>,<s1:s2>, ...
+                  Where s's are strings.
+dbl_dbl_pair   -- std::pair<double, double>.
+                  The pair is printed as "<d1:d2>,<d1:d2>, ...
+                  Where d's are doubles.
+dbl_vec        -- std::vector<double>.
                   The vector is printed as "s[d1|d2|...]"
                   where s is the size of the vector and
                   d's are the double values.
-str_vec        -- A vector of std::string values,
+str_vec        -- std::vector<std::string>.
                   The vector is printed as "s[str1|str2|...]"
                   where s is the size of the vector
                   and str's are the strings.
-dbl_set        -- A set of double precision values,
+dbl_set        -- std::set<double>.
                   The set is printed as "s[d1|d2|...]"
                   where s is the size of the set
                   and d's are the double values.
-str_set        -- A set of std::string values,
+str_set        -- std::set<std::string>.
                   The set is printed as "s[str1|str2|...]"
                   where s is the size of the set
                   and str's are the strings.
-str_dbl_map    -- A map of string keys to double
+str_dbl_map    -- std::map<std::string, double>.
                   precision values, The map is printed
                   as "s{k1:v1|k2:v2|...}"
                   where s is the size of the map
                   and k's and v's are keys and values.
-str_dbl_unomap -- An unordered map of string keys
-                  to double precision values,
+str_dbl_unomap -- std::unoredered_map<std::string, double>.
                   The map is printed as "s{k1:v1|k2:v2|...}"
                   where s is the size of the map and k's
                   and v's are keys and values.
diff --git a/docs/HTML/write.html b/docs/HTML/write.html
index 92bfd85db..a40c9035e 100644
--- a/docs/HTML/write.html
+++ b/docs/HTML/write.html
@@ -123,24 +123,39 @@
         
In case of io_format::csv2, io_format::csv, and io_format::binary the following additional types are also supported:
-dbl_vec        -- A vector of double precision values, The vector is printed
-                  as "s[d1|d2|...]" where s is the size of the vector and d's
-                  are the double values.
-str_vec        -- A vector of std::string values, The vector is printed as
-                  "s[str1|str2|...]" where s is the size of the vector and
-                  str's are the strings.
-dbl_set        -- A set of double precision values, The set is printed as
-                  "s[d1|d2|...]" where s is the size of the set and d's
-                  are the double values.
-str_set        -- A set of std::string values, The set is printed as
-                  "s[str1|str2|...]" where s is the size of the set and
-                  str's are the strings.
-str_dbl_map    -- A map of string keys to double precision values, The map is
-                  printed as "s{k1:v1|k2:v2|...}" where s is the size of
-                  the map and k's and v's are keys and values.
-str_dbl_unomap -- An unordered map of string keys to double precision values,
-                  The map is printed as "s{k1:v1|k2:v2|...}" where s is the
-                  size of the map and k's and v's are keys and values.
+str_dbl_pair   -- std::pair<std::string, double>.
+                  The pair is printed as "<s:d>,<s:d>, ...
+                  Where s's are strings and d's are doubles.
+str_str_pair   -- std::pair<std::string, std::string>.
+                  The pair is printed as "<s1:s2>,<s1:s2>, ...
+                  Where s's are strings.
+dbl_dbl_pair   -- std::pair<double, double>.
+                  The pair is printed as "<d1:d2>,<d1:d2>, ...
+                  Where d's are doubles.
+dbl_vec        -- std::vector<double>.
+                  The vector is printed as "s[d1|d2|...]"
+                  where s is the size of the vector and
+                  d's are the double values.
+str_vec        -- std::vector<std::string>.
+                  The vector is printed as "s[str1|str2|...]"
+                  where s is the size of the vector
+                  and str's are the strings.
+dbl_set        -- std::set<double>.
+                  The set is printed as "s[d1|d2|...]"
+                  where s is the size of the set
+                  and d's are the double values.
+str_set        -- std::set<std::string>.
+                  The set is printed as "s[str1|str2|...]"
+                  where s is the size of the set
+                  and str's are the strings.
+str_dbl_map    -- std::map<std::string, double>.
+                  precision values, The map is printed
+                  as "s{k1:v1|k2:v2|...}"
+                  where s is the size of the map
+                  and k's and v's are keys and values.
+str_dbl_unomap -- std::unoredered_map<std::string, double>.
+                  The map is printed as "s{k1:v1|k2:v2|...}"
+                  where s is the size of the map and k's
         
In case of io_format::csv2 the following additional types are also supported: diff --git a/include/DataFrame/Internals/DataFrame_misc.tcc b/include/DataFrame/Internals/DataFrame_misc.tcc index 39c55fc18..1d7d90c30 100644 --- a/include/DataFrame/Internals/DataFrame_misc.tcc +++ b/include/DataFrame/Internals/DataFrame_misc.tcc @@ -240,26 +240,39 @@ DataFrame::print_binary_functor_::operator() (const T &vec) { std::strncpy(col_name, name, sizeof(col_name)); os.write(col_name, sizeof(col_name)); + + const long local_start_row = std::min (long(vec.size()), start_row); + const long local_end_row = std::min (long(vec.size()), end_row); + if constexpr (std::is_same_v) - _write_binary_string_(os, vec, start_row, end_row); + _write_binary_string_(os, vec, local_start_row, local_end_row); else if constexpr (std::is_same_v) - _write_binary_datetime_(os, vec, start_row, end_row); + _write_binary_datetime_(os, vec, local_start_row, local_end_row); else if constexpr (std::is_same_v>) - _write_binary_dbl_vec_(os, vec, start_row, end_row); + _write_binary_dbl_vec_(os, vec, local_start_row, local_end_row); else if constexpr (std::is_same_v> || std::is_same_v>) - _write_binary_str_vec_(os, vec, start_row, end_row); + _write_binary_str_vec_(os, vec, local_start_row, local_end_row); else if constexpr (std::is_same_v>) - _write_binary_dbl_set_(os, vec, start_row, end_row); + _write_binary_dbl_set_(os, vec, local_start_row, local_end_row); else if constexpr (std::is_same_v> || std::is_same_v>) - _write_binary_str_set_(os, vec, start_row, end_row); + _write_binary_str_set_(os, vec, local_start_row, local_end_row); else if constexpr ( std::is_same_v> || std::is_same_v>) - _write_binary_str_dbl_map_(os, vec, start_row, end_row); + _write_binary_str_dbl_map_(os, vec, local_start_row, local_end_row); + else if constexpr ( + std::is_same_v>) + _write_binary_str_dbl_pair_(os, vec, local_start_row, local_end_row); + else if constexpr ( + std::is_same_v>) + _write_binary_str_str_pair_(os, vec, local_start_row, local_end_row); + else if constexpr ( + std::is_same_v>) + _write_binary_dbl_dbl_pair_(os, vec, local_start_row, local_end_row); else - _write_binary_data_(os, vec, start_row, end_row); + _write_binary_data_(os, vec, local_start_row, local_end_row); return; } diff --git a/include/DataFrame/Internals/DataFrame_read.tcc b/include/DataFrame/Internals/DataFrame_read.tcc index a85f158ae..74acdf569 100644 --- a/include/DataFrame/Internals/DataFrame_read.tcc +++ b/include/DataFrame/Internals/DataFrame_read.tcc @@ -529,6 +529,39 @@ void DataFrame::read_csv_(std::istream &stream, bool columns_only) { col_vector_push_back_func_(vec, stream, &::strtol); } + // Pairs + // + else if (type_str == "str_dbl_pair") { + using val_t = std::pair; + + StlVecType &vec = + create_column(col_name.c_str(), false); + + vec.reserve(::atoi(value.c_str())); + col_vector_push_back_cont_func_( + vec, stream, &_get_str_dbl_pair_from_value_); + } + else if (type_str == "str_str_pair") { + using val_t = std::pair; + + StlVecType &vec = + create_column(col_name.c_str(), false); + + vec.reserve(::atoi(value.c_str())); + col_vector_push_back_cont_func_( + vec, stream, &_get_str_str_pair_from_value_); + } + else if (type_str == "dbl_dbl_pair") { + using val_t = std::pair; + + StlVecType &vec = + create_column(col_name.c_str(), false); + + vec.reserve(::atoi(value.c_str())); + col_vector_push_back_cont_func_( + vec, stream, &_get_dbl_dbl_pair_from_value_); + } + // Containers // else if (type_str == "dbl_vec") { @@ -755,6 +788,28 @@ read_csv2_(std::FILE *stream, type_str.c_str(), col_name.c_str(), nrows); + + // Pairs + // + else if (type_str == "str_dbl_pair") + spec_vec.emplace_back( + StlVecType>{ }, + type_str.c_str(), + col_name.c_str(), + nrows); + else if (type_str == "str_str_pair") + spec_vec.emplace_back( + StlVecType>{ }, + type_str.c_str(), + col_name.c_str(), + nrows); + else if (type_str == "dbl_dbl_pair") + spec_vec.emplace_back( + StlVecType>{ }, + type_str.c_str(), + col_name.c_str(), + nrows); + // Containers // else if (type_str == "dbl_vec") @@ -1030,6 +1085,36 @@ read_csv2_(std::FILE *stream, } } + // Pairs + // + else if (col_spec.type_spec == "str_dbl_pair") { + using val_t = std::pair; + + StlVecType &vec = + std::any_cast &>(col_spec.col_vec); + + vec.push_back(std::move(_get_str_dbl_pair_from_value_( + value.c_str()))); + } + else if (col_spec.type_spec == "str_str_pair") { + using val_t = std::pair; + + StlVecType &vec = + std::any_cast &>(col_spec.col_vec); + + vec.push_back(std::move(_get_str_str_pair_from_value_( + value.c_str()))); + } + else if (col_spec.type_spec == "dbl_dbl_pair") { + using val_t = std::pair; + + StlVecType &vec = + std::any_cast &>(col_spec.col_vec); + + vec.push_back(std::move(_get_dbl_dbl_pair_from_value_( + value.c_str()))); + } + // Containers // else if (col_spec.type_spec == "dbl_vec") { @@ -1234,6 +1319,33 @@ read_csv2_(std::FILE *stream, (col_spec.col_vec)), nan_policy::dont_pad_with_nans); + // Pairs + // + else if (col_spec.type_spec == "str_dbl_pair") { + using val_t = std::pair; + + load_column(col_spec.col_name.c_str(), + std::move(std::any_cast &> + (col_spec.col_vec)), + nan_policy::dont_pad_with_nans); + } + else if (col_spec.type_spec == "str_str_pair") { + using val_t = std::pair; + + load_column(col_spec.col_name.c_str(), + std::move(std::any_cast &> + (col_spec.col_vec)), + nan_policy::dont_pad_with_nans); + } + else if (col_spec.type_spec == "dbl_dbl_pair") { + using val_t = std::pair; + + load_column(col_spec.col_name.c_str(), + std::move(std::any_cast &> + (col_spec.col_vec)), + nan_policy::dont_pad_with_nans); + } + // Containers // else if (col_spec.type_spec == "dbl_vec") @@ -1464,6 +1576,42 @@ read_binary_(std::istream &stream, load_column(col_name, std::move(vec), nan_policy::dont_pad_with_nans); } + + // Pairs + // + else if ( ! std::strcmp(col_type, "str_dbl_pair")) { + using val_t = std::pair; + + ColumnVecType vec; + + _read_binary_str_dbl_pair_(stream, vec, needs_flipping, + starting_row, num_rows); + load_column(col_name, std::move(vec), + nan_policy::dont_pad_with_nans); + } + else if ( ! std::strcmp(col_type, "str_str_pair")) { + using val_t = std::pair; + + ColumnVecType vec; + + _read_binary_str_str_pair_(stream, vec, needs_flipping, + starting_row, num_rows); + load_column(col_name, std::move(vec), + nan_policy::dont_pad_with_nans); + } + else if ( ! std::strcmp(col_type, "dbl_dbl_pair")) { + using val_t = std::pair; + + ColumnVecType vec; + + _read_binary_dbl_dbl_pair_(stream, vec, needs_flipping, + starting_row, num_rows); + load_column(col_name, std::move(vec), + nan_policy::dont_pad_with_nans); + } + + // Containers + // else if ( ! std::strcmp(col_type, "dbl_vec")) { ColumnVecType> vec; diff --git a/include/DataFrame/Internals/DataFrame_standalone.tcc b/include/DataFrame/Internals/DataFrame_standalone.tcc index 17ba140ab..33e39f7bf 100644 --- a/include/DataFrame/Internals/DataFrame_standalone.tcc +++ b/include/DataFrame/Internals/DataFrame_standalone.tcc @@ -39,6 +39,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -79,6 +80,8 @@ std::unordered_map<_TypeInfoRef_, _TypeinfoHasher_, _TypeinfoEqualTo_> _typeinfo_name_ { + // Fundamentals + // { typeid(float), "float" }, { typeid(double), "double" }, { typeid(long double), "longdouble" }, @@ -96,8 +99,15 @@ _typeinfo_name_ { { typeid(const char *), "string" }, { typeid(char *), "string" }, { typeid(bool), "bool" }, + { typeid(DateTime), "DateTime" }, + // Pairs + // + { typeid(std::pair), "str_dbl_pair" }, + { typeid(std::pair), "str_str_pair" }, + { typeid(std::pair), "dbl_dbl_pair" }, + // Containers // { typeid(std::vector), "dbl_vec" }, @@ -202,7 +212,8 @@ _create_column_from_triple_(DF &df, T &triple) { // ---------------------------------------------------------------------------- -template +template static inline void _load_groupby_data_1_( const SRC_DF &source, @@ -345,8 +356,10 @@ _load_groupby_data_2_( idx_visitor(src_idx[sort_v[j]], src_idx[sort_v[j]]); idx_visitor.post(); dst_idx.push_back(idx_visitor.get_result()); - if (col_vec1) col_vec1->push_back(input_col1[sort_v[vec_size - 1]]); - if (col_vec2) col_vec2->push_back(input_col2[sort_v[vec_size - 1]]); + if (col_vec1) + col_vec1->push_back(input_col1[sort_v[vec_size - 1]]); + if (col_vec2) + col_vec2->push_back(input_col2[sort_v[vec_size - 1]]); } } @@ -454,9 +467,12 @@ _load_groupby_data_3_( idx_visitor(src_idx[sort_v[j]], src_idx[sort_v[j]]); idx_visitor.post(); dst_idx.push_back(idx_visitor.get_result()); - if (col_vec1) col_vec1->push_back(input_col1[sort_v[vec_size - 1]]); - if (col_vec2) col_vec2->push_back(input_col2[sort_v[vec_size - 1]]); - if (col_vec3) col_vec3->push_back(input_col3[sort_v[vec_size - 1]]); + if (col_vec1) + col_vec1->push_back(input_col1[sort_v[vec_size - 1]]); + if (col_vec2) + col_vec2->push_back(input_col2[sort_v[vec_size - 1]]); + if (col_vec3) + col_vec3->push_back(input_col3[sort_v[vec_size - 1]]); } } @@ -704,6 +720,116 @@ _get_dbl_vec_from_value_(const char *value) { // ---------------------------------------------------------------------------- +inline static std::pair +_get_str_dbl_pair_from_value_(const char *value) { + + using val_t = std::pair; + + std::size_t vcnt { 0 }; + val_t data ("", std::numeric_limits::quiet_NaN()); + + while (value[vcnt] && value[vcnt] != '<') ++vcnt; + if (! value[vcnt]) return (data); + vcnt += 1; // skip < + + char buffer[2048]; + std::size_t bcnt { 0 }; + + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != ':') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt]) return (data); + buffer[bcnt] = '\0'; + data.first = buffer; + vcnt += 1; // skip : + + bcnt = 0; + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != '>') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt] || buffer[0] == '\0') return (data); + buffer[bcnt] = '\0'; + data.second = std::strtod(buffer, nullptr); + + return (data); +} + +// ---------------------------------------------------------------------------- + +inline static std::pair +_get_dbl_dbl_pair_from_value_(const char *value) { + + using val_t = std::pair; + + std::size_t vcnt { 0 }; + val_t data (std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN()); + + while (value[vcnt] && value[vcnt] != '<') ++vcnt; + if (! value[vcnt]) return (data); + vcnt += 1; // skip < + + char buffer[2048]; + std::size_t bcnt { 0 }; + + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != ':') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt]) return (data); + buffer[bcnt] = '\0'; + if (buffer[0]) + data.first = std::strtod(buffer, nullptr); + vcnt += 1; // skip : + + bcnt = 0; + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != '>') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt] || buffer[0] == '\0') return (data); + buffer[bcnt] = '\0'; + data.second = std::strtod(buffer, nullptr); + + return (data); +} + +// ---------------------------------------------------------------------------- + +inline static std::pair +_get_str_str_pair_from_value_(const char *value) { + + using val_t = std::pair; + + std::size_t vcnt { 0 }; + val_t data ("", ""); + + while (value[vcnt] && value[vcnt] != '<') ++vcnt; + if (! value[vcnt]) return (data); + vcnt += 1; // skip < + + char buffer[2048]; + std::size_t bcnt { 0 }; + + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != ':') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt]) return (data); + buffer[bcnt] = '\0'; + data.first = buffer; + vcnt += 1; // skip : + + bcnt = 0; + buffer[0] = '\0'; + while (value[vcnt] && value[vcnt] != '>') + buffer[bcnt++] = value[vcnt++]; + if (! value[vcnt] || buffer[0] == '\0') return (data); + buffer[bcnt] = '\0'; + data.second = buffer; + + return (data); +} + +// ---------------------------------------------------------------------------- + inline static std::vector _get_str_vec_from_value_(const char *value) { @@ -1023,6 +1149,87 @@ _write_binary_datetime_(STRM &strm, const V &dt_vec, // ---------------------------------------------------------------------------- +// Vector of std::pair +// +template +inline static STRM & +_write_binary_str_dbl_pair_(STRM &strm, const V &p_vec, + std::size_t start_row, + std::size_t end_row) { + + _write_binary_common_(strm, p_vec, start_row, end_row); + + for (uint64_t i = start_row; i < end_row; ++i) { + const uint16_t str_sz = static_cast(p_vec[i].first.size()); + + strm.write(reinterpret_cast(&str_sz), sizeof(str_sz)); + } + for (uint64_t i = start_row; i < end_row; ++i) { + const auto &str = p_vec[i].first; + + strm.write(str.data(), str.size() * sizeof(char)); + strm.write(reinterpret_cast(&(p_vec[i].second)), + sizeof(double)); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + +// Vector of std::pair +// +template +inline static STRM & +_write_binary_str_str_pair_(STRM &strm, const V &p_vec, + std::size_t start_row, + std::size_t end_row) { + + _write_binary_common_(strm, p_vec, start_row, end_row); + + for (uint64_t i = start_row; i < end_row; ++i) { + const uint16_t str_sz1 = static_cast(p_vec[i].first.size()); + const uint16_t str_sz2 = + static_cast(p_vec[i].second.size()); + + strm.write(reinterpret_cast(&str_sz1), sizeof(str_sz1)); + strm.write(reinterpret_cast(&str_sz2), sizeof(str_sz2)); + } + for (uint64_t i = start_row; i < end_row; ++i) { + const auto &str1 = p_vec[i].first; + const auto &str2 = p_vec[i].second; + + strm.write(str1.data(), str1.size() * sizeof(char)); + strm.write(str2.data(), str2.size() * sizeof(char)); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + +// Vector of std::pair +// +template +inline static STRM & +_write_binary_dbl_dbl_pair_(STRM &strm, const V &p_vec, + std::size_t start_row, + std::size_t end_row) { + + _write_binary_common_(strm, p_vec, start_row, end_row); + + for (uint64_t i = start_row; i < end_row; ++i) { + strm.write(reinterpret_cast(&(p_vec[i].first)), + sizeof(double)); + strm.write(reinterpret_cast(&(p_vec[i].second)), + sizeof(double)); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + // Vector of double vectors // template @@ -1286,7 +1493,134 @@ _read_binary_datetime_(STRM &strm, V &dt_vec, bool needs_flipping, // ---------------------------------------------------------------------------- -// Vector of double vectors +// Vector of std::pair +// +template +inline static STRM & +_read_binary_str_dbl_pair_(STRM &strm, V &p_vec, bool needs_flipping, + std::size_t start_row, std::size_t num_rows) { + + const uint64_t vec_size = + _read_binary_common_(strm, needs_flipping, start_row); + std::vector str_sizes (vec_size, 0); + + strm.read(reinterpret_cast(str_sizes.data()), + vec_size * sizeof(uint16_t)); + if (needs_flipping) { + SwapBytes swaper { }; + + for (auto &s : str_sizes) + s = swaper(s); + } + + const uint64_t read_end = + (num_rows == std::numeric_limits::max() || + (start_row + num_rows) > vec_size) + ? vec_size : uint64_t(start_row + num_rows); + + p_vec.reserve(read_end - start_row); + for (uint64_t i = 0; i < vec_size; ++i) { + if (i >= start_row && i < read_end) [[likely]] { + std::string str (std::size_t(str_sizes[i]), 0); + double val { 0 }; + + strm.read(str.data(), str_sizes[i] * sizeof(char)); + strm.read(reinterpret_cast(&val), sizeof(val)); + if (needs_flipping) + val = SwapBytes { }(val); + p_vec.emplace_back(std::move(str), std::move(val)); + } + else + strm.seekg(str_sizes[i] + sizeof(double), std::ios_base::cur); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + +// Vector of std::pair +// +template +inline static STRM & +_read_binary_str_str_pair_(STRM &strm, V &p_vec, bool needs_flipping, + std::size_t start_row, std::size_t num_rows) { + + const uint64_t vec_size = + _read_binary_common_(strm, needs_flipping, start_row); + std::vector str_sizes (vec_size * 2, 0); + + strm.read(reinterpret_cast(str_sizes.data()), + vec_size * 2 * sizeof(uint16_t)); + if (needs_flipping) { + SwapBytes swaper { }; + + for (auto &s : str_sizes) + s = swaper(s); + } + + const uint64_t read_end = + (num_rows == std::numeric_limits::max() || + (start_row + num_rows) > vec_size) + ? vec_size : uint64_t(start_row + num_rows); + std::size_t sizes_idx { 0 }; + + p_vec.reserve(read_end - start_row); + for (uint64_t i = 0; i < vec_size; ++i, sizes_idx += 2) { + if (i >= start_row && i < read_end) [[likely]] { + std::string str1 (std::size_t(str_sizes[sizes_idx]), 0); + std::string str2 (std::size_t(str_sizes[sizes_idx + 1]), 0); + + strm.read(str1.data(), str_sizes[sizes_idx] * sizeof(char)); + strm.read(str2.data(), str_sizes[sizes_idx + 1] * sizeof(char)); + p_vec.emplace_back(std::move(str1), std::move(str2)); + } + else + strm.seekg(str_sizes[sizes_idx] + str_sizes[sizes_idx + 1], + std::ios_base::cur); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + +// Vector of std::pair +// +template +inline static STRM & +_read_binary_dbl_dbl_pair_(STRM &strm, V &p_vec, bool needs_flipping, + std::size_t start_row, std::size_t num_rows) { + + const uint64_t vec_size = + _read_binary_common_(strm, needs_flipping, start_row); + const uint64_t read_end = + (num_rows == std::numeric_limits::max() || + (start_row + num_rows) > vec_size) + ? vec_size : uint64_t(start_row + num_rows); + + p_vec.reserve(read_end - start_row); + for (uint64_t i = 0; i < vec_size; ++i) { + if (i >= start_row && i < read_end) [[likely]] { + double val[2]; + + strm.read(reinterpret_cast(val), sizeof(double) * 2); + if (needs_flipping) { + val[0] = SwapBytes { }(val[0]); + val[1] = SwapBytes { }(val[1]); + } + p_vec.emplace_back(val[0], val[1]); + } + else + strm.seekg(sizeof(double) * 2, std::ios_base::cur); + } + + return (strm); +} + +// ---------------------------------------------------------------------------- + +// Vector of std::vector // template inline static STRM & @@ -1328,7 +1662,7 @@ _read_binary_dbl_vec_(STRM &strm, V &vec, bool needs_flipping, // ---------------------------------------------------------------------------- -// Vector of string vectors +// Vector of std::vector // template inline static STRM & @@ -1363,7 +1697,7 @@ _read_binary_str_vec_(STRM &strm, V &vec, bool needs_flipping, // ---------------------------------------------------------------------------- -// Vector of double sets +// Vector of std::set // template inline static STRM & @@ -1410,7 +1744,7 @@ _read_binary_dbl_set_(STRM &strm, V &set_vec, bool needs_flipping, // ---------------------------------------------------------------------------- -// Vector of string sets +// Vector of std::set // template inline static STRM & @@ -1734,7 +2068,8 @@ _inv_merge_sort_(Con &original, } else { inv_count += - _inv_merge_sort_(original, temp, left, mid, comp, thread_level); + _inv_merge_sort_(original, temp, left, mid, comp, + thread_level); inv_count += _inv_merge_sort_(original, temp, mid + 1, right, comp, thread_level); @@ -1770,8 +2105,9 @@ struct _LikeClauseUtil_ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, + 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, diff --git a/test/dataframe_tester_2.cc b/test/dataframe_tester_2.cc index 6a42e52ee..e364a3d20 100644 --- a/test/dataframe_tester_2.cc +++ b/test/dataframe_tester_2.cc @@ -3304,6 +3304,9 @@ static void test_no_index_reads() { std::set, std::set, std::vector, + std::pair, + std::pair, + std::pair, std::string>(std::cout, io_format::csv2); std::cout << '\n' << std::endl; diff --git a/test/dataframe_tester_4.cc b/test/dataframe_tester_4.cc index d7557b283..5b469d6cd 100644 --- a/test/dataframe_tester_4.cc +++ b/test/dataframe_tester_4.cc @@ -672,6 +672,205 @@ static void test_explode() { // ---------------------------------------------------------------------------- +static void test_read_write_pairs() { + + std::cout << "\nTesting read_write_pairs ..." << std::endl; + + MyDataFrame df; + + try { + df.read("sample_data_2.csv", io_format::csv); + } + catch (const DataFrameError &ex) { + std::cout << ex.what() << std::endl; + } + + df.write, + std::pair, + std::pair> + (std::cout, io_format::csv); + + df.write, + std::pair, + std::pair> + ("./tmp_sample_data_2.csv", io_format::csv2); + df.write, + std::pair, + std::pair> + ("./tmp_sample_data_2.dat", io_format::binary); + + MyDataFrame df2; + + df2.read("./tmp_sample_data_2.csv", io_format::csv2); + assert(df.get_index() == df2.get_index()); + assert((df.get_column("xint_col_2") == + df2.get_column("xint_col_2"))); + assert((df.get_column("str_col_2") == + df2.get_column("str_col_2"))); + assert((df.get_column("ul_col_2") == + df2.get_column("ul_col_2"))); + assert(( + df.get_column>("str_str_pair_col")[0] == + df2.get_column>("str_str_pair_col")[0])); + assert(( + df.get_column>("str_str_pair_col")[1] == + df2.get_column>("str_str_pair_col")[1])); + assert(( + df.get_column>("str_str_pair_col")[2] == + df2.get_column>("str_str_pair_col")[2])); + assert(( + df.get_column>("str_str_pair_col")[3] == + df2.get_column>("str_str_pair_col")[3])); + assert(( + df.get_column>("str_str_pair_col")[4] == + df2.get_column>("str_str_pair_col")[4])); + assert(( + df.get_column>("dbl_dbl_pair_col")[0] == + df2.get_column>("dbl_dbl_pair_col")[0])); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[1].first))); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[1].second))); + assert((std::isnan(df2.get_column> + ("dbl_dbl_pair_col")[1].first))); + assert((std::isnan(df2.get_column> + ("dbl_dbl_pair_col")[1].second))); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[2].first))); + assert((std::isnan(df2.get_column> + ("dbl_dbl_pair_col")[2].first))); + assert(( + df.get_column>("dbl_dbl_pair_col")[2].second == + df2.get_column>("dbl_dbl_pair_col")[2].second)); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[3].second))); + assert((std::isnan(df2.get_column> + ("dbl_dbl_pair_col")[3].second))); + assert(( + df.get_column>("dbl_dbl_pair_col")[3].first == + df2.get_column>("dbl_dbl_pair_col")[3].first)); + assert(( + df.get_column>("dbl_dbl_pair_col")[4] == + df2.get_column>("dbl_dbl_pair_col")[4])); + + assert(( + df.get_column>("str_dbl_pair_col")[0] == + df2.get_column>("str_dbl_pair_col")[0])); + assert(( + df.get_column>("str_dbl_pair_col")[1].first == + df2.get_column>("str_dbl_pair_col")[1].first)); + assert((std::isnan(df.get_column> + ("str_dbl_pair_col")[1].second))); + assert((std::isnan(df2.get_column> + ("str_dbl_pair_col")[1].second))); + assert(( + df.get_column>("str_dbl_pair_col")[2].first == + df2.get_column>("str_dbl_pair_col")[2].first)); + assert(( + df.get_column>("str_dbl_pair_col")[2].second == + df2.get_column>("str_dbl_pair_col")[2].second)); + assert((std::isnan(df.get_column> + ("str_dbl_pair_col")[3].second))); + assert((std::isnan(df2.get_column> + ("str_dbl_pair_col")[3].second))); + assert(( + df.get_column>("str_dbl_pair_col")[3].first == + df2.get_column>("str_dbl_pair_col")[3].first)); + assert(( + df.get_column>("str_dbl_pair_col")[4] == + df2.get_column>("str_dbl_pair_col")[4])); + + MyDataFrame df3; + + df3.read("./tmp_sample_data_2.dat", io_format::binary); + assert(df.get_index() == df3.get_index()); + assert((df.get_column("xint_col_2") == + df3.get_column("xint_col_2"))); + assert((df.get_column("str_col_2") == + df3.get_column("str_col_2"))); + assert((df.get_column("ul_col_2") == + df3.get_column("ul_col_2"))); + assert(( + df.get_column>("str_str_pair_col") == + df3.get_column>("str_str_pair_col"))); + assert(( + df.get_column>("dbl_dbl_pair_col")[0] == + df3.get_column>("dbl_dbl_pair_col")[0])); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[1].first))); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[1].second))); + assert((std::isnan(df3.get_column> + ("dbl_dbl_pair_col")[1].first))); + assert((std::isnan(df3.get_column> + ("dbl_dbl_pair_col")[1].second))); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[2].first))); + assert((std::isnan(df3.get_column> + ("dbl_dbl_pair_col")[2].first))); + assert(( + df.get_column>("dbl_dbl_pair_col")[2].second == + df3.get_column>("dbl_dbl_pair_col")[2].second)); + assert((std::isnan(df.get_column> + ("dbl_dbl_pair_col")[3].second))); + assert((std::isnan(df3.get_column> + ("dbl_dbl_pair_col")[3].second))); + assert(( + df.get_column>("dbl_dbl_pair_col")[3].first == + df3.get_column>("dbl_dbl_pair_col")[3].first)); + assert(( + df.get_column>("dbl_dbl_pair_col")[4] == + df3.get_column>("dbl_dbl_pair_col")[4])); + + assert(( + df.get_column>("str_dbl_pair_col")[0] == + df3.get_column>("str_dbl_pair_col")[0])); + assert(( + df.get_column>("str_dbl_pair_col")[1].first == + df3.get_column>("str_dbl_pair_col")[1].first)); + assert((std::isnan(df.get_column> + ("str_dbl_pair_col")[1].second))); + assert((std::isnan(df3.get_column> + ("str_dbl_pair_col")[1].second))); + assert(( + df.get_column>("str_dbl_pair_col")[2].first == + df3.get_column>("str_dbl_pair_col")[2].first)); + assert(( + df.get_column>("str_dbl_pair_col")[2].second == + df3.get_column>("str_dbl_pair_col")[2].second)); + assert((std::isnan(df.get_column> + ("str_dbl_pair_col")[3].second))); + assert((std::isnan(df3.get_column> + ("str_dbl_pair_col")[3].second))); + assert(( + df.get_column>("str_dbl_pair_col")[3].first == + df3.get_column>("str_dbl_pair_col")[3].first)); + assert(( + df.get_column>("str_dbl_pair_col")[4] == + df3.get_column>("str_dbl_pair_col")[4])); + + std::remove("./tmp_sample_data_2.csv"); + std::remove("./tmp_sample_data_2.dat"); +} + +// ---------------------------------------------------------------------------- + int main(int, char *[]) { test_starts_with(); @@ -683,6 +882,7 @@ int main(int, char *[]) { test_truncate(); test_load_column(); test_explode(); + test_read_write_pairs(); return (0); } diff --git a/test/dataframe_tester_output.txt b/test/dataframe_tester_output.txt index e926a693c..1b84c63d9 100644 --- a/test/dataframe_tester_output.txt +++ b/test/dataframe_tester_output.txt @@ -714,18 +714,18 @@ col_2:1::8, col_3:1::15, col_str:1::11, col_4:1::22, -INDEX:1::123450, -col_1:1::1, -col_2:1::8, -col_3:1::15, -col_str:1::11, -col_4:1::22, -INDEX:1::123450, -col_1:1::1, -col_2:1::8, -col_3:1::15, -col_str:1::11, -col_4:1::22, +INDEX:1::123451, +col_1:1::2, +col_2:1::9, +col_3:1::16, +col_str:1::22, +col_4:1::23, +INDEX:1::123452, +col_1:1::3, +col_2:1::10, +col_3:1::17, +col_str:1::33, +col_4:1::24, Testing write(json) ... Writing in JSON: @@ -1324,35 +1324,35 @@ INDEX:28:,ul_col:28:,dbl_col_2:28:,bool_col:28:,str_ 123450,123450,nan,0,,2,nan,C,0,123450,nan,0,,2,0,123450,nan,0,,2,0 -INDEX:28:,ul_col:28:,xint_col:28:,str_col:28:,dbl_col:28:,dbl_col_2:28:,bool_col:28:,Map 1:4:,Unordered Map:4:,Str Vec:4:,Double Set:4:,Str Set:4:,Z Score:4:,ul_col_2:28:,xint_col_2:28:,str_col_2:28:,dbl_col_3:28:,dbl_col_2_2:28:,bool_col_2:28:,ul_col_no_idx:28:,xint_col_no_idx:28:,str_col_no_idx:28:,dbl_col_no_idx:28:,dbl_col_2_no_idx:28:,bool_col_no_idx:28:,char_col:11: -123432,123450,35,XXXX10,2.009,0.87865,0,3{label one 1:123|label one 2:-782.5|label one 3:444.44},3{Key one 3:444.44|Key one 2:-782.5|Key one 1:123},4[bbb|aaa|zzz|ddd],3[-782.5|123|444.44],3[-782.5|123.0|444.44],10[1.95474040557|0.552535091086|0.775388936446|-0.561817339812|0.106794118727|-0.153218675013|-0.896114748672|-1.72258101434|-0.301804546072|0.246077772077],123450,35,XXXX10,2.009,0.87865,0,123450,35,XXXX10,2.009,0.87865,0,C -123433,123451,36,XXXX11,3.111,-0.6999,1,3{label two 1:123|label two 2:-782.5|label two 3:444.44},3{Key two 3:444.44|Key two 2:-782.5|Key two 1:123},4[aaa|bbb|ccc|www],3[-782.5|0|1],3[-782.5|1:123.0|:444.44],10[-0.985180680575|-0.338649566179|1.37000434149|0.831246802651|-0.415610988193|1.06213106869|0.554158098662|0.507981245453|-1.55472278822|-1.03135753378],123451,36,XXXX11,3.111,-0.6999,1,123451,36,XXXX11,3.111,-0.6999,1,% -123434,123452,40,XXXX01,10,0.4111,1,3{label three 1:123|label three 2:-782.5|label three 3:444.44},3{Key three 3:444.44|Key three 2:-782.5|Key three 1:123},4[123|abc|345|list],3[-782.5|123|444.44],3[-782.5|123.0|444.44],10[-1.39575784008|-1.62506351709|-0.907239380237|-0.159508137551|0.807541881212|0.937157205458|0.578236204203|0.717820563726|-0.0398797142361|1.0866927346],123452,40,XXXX01,10,0.4111,1,123452,40,XXXX01,10,0.4111,1,x -123435,123450,45,XXXX02,4.2222,0.1902,1,3{label four 1:123|label four 2:-782.5|label four 3:444.44},3{Key four 2:-782.5|Key four 3:444.44|Key four 1:123},3[bbb|aaa|zzz],4[-782.5|100.5|123|444.44],4[-782.5|100.5|123.0|444.44],10[1.94246107491|-0.062340594565|0.246115232403|-1.24462409799|-0.190844664632|0.8115331407|0.381021476571|-1.60448155299|-0.422151990754|0.143311976349],123450,45,XXXX02,4.2222,0.1902,1,123450,45,XXXX02,4.2222,0.1902,1,0 -123436,123455,46,XXXX03,5.3333,-0.4888,0,,,,,,,123455,46,XXXX03,5.3333,-0.4888,0,123455,46,XXXX03,5.3333,-0.4888,0,A -123441,123450,33,XXXX6,12,0.2,0,,,,,,,123450,33,XXXX6,12,0.2,0,123450,33,XXXX6,12,0.2,0,0 -123442,123449,34,XXXX7,6.25,0.1056,1,,,,,,,123449,34,XXXX7,6.25,0.1056,1,123449,34,XXXX7,6.25,0.1056,1,% -123448,123448,8,Running fast,10,0.1,0,,,,,,,123448,8,Running fast,10,0.1,0,123448,8,Running fast,10,0.1,0,-6 -123449,123451,7,$15 increase,0.9999,0.06743,0,,,,,,,123451,7,$15 increase,0.9999,0.06743,0,123451,7,$15 increase,0.9999,0.06743,0,0 -123450,123452,1,4% of something,1.2345,0.998,0,,,,,,,123452,1,4% of something,1.2345,0.998,0,123452,1,4% of something,1.2345,0.998,0,A -123450,123452,4,3.4% of GDP,4.2345,0.15678,1,,,,,,,123452,4,3.4% of GDP,4.2345,0.15678,1,123452,4,3.4% of GDP,4.2345,0.15678,1,0 -123450,123450,6,Market pulls back,3,0.923,0,,,,,,,123450,6,Market pulls back,3,0.923,0,123450,6,Market pulls back,3,0.923,0, -123450,123455,12,Bonds vs. Equities,8,0.0111,1,,,,,,,123455,12,Bonds vs. Equities,8,0.0111,1,123455,12,Bonds vs. Equities,8,0.0111,1, -123450,123450,14,Here comes the sun,3.3333,-0.8888,0,,,,,,,123450,14,Here comes the sun,3.3333,-0.8888,0,123450,14,Here comes the sun,3.3333,-0.8888,0, -123451,123454,2,Description 4/5,2.2345,0.3456,0,,,,,,,123454,2,Description 4/5,2.2345,0.3456,0,123454,2,Description 4/5,2.2345,0.3456,0, -123451,123453,9,C++14 development,4.25,0.0056,0,,,,,,,123453,9,C++14 development,4.25,0.0056,0,123453,9,C++14 development,4.25,0.0056,0, -123452,123456,3,This is bad,3.2345,0.056,0,,,,,,,123456,3,This is bad,3.2345,0.056,0,123456,3,This is bad,3.2345,0.056,0, -123452,123457,10,Some explanation,0.009,0.07865,0,,,,,,,123457,10,Some explanation,0.009,0.07865,0,123457,10,Some explanation,0.009,0.07865,0, -123452,123458,11,More strings,1.111,-0.9999,0,,,,,,,123458,11,More strings,1.111,-0.9999,0,123458,11,More strings,1.111,-0.9999,0, -123453,123459,20,XXXX04,5.25,0.0456,0,,,,,,,123459,20,XXXX04,5.25,0.0456,0,123459,20,XXXX04,5.25,0.0456,0, -123454,123460,15,XXXX1,11,0.14,1,,,,,,,123460,15,XXXX1,11,0.14,1,123460,15,XXXX1,11,0.14,1, -123455,123441,5,Market drops,5.2345,0.00345,0,,,,,,,123441,5,Market drops,5.2345,0.00345,0,123441,5,Market drops,5.2345,0.00345,0, -123455,123442,13,Almost done,2.2222,0.1002,1,,,,,,,123442,13,Almost done,2.2222,0.1002,1,123442,13,Almost done,2.2222,0.1002,1, -123456,123432,22,XXXX2,1.009,0.078654,0,,,,,,,123432,22,XXXX2,1.009,0.078654,0,123432,22,XXXX2,1.009,0.078654,0, -123457,123433,23,XXXX3,2.111,-0.8999,0,,,,,,,123433,23,XXXX3,2.111,-0.8999,0,123433,23,XXXX3,2.111,-0.8999,0, -123458,123434,24,XXXX4,9,0.01119,1,,,,,,,123434,24,XXXX4,9,0.01119,1,123434,24,XXXX4,9,0.01119,1, -123459,123435,25,XXXX4,3.2222,0.8002,0,,,,,,,123435,25,XXXX4,3.2222,0.8002,0,123435,25,XXXX4,3.2222,0.8002,0, -123460,123436,30,XXXX5,4.3333,-0.9888,0,,,,,,,123436,30,XXXX5,4.3333,-0.9888,0,123436,30,XXXX5,4.3333,-0.9888,0, +INDEX:28:,ul_col:28:,xint_col:28:,str_col:28:,dbl_col:28:,dbl_col_2:28:,bool_col:28:,Map 1:4:,Unordered Map:4:,Str Vec:4:,Double Set:4:,Str Set:4:,Z Score:4:,ul_col_2:28:,xint_col_2:28:,str_col_2:28:,dbl_col_3:28:,dbl_col_2_2:28:,bool_col_2:28:,str_str_pair_col:5:,dbl_dbl_pair_col:5:,str_dbl_pair_col:5:,ul_col_no_idx:28:,xint_col_no_idx:28:,str_col_no_idx:28:,dbl_col_no_idx:28:,dbl_col_2_no_idx:28:,bool_col_no_idx:28:,char_col:11: +123432,123450,35,XXXX10,2.009,0.87865,0,3{label one 1:123|label one 2:-782.5|label one 3:444.44},3{Key one 3:444.44|Key one 2:-782.5|Key one 1:123},4[bbb|aaa|zzz|ddd],3[-782.5|123|444.44],3[-782.5|123.0|444.44],10[1.95474040557|0.552535091086|0.775388936446|-0.561817339812|0.106794118727|-0.153218675013|-0.896114748672|-1.72258101434|-0.301804546072|0.246077772077],123450,35,XXXX10,2.009,0.87865,0,,<1234:456.7>,,123450,35,XXXX10,2.009,0.87865,0,C +123433,123451,36,XXXX11,3.111,-0.6999,1,3{label two 1:123|label two 2:-782.5|label two 3:444.44},3{Key two 3:444.44|Key two 2:-782.5|Key two 1:123},4[aaa|bbb|ccc|www],3[-782.5|0|1],3[-782.5|1:123.0|:444.44],10[-0.985180680575|-0.338649566179|1.37000434149|0.831246802651|-0.415610988193|1.06213106869|0.554158098662|0.507981245453|-1.55472278822|-1.03135753378],123451,36,XXXX11,3.111,-0.6999,1,<:>,,<:nan>,123451,36,XXXX11,3.111,-0.6999,1,% +123434,123452,40,XXXX01,10,0.4111,1,3{label three 1:123|label three 2:-782.5|label three 3:444.44},3{Key three 3:444.44|Key three 2:-782.5|Key three 1:123},4[123|abc|345|list],3[-782.5|123|444.44],3[-782.5|123.0|444.44],10[-1.39575784008|-1.62506351709|-0.907239380237|-0.159508137551|0.807541881212|0.937157205458|0.578236204203|0.717820563726|-0.0398797142361|1.0866927346],123452,40,XXXX01,10,0.4111,1,<:Second String 13>,,<:456.8>,123452,40,XXXX01,10,0.4111,1,x +123435,123450,45,XXXX02,4.2222,0.1902,1,3{label four 1:123|label four 2:-782.5|label four 3:444.44},3{Key four 2:-782.5|Key four 3:444.44|Key four 1:123},3[bbb|aaa|zzz],4[-782.5|100.5|123|444.44],4[-782.5|100.5|123.0|444.44],10[1.94246107491|-0.062340594565|0.246115232403|-1.24462409799|-0.190844664632|0.8115331407|0.381021476571|-1.60448155299|-0.422151990754|0.143311976349],123450,45,XXXX02,4.2222,0.1902,1,,<456.9:nan>,,123450,45,XXXX02,4.2222,0.1902,1,0 +123436,123455,46,XXXX03,5.3333,-0.4888,0,,,,,,,123455,46,XXXX03,5.3333,-0.4888,0,,<789.1:789.2>,,123455,46,XXXX03,5.3333,-0.4888,0,A +123441,123450,33,XXXX6,12,0.2,0,,,,,,,123450,33,XXXX6,12,0.2,0,,,,123450,33,XXXX6,12,0.2,0,0 +123442,123449,34,XXXX7,6.25,0.1056,1,,,,,,,123449,34,XXXX7,6.25,0.1056,1,,,,123449,34,XXXX7,6.25,0.1056,1,% +123448,123448,8,Running fast,10,0.1,0,,,,,,,123448,8,Running fast,10,0.1,0,,,,123448,8,Running fast,10,0.1,0,-6 +123449,123451,7,$15 increase,0.9999,0.06743,0,,,,,,,123451,7,$15 increase,0.9999,0.06743,0,,,,123451,7,$15 increase,0.9999,0.06743,0,0 +123450,123452,1,4% of something,1.2345,0.998,0,,,,,,,123452,1,4% of something,1.2345,0.998,0,,,,123452,1,4% of something,1.2345,0.998,0,A +123450,123452,4,3.4% of GDP,4.2345,0.15678,1,,,,,,,123452,4,3.4% of GDP,4.2345,0.15678,1,,,,123452,4,3.4% of GDP,4.2345,0.15678,1,0 +123450,123450,6,Market pulls back,3,0.923,0,,,,,,,123450,6,Market pulls back,3,0.923,0,,,,123450,6,Market pulls back,3,0.923,0, +123450,123455,12,Bonds vs. Equities,8,0.0111,1,,,,,,,123455,12,Bonds vs. Equities,8,0.0111,1,,,,123455,12,Bonds vs. Equities,8,0.0111,1, +123450,123450,14,Here comes the sun,3.3333,-0.8888,0,,,,,,,123450,14,Here comes the sun,3.3333,-0.8888,0,,,,123450,14,Here comes the sun,3.3333,-0.8888,0, +123451,123454,2,Description 4/5,2.2345,0.3456,0,,,,,,,123454,2,Description 4/5,2.2345,0.3456,0,,,,123454,2,Description 4/5,2.2345,0.3456,0, +123451,123453,9,C++14 development,4.25,0.0056,0,,,,,,,123453,9,C++14 development,4.25,0.0056,0,,,,123453,9,C++14 development,4.25,0.0056,0, +123452,123456,3,This is bad,3.2345,0.056,0,,,,,,,123456,3,This is bad,3.2345,0.056,0,,,,123456,3,This is bad,3.2345,0.056,0, +123452,123457,10,Some explanation,0.009,0.07865,0,,,,,,,123457,10,Some explanation,0.009,0.07865,0,,,,123457,10,Some explanation,0.009,0.07865,0, +123452,123458,11,More strings,1.111,-0.9999,0,,,,,,,123458,11,More strings,1.111,-0.9999,0,,,,123458,11,More strings,1.111,-0.9999,0, +123453,123459,20,XXXX04,5.25,0.0456,0,,,,,,,123459,20,XXXX04,5.25,0.0456,0,,,,123459,20,XXXX04,5.25,0.0456,0, +123454,123460,15,XXXX1,11,0.14,1,,,,,,,123460,15,XXXX1,11,0.14,1,,,,123460,15,XXXX1,11,0.14,1, +123455,123441,5,Market drops,5.2345,0.00345,0,,,,,,,123441,5,Market drops,5.2345,0.00345,0,,,,123441,5,Market drops,5.2345,0.00345,0, +123455,123442,13,Almost done,2.2222,0.1002,1,,,,,,,123442,13,Almost done,2.2222,0.1002,1,,,,123442,13,Almost done,2.2222,0.1002,1, +123456,123432,22,XXXX2,1.009,0.078654,0,,,,,,,123432,22,XXXX2,1.009,0.078654,0,,,,123432,22,XXXX2,1.009,0.078654,0, +123457,123433,23,XXXX3,2.111,-0.8999,0,,,,,,,123433,23,XXXX3,2.111,-0.8999,0,,,,123433,23,XXXX3,2.111,-0.8999,0, +123458,123434,24,XXXX4,9,0.01119,1,,,,,,,123434,24,XXXX4,9,0.01119,1,,,,123434,24,XXXX4,9,0.01119,1, +123459,123435,25,XXXX4,3.2222,0.8002,0,,,,,,,123435,25,XXXX4,3.2222,0.8002,0,,,,123435,25,XXXX4,3.2222,0.8002,0, +123460,123436,30,XXXX5,4.3333,-0.9888,0,,,,,,,123436,30,XXXX5,4.3333,-0.9888,0,,,,123436,30,XXXX5,4.3333,-0.9888,0, INDEX:12:,col_3:12:,col_4:6:,col_str:12:,col_2:12:,col_1:12:,col_char:12:,col_uchar:12:,col_3_2:12:,col_4_2:6:,col_str_2:12:,col_2_2:12:,col_1_2:12:,col_3_no_idx:12:,col_4_no_idx:6:,col_str_no_idx:12:,col_2_no_idx:12:,col_1_no_idx:12:,col_char_no_idx:12:,col_uchar_no_idx:12: @@ -2074,7 +2074,7 @@ INDEX:4:,Open:4:,High:4:,Low:4:,Close:4:,Unordered Map:12:,Open:12:,High:12:,Low:12:,Close:12:,Mean:12:,Median:12:,25% Quantile:12:,Std:12:,MAD:12:,Map 1:12:,Str Vec:12:,Double Set:12:,Str Set:12:,Volume:12: +INDEX:12:,Unordered Map:12:,Open:12:,High:12:,Low:12:,Close:12:,Mean:12:,Median:12:,25% Quantile:12:,Std:12:,MAD:12:,Map 1:12:,Str Vec:12:,Double Set:12:,Str Set:12:,Volume:12: 01/14/2000 00:00:00.000,,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.060093197,0.0436942,3{label one 1:123|label one 2:-782.5|label one 3:444.44},4[bbb|aaa|zzz|ddd],3[-782.5|123|444.44],3[-782.5|123.0|444.44],6400945600 01/14/2000 00:00:00.000,,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.060093197,0.0436942,3{label one 1:123|label one 2:-782.5|label one 3:444.44},4[bbb|aaa|zzz|ddd],3[-782.5|123|444.44],3[-782.5|123.0|444.44],6400945600 01/14/2000 00:00:00.000,,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.060093197,0.0436942,3{label one 1:123|label one 2:-782.5|label one 3:444.44},4[bbb|aaa|zzz|ddd],3[-782.5|123|444.44],3[-782.5|123.0|444.44],6400945600 @@ -2089,7 +2089,7 @@ INDEX:12:,Unordered Map:12:,Open:12:,High:12:,Low 02/29/2000 00:00:00.000,,1.0625,1.0625,0.985491,1.023438,1.0203265,1.0212055,1.013672,0.021711375,0.0153041,3{label four 1:123|label four 2:-782.5|label four 3:444.44},3[bbb|aaa|zzz],4[-782.5|100.5|123|444.44],4[-782.5|100.5|123.0|444.44],3605190400 -INDEX:12:,Map 1:12:,Open:12:,High:12:,Low:12:,Close:12:,Mean:12:,Median:12:,25% Quantile:12:,Std:12:,MAD:12:,Unordered Map:12:,Str Vec:12:,Double Set:12:,Str Set:12:,Volume:12: +INDEX:12:,Map 1:12:,Open:12:,High:12:,Low:12:,Close:12:,Mean:12:,Median:12:,25% Quantile:12:,Std:12:,MAD:12:,Unordered Map:12:,Str Vec:12:,Double Set:12:,Str Set:12:,Volume:12: 01/14/2000 00:00:00.000,