Skip to content

Commit

Permalink
Added str_vec, dbl_set, and str_set to read/write
Browse files Browse the repository at this point in the history
  • Loading branch information
hosseinmoein committed Oct 9, 2023
1 parent d9d82b3 commit c7d1c7b
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 9 deletions.
11 changes: 5 additions & 6 deletions data/AAPL_10dBucketWithMaps.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
INDEX:4:<DateTimeAME>,Open:4:<double>,High:4:<double>,Low:4:<double>,Close:4:<double>,Mean:4:<double>,Median:4:<double>,25% Quantile:4:<double>,Std:4:<double>,MAD:4:<double>,Map 1:4:<str_dbl_map>,Unordered Map:4:<str_dbl_unomap>,Volume:4:<long>
01/14/2000 00:00:00.000,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.0600931968588,0.0436942,3{label one 1:123.0|label one 2:-782.5|label one 3:444.44},3{Key one 1:123.0|Key one 2:-782.5|Key one 3:444.44},6400945600
01/31/2000 00:00:00.000,0.928013,1.013393,0.907366,0.926339,0.9637277,0.966797,0.938337,0.0362519289143,0.0313617,3{label two 1:123.0|label two 2:-782.5|label two 3:444.44},3{Key two 1:123.0|Key two 2:-782.5|Key two 3:444.44},6154232000
02/14/2000 00:00:00.000,0.895089,1.03404,0.882254,1.03404,0.9732142,0.988281,0.9433595,0.0559733198384,0.0462054,3{label three 1:123.0|label three 2:-782.5|label three 3:444.44},3{Key three 1:123.0|Key three 2:-782.5|Key three 3:444.44},3714592000
02/29/2000 00:00:00.000,1.0625,1.0625,0.985491,1.023438,1.0203265,1.0212055,1.013672,0.0217113745778,0.0153041,3{label four 1:123.0|label four 2:-782.5|label four 3:444.44},3{Key four 1:123.0|Key four 2:-782.5|Key four 3:444.44},3605190400

INDEX:4:<DateTimeAME>,Open:4:<double>,High:4:<double>,Low:4:<double>,Close:4:<double>,Mean:4:<double>,Median:4:<double>,25% Quantile:4:<double>,Std:4:<double>,MAD:4:<double>,Map 1:4:<str_dbl_map>,Unordered Map:4:<str_dbl_unomap>,Str Vec:4<str_vec>,Double Set:4:<dbl_set>,Str Set:4:<str_set>,Volume:4:<long>
01/14/2000 00:00:00.000,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.060093197,0.0436942,3{label one 1:123.0|label one 2:-782.5|label one 3:444.44},3{Key one 1:123.0|Key one 2:-782.5|Key one 3:444.44},4[bbb|aaa|zzz|ddd],3[123.0|-782.5|444.44],3[123.0|-782.5|444.44],6400945600
01/31/2000 00:00:00.000,0.928013,1.013393,0.907366,0.926339,0.9637277,0.966797,0.938337,0.036251929,0.0313617,3{label two 1:123.0|label two 2:-782.5|label two 3:444.44},3{Key two 1:123.0|Key two 2:-782.5|Key two 3:444.44},4[aaa|bbb|ccc|www],3[1:123.0|-782.5|:444.44],3[1:123.0|-782.5|:444.44],6154232000
02/14/2000 00:00:00.000,0.895089,1.03404,0.882254,1.03404,0.9732142,0.988281,0.9433595,0.05597332,0.0462054,3{label three 1:123.0|label three 2:-782.5|label three 3:444.44},3{Key three 1:123.0|Key three 2:-782.5|Key three 3:444.44},4[123|abc|345|list],3[123.0|-782.5|444.44],3[123.0|-782.5|444.44],3714592000
02/29/2000 00:00:00.000,1.0625,1.0625,0.985491,1.023438,1.0203265,1.0212055,1.013672,0.021711375,0.0153041,3{label four 1:123.0|label four 2:-782.5|label four 3:444.44},3{Key four 1:123.0|Key four 2:-782.5|Key four 3:444.44},3[bbb|aaa|zzz],4[123.0|-782.5|444.44|100.5],4[123.0|-782.5|444.44|100.5],3605190400
8 changes: 7 additions & 1 deletion docs/HTML/read.html
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,14 @@
DateTimeAME -- DateTime string printed in American style (MM/DD/YYYY HH:MM:SS.mmm)
DateTimeEUR -- DateTime string printed in European style (YYYY/MM/DD HH:MM:SS.mmm)
DateTimeISO -- DateTime string printed in ISO style (YYYY-MM-DD HH:MM:SS.mmm)
dbl_vector -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
dbl_vec -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
where s is the size of the vector and d's are the double values.
str_vec -- A vector of std::string values, The vector is printed as "s[str1|str2|...]"
where s is the size of the vector and str's are the strings.
dbl_set -- A set of double precision values, The set is printed as "s[d1|d2|...]"
where s is the size of the set and d's are the double values.
str_set -- A set of std::string values, The set is printed as "s[str1|str2|...]"
where s is the size of the set and str's are the strings.
str_dbl_map -- A map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
where s is the size of the map and k's and v's are keys and values.
str_dbl_unomap -- An unordered map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
Expand Down
8 changes: 7 additions & 1 deletion docs/HTML/write.html
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,14 @@
DateTimeAME -- DateTime string printed in American style (MM/DD/YYYY HH:MM:SS.mmm)
DateTimeEUR -- DateTime string printed in European style (YYYY/MM/DD HH:MM:SS.mmm)
DateTimeISO -- DateTime string printed in ISO style (YYYY-MM-DD HH:MM:SS.mmm)
dbl_vector -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
dbl_vec -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
where s is the size of the vector and d's are the double values.
str_vec -- A vector of std::string values, The vector is printed as "s[str1|str2|...]"
where s is the size of the vector and str's are the strings.
dbl_set -- A set of double precision values, The set is printed as "s[d1|d2|...]"
where s is the size of the set and d's are the double values.
str_set -- A set of std::string values, The set is printed as "s[str1|str2|...]"
where s is the size of the set and str's are the strings.
str_dbl_map -- A map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
where s is the size of the map and k's and v's are keys and values.
str_dbl_unomap -- An unordered map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
Expand Down
2 changes: 1 addition & 1 deletion examples/hello_world.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ using StrDataFrame = StdDataFrame<std::string>;
//
using DTDataFrame = StdDataFrame<DateTime>;

// This is just some arbitrary type to show how any type could be in DataFrame
// This is just some arbitrary type to show how any type, including the DataFrame itself, could be in DataFrame
//
struct MyData {
int i { 10 };
Expand Down
76 changes: 76 additions & 0 deletions include/DataFrame/Internals/DataFrame_read.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ read_csv2_(std::istream &stream,
stream.unget();

// First get the header which is column names, sizes and types
//
if (! header_read) [[unlikely]] {
col_name.clear();
type_str.clear();
Expand All @@ -532,6 +533,7 @@ read_csv2_(std::istream &stream,
size_type row_cnt = 0;

// Jump to the starting row
//
while (row_cnt < starting_row && stream.get(c))
if (c == '\r' || c == '\n')
row_cnt += 1;
Expand Down Expand Up @@ -592,6 +594,7 @@ read_csv2_(std::istream &stream,
col_name.c_str(),
nrows);
// This includes DateTime, DateTimeAME, DateTimeEUR, DateTimeISO
//
else if (! ::strncmp(type_str.c_str(), "DateTime", 8))
spec_vec.emplace_back(StlVecType<DateTime>(),
type_str.c_str(),
Expand All @@ -607,6 +610,21 @@ read_csv2_(std::istream &stream,
type_str.c_str(),
col_name.c_str(),
nrows);
else if (type_str == "str_vec")
spec_vec.emplace_back(StlVecType<StlVecType<std::string>>{ },
type_str.c_str(),
col_name.c_str(),
nrows);
else if (type_str == "dbl_set")
spec_vec.emplace_back(StlVecType<std::set<double>>{ },
type_str.c_str(),
col_name.c_str(),
nrows);
else if (type_str == "str_set")
spec_vec.emplace_back(StlVecType<std::set<std::string>>{ },
type_str.c_str(),
col_name.c_str(),
nrows);
else if (type_str == "str_dbl_map")
spec_vec.emplace_back(
StlVecType<std::map<std::string, double>>{ },
Expand Down Expand Up @@ -743,6 +761,39 @@ read_csv2_(std::istream &stream,
value.c_str())));
}
}
else if (col_spec.type_spec == "str_vec") {
if (! value.empty()) {
StlVecType<StlVecType<std::string>> &vec =
std::any_cast<StlVecType<StlVecType<std::string>> &>
(col_spec.col_vec);

vec.push_back(
std::move(_get_str_vec_from_value_<DataFrame<I, H>>(
value.c_str())));
}
}
else if (col_spec.type_spec == "dbl_set") {
using set_t = std::set<double>;

if (! value.empty()) {
StlVecType<set_t> &vec =
std::any_cast<StlVecType<set_t> &>(col_spec.col_vec);

vec.push_back(std::move(_get_dbl_set_from_value_(
value.c_str())));
}
}
else if (col_spec.type_spec == "str_set") {
using set_t = std::set<std::string>;

if (! value.empty()) {
StlVecType<set_t> &vec =
std::any_cast<StlVecType<set_t> &>(col_spec.col_vec);

vec.push_back(std::move(_get_str_set_from_value_(
value.c_str())));
}
}
else if (col_spec.type_spec == "str_dbl_map") {
using map_t = std::map<std::string, double>;

Expand Down Expand Up @@ -864,6 +915,31 @@ read_csv2_(std::istream &stream,
std::move(std::any_cast<StlVecType<StlVecType<double>> &>
(col_spec.col_vec)),
nan_policy::dont_pad_with_nans);
else if (col_spec.type_spec == "str_vec")
load_column<StlVecType<std::string>>(
col_spec.col_name.c_str(),
std::move(
std::any_cast<StlVecType<StlVecType<std::string>> &>
(col_spec.col_vec)),
nan_policy::dont_pad_with_nans);
else if (col_spec.type_spec == "dbl_set") {
using set_t = std::set<double>;

load_column<set_t>(
col_spec.col_name.c_str(),
std::move(std::any_cast<StlVecType<set_t> &>
(col_spec.col_vec)),
nan_policy::dont_pad_with_nans);
}
else if (col_spec.type_spec == "str_set") {
using set_t = std::set<std::string>;

load_column<set_t>(
col_spec.col_name.c_str(),
std::move(std::any_cast<StlVecType<set_t> &>
(col_spec.col_vec)),
nan_policy::dont_pad_with_nans);
}
else if (col_spec.type_spec == "str_dbl_map") {
using map_t = std::map<std::string, double>;

Expand Down
109 changes: 109 additions & 0 deletions include/DataFrame/Internals/DataFrame_standalone.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,20 @@ static S &operator << (S &stream, const std::vector<T> &data) {

// ----------------------------------------------------------------------------

template<typename S, typename T>
static S &operator << (S &stream, const std::set<T> &data) {

if (! data.empty()) {
stream << data.size() << '[' << *(data.cbegin());
for (auto citer = ++(data.cbegin()); citer != data.cend(); ++citer)
stream << '|' << *citer;
stream << ']';
}
return (stream);
}

// ----------------------------------------------------------------------------

template<typename S, typename T, std::size_t N>
static S &operator << (S &stream, const std::array<T, N> &data) {

Expand Down Expand Up @@ -596,6 +610,101 @@ _get_dbl_vec_from_value_(const char *value) {

// ----------------------------------------------------------------------------

template<typename DF>
inline static typename DF::template StlVecType<std::string>
_get_str_vec_from_value_(const char *value) {

using vec_t = typename DF::template StlVecType<std::string>;

std::size_t vcnt { 0 };
char buffer[2048];

while (value[vcnt] != '[') {
buffer[vcnt] = value[vcnt];
vcnt += 1;
}
buffer[vcnt] = '\0';

vec_t data;
std::size_t bcnt;

data.reserve(std::strtol(buffer, nullptr, 10));
vcnt += 1; // skip [
while (value[vcnt] && value[vcnt] != ']') {
bcnt = 0;
while (value[vcnt] != '|' && value[vcnt] != ']')
buffer[bcnt++] = value[vcnt++];
buffer[bcnt] = '\0';
data.push_back(buffer);
vcnt += 1; // skip separator
}
return (data);
}

// ----------------------------------------------------------------------------

inline static std::set<double>
_get_dbl_set_from_value_(const char *value) {

using set_t = typename std::set<double>;

std::size_t vcnt = 0;
char buffer[128];

while (value[vcnt] != '[') {
buffer[vcnt] = value[vcnt];
vcnt += 1;
}
buffer[vcnt] = '\0'; // That is the count which is useless for sets

set_t data;
std::size_t bcnt;

vcnt += 1; // skip [
while (value[vcnt] && value[vcnt] != ']') {
bcnt = 0;
while (value[vcnt] != '|' && value[vcnt] != ']')
buffer[bcnt++] = value[vcnt++];
buffer[bcnt] = '\0';
data.insert(std::strtod(buffer, nullptr));
vcnt += 1; // skip separator
}
return (data);
}

// ----------------------------------------------------------------------------

inline static std::set<std::string>
_get_str_set_from_value_(const char *value) {

using set_t = typename std::set<std::string>;

std::size_t vcnt = 0;
char buffer[2048];

while (value[vcnt] != '[') {
buffer[vcnt] = value[vcnt];
vcnt += 1;
}
buffer[vcnt] = '\0'; // That is the count which is useless for sets

set_t data;
std::size_t bcnt;

vcnt += 1; // skip [
while (value[vcnt] && value[vcnt] != ']') {
bcnt = 0;
while (value[vcnt] != '|' && value[vcnt] != ']')
buffer[bcnt++] = value[vcnt++];
buffer[bcnt] = '\0';
data.insert(buffer);
vcnt += 1; // skip separator
}
return (data);
}

// ----------------------------------------------------------------------------

template<typename MAP>
inline static MAP
_get_str_dbl_map_from_value_(const char *value) {
Expand Down
20 changes: 20 additions & 0 deletions test/dataframe_tester_3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,9 @@ static void test_read_csv_with_maps() {
using DT_DataFrame = StdDataFrame<DateTime>;
using map_t = std::map<std::string, double>;
using unomap_t = std::unordered_map<std::string, double>;
using str_vec_t = std::vector<std::string>;
using str_set_t = std::set<std::string>;
using dbl_set_t = std::set<double>;

DT_DataFrame df;

Expand All @@ -2221,6 +2224,23 @@ static void test_read_csv_with_maps() {
assert((std::fabs(
df.get_column<unomap_t>
("Unordered Map")[0]["Key one 2"] - -782.5) < 0.001));

assert((df.get_column<str_vec_t>("Str Vec").size() == 4));
assert((df.get_column<str_vec_t>("Str Vec")[1].size() == 4));
assert((df.get_column<str_vec_t>("Str Vec")[3].size() == 3));
assert((df.get_column<str_vec_t>("Str Vec")[2][2] == "345"));

assert((df.get_column<dbl_set_t>("Double Set").size() == 4));
assert((df.get_column<dbl_set_t>("Double Set")[1].size() == 3));
assert((df.get_column<dbl_set_t>("Double Set")[3].size() == 4));
assert((*(df.get_column<dbl_set_t>("Double Set")[2].find(444.44)) ==
444.44));

assert((df.get_column<str_set_t>("Str Set").size() == 4));
assert((df.get_column<str_set_t>("Str Set")[1].size() == 3));
assert((df.get_column<str_set_t>("Str Set")[3].size() == 4));
assert((*(df.get_column<str_set_t>("Str Set")[0].find("123.0")) ==
"123.0"));
}
catch (const DataFrameError &ex) {
std::cout << ex.what() << std::endl;
Expand Down

0 comments on commit c7d1c7b

Please sign in to comment.