Skip to content

Commit

Permalink
Made reading large files in csv format faster
Browse files Browse the repository at this point in the history
  • Loading branch information
hosseinmoein committed Feb 3, 2025
1 parent 4a9e568 commit 2fd7e51
Show file tree
Hide file tree
Showing 3 changed files with 425 additions and 289 deletions.
2 changes: 1 addition & 1 deletion include/DataFrame/DataFrameTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ struct StationaryTestParams {

// ----------------------------------------------------------------------------

// Column data types that can come from a file
// These are enumerations for data types that can be read/written from/to files
//
enum class file_dtypes : std::size_t {

Expand Down
38 changes: 32 additions & 6 deletions include/DataFrame/Internals/DataFrame_private_decl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,30 @@ replace_vector_vals_(V &data_vec,

// ----------------------------------------------------------------------------

template<typename V>
inline static void
col_vector_push_back_func_(
typename V::value_type(*converter)(const char *, int),
std::istream &file,
V &vec,
io_format file_type = io_format::csv) {

std::string value;
char c = 0;

while (file.get(c)) [[likely]] {
value.clear();
if (file_type == io_format::csv && c == '\n') break;
else if (file_type == io_format::json && c == ']') break;
file.unget();
_get_token_from_file_(file, ',', value,
file_type == io_format::json ? ']' : '\0');
vec.push_back(converter(value.c_str(), int(value.size())));
}
}

// ----------------------------------------------------------------------------

template<typename T, typename V>
inline static void
col_vector_push_back_func_(V &vec,
Expand Down Expand Up @@ -1483,7 +1507,7 @@ struct IdxParserFunctor_<int, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtol, file_type);
col_vector_push_back_func_(&_atoi_<int>, file, vec, file_type);
}
};

Expand All @@ -1496,7 +1520,7 @@ struct IdxParserFunctor_<long, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtol, file_type);
col_vector_push_back_func_(&_atoi_<long>, file, vec, file_type);
}
};

Expand All @@ -1509,7 +1533,7 @@ struct IdxParserFunctor_<long long, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtoll, file_type);
col_vector_push_back_func_(&_atoi_<long long>, file, vec, file_type);
}
};

Expand All @@ -1522,7 +1546,7 @@ struct IdxParserFunctor_<unsigned int, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtoul, file_type);
col_vector_push_back_func_(&_atoi_<unsigned int>, file, vec, file_type);
}
};

Expand All @@ -1535,7 +1559,8 @@ struct IdxParserFunctor_<unsigned long, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtoul, file_type);
col_vector_push_back_func_(&_atoi_<unsigned long>,
file, vec, file_type);
}
};

Expand All @@ -1548,7 +1573,8 @@ struct IdxParserFunctor_<unsigned long long, Dummy> {
std::istream &file,
io_format file_type = io_format::csv) const {

col_vector_push_back_func_(vec, file, &::strtoull, file_type);
col_vector_push_back_func_(&_atoi_<unsigned long long>,
file, vec, file_type);
}
};

Expand Down
Loading

0 comments on commit 2fd7e51

Please sign in to comment.