Skip to content

Commit 8ae02e4

Browse files
committed
converted read_csv2() to fopen
1 parent 4aeff58 commit 8ae02e4

File tree

3 files changed

+48
-16
lines changed

3 files changed

+48
-16
lines changed

include/DataFrame/Internals/DataFrame_private_decl.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030
#pragma once
3131

32+
#include <cstdio>
3233
#include <ranges>
3334

3435
// ----------------------------------------------------------------------------
@@ -66,7 +67,7 @@ void read_binary_(std::istream &file,
6667
size_type starting_row,
6768
size_type num_rows);
6869
void read_csv_(std::istream &file, bool columns_only);
69-
void read_csv2_(std::istream &file,
70+
void read_csv2_(std::FILE *stream,
7071
bool columns_only,
7172
size_type starting_row,
7273
size_type num_rows);

include/DataFrame/Internals/DataFrame_read.tcc

+25-15
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434

3535
#include <cstring>
3636
#include <sstream>
37+
#include <string_view>
3738

3839
// ----------------------------------------------------------------------------
3940

@@ -625,28 +626,28 @@ struct _col_data_spec_ {
625626

626627
template<typename I, typename H>
627628
void DataFrame<I, H>::
628-
read_csv2_(std::istream &stream,
629+
read_csv2_(std::FILE *stream,
629630
bool columns_only,
630631
size_type starting_row,
631632
size_type num_rows) {
632633

633634
using SpecVec = StlVecType<_col_data_spec_>;
634635

635-
std::string line;
636+
char line[64 * 1024];
636637
std::string value;
637638
SpecVec spec_vec;
638639
bool header_read { false };
639640
size_type col_count { 0 };
640641
size_type data_rows_read { 0 };
641642
size_type row_cnt { 0 };
642643

643-
line.reserve(1024);
644644
value.reserve(64);
645645
spec_vec.reserve(32);
646-
while (! stream.eof()) {
647-
std::getline(stream, line);
646+
while (! std::feof(stream)) {
647+
line[0] = '\0';
648+
std::fgets(line, sizeof(line) - 1, stream);
648649

649-
if (line.size() < 2 || line.empty() || line[0] == '#') continue;
650+
if (line[0] == '\0' || line[0] == '#') [[unlikely]] continue;
650651

651652
std::stringstream sstream { line };
652653

@@ -1531,17 +1532,25 @@ read (const char *file_name,
15311532
size_type starting_row,
15321533
size_type num_rows) {
15331534

1534-
std::ifstream stream;
1535-
const IOStreamOpti io_opti(stream, file_name, iof == io_format::binary);
1535+
if (iof == io_format::csv2) {
1536+
IOFileOpti io_opti(file_name);
15361537

1537-
if (stream.fail()) [[unlikely]] {
1538-
String1K err;
1539-
1540-
err.printf("read(): ERROR: Unable to open file '%s'", file_name);
1541-
throw DataFrameError(err.c_str());
1538+
read_csv2_(io_opti.file, columns_only, starting_row, num_rows);
15421539
}
1540+
else {
1541+
std::ifstream stream;
1542+
const IOStreamOpti io_opti(stream,
1543+
file_name, iof == io_format::binary);
1544+
1545+
if (stream.fail()) [[unlikely]] {
1546+
String1K err;
15431547

1544-
read<std::istream>(stream, iof, columns_only, starting_row, num_rows);
1548+
err.printf("read(): ERROR: Unable to open file '%s'", file_name);
1549+
throw DataFrameError(err.c_str());
1550+
}
1551+
1552+
read<std::istream>(stream, iof, columns_only, starting_row, num_rows);
1553+
}
15451554
return (true);
15461555
}
15471556

@@ -1568,7 +1577,8 @@ read (S &in_s,
15681577
read_csv_ (in_s, columns_only);
15691578
}
15701579
else if (iof == io_format::csv2) {
1571-
read_csv2_ (in_s, columns_only, starting_row, num_rows);
1580+
throw NotImplemented("read(): You can read a file in io_format::csv2 "
1581+
"format only by calling read() with file name");
15721582
}
15731583
else if (iof == io_format::json) {
15741584
if (starting_row != 0 ||

include/DataFrame/Utils/Utils.h

+21
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030
#pragma once
3131

3232
#include <cmath>
33+
#include <cstdio>
3334
#include <cstdlib>
3435
#include <iostream>
3536
#include <iterator>
@@ -383,6 +384,26 @@ struct IOStreamOpti {
383384
const bool sync_;
384385
};
385386

387+
// ----------------------------------------------------------------------------
388+
389+
template<std::size_t SIZ = 256 * 1024>
390+
struct IOFileOpti {
391+
392+
std::FILE *file;
393+
394+
explicit
395+
IOFileOpti(const char *file_name) : file(std::fopen(file_name, "r")) {
396+
397+
std::setvbuf(file, nullptr, _IOFBF, SIZ);
398+
}
399+
400+
~IOFileOpti () { if (file) std::fclose(file); }
401+
402+
IOFileOpti () = delete;
403+
IOFileOpti (const IOFileOpti &) = delete;
404+
IOFileOpti &operator = (const IOFileOpti &) = delete;
405+
};
406+
386407
} // namespace hmdf
387408

388409
// ----------------------------------------------------------------------------

0 commit comments

Comments
 (0)