Skip to content

Commit 420b4bf

Browse files
committedApr 25, 2020
Updated benchmarks after some small optimizations to reduce branch misprediction
1 parent a9745ab commit 420b4bf

File tree

4 files changed

+20
-36
lines changed

4 files changed

+20
-36
lines changed
 

‎README.md

+11-11
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,17 @@ Operating System: macOS Catalina version 10.15.3
4747

4848
| Dataset | File Size | Rows | Cols | Time |
4949
|:--- | ---:| ---:| ---:| ---:|
50-
| [Denver Crime Data](https://www.kaggle.com/paultimothymooney/denver-crime-data) | 111 MB | 479,100 | 19 | 0.198s |
51-
| [AirBnb Paris Listings](https://www.kaggle.com/juliatb/airbnb-paris) | 196 MB | 141,730 | 96 | 0.345s |
52-
| [2015 Flight Delays and Cancellations](https://www.kaggle.com/usdot/flight-delays) | 574 MB | 5,819,079 | 31 | 0.994s |
53-
| [StackLite: Stack Overflow questions](https://www.kaggle.com/stackoverflow/stacklite) | 870 MB | 17,203,824 | 7 | 1.547s |
54-
| [Used Cars Dataset](https://www.kaggle.com/austinreese/craigslist-carstrucks-data) | 1.4 GB | 539,768 | 25 | 2.381s |
55-
| [Title-Based Semantic Subject Indexing](https://www.kaggle.com/hsrobo/titlebased-semantic-subject-indexing) | 3.7 GB | 12,834,026 | 4 | 6.965s|
56-
| [Bitcoin tweets - 16M tweets](https://www.kaggle.com/alaix14/bitcoin-tweets-20160101-to-20190329) | 4 GB | 47,478,748 | 9 | 7.945s |
57-
| [DDoS Balanced Dataset](https://www.kaggle.com/devendra416/ddos-datasets) | 6.3 GB | 12,794,627 | 85 | 13.578s |
58-
| [Seattle Checkouts by Title](https://www.kaggle.com/city-of-seattle/seattle-checkouts-by-title) | 7.1 GB | 34,892,623 | 11 | 15.350s |
59-
| [SHA-1 password hash dump](https://www.kaggle.com/urvishramaiya/have-i-been-pwnd) | 11 GB | 2,62,974,241 | 2 | 22.069s |
60-
| [DOHUI NOH scaled_data](https://www.kaggle.com/seaa0612/scaled-data) | 16 GB | 496,782 | 3213 | 34.923s |
50+
| [Denver Crime Data](https://www.kaggle.com/paultimothymooney/denver-crime-data) | 111 MB | 479,100 | 19 | 0.174s |
51+
| [AirBnb Paris Listings](https://www.kaggle.com/juliatb/airbnb-paris) | 196 MB | 141,730 | 96 | 0.289s |
52+
| [2015 Flight Delays and Cancellations](https://www.kaggle.com/usdot/flight-delays) | 574 MB | 5,819,079 | 31 | 1.047s |
53+
| [StackLite: Stack Overflow questions](https://www.kaggle.com/stackoverflow/stacklite) | 870 MB | 17,203,824 | 7 | 1.505s |
54+
| [Used Cars Dataset](https://www.kaggle.com/austinreese/craigslist-carstrucks-data) | 1.4 GB | 539,768 | 25 | 1.979s |
55+
| [Title-Based Semantic Subject Indexing](https://www.kaggle.com/hsrobo/titlebased-semantic-subject-indexing) | 3.7 GB | 12,834,026 | 4 | 5.929s|
56+
| [Bitcoin tweets - 16M tweets](https://www.kaggle.com/alaix14/bitcoin-tweets-20160101-to-20190329) | 4 GB | 47,478,748 | 9 | 7.040s |
57+
| [DDoS Balanced Dataset](https://www.kaggle.com/devendra416/ddos-datasets) | 6.3 GB | 12,794,627 | 85 | 12.648s |
58+
| [Seattle Checkouts by Title](https://www.kaggle.com/city-of-seattle/seattle-checkouts-by-title) | 7.1 GB | 34,892,623 | 11 | 12.883s |
59+
| [SHA-1 password hash dump](https://www.kaggle.com/urvishramaiya/have-i-been-pwnd) | 11 GB | 2,62,974,241 | 2 | 19.505s |
60+
| [DOHUI NOH scaled_data](https://www.kaggle.com/seaa0612/scaled-data) | 16 GB | 496,782 | 3213 | 32.780s |
6161

6262
## API
6363

‎benchmark/csv-game/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,5 @@ Operating System: macOS Catalina version 10.15.3
3636

3737
| Test | Time |
3838
| --- | --- |
39-
| fieldcount | 47.2 ms ± 1.0 ms |
40-
| csv_count | 98.7 ms ± 1.9 ms |
39+
| fieldcount | 36.2 ms ± 3.1 ms |
40+
| csv_count | 92.5 ms ± 1.8 ms |

‎benchmark/csv-game/csv_count.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,11 @@ int main(int argc, char **argv) {
1818
for (const auto row : csv) {
1919
size_t col{0};
2020
for (const auto cell : row) {
21-
cell_value.clear();
22-
cell.read_raw_value(cell_value);
23-
std::cout << "Cell value: " << cell_value << "\n";
2421
col += 1;
2522
if (col == column_index) {
26-
// cell.read_raw_value(cell_value);
23+
cell.read_raw_value(cell_value);
2724
sum += std::stoi(cell_value);
28-
// cell_value.clear();
25+
cell_value.clear();
2926
}
3027
}
3128
}

‎include/csv2/reader.hpp

+5-18
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ class Reader {
165165
size_t last_quote_location = 0;
166166
bool quote_opened = false;
167167
for (auto i = current_; i < end_; i++) {
168+
current_ = i;
168169
if (buffer_[i] == delimiter::value && !quote_opened) {
169170
// actual delimiter
170171
// end of cell
171-
current_ = i;
172172
cell.end_ = current_;
173173
cell.escaped_ = escaped;
174174
return cell;
@@ -179,22 +179,10 @@ class Reader {
179179
quote_opened = true;
180180
last_quote_location = i;
181181
} else {
182-
// quote previously opened for this cell
183-
// check last quote location
184-
if (last_quote_location == i - 1) {
185-
// previous character was quote too!
186-
escaped = true;
187-
} else {
188-
last_quote_location = i;
189-
if (i + 1 < end_ && buffer_[i + 1] == delimiter::value) {
190-
quote_opened = false;
191-
}
192-
}
182+
escaped = (last_quote_location == i - 1);
183+
last_quote_location += (i - last_quote_location) * size_t(!escaped);
184+
quote_opened = escaped || (buffer_[i + 1] != delimiter::value);
193185
}
194-
current_ = i;
195-
} else {
196-
// Not delimiter or quote
197-
current_ = i;
198186
}
199187
}
200188
}
@@ -236,8 +224,7 @@ class Reader {
236224
static_cast<const char *>(memchr(&buffer_[start_], '\n', (buffer_size_ - start_)))) {
237225
end_ = start_ + (ptr - &buffer_[start_]);
238226
result.end_ = end_;
239-
if (end_ + 1 < buffer_size_)
240-
start_ = end_ + 1;
227+
start_ = end_ + 1;
241228
} else {
242229
// last row
243230
end_ = buffer_size_;

0 commit comments

Comments
 (0)
Please sign in to comment.