Skip to content

Commit b721632

Browse files
authored
Merge pull request #270 from hosseinmoein/Hossein/Cpp23
Using C++23 to reimplement sort by using zip
2 parents 6e382c3 + 0abd9e4 commit b721632

13 files changed

+517
-383
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ add_library(DataFrame::DataFrame ALIAS DataFrame)
3434

3535
target_sources(DataFrame PRIVATE src/Utils/DateTime.cc)
3636

37-
target_compile_features(DataFrame PUBLIC cxx_std_20)
37+
target_compile_features(DataFrame PUBLIC cxx_std_23)
3838
target_compile_definitions(
3939
DataFrame
4040
PRIVATE $<$<BOOL:${HMDF_HAVE_CLOCK_GETTIME}>:HMDF_HAVE_CLOCK_GETTIME>

README.md

+2-4
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2424
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2525
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
-->
27+
[![C++23](https://img.shields.io/badge/C%2B%2B-23-blue.svg)](https://isocpp.org/std/the-standard )
2728
[![Build status](https://ci.appveyor.com/api/projects/status/hjw01qui3bvxs8yi?svg=true)](https://ci.appveyor.com/project/hosseinmoein/dataframe)
29+
<BR>
2830
![GitHub](https://img.shields.io/github/license/hosseinmoein/DataFrame.svg?color=red&style=popout)
29-
[![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://isocpp.org/std/the-standard )
3031
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/db646376a4014c3788c7224e670fe451)](https://app.codacy.com/manual/hosseinmoein/DataFrame?utm_source=github.com&utm_medium=referral&utm_content=hosseinmoein/DataFrame&utm_campaign=Badge_Grade_Dashboard)
3132
<BR>
32-
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/hosseinmoein/DataFrame/master)
33-
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/hosseinmoein/DataFrame/graphs/commit-activity)
3433
![GitHub tag (latest by date)](https://img.shields.io/github/tag-date/hosseinmoein/DataFrame.svg?color=blue&label=Official%20Release&style=popout)
35-
<BR>
3634
![Conan Center](https://img.shields.io/conan/v/dataframe)
3735
[![VCPKG package](https://repology.org/badge/version-for-repo/vcpkg/dataframe.svg)](https://repology.org/project/dataframe/versions)
3836

benchmarks/dataframe_performance.cc

+17-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ using namespace hmdf;
3636
using namespace std::chrono;
3737

3838
constexpr std::size_t ALIGNMENT = 64;
39-
constexpr std::size_t SIZE = 300000000;
39+
// constexpr std::size_t SIZE = 300000000;
40+
constexpr std::size_t SIZE = 10000000;
4041

4142
typedef StdDataFrame64<time_t> MyDataFrame;
4243

@@ -57,7 +58,7 @@ int main(int, char *[]) {
5758

5859
std::cout << "Data generation/load time: "
5960
<< double(duration_cast<microseconds>(second - first).count()) / 1000000.0
60-
<< std::endl;
61+
<< " secs" << std::endl;
6162

6263
MeanVisitor<double, time_t> n_mv;
6364
VarVisitor<double, time_t> ln_vv;
@@ -81,14 +82,25 @@ int main(int, char *[]) {
8182

8283
const auto fourth = high_resolution_clock::now();
8384

85+
// df.sort<double, double, double>("log_normal", sort_spec::ascen,
86+
// "exponential", sort_spec::ascen);
87+
// std::cout << "1001th value in normal column: "
88+
// << df.get_column<double>("normal")[1001] << std::endl;
89+
90+
const auto fifth = high_resolution_clock::now();
91+
8492
std::cout << "Calculation time: "
8593
<< double(duration_cast<microseconds>(third - second).count()) / 1000000.0
86-
<< '\n'
94+
<< " secs\n"
8795
<< "Selection time: "
8896
<< double(duration_cast<microseconds>(fourth - third).count()) / 1000000.0
89-
<< '\n'
97+
<< " secs\n"
98+
// << "Sorting time: "
99+
// << double(duration_cast<microseconds>(fifth - fourth).count()) / 1000000.0
100+
// << " secs\n"
90101
<< "Overall time: "
91-
<< double(duration_cast<microseconds>(fourth - first).count()) / 1000000.0
102+
<< double(duration_cast<microseconds>(fifth - first).count()) / 1000000.0
103+
<< " secs"
92104
<< std::endl;
93105
return (0);
94106
}

benchmarks/polars_performance.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
# ------------------------------------------------------------------------------
66

7-
SIZE: int = 300000000
7+
# SIZE: int = 300000000
8+
SIZE: int = 10000000
89

910
first = datetime.datetime.now()
1011
df = pl.DataFrame({"normal": np.random.normal(size=SIZE),
@@ -13,7 +14,7 @@
1314
})
1415
second = datetime.datetime.now()
1516
print(f"Data generation/load time: "
16-
f"{(second - first).seconds}.{(second - first).microseconds}")
17+
f"{(second - first).seconds}.{(second - first).microseconds} secs")
1718

1819
df2 = df.select(
1920
mean = pl.col("normal").mean(),
@@ -32,9 +33,14 @@
3233
print(f"Number of rows after select: {df3.select(pl.count()).item()}")
3334
fourth = datetime.datetime.now()
3435

35-
print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds}")
36-
print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds}")
37-
print(f"Overall time: {(fourth - first).seconds}.{(fourth - first).microseconds}")
36+
# df4 = df.sort(["log_normal", "exponential"]);
37+
# print(f"1001th value in normal column: {df4['normal'][1001]}")
38+
fifth = datetime.datetime.now()
39+
40+
print(f"Calculation time: {(third - second).seconds}.{(third - second).microseconds} secs")
41+
print(f"Selection time: {(fourth - third).seconds}.{(fourth - third).microseconds} secs")
42+
# print(f"Sorting time: {(fifth - fourth).seconds}.{(fifth - fourth).microseconds} secs")
43+
print(f"Overall time: {(fifth - first).seconds}.{(fifth - first).microseconds} secs")
3844

3945
# ------------------------------------------------------------------------------
4046

include/DataFrame/DataFrameStatsVisitors.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -4842,6 +4842,9 @@ struct LowessVisitor {
48424842
const Y &y_begin, const Y &y_end, // dependent variable
48434843
const X &x_begin, const X &x_end) { // independent variable
48444844

4845+
using bool_vec_t =
4846+
std::vector<bool, typename allocator_declare<bool, A>::type>;
4847+
48454848
assert(frac_ >= 0 && frac_ <= 1);
48464849
assert(loop_n_ > 2);
48474850

@@ -4862,7 +4865,10 @@ struct LowessVisitor {
48624865
[] (auto lhs, auto rhs) -> bool {
48634866
return (lhs < rhs);
48644867
});
4865-
_sort_by_sorted_index_(yvals, sorting_idxs, col_s);
4868+
4869+
bool_vec_t done_vec (col_s);
4870+
4871+
_sort_by_sorted_index_(yvals, sorting_idxs, done_vec, col_s);
48664872
lowess_(idx_begin, idx_end,
48674873
yvals.begin(), yvals.end(),
48684874
xvals.begin(), xvals.end());

include/DataFrame/DataFrameTypes.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,10 @@ struct RandGenParams {
579579
std::size_t t_dist { 1 };
580580
// The μ distribution parameter (the mean of the distribution)
581581
//
582-
double mean { 1.0 };
582+
double mean { 0 };
583583
// the σ distribution parameter (standard deviation)
584584
//
585-
double std { 0 };
585+
double std { 1 };
586586
// The λ distribution parameter (the rate parameter)
587587
//
588588
double lambda { 1.0 };

0 commit comments

Comments
 (0)