Skip to content

Commit

Permalink
parallelizing sorting, pass 1
Browse files Browse the repository at this point in the history
  • Loading branch information
bhaller committed Aug 12, 2023
1 parent f04c311 commit b310254
Show file tree
Hide file tree
Showing 13 changed files with 483 additions and 218 deletions.
1 change: 1 addition & 0 deletions EidosScribe/EidosTextView.mm
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "eidos_property_signature.h"
#include "eidos_type_table.h"
#include "eidos_type_interpreter.h"
#include "eidos_sorting.h"

#include <stdexcept>
#include <memory>
Expand Down
20 changes: 20 additions & 0 deletions SLiM.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,14 @@
986D73E81B07E89E007FBB70 /* eidos_call_signature.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 986D73E61B07E89E007FBB70 /* eidos_call_signature.cpp */; };
986D73E91B07E89E007FBB70 /* eidos_call_signature.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 986D73E61B07E89E007FBB70 /* eidos_call_signature.cpp */; };
986D73EA1B07E89E007FBB70 /* eidos_call_signature.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 986D73E61B07E89E007FBB70 /* eidos_call_signature.cpp */; };
98729ACF2A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD02A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD12A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD22A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD32A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD42A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD52A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98729AD62A87AAB700E81662 /* eidos_sorting.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */; };
98760EDD28CE5E7600CEBC40 /* libomp.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 98760EDC28CE5E7600CEBC40 /* libomp.dylib */; };
98760EDE28CE5E8200CEBC40 /* libomp.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 98760EDC28CE5E7600CEBC40 /* libomp.dylib */; };
9876E5F51ED5572C00FF9762 /* tdist.c in Sources */ = {isa = PBXBuildFile; fileRef = 9876E5F41ED5572C00FF9762 /* tdist.c */; };
Expand Down Expand Up @@ -1618,6 +1626,8 @@
986926EA1AA6B7480000E138 /* GraphView_PopulationVisualization.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = GraphView_PopulationVisualization.mm; sourceTree = "<group>"; };
986D73E61B07E89E007FBB70 /* eidos_call_signature.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = eidos_call_signature.cpp; sourceTree = "<group>"; };
986D73E71B07E89E007FBB70 /* eidos_call_signature.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eidos_call_signature.h; sourceTree = "<group>"; };
98729ACD2A87A93500E81662 /* eidos_sorting.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = eidos_sorting.h; sourceTree = "<group>"; };
98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = eidos_sorting.cpp; sourceTree = "<group>"; };
9873B12328CE26CD00582D83 /* eidos_openmp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = eidos_openmp.h; sourceTree = "<group>"; };
98760EDC28CE5E7600CEBC40 /* libomp.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libomp.dylib; path = /usr/local/lib/libomp.dylib; sourceTree = "<group>"; };
9876E5F31ED5572C00FF9762 /* gsl_cdf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gsl_cdf.h; path = cdf/gsl_cdf.h; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2009,6 +2019,8 @@
9857E33A1BB58DAE00F1C8A9 /* eidos_intrusive_ptr.h */,
985A11861BBA07CB009EE1FF /* eidos_object_pool.h */,
9809F8BD24F32B3E00D312E4 /* eidos_tinycolormap.h */,
98729ACD2A87A93500E81662 /* eidos_sorting.h */,
98729ACE2A87AAB700E81662 /* eidos_sorting.cpp */,
9893C7DC1CDA2D650029AC94 /* eidos_beep.h */,
9893C7DB1CDA2D650029AC94 /* eidos_beep.cpp */,
98B8AF8725A2BE7200C95D66 /* json_fwd.hpp */,
Expand Down Expand Up @@ -3300,6 +3312,7 @@
982556B81BA8EF8C0054CB3F /* eidos_property_signature.cpp in Sources */,
98C8217F1C7A983800548839 /* inline.c in Sources */,
985F3F0E24BA31D900E712E0 /* eidos_test_functions_statistics.cpp in Sources */,
98729AD42A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98C821AC1C7A9B1600548839 /* discrete.c in Sources */,
9807663924493E0B00F6CBB4 /* gzwrite.c in Sources */,
98C8218C1C7A983800548839 /* poisson.c in Sources */,
Expand Down Expand Up @@ -3371,6 +3384,7 @@
98332ADC1FDBC0D000274FF0 /* dgemv.c in Sources */,
98332B111FDBD09800274FF0 /* init.c in Sources */,
98C8212C1C7A980000548839 /* gamma.c in Sources */,
98729ACF2A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98C8212D1C7A980000548839 /* gauss.c in Sources */,
985F3F0124BA307200E712E0 /* eidos_test_operators_arithmetic.cpp in Sources */,
985F3F0B24BA31D900E712E0 /* eidos_test_functions_statistics.cpp in Sources */,
Expand Down Expand Up @@ -3613,6 +3627,7 @@
98332AEE1FDBC29500274FF0 /* rowcol.c in Sources */,
9890D1EF27136BB7001EAE98 /* eidos_class_DataFrame.cpp in Sources */,
987AD8871B2CC0C10035D6C8 /* EidosVariableBrowserController.mm in Sources */,
98729AD22A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
9807662B24493E0B00F6CBB4 /* crc32.c in Sources */,
98C821661C7A983800548839 /* beta.c in Sources */,
985F3EFE24BA2F1500E712E0 /* eidos_test_functions_vector.cpp in Sources */,
Expand Down Expand Up @@ -3691,6 +3706,7 @@
98CF5215294A3FC900557BBA /* binomial_tpe.c in Sources */,
98CF5216294A3FC900557BBA /* tables.c in Sources */,
98CF5217294A3FC900557BBA /* species_eidos.cpp in Sources */,
98729AD12A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98CF5218294A3FC900557BBA /* slim_functions.cpp in Sources */,
98CF5219294A3FC900557BBA /* deflate.c in Sources */,
98CF521A294A3FC900557BBA /* FindRecipeController.mm in Sources */,
Expand Down Expand Up @@ -3918,6 +3934,7 @@
98CF53E8294A714200557BBA /* rowcol.c in Sources */,
98CF53E9294A714200557BBA /* eidos_class_DataFrame.cpp in Sources */,
98CF53EA294A714200557BBA /* EidosVariableBrowserController.mm in Sources */,
98729AD32A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98CF53EB294A714200557BBA /* crc32.c in Sources */,
98CF53EC294A714200557BBA /* beta.c in Sources */,
98CF53ED294A714200557BBA /* eidos_test_functions_vector.cpp in Sources */,
Expand Down Expand Up @@ -3996,6 +4013,7 @@
98C821491C7A983700548839 /* binomial_tpe.c in Sources */,
9850D8E9206309A0006BFD2E /* tables.c in Sources */,
98AC617B24BA34ED0001914C /* species_eidos.cpp in Sources */,
98729AD02A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98DDAED7221765480038C133 /* slim_functions.cpp in Sources */,
98076618244934A800F6CBB4 /* deflate.c in Sources */,
984252C3216FA9930019696A /* FindRecipeController.mm in Sources */,
Expand Down Expand Up @@ -4192,6 +4210,7 @@
98D7EBB328CE557C00DEAAC4 /* poisson.c in Sources */,
98D7EBB428CE557C00DEAAC4 /* eidos_type_table.cpp in Sources */,
98D7EBB528CE557C00DEAAC4 /* exp.c in Sources */,
98729AD52A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98D7EBB628CE557C00DEAAC4 /* eidos_test_functions_other.cpp in Sources */,
98D7EBB728CE557C00DEAAC4 /* eidos_script.cpp in Sources */,
981DC37B28E27300000ABE91 /* eidos_functions_distributions.cpp in Sources */,
Expand Down Expand Up @@ -4263,6 +4282,7 @@
98D7ECA428CE58FC00DEAAC4 /* dgemv.c in Sources */,
98D7ECA528CE58FC00DEAAC4 /* init.c in Sources */,
98D7ECA628CE58FC00DEAAC4 /* gamma.c in Sources */,
98729AD62A87AAB700E81662 /* eidos_sorting.cpp in Sources */,
98D7ECA728CE58FC00DEAAC4 /* gauss.c in Sources */,
98D7ECA828CE58FC00DEAAC4 /* eidos_test_operators_arithmetic.cpp in Sources */,
981DC38528E27301000ABE91 /* eidos_functions_files.cpp in Sources */,
Expand Down
1 change: 1 addition & 0 deletions VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ PARALLEL changes (now in the master branch):
add recipe 22.7, Parallelizing nonWF reproduction and spatial interactions
add internal EidosBenchmark facility for timing of internal loops, for benchmarking of parallelization scaling
add a command-line option, -perTaskThreads, to let the user control how many threads to use for different tasks (if not overridden)
parallelize sorting, where possible/efficient

development head (in the master branch):
fix a minor bug with autofix when opening multiple .slim documents at once in SLiMgui
Expand Down
1 change: 1 addition & 0 deletions core/genome.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "species.h"
#include "polymorphism.h"
#include "subpopulation.h"
#include "eidos_sorting.h"

#include <algorithm>
#include <string>
Expand Down
1 change: 1 addition & 0 deletions eidos/eidos_functions_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@


#include "eidos_functions.h"
#include "eidos_sorting.h"

#include <utility>
#include <limits>
Expand Down
1 change: 1 addition & 0 deletions eidos/eidos_functions_values.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "eidos_functions.h"
#include "eidos_interpreter.h"
#include "eidos_rng.h"
#include "eidos_sorting.h"

#include <unordered_map>
#include <vector>
Expand Down
147 changes: 0 additions & 147 deletions eidos/eidos_globals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3331,153 +3331,6 @@ bool Eidos_RegexWorks(void)
return regex_works;
}

// Here are some early explorations into parallelizing sorting. The speedups
// here are not particularly impressive. Parallel sorting is a very deep and
// complex rabbit hole to go down; see, e.g., Victor Duvanenko's work at
// https://github.com/DragonSpit/ParallelAlgorithms. But those algorithms
// use TBB instead of OpenMP, and require C++17, so they're not easily usable.
// Wikipedia at https://en.wikipedia.org/wiki/Merge_sort#Parallel_merge_sort
// also has some very interesting commentary about parallelization of sorting.
// The code here is also very primitive - integer only, no templates, no
// client-suppliable comparator, etc. – so it would be hard to integrate into
// all the ways Eidos presently uses std::sort. Improving this looks like a
// good project for somebody in CS.

// This parallel quicksort code is thanks to Ruud van der Pas, modified from
// https://www.openmp.org/wp-content/uploads/sc16-openmp-booth-tasking-ruud.pdf
#ifdef _OPENMP
static void _Eidos_ParallelQuicksort_I(int64_t *values, int64_t lo, int64_t hi)
{
if (lo >= hi)
return;

if (hi - lo + 1 <= 1000) {
// fall over to using std::sort when below a threshold interval size
// the larger the threshold, the less time we spend thrashing tasks on small
// intervals, which is good; but it also sets a limit on how many threads we
// we bring to bear on relatively small sorts, which is bad; 1000 seems fine
std::sort(values + lo, values + hi + 1);
} else {
// choose the middle of three pivots, in an attempt to avoid really bad pivots
int64_t pivot1 = *(values + lo);
int64_t pivot2 = *(values + hi);
int64_t pivot3 = *(values + ((lo + hi) >> 1));
int64_t pivot;

if (pivot1 > pivot2)
{
if (pivot2 > pivot3) pivot = pivot2;
else if (pivot1 > pivot3) pivot = pivot3;
else pivot = pivot1;
}
else
{
if (pivot1 > pivot3) pivot = pivot1;
else if (pivot2 > pivot3) pivot = pivot3;
else pivot = pivot2;
}

// note that std::partition is not guaranteed to leave the pivot value in position
// we do a second partition to exclude all duplicate pivot values, which seems to be one standard strategy
// this works particularly well when duplicate values are very common; it helps avoid O(n^2) performance
// note the partition is not parallelized; that is apparently a difficult problem for parallel quicksort
int64_t *middle1 = std::partition(values + lo, values + hi + 1, [pivot](const int64_t& em) { return em < pivot; });
int64_t *middle2 = std::partition(middle1, values + hi + 1, [pivot](const int64_t& em) { return !(pivot < em); });
int64_t mid1 = middle1 - values;
int64_t mid2 = middle2 - values;
#pragma omp task default(none) firstprivate(values, lo, mid1)
{ _Eidos_ParallelQuicksort_I(values, lo, mid1 - 1); } // Left branch
#pragma omp task default(none) firstprivate(values, hi, mid2)
{ _Eidos_ParallelQuicksort_I(values, mid2, hi); } // Right branch
}
}
#endif

void Eidos_ParallelQuicksort_I(int64_t *values, int64_t nelements)
{
#ifdef _OPENMP
if (nelements > 1000) {
#pragma omp parallel default(none) shared(values, nelements)
{
#pragma omp single nowait
{
_Eidos_ParallelQuicksort_I(values, 0, nelements - 1);
}
} // End of parallel region
}
else
{
// Use std::sort for small vectors
std::sort(values, values + nelements);
}
#else
// Use std::sort when not running parallel
std::sort(values, values + nelements);
#endif
}

// This parallel mergesort code is thanks to Libor Bukata and Jan Dvořák, modified from
// https://cw.fel.cvut.cz/old/_media/courses/b4m35pag/lab6_slides_advanced_openmp.pdf
#ifdef _OPENMP
static void _Eidos_ParallelMergesort_I(int64_t *values, int64_t left, int64_t right)
{
if (left >= right)
return;

if (right - left <= 1000)
{
// fall over to using std::sort when below a threshold interval size
// the larger the threshold, the less time we spend thrashing tasks on small
// intervals, which is good; but it also sets a limit on how many threads we
// we bring to bear on relatively small sorts, which is bad; 1000 seems fine
std::sort(values + left, values + right + 1);
}
else
{
int64_t mid = (left + right) / 2;
#pragma omp taskgroup
{
// the original code had if() limits on task subdivision here, but that
// doesn't make sense to me, because we also have the threshold above,
// which serves the same purpose but avoids using std::sort on subdivided
// regions and then merging them with inplace_merge; if we assume that
// std::sort is faster than mergesort when running on one thread, then
// merging subdivided std::sort calls only seems like a good strategy
// when the std::sort calls happen on separate threads
#pragma omp task default(none) firstprivate(values, left, mid) untied
_Eidos_ParallelMergesort_I(values, left, mid);
#pragma omp task default(none) firstprivate(values, mid, right) untied
_Eidos_ParallelMergesort_I(values, mid + 1, right);
#pragma omp taskyield
}
std::inplace_merge(values + left, values + mid + 1, values + right + 1);
}
}
#endif

void Eidos_ParallelMergesort_I(int64_t *values, int64_t nelements)
{
#ifdef _OPENMP
if (nelements > 1000) {
#pragma omp parallel default(none) shared(values, nelements)
{
#pragma omp single
{
_Eidos_ParallelMergesort_I(values, 0, nelements - 1);
}
} // End of parallel region
}
else
{
// Use std::sort for small vectors
std::sort(values, values + nelements);
}
#else
// Use std::sort when not running parallel
std::sort(values, values + nelements);
#endif
}


// *******************************************************************************************************************
//
Expand Down
71 changes: 0 additions & 71 deletions eidos/eidos_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,73 +590,6 @@ std::string Eidos_string_escaped_CSV(const std::string &unescapedString);
// BCH 13 December 2017: no longer used, commenting this out
//std::string Eidos_Exec(const char *p_cmd);

// Get indexes that would result in sorted ordering of a vector. This rather nice code is adapted from http://stackoverflow.com/a/12399290/2752221
template <typename T>
std::vector<int64_t> EidosSortIndexes(const std::vector<T> &p_v, bool p_ascending = true)
{
// initialize original index locations
std::vector<int64_t> idx(p_v.size());
std::iota(idx.begin(), idx.end(), 0);

// sort indexes based on comparing values in v
if (p_ascending)
std::sort(idx.begin(), idx.end(), [&p_v](int64_t i1, int64_t i2) {return p_v[i1] < p_v[i2];});
else
std::sort(idx.begin(), idx.end(), [&p_v](int64_t i1, int64_t i2) {return p_v[i1] > p_v[i2];});

return idx;
}

template <>
inline std::vector<int64_t> EidosSortIndexes<double>(const std::vector<double> &p_v, bool p_ascending)
{
// initialize original index locations
std::vector<int64_t> idx(p_v.size());
std::iota(idx.begin(), idx.end(), 0);

// sort indexes based on comparing values in v
// this specialization for type double sorts NaNs to the end
if (p_ascending)
std::sort(idx.begin(), idx.end(), [&p_v](int64_t i1, int64_t i2) {return std::isnan(p_v[i2]) || (p_v[i1] < p_v[i2]);});
else
std::sort(idx.begin(), idx.end(), [&p_v](int64_t i1, int64_t i2) {return std::isnan(p_v[i2]) || (p_v[i1] > p_v[i2]);});

return idx;
}

template <typename T>
std::vector<int64_t> EidosSortIndexes(const T *p_v, size_t p_size, bool p_ascending = true)
{
// initialize original index locations
std::vector<int64_t> idx(p_size);
std::iota(idx.begin(), idx.end(), 0);

// sort indexes based on comparing values in v
if (p_ascending)
std::sort(idx.begin(), idx.end(), [p_v](int64_t i1, int64_t i2) {return p_v[i1] < p_v[i2];});
else
std::sort(idx.begin(), idx.end(), [p_v](int64_t i1, int64_t i2) {return p_v[i1] > p_v[i2];});

return idx;
}

template <>
inline std::vector<int64_t> EidosSortIndexes<double>(const double *p_v, size_t p_size, bool p_ascending)
{
// initialize original index locations
std::vector<int64_t> idx(p_size);
std::iota(idx.begin(), idx.end(), 0);

// sort indexes based on comparing values in v
// this specialization for type double sorts NaNs to the end
if (p_ascending)
std::sort(idx.begin(), idx.end(), [p_v](int64_t i1, int64_t i2) {return std::isnan(p_v[i2]) || (p_v[i1] < p_v[i2]);});
else
std::sort(idx.begin(), idx.end(), [p_v](int64_t i1, int64_t i2) {return std::isnan(p_v[i2]) || (p_v[i1] > p_v[i2]);});

return idx;
}

extern int gEidosFloatOutputPrecision; // precision used for output of float values in Eidos; not user-visible at present

std::string EidosStringForFloat(double p_value);
Expand Down Expand Up @@ -689,10 +622,6 @@ BidiIter Eidos_random_unique(BidiIter begin, BidiIter end, size_t num_random)
// The <regex> library does not work on Ubuntu 18.04, annoyingly; probably a very old compiler or something. So we have to check.
bool Eidos_RegexWorks(void);

// Parallel sorting; these use std::sort when we are not running parallel, or for small jobs
void Eidos_ParallelQuicksort_I(int64_t *values, int64_t nelements);
void Eidos_ParallelMergesort_I(int64_t *values, int64_t nelements);


// *******************************************************************************************************************
//
Expand Down
3 changes: 3 additions & 0 deletions eidos/eidos_openmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@

#include <signal.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
#include <iostream>


// This is the largest number of threads we allow the user to set. There is no hard limit in the code;
Expand Down
Loading

0 comments on commit b310254

Please sign in to comment.