diff --git a/EidosScribe/EidosHelpFunctions.rtf b/EidosScribe/EidosHelpFunctions.rtf index 663995267..5f2817ad3 100644 --- a/EidosScribe/EidosHelpFunctions.rtf +++ b/EidosScribe/EidosHelpFunctions.rtf @@ -5788,43 +5788,216 @@ Returns a \pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 \f0\b\fs20 \cf2 Gets the number of threads -\f3\b0 that will be used in subsequent parallel (i.e., multithreaded) regions, as set with +\f3\b0 that is requested be used in subsequent parallel (i.e., multithreaded) regions, as set with \f1\fs18 parallelSetNumThreads() \f3\fs20 . If Eidos is not configured to run multithreaded, this function will return \f1\fs18 1 \f3\fs20 . See also \f1\fs18 parallelGetMaxThreads() -\f3\fs20 , which returns the maximum number of threads that can be used.\ +\f3\fs20 , which returns the maximum number of threads that can be used. Note that if this function returns the maximum number of threads, as returned by +\f1\fs18 parallelGetMaxThreads() +\f3\fs20 , then there are +\f7\i two possible semantic meanings +\f3\i0 of that return value, which cannot be distinguished using this function; see +\f1\fs18 parallelSetNumThreads() +\f3\fs20 for discussion.\ \pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0 \f1\fs18 \cf2 (integer$)parallelGetMaxThreads(void)\ \pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 \f0\b\fs20 \cf2 Gets the maximum number of threads -\f3\b0 that can be used in parallel (i.e., multithreaded) regions. This is configured externally; it may be OpenMP\'92s default number of threads for the hardware platform being used, or may be set by an environment variable or command-line option). If Eidos is not configured to run multithreaded, this function will return +\f3\b0 that can be used in parallel (i.e., multithreaded) regions. This is configured externally; it may be OpenMP\'92s default number of threads for the hardware platform being used, or may be set by an environment variable or command-line option. If Eidos is not configured to run multithreaded, this function will return \f1\fs18 1 \f3\fs20 .\ \pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0 +\f1\fs18 \cf2 (object$)parallelGetTaskThreadCounts(void)\ +\pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 + +\f0\b\fs20 \cf2 Gets the number of threads +\f3\b0 that is requested to be used for specific tasks in Eidos and SLiM. Returns a new +\f1\fs18 Dictionary +\f3\fs20 containing values for all of the tasks for which a number of threads can be specified; see +\f1\fs18 parallelSetTaskThreadCounts() +\f3\fs20 for a list of all such tasks. Note that the specified number of threads will not necessarily be used in practice; in particular, a thread count set by +\f1\fs18 parallelSetNumThreads() +\f3\fs20 will override these per-task counts. Also, if the task size is below a certain task-specific threshold the task will not be executed in parallel regardless of these settings.\ +\pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0 + \f1\fs18 \cf2 (void)parallelSetNumThreads([Ni$\'a0numThreads\'a0=\'a0NULL])\ \pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 \f0\b\fs20 \cf2 Sets the number of threads -\f3\b0 that will be used in subsequent parallel (i.e., multithreaded) regions. If Eidos is not configured to run multithreaded, this function will have no effect. The requested number of threads will be clamped to the interval [ +\f3\b0 that is requested to be used in subsequent parallel (i.e., multithreaded) regions. If Eidos is not configured to run multithreaded, this function will have no effect. The requested number of threads will be clamped to the interval [ \f1\fs18 1 \f3\fs20 , \f1\fs18 maxThreads \f3\fs20 ], where \f1\fs18 maxThreads -\f3\fs20 is the maximum number of threads configured externally (either by OpenMP\'92s default, or by an environment variable or command-line option). The maximum number of threads (the value of +\f3\fs20 is the maximum number of threads configured externally (either by OpenMP\'92s default, or by an environment variable or command-line option). That maximum number of threads (the value of \f1\fs18 maxThreads \f3\fs20 ) can be obtained from \f1\fs18 parallelGetMaxThreads() -\f3\fs20 . Passing +\f3\fs20 .\ +There is an important wrinkle in the semantics of this method that must be explained. Passing \f1\fs18 NULL -\f3\fs20 (the default) is equivalent to passing +\f3\fs20 (the default) resets Eidos to the default number of threads for which it is configured to run. In this configuration, +\f1\fs18 parallelGetNumThreads() +\f3\fs20 will return +\f1\fs18 maxThreads +\f3\fs20 , but the number of threads used for any given parallel operation might not, in fact, be equal to +\f1\fs18 maxThreads +\f3\fs20 ; Eidos might use fewer threads if it determines that that would improve performance. Passing the value of +\f1\fs18 maxThreads +\f3\fs20 explicitly, on the other hand, sets Eidos to always use +\f1\fs18 maxThreads +\f3\fs20 threads, even if it may result in lower performance; but in this configuration, too, +\f1\fs18 parallelGetNumThreads() +\f3\fs20 will return \f1\fs18 maxThreads -\f3\fs20 , and thus can be used to easily reset Eidos to the number of threads for which it is configured to run.\ +\f3\fs20 . For example, suppose +\f1\fs18 maxThreads +\f3\fs20 is +\f1\fs18 16 +\f3\fs20 . Passing +\f1\fs18 NULL +\f3\fs20 requests that Eidos use +\f7\i up to +\f3\i0 +\f1\fs18 16 +\f3\fs20 threads, as it sees fit; in contrast, explicitly passing +\f1\fs18 16 +\f3\fs20 requests that Eidos use +\f7\i exactly +\f3\i0 16 threads. In both cases, however, +\f1\fs18 parallelGetNumThreads() +\f3\fs20 will return +\f1\fs18 16 +\f3\fs20 .\ +If you wish to temporarily change the number of threads used, the standard pattern is to call +\f1\fs18 parallelSetNumThreads() +\f3\fs20 with the number of threads you want to use, do the operation you wish to control, and then call +\f1\fs18 parallelSetNumThreads(NULL) +\f3\fs20 to return to the default behavior of Eidos.\ +Note that the number of threads requested here overrides any per-task request set with +\f1\fs18 parallelSetTaskThreadCounts() +\f3\fs20 . Also, if the task size is below a certain task-specific threshold the task will not be executed in parallel regardless of these settings.\ +\pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0 + +\f1\fs18 \cf2 (void)parallelSetTaskThreadCounts(object$\'a0dict)\ +\pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 + +\f0\b\fs20 \cf2 Sets the number of threads +\f3\b0 that is requested to be used for specific tasks in Eidos and SLiM. The dictionary +\f1\fs18 dict +\f3\fs20 should contain +\f1\fs18 string +\f3\fs20 keys that identify tasks, and +\f1\fs18 integer +\f3\fs20 values that provide the number of threads to be used when performing those tasks. For example, a key of +\f1\fs18 "LOG10_FLOAT" +\f3\fs20 identifies the task of performing the +\f1\fs18 log10() +\f3\fs20 function on a +\f1\fs18 float +\f3\fs20 vector, and a value of +\f1\fs18 8 +\f3\fs20 for that key would tell Eidos to use eight threads when performing that task. The number of threads actually used will never be greater than the maximum thread count as returned by +\f1\fs18 parallelGetMaxThreads() +\f3\fs20 . Furthermore, a thread count set with +\f1\fs18 parallelSetNumThreads() +\f3\fs20 overrides the per-task setting, so if you wish to set specific per-task thread counts you should not set an overall thread count with +\f1\fs18 parallelSetNumThreads() +\f3\fs20 . If +\f1\fs18 dict +\f3\fs20 is +\f1\fs18 NULL +\f3\fs20 , all task thread counts will be reset to their default values.\ +The currently requested thread counts for all tasks can be obtained with +\f1\fs18 parallelGetTaskThreadCounts() +\f3\fs20 . Note that the counts returned by that function may not match the counts requested with +\f1\fs18 parallelSetTaskThreadCounts() +\f3\fs20 ; in particular, they may be clipped to the maximum number of threads as returned by +\f1\fs18 parallelGetMaxThreads() +\f3\fs20 .\ +The task keys recognized, and the tasks they govern, are:\ +\pard\tx4320\pardeftab720\li1080\sa180\partightenfactor0 + +\f1\fs18 \cf2 "ABS_FLOAT" abs(float x)\uc0\u8232 "CEIL" ceil()\u8232 "EXP_FLOAT" exp(float x)\u8232 "FLOOR" floor()\u8232 "LOG_FLOAT" log(float x)\u8232 "LOG10_FLOAT" log10(float x)\u8232 "LOG2_FLOAT" log2(float x)\u8232 "ROUND" round()\u8232 "SQRT_FLOAT" sqrt(float x)\u8232 "SUM_INTEGER" sum(integer x)\u8232 "SUM_FLOAT" sum(float x)\u8232 "SUM_LOGICAL" sum(logical x)\u8232 "TRUNC" trunc()\ +"MAX_INT" max(integer x)\uc0\u8232 "MAX_FLOAT" max(float x)\u8232 "MIN_INT" min(integer x)\u8232 "MIN_FLOAT" min(float x)\u8232 "PMAX_INT_1" pmax(i$ x, i y) / pmax(i x, i$ y)\u8232 "PMAX_INT_2" pmax(integer x, integer y)\u8232 "PMAX_FLOAT_1" pmax(f$ x, f y) / pmax(f x, f$ y)\u8232 "PMAX_FLOAT_2" pmax(float x, float y)\u8232 "PMIN_INT_1" pmin(i$ x, i y) / pmax(i x, i$ y)\u8232 "PMIN_INT_2" pmin(integer x, integer y)\u8232 "PMIN_FLOAT_1" pmin(f$ x, f y) / pmin(f x, f$ y)\u8232 "PMIN_FLOAT_2" pmin(float x, float y)\ +"MATCH_INT" match(integer x, integer table)\uc0\u8232 "MATCH_FLOAT" match(float x, float table)\u8232 "MATCH_STRING" match(string x, string table)\u8232 "MATCH_OBJECT" match(object x, object table)\u8232 "SAMPLE_INDEX" sample() +\f3\fs20 index buffer generation (internal) +\f1\fs18 \uc0\u8232 "SAMPLE_R_INT" sample(integer x, weights=NULL)\u8232 "SAMPLE_R_FLOAT" sample(float x, weights=NULL)\u8232 "SAMPLE_R_OBJECT" sample(object x, weights=NULL)\u8232 "SAMPLE_WR_INT" sample(integer x, if weights)\u8232 "SAMPLE_WR_FLOAT" sample(float x, if weights)\u8232 "SAMPLE_WR_OBJECT" sample(object x, if weights)\u8232 "TABULATE" tabulate()\ +"DNORM_1" dnorm(numeric$ mean, numeric$ sd)\uc0\u8232 "DNORM_2" dnorm() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RBINOM_1" rbinom(i$ size = 1, f$ prob = 0.5)\u8232 "RBINOM_2" rbinom(i$ size, f$ prob) +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RBINOM_3" rbinom() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RDUNIF_1" rdunif(i$ min = 0, i$ max = 1) +\f3\fs20 and similar +\f1\fs18 \uc0\u8232 "RDUNIF_2" rdunif(i$ min, i$ max) +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RDUNIF_3" rdunif() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "REXP_1" rexp(numeric$ mu)\u8232 "REXP_2" rexp() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RNORM_1" rnorm(numeric$ mean, numeric$ sd)\u8232 "RNORM_2" rnorm(numeric$ sigma)\u8232 "RNORM_3" rnorm() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RPOIS_1" rpois(numeric$ lambda)\u8232 "RPOIS_2" rpois() +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RUNIF_1" runif(numeric$ min = 0, numeric$ max = 1)\u8232 "RUNIF_2" runif(numeric$ min, numeric$ max) +\f3\fs20 other cases +\f1\fs18 \uc0\u8232 "RUNIF_3" runif() +\f3\fs20 other cases +\f1\fs18 \ +"CLIPPEDINTEGRAL_1" clippedIntegral() "x"\uc0\u8232 "CLIPPEDINTEGRAL_2" clippedIntegral() "y"\u8232 "CLIPPEDINTEGRAL_3" clippedIntegral() "z"\u8232 "CLIPPEDINTEGRAL_4" clippedIntegral() "xy"\u8232 "CLIPPEDINTEGRAL_5" clippedIntegral() "xz"\u8232 "CLIPPEDINTEGRAL_6" clippedIntegral() "yz"\u8232 "DRAWBYSTRENGTH" drawByStrength(returnDict=T)\u8232 "INTNEIGHCOUNT" interactingNeighborSount()\u8232 "LOCALPOPDENSITY" localPopulationDensity()\u8232 "NEARESTINTNEIGH" nearestInteractingNeighbors(returnDict=T)\u8232 "NEARESTNEIGH" nearestNeighbors(returnDict=T)\u8232 "NEIGHCOUNT" neighborCount()\u8232 "TOTNEIGHSTRENGTH" totalOfNeighborsStrengths()\ +"POINT_IN_BOUNDS" pointInBounds()\uc0\u8232 "POINT_PERIODIC" pointPeriodic()\u8232 "POINT_REFLECTED" pointReflected()\u8232 "POINT_STOPPED" pointStopped()\u8232 "POINT_UNIFORM" pointUniform()\u8232 "SET_SPATIAL_POS_1" setSpatialPosition() +\f3\fs20 with one point +\f1\fs18 \uc0\u8232 "SET_SPATIAL_POS_2" setSpatialPosition() +\f3\fs20 with +\f7\i N +\f3\i0 points +\f1\fs18 \uc0\u8232 "SPATIAL_MAP_VALUE" spatialMapValue()\ +"CONTAINS_MARKER_MUT" containsMarkerMutation(returnMutation = F)\uc0\u8232 "I_COUNT_OF_MUTS_OF_TYPE" countOfMutationsOfType() (Individual)\u8232 "G_COUNT_OF_MUTS_OF_TYPE" countOfMutationsOfType() (Genome)\u8232 "INDS_W_PEDIGREE_IDS" individualsWithPedigreeIDs()\u8232 "RELATEDNESS" relatedness()\u8232 "SAMPLE_INDIVIDUALS_1" sampleIndividuals() +\f3\fs20 simple case with replace=T +\f1\fs18 \uc0\u8232 "SAMPLE_INDIVIDUALS_2" sampleIndividuals() +\f3\fs20 base case with replace=T +\f1\fs18 \uc0\u8232 "SET_FITNESS_SCALE_1" Individual.fitness = +\f3\fs20 one value +\f1\fs18 \uc0\u8232 "SET_FITNESS_SCALE_2" Individual.fitness = +\f7\i\fs20 N +\f3\i0 values +\f1\fs18 \uc0\u8232 "SUM_OF_MUTS_OF_TYPE" sumOfMutationsOfType()\ +"AGE_INCR" +\f3\fs20 incrementing +\f1\fs18 Individual age +\f3\fs20 values +\f1\fs18 \uc0\u8232 "DEFERRED_REPRO" +\f3\fs20 deferred nonWF reproduction +\f1\fs18 \uc0\u8232 "FITNESS_ASEX_1" +\f3\fs20 fitness eval, asexual, with individual +\f1\fs18 fitnessScaling\uc0\u8232 "FITNESS_ASEX_2" +\f3\fs20 fitness eval, asexual, without individual +\f1\fs18 fitnessScaling\uc0\u8232 "FITNESS_SEX_F_1" +\f3\fs20 fitness eval, female, with individual +\f1\fs18 fitnessScaling\uc0\u8232 "FITNESS_SEX_F_2" +\f3\fs20 fitness eval, female, without individual +\f1\fs18 fitnessScaling\uc0\u8232 "FITNESS_SEX_M_1" +\f3\fs20 fitness eval, male, with individual +\f1\fs18 fitnessScaling\uc0\u8232 "FITNESS_SEX_M_2" +\f3\fs20 fitness eval, male, without individual +\f1\fs18 fitnessScaling\uc0\u8232 "MIGRANT_CLEAR" +\f3\fs20 clearing the +\f1\fs18 migrant +\f3\fs20 property at tick end +\f1\fs18 \uc0\u8232 "SURVIVAL" +\f3\fs20 survival evaluation (no callbacks) +\f1\fs18 \ +\pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0 + +\f3\fs20 \cf2 Typically, a dictionary of task keys and thread counts is read from a file and set up with this function at initialization time, but it is also possible to change new task thread counts dynamically. If Eidos is not configured to run multithreaded, this function has no effect.\ \pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0 \f1\fs18 \cf2 (void)rm([Ns\'a0variableNames\'a0=\'a0NULL])\ diff --git a/QtSLiM/help/EidosHelpFunctions.html b/QtSLiM/help/EidosHelpFunctions.html index 6f2f5ee80..a280d922b 100644 --- a/QtSLiM/help/EidosHelpFunctions.html +++ b/QtSLiM/help/EidosHelpFunctions.html @@ -15,9 +15,11 @@ p.p7 {margin: 3.0px 0.0px 3.0px 27.4px; font: 9.0px Menlo} p.p8 {margin: 3.0px 0.0px 3.0px 27.4px; font: 10.0px Optima; color: #d50005} p.p9 {margin: 3.0px 0.0px 3.0px 27.4px; font: 10.0px Optima; color: #969696} - p.p10 {margin: 3.0px 0.0px 3.0px 27.4px; font: 9.0px Menlo; color: #000000} - p.p11 {margin: 0.0px 0.0px 3.0px 27.4px; text-indent: 18.0px; font: 10.0px Optima; color: #000000} - p.p12 {margin: 9.0px 0.0px 9.0px 45.0px; font: 9.0px Menlo} + p.p10 {margin: 0.0px 0.0px 9.0px 54.0px; font: 9.0px Menlo; color: #000000} + p.p11 {margin: 0.0px 0.0px 9.0px 54.0px; font: 10.0px Optima; color: #000000} + p.p12 {margin: 3.0px 0.0px 3.0px 27.4px; font: 9.0px Menlo; color: #000000} + p.p13 {margin: 0.0px 0.0px 3.0px 27.4px; text-indent: 18.0px; font: 10.0px Optima; color: #000000} + p.p14 {margin: 9.0px 0.0px 9.0px 45.0px; font: 9.0px Menlo} li.li6 {margin: 3.0px 0.0px 3.0px 0.0px; font: 10.0px Optima} span.s1 {font: 9.0px 'Times New Roman'} span.s2 {font: 9.0px Menlo} @@ -479,11 +481,117 @@

Prints all currently defined variables to Eidos’s output stream.

Beginning in Eidos 2.5 (SLiM 3.5), the showSymbolTables optional argument can be set to T to request full information on the current symbol table chain.  This will show which symbol table a given symbol is defined in, as well as revealing whether there are other symbols with the same name that have been masked by a local definition.  This is mostly useful for debugging.

(integer$)parallelGetNumThreads(void)

-

Gets the number of threads that will be used in subsequent parallel (i.e., multithreaded) regions, as set with parallelSetNumThreads().  If Eidos is not configured to run multithreaded, this function will return 1.  See also parallelGetMaxThreads(), which returns the maximum number of threads that can be used.

+

Gets the number of threads that is requested be used in subsequent parallel (i.e., multithreaded) regions, as set with parallelSetNumThreads().  If Eidos is not configured to run multithreaded, this function will return 1.  See also parallelGetMaxThreads(), which returns the maximum number of threads that can be used.  Note that if this function returns the maximum number of threads, as returned by parallelGetMaxThreads(), then there are two possible semantic meanings of that return value, which cannot be distinguished using this function; see parallelSetNumThreads() for discussion.

(integer$)parallelGetMaxThreads(void)

-

Gets the maximum number of threads that can be used in parallel (i.e., multithreaded) regions.  This is configured externally; it may be OpenMP’s default number of threads for the hardware platform being used, or may be set by an environment variable or command-line option).  If Eidos is not configured to run multithreaded, this function will return 1.

+

Gets the maximum number of threads that can be used in parallel (i.e., multithreaded) regions.  This is configured externally; it may be OpenMP’s default number of threads for the hardware platform being used, or may be set by an environment variable or command-line option.  If Eidos is not configured to run multithreaded, this function will return 1.

+

(object<Dictionary>$)parallelGetTaskThreadCounts(void)

+

Gets the number of threads that is requested to be used for specific tasks in Eidos and SLiM.  Returns a new Dictionary containing values for all of the tasks for which a number of threads can be specified; see parallelSetTaskThreadCounts() for a list of all such tasks.  Note that the specified number of threads will not necessarily be used in practice; in particular, a thread count set by parallelSetNumThreads() will override these per-task counts.  Also, if the task size is below a certain task-specific threshold the task will not be executed in parallel regardless of these settings.

(void)parallelSetNumThreads([Ni$ numThreads = NULL])

-

Sets the number of threads that will be used in subsequent parallel (i.e., multithreaded) regions.  If Eidos is not configured to run multithreaded, this function will have no effect.  The requested number of threads will be clamped to the interval [1, maxThreads], where maxThreads is the maximum number of threads configured externally (either by OpenMP’s default, or by an environment variable or command-line option).  The maximum number of threads (the value of maxThreads) can be obtained from parallelGetMaxThreads().  Passing NULL (the default) is equivalent to passing maxThreads, and thus can be used to easily reset Eidos to the number of threads for which it is configured to run.

+

Sets the number of threads that is requested to be used in subsequent parallel (i.e., multithreaded) regions.  If Eidos is not configured to run multithreaded, this function will have no effect.  The requested number of threads will be clamped to the interval [1, maxThreads], where maxThreads is the maximum number of threads configured externally (either by OpenMP’s default, or by an environment variable or command-line option).  That maximum number of threads (the value of maxThreads) can be obtained from parallelGetMaxThreads().

+

There is an important wrinkle in the semantics of this method that must be explained.  Passing NULL (the default) resets Eidos to the default number of threads for which it is configured to run.  In this configuration, parallelGetNumThreads() will return maxThreads, but the number of threads used for any given parallel operation might not, in fact, be equal to maxThreads; Eidos might use fewer threads if it determines that that would improve performance.  Passing the value of maxThreads explicitly, on the other hand, sets Eidos to always use maxThreads threads, even if it may result in lower performance; but in this configuration, too, parallelGetNumThreads() will return maxThreads.  For example, suppose maxThreads is 16.  Passing NULL requests that Eidos use up to 16 threads, as it sees fit; in contrast, explicitly passing 16 requests that Eidos use exactly 16 threads.  In both cases, however, parallelGetNumThreads() will return 16.

+

If you wish to temporarily change the number of threads used, the standard pattern is to call parallelSetNumThreads() with the number of threads you want to use, do the operation you wish to control, and then call parallelSetNumThreads(NULL) to return to the default behavior of Eidos.

+

Note that the number of threads requested here overrides any per-task request set with parallelSetTaskThreadCounts().  Also, if the task size is below a certain task-specific threshold the task will not be executed in parallel regardless of these settings.

+

(void)parallelSetTaskThreadCounts(object$ dict)

+

Sets the number of threads that is requested to be used for specific tasks in Eidos and SLiM.  The dictionary dict should contain string keys that identify tasks, and integer values that provide the number of threads to be used when performing those tasks.  For example, a key of "LOG10_FLOAT" identifies the task of performing the log10() function on a float vector, and a value of 8 for that key would tell Eidos to use eight threads when performing that task.  The number of threads actually used will never be greater than the maximum thread count as returned by parallelGetMaxThreads().  Furthermore, a thread count set with parallelSetNumThreads() overrides the per-task setting, so if you wish to set specific per-task thread counts you should not set an overall thread count with parallelSetNumThreads().  If dict is NULL, all task thread counts will be reset to their default values.

+

The currently requested thread counts for all tasks can be obtained with parallelGetTaskThreadCounts().  Note that the counts returned by that function may not match the counts requested with parallelSetTaskThreadCounts(); in particular, they may be clipped to the maximum number of threads as returned by parallelGetMaxThreads().

+

The task keys recognized, and the tasks they govern, are:

+

"ABS_FLOAT" abs(float x)
+"CEIL" ceil()
+"EXP_FLOAT" exp(float x)
+"FLOOR" floor()
+"LOG_FLOAT" log(float x)
+"LOG10_FLOAT" log10(float x)
+"LOG2_FLOAT" log2(float x)
+"ROUND" round()
+"SQRT_FLOAT" sqrt(float x)
+"SUM_INTEGER" sum(integer x)
+"SUM_FLOAT" sum(float x)
+"SUM_LOGICAL" sum(logical x)
+"TRUNC" trunc()

+

"MAX_INT" max(integer x)
+"MAX_FLOAT" max(float x)
+"MIN_INT" min(integer x)
+"MIN_FLOAT" min(float x)
+"PMAX_INT_1" pmax(i$ x, i y) / pmax(i x, i$ y)
+"PMAX_INT_2" pmax(integer x, integer y)
+"PMAX_FLOAT_1" pmax(f$ x, f y) / pmax(f x, f$ y)
+"PMAX_FLOAT_2" pmax(float x, float y)
+"PMIN_INT_1" pmin(i$ x, i y) / pmax(i x, i$ y)
+"PMIN_INT_2" pmin(integer x, integer y)
+"PMIN_FLOAT_1" pmin(f$ x, f y) / pmin(f x, f$ y)
+"PMIN_FLOAT_2" pmin(float x, float y)

+

"MATCH_INT" match(integer x, integer table)
+"MATCH_FLOAT" match(float x, float table)
+"MATCH_STRING" match(string x, string table)
+"MATCH_OBJECT" match(object x, object table)
+"SAMPLE_INDEX" sample() index buffer generation (internal)
+"SAMPLE_R_INT" sample(integer x, weights=NULL)
+"SAMPLE_R_FLOAT" sample(float x, weights=NULL)
+"SAMPLE_R_OBJECT" sample(object x, weights=NULL)
+"SAMPLE_WR_INT" sample(integer x, if weights)
+"SAMPLE_WR_FLOAT" sample(float x, if weights)
+"SAMPLE_WR_OBJECT" sample(object x, if weights)
+"TABULATE" tabulate()

+

"DNORM_1" dnorm(numeric$ mean, numeric$ sd)
+"DNORM_2" dnorm() other cases
+"RBINOM_1" rbinom(i$ size = 1, f$ prob = 0.5)
+"RBINOM_2" rbinom(i$ size, f$ prob) other cases
+"RBINOM_3" rbinom() other cases
+"RDUNIF_1" rdunif(i$ min = 0, i$ max = 1) and similar
+"RDUNIF_2" rdunif(i$ min, i$ max) other cases
+"RDUNIF_3" rdunif() other cases
+"REXP_1" rexp(numeric$ mu)
+"REXP_2" rexp() other cases
+"RNORM_1" rnorm(numeric$ mean, numeric$ sd)
+"RNORM_2" rnorm(numeric$ sigma)
+"RNORM_3" rnorm() other cases
+"RPOIS_1" rpois(numeric$ lambda)
+"RPOIS_2" rpois() other cases
+"RUNIF_1" runif(numeric$ min = 0, numeric$ max = 1)
+"RUNIF_2" runif(numeric$ min, numeric$ max) other cases
+"RUNIF_3" runif() other cases

+

"CLIPPEDINTEGRAL_1" clippedIntegral() "x"
+"CLIPPEDINTEGRAL_2" clippedIntegral() "y"
+"CLIPPEDINTEGRAL_3" clippedIntegral() "z"
+"CLIPPEDINTEGRAL_4" clippedIntegral() "xy"
+"CLIPPEDINTEGRAL_5" clippedIntegral() "xz"
+"CLIPPEDINTEGRAL_6" clippedIntegral() "yz"
+"DRAWBYSTRENGTH" drawByStrength(returnDict=T)
+"INTNEIGHCOUNT" interactingNeighborSount()
+"LOCALPOPDENSITY" localPopulationDensity()
+"NEARESTINTNEIGH" nearestInteractingNeighbors(returnDict=T)
+"NEARESTNEIGH" nearestNeighbors(returnDict=T)
+"NEIGHCOUNT" neighborCount()
+"TOTNEIGHSTRENGTH" totalOfNeighborsStrengths()

+

"POINT_IN_BOUNDS" pointInBounds()
+"POINT_PERIODIC" pointPeriodic()
+"POINT_REFLECTED" pointReflected()
+"POINT_STOPPED" pointStopped()
+"POINT_UNIFORM" pointUniform()
+"SET_SPATIAL_POS_1" setSpatialPosition() with one point
+"SET_SPATIAL_POS_2" setSpatialPosition() with N points
+"SPATIAL_MAP_VALUE" spatialMapValue()

+

"CONTAINS_MARKER_MUT" containsMarkerMutation(returnMutation = F)
+"I_COUNT_OF_MUTS_OF_TYPE" countOfMutationsOfType() (Individual)
+"G_COUNT_OF_MUTS_OF_TYPE" countOfMutationsOfType() (Genome)
+"INDS_W_PEDIGREE_IDS" individualsWithPedigreeIDs()
+"RELATEDNESS" relatedness()
+"SAMPLE_INDIVIDUALS_1" sampleIndividuals() simple case with replace=T
+"SAMPLE_INDIVIDUALS_2" sampleIndividuals() base case with replace=T
+"SET_FITNESS_SCALE_1" Individual.fitness = one value
+"SET_FITNESS_SCALE_2" Individual.fitness = N values
+"SUM_OF_MUTS_OF_TYPE" sumOfMutationsOfType()

+

"AGE_INCR" incrementing Individual age values
+"DEFERRED_REPRO"
deferred nonWF reproduction
+"FITNESS_ASEX_1"
fitness eval, asexual, with individual fitnessScaling
+"FITNESS_ASEX_2"
fitness eval, asexual, without individual fitnessScaling
+"FITNESS_SEX_F_1"
fitness eval, female, with individual fitnessScaling
+"FITNESS_SEX_F_2"
fitness eval, female, without individual fitnessScaling
+"FITNESS_SEX_M_1"
fitness eval, male, with individual fitnessScaling
+"FITNESS_SEX_M_2"
fitness eval, male, without individual fitnessScaling
+"MIGRANT_CLEAR"
clearing the migrant property at tick end
+"SURVIVAL"
survival evaluation (no callbacks)

+

Typically, a dictionary of task keys and thread counts is read from a file and set up with this function at initialization time, but it is also possible to change new task thread counts dynamically.  If Eidos is not configured to run multithreaded, this function has no effect.

(void)rm([Ns variableNames = NULL])

Removes variables from the Eidos namespace; in other words, it causes the variables to become undefined.  Variables are specified by their string name in the variableNames parameter.  If the optional variableNames parameter is NULL (the default), all variables will be removed (be careful!).

In SLiM 3, there was an optional parameter removeConstants that, if T, allowed you to remove defined constants (and then potentially redefine them to have a different value).  The removeConstants parameter was removed in SLiM 4, since the defineGlobal() function now provides the ability to define (and redefine) global variables that are not constant.

@@ -491,7 +599,7 @@

Named apply() prior to Eidos 1.6 / SLiM 2.6

Applies a block of Eidos code to the elements of x.  This function is sort of a hybrid between c() and executeLambda(); it might be useful to consult the documentation for both of those functions to better understand what sapply() does.  For each element in x, the lambda defined by lambdaSource will be called.  For the duration of that callout, a variable named applyValue will be defined to have as its value the element of x currently being processed.  The expectation is that the lambda will use applyValue in some way, and will return either NULL or a new value (which need not be a singleton, and need not be of the same type as x).  The return value of sapply() is generated by concatenating together all of the individual vectors returned by the lambda, in exactly the same manner as the c() function (including the possibility of type promotion).

Since this function can be hard to understand at first, here is an example:

-

sapply(1:10, "if (applyValue % 2) applyValue ^ 2; else NULL;");

+

sapply(1:10, "if (applyValue % 2) applyValue ^ 2; else NULL;");

This produces the output 1 9 25 49 81.  The sapply() operation begins with the vector 1:10.  For each element of that vector, the lambda is called and applyValue is defined with the element value.  In this respect, sapply() is actually very much like a for loop.  If applyValue is even (as evaluated by the modulo operator, %), the condition of the if statement is F and so NULL is returned by the lambda; this must be done explicitly, since a void return is not allowed by sapply().  If applyValue is odd, on the other hand, the lambda returns its square (as calculated by the exponential operator, ^).  Just as with the c() function, NULL values are dropped during concatenation, so the final result contains only the squares of the odd values.

This example illustrates that the lambda can “drop” values by returning NULL, so sapply() can be used to select particular elements of a vector that satisfy some condition, much like the subscript operator, [].  The example also illustrates that input and result types do not have to match; the vector passed in is integer, whereas the result vector is float.

Beginning in Eidos 1.6, a new optional parameter named simplify allows the result of sapply() to be a matrix or array in certain cases, better organizing the elements of the result.  If the simplify parameter is "vector", the concatenated result value is returned as a plain vector in all cases; this is the default behavior, for backward compatibility.  Two other possible values for simplify are presently supported.  If simplify is "matrix", the concatenated result value will be turned into a matrix with one column for each non-NULL value returned by the lambda, as if the values were joined together with cbind(), as long as all of the lambda’s return values are either (a) NULL or (b) the same length as the other non-NULL values returned.  If simplify is "match", the concatenated result value will be turned into a vector, matrix, or array that exactly matches the dimensions as x, with a one-to-one correspondence between x and the elements of the return value just like a unary operator, as long as all of the lambda’s return values are singletons (with no NULL values).  Both "matrix" and "match" will raise an error if their preconditions are not met, to avoid unexpected behavior, so care should be taken that the preconditions are always met when these options are used.

@@ -509,19 +617,19 @@

Turns suppression of warning messages on or off.  The suppress flag indicates whether suppression of warnings should be enabled (T) or disabled (F).  The previous warning-suppression value is returned by suppressWarnings(), making it easy to suppress warnings from a given call and then return to the previous suppression state afterwards.  It is recommended that warnings be suppressed only around short blocks of code (not all the time), so that unexpected but perhaps important warnings are not missed.  And of course warnings are generally emitted for good reasons; before deciding to disregard a given warning, make sure that you understand exactly why it is being issued, and are certain that it does not represent a serious problem.

(*)sysinfo(string$ key)

Returns information about the system.  The information returned by tempdir() depends upon the value of key, which selects one of the pieces of information listed:

-

key value

-

os the name of the OS; "macOS" or "Windows", or "Unix" for all others

-

sysname the name of the kernel

-

release the operating system (kernel) release

-

version the operating system (kernel) version

-

nodename the name by which the machine is known on the network

-

machine the hardware type; often the CPU type (e.g., "x86_64")

+

key value

+

os the name of the OS; "macOS" or "Windows", or "Unix" for all others

+

sysname the name of the kernel

+

release the operating system (kernel) release

+

version the operating system (kernel) version

+

nodename the name by which the machine is known on the network

+

machine the hardware type; often the CPU type (e.g., "x86_64")

The value "unknown" will be returned for a key if the correct value cannot be ascertained.  Note that the values of keys that refer to the kernel may not be what you expect; for example, on one particular macOS 10.15.7 system, sysname returns "Darwin", release returns "19.6.0", and version returns "Darwin Kernel Version 19.6.0: Thu Sep 16 20:58:47 PDT 2021; root:xnu-6153.141.40.1~1/RELEASE_X86_64".

Further keys can be added if there is information that would be useful, particularly if a cross-platform way to obtain the information can be found.

(string)system(string$ command, [string args = ""], [string input = ""], [logical$ stderr = F], [logical$ wait = T])

Runs a Un*x command in a /bin/sh shell with optional arguments and input, and returns the result as a vector of output lines.  The args parameter may contain a vector of arguments to command; they will be passed directly to the shell without any quoting, so applying the appropriate quoting as needed by /bin/sh is the caller’s responsibility.  The arguments are appended to command, separated by spaces, and the result is passed to the shell as a single command string, so arguments may simply be given as part of command instead, if preferred.  By default no input is supplied to command; if input is non-empty, however, it will be written to a temporary file (one line per string element) and the standard input of command will be redirected to that temporary file (using standard /bin/sh redirection with <, appended to the command string passed to the shell).  By default, output sent to standard error will not be captured (and thus may end up in the output of the SLiM process, or may be lost); if stderr is T, however, the standard error stream will be redirected into standard out (using standard /bin/sh redirection with 2>&1, appended to the command string passed to the shell).

Arbitrary command strings involving multiple commands, pipes, redirection, etc., may be used with system(), but may be incompatible with the way that args, input, and stderr are handled by this function, so in this case supplying the whole command string in command may be the simplest course.  You may redirect standard error into standard output yourself in command with 2>&1.  Supplying input to a complex command line can often be facilitated by the use of parentheses to create a subshell; for example,

-

system("(wc -l | sed 's/ //g')", input=c('foo', 'bar', 'baz'));

+

system("(wc -l | sed 's/ //g')", input=c('foo', 'bar', 'baz'));

will supply the input lines to wc courtesy of the subshell started for the () operator.  If this strategy doesn’t work for the command line you want to execute, you can always write a temporary file yourself using writeFile() or writeTempFile() and redirect that file to standard input in command with <.

If wait is T (the default), system() will wait for the command to finish, and return the output generated as a string vector, as described above.  If wait is F, system() will instead append " &" to the end of the command line to request that it be run in the background, and it will not collect and return the output from the command; instead it will return string(0) immediately.  If the output from the command is needed, it could be redirected to a file, and that file could be checked periodically in Eidos for some indication that the command had completed; if output is not redirected to a file, it may appear in SLiM’s output stream.  If the final command line executed by system() ends in " &", the behavior of system() should be just as if wait=T had been supplied, but it is recommended to use wait=T instead to ensure that the command line is correctly assembled.

(string$)time(void)

diff --git a/SLiMgui/SLiMWindowController.mm b/SLiMgui/SLiMWindowController.mm index 992ad8e5d..a93599b31 100644 --- a/SLiMgui/SLiMWindowController.mm +++ b/SLiMgui/SLiMWindowController.mm @@ -467,6 +467,7 @@ - (void)startNewSimulationFromScript // note that we do not save/restore the value across context switches between models, as we do RNGs and such, // since we don't support end users running SLiMgui multithreaded anyhow gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; omp_set_num_threads(gEidosMaxThreads); // Free the old simulation RNG and make a new one, to have clean state diff --git a/VERSIONS b/VERSIONS index 27861995a..f69c40acd 100644 --- a/VERSIONS +++ b/VERSIONS @@ -78,6 +78,10 @@ PARALLEL changes (now in the master branch): thread-safety work - break in backward reproducibility for scripts that use a type 's' DFE, because the code path for that shifted algorithm change for nearestNeighbors() and nearestNeighborsOfPoint(), when count is >1 and AllocateChunk()) EidosValue_Logical())->resize_no_initialize(p_elements_size); bool null_genome_seen = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(p_elements_size) firstprivate(p_elements, mutation_type_ptr, marker_position, last_position, result_logical_vec) reduction(||: null_genome_seen) if(p_elements_size >= EIDOS_OMPMIN_CONTAINS_MARKER_MUT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CONTAINS_MARKER_MUT); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(p_elements_size) firstprivate(p_elements, mutation_type_ptr, marker_position, last_position, result_logical_vec) reduction(||: null_genome_seen) if(p_elements_size >= EIDOS_OMPMIN_CONTAINS_MARKER_MUT) num_threads(thread_count) for (size_t element_index = 0; element_index < p_elements_size; ++element_index) { Genome *element = (Genome *)(p_elements[element_index]); @@ -818,7 +819,8 @@ EidosValue_SP Genome::ExecuteMethod_Accelerated_countOfMutationsOfType(EidosObje EidosValue_Int_vector *integer_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(p_elements_size); bool saw_error = false; -#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, integer_result, mutrun_count) reduction(||: saw_error) if(p_elements_size >= EIDOS_OMPMIN_G_COUNT_OF_MUTS_OF_TYPE) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE); +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, integer_result, mutrun_count) reduction(||: saw_error) if(p_elements_size >= EIDOS_OMPMIN_G_COUNT_OF_MUTS_OF_TYPE) num_threads(thread_count) for (size_t element_index = 0; element_index < p_elements_size; ++element_index) { Genome *element = (Genome *)(p_elements[element_index]); diff --git a/core/individual.cpp b/core/individual.cpp index 022fc3cca..d9c05526f 100644 --- a/core/individual.cpp +++ b/core/individual.cpp @@ -1073,7 +1073,8 @@ bool Individual::_SetFitnessScaling_1(double source_value, EidosObject **p_value // potential race condition if the same Individual is referenced more than once in // p_values; that is considered a bug in the user's script, and we could check for it // in DEBUG mode if we wanted to. -#pragma omp parallel for simd schedule(simd:static) default(none) shared(p_values_size) firstprivate(p_values, source_value) if(parallel:p_values_size >= EIDOS_OMPMIN_SET_FITNESS_S1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_FITNESS_SCALE_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(p_values_size) firstprivate(p_values, source_value) if(parallel:p_values_size >= EIDOS_OMPMIN_SET_FITNESS_SCALE_1) num_threads(thread_count) for (size_t value_index = 0; value_index < p_values_size; ++value_index) ((Individual *)(p_values[value_index]))->fitness_scaling_ = source_value; @@ -1089,7 +1090,8 @@ bool Individual::_SetFitnessScaling_N(const double *source_data, EidosObject **p // potential race condition if the same Individual is referenced more than once in // p_values; that is considered a bug in the user's script, and we could check for it // in DEBUG mode if we wanted to. -#pragma omp parallel for schedule(static) default(none) shared(p_values_size) firstprivate(p_values, source_data) reduction(||: saw_error) if(p_values_size >= EIDOS_OMPMIN_SET_FITNESS_S2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_FITNESS_SCALE_2); +#pragma omp parallel for schedule(static) default(none) shared(p_values_size) firstprivate(p_values, source_data) reduction(||: saw_error) if(p_values_size >= EIDOS_OMPMIN_SET_FITNESS_SCALE_2) num_threads(thread_count) for (size_t value_index = 0; value_index < p_values_size; ++value_index) { double source_value = source_data[value_index]; @@ -1343,7 +1345,8 @@ EidosValue_SP Individual::ExecuteMethod_Accelerated_countOfMutationsOfType(Eidos Mutation *mut_block_ptr = gSLiM_Mutation_Block; EidosValue_Int_vector *integer_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(p_elements_size); -#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, integer_result) if(p_elements_size >= EIDOS_OMPMIN_I_COUNT_OF_MUTS_OF_TYPE) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE); +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, integer_result) if(p_elements_size >= EIDOS_OMPMIN_I_COUNT_OF_MUTS_OF_TYPE) num_threads(thread_count) for (size_t element_index = 0; element_index < p_elements_size; ++element_index) { Individual *element = (Individual *)(p_elements[element_index]); @@ -1427,7 +1430,8 @@ EidosValue_SP Individual::ExecuteMethod_relatedness(EidosGlobalStringID p_method { // this parallelizes the case of one_individual.relatedness(many_individuals) // it would be nice to also parallelize the case of many_individuals.relatedness(one_individual); that would require accelerating this method -#pragma omp parallel for schedule(dynamic, 128) default(none) shared(individuals_count, individuals_value) firstprivate(float_result) if(individuals_count >= EIDOS_OMPMIN_RELATEDNESS) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RELATEDNESS); +#pragma omp parallel for schedule(dynamic, 128) default(none) shared(individuals_count, individuals_value) firstprivate(float_result) if(individuals_count >= EIDOS_OMPMIN_RELATEDNESS) num_threads(thread_count) for (int value_index = 0; value_index < individuals_count; ++value_index) { Individual *ind = (Individual *)(individuals_value->ObjectElementAtIndex(value_index, nullptr)); @@ -1476,7 +1480,8 @@ EidosValue_SP Individual::ExecuteMethod_Accelerated_sumOfMutationsOfType(EidosOb Mutation *mut_block_ptr = gSLiM_Mutation_Block; EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(p_elements_size); -#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, float_result) if(p_elements_size >= EIDOS_OMPMIN_SUM_OF_MUTS_OF_TYPE) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE); +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(p_elements_size) firstprivate(p_elements, mut_block_ptr, mutation_type_ptr, float_result) if(p_elements_size >= EIDOS_OMPMIN_SUM_OF_MUTS_OF_TYPE) num_threads(thread_count) for (size_t element_index = 0; element_index < p_elements_size; ++element_index) { Individual *element = (Individual *)(p_elements[element_index]); @@ -1901,7 +1906,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri { double x = position_value->FloatAtIndex(0, nullptr); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) num_threads(thread_count) for (int target_index = 0; target_index < target_size; ++target_index) { Individual *target = targets[target_index]; @@ -1914,7 +1920,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri double x = position_value->FloatAtIndex(0, nullptr); double y = position_value->FloatAtIndex(1, nullptr); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x, y) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x, y) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) num_threads(thread_count) for (int target_index = 0; target_index < target_size; ++target_index) { Individual *target = targets[target_index]; @@ -1929,7 +1936,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri double y = position_value->FloatAtIndex(1, nullptr); double z = position_value->FloatAtIndex(2, nullptr); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x, y, z) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(target_size) firstprivate(targets, x, y, z) if(target_size >= EIDOS_OMPMIN_SET_SPATIAL_POS_1) num_threads(thread_count) for (int target_index = 0; target_index < target_size; ++target_index) { Individual *target = targets[target_index]; @@ -1957,7 +1965,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri { case 1: { -#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_2); +#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) num_threads(thread_count) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) for (int target_index = 0; target_index < target_size; ++target_index) { targets[target_index]->spatial_x_ = positions[target_index]; @@ -1966,7 +1975,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri } case 2: { -#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_2); +#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) num_threads(thread_count) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) for (int target_index = 0; target_index < target_size; ++target_index) { Individual *target = targets[target_index]; @@ -1979,7 +1989,8 @@ EidosValue_SP Individual_Class::ExecuteMethod_setSpatialPosition(EidosGlobalStri } case 3: { -#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SET_SPATIAL_POS_2); +#pragma omp parallel for schedule(static) default(none) shared(target_size) firstprivate(targets, positions) num_threads(thread_count) // if(EIDOS_OMPMIN_SET_SPATIAL_POS_2) for (int target_index = 0; target_index < target_size; ++target_index) { Individual *target = targets[target_index]; diff --git a/core/interaction_type.cpp b/core/interaction_type.cpp index 79249e631..de4e90ee4 100755 --- a/core/interaction_type.cpp +++ b/core/interaction_type.cpp @@ -3602,7 +3602,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID { if (spatiality_string_ == "x") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_1); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_1) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -3635,7 +3636,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID } else if (spatiality_string_ == "y") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_y) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_2); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_y) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_2) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -3668,7 +3670,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID } else // (spatiality_string_ == "z") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_3) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_3); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_3) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -3704,7 +3707,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID { if (spatiality_string_ == "xy") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x, periodic_y) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_4) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_4); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x, periodic_y) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_4) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -3738,7 +3742,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID } else if (spatiality_string_ == "xz") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_5) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_5); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_x, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_5) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -3772,7 +3777,8 @@ EidosValue_SP InteractionType::ExecuteMethod_clippedIntegral(EidosGlobalStringID } else // (spatiality_string_ == "yz") { -#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_y, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_6) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CLIPPEDINTEGRAL_6); +#pragma omp parallel for schedule(static) default(none) shared(receivers_count, receiver_subpop_data) firstprivate(receivers_data, float_result, periodic_y, periodic_z) reduction(||: saw_error1) reduction(||: saw_error2) if(receivers_count >= EIDOS_OMPMIN_CLIPPEDINTEGRAL_6) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { const Individual *receiver = receivers_data[receiver_index]; @@ -4329,7 +4335,8 @@ EidosValue_SP InteractionType::ExecuteMethod_drawByStrength(EidosGlobalStringID InteractionsData &receiver_subpop_data = InteractionsDataForSubpop(data_, receiver_subpop); EnsureKDTreePresent(exerter_subpop_data); -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(gEidos_RNG_PERTHREAD, receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data, optimize_fixed_interaction_strengths) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) reduction(||: saw_error_3) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_DRAWBYSTRENGTH)) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_DRAWBYSTRENGTH); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(gEidos_RNG_PERTHREAD, receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data, optimize_fixed_interaction_strengths) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) reduction(||: saw_error_3) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_DRAWBYSTRENGTH)) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receiver_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -4547,7 +4554,8 @@ EidosValue_SP InteractionType::ExecuteMethod_interactingNeighborCount(EidosGloba EidosValue_Int_vector *result_vec = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(receivers_count); bool saw_error_1 = false, saw_error_2 = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_INTNEIGHCOUNT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_INTNEIGHCOUNT); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_INTNEIGHCOUNT) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receivers_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -4730,7 +4738,8 @@ EidosValue_SP InteractionType::ExecuteMethod_localPopulationDensity(EidosGlobalS EidosValue_Float_vector *result_vec = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(receivers_count); bool saw_error_1 = false, saw_error_2 = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data, strength_for_zero_distance, clipped_integrals, optimize_fixed_interaction_strengths) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_LOCALPOPDENSITY)) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_LOCALPOPDENSITY); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data, strength_for_zero_distance, clipped_integrals, optimize_fixed_interaction_strengths) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_LOCALPOPDENSITY)) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receivers_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -5111,7 +5120,8 @@ EidosValue_SP InteractionType::ExecuteMethod_nearestInteractingNeighbors(EidosGl { bool saw_error_1 = false, saw_error_2 = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, exerters, receiver_subpop_data, exerter_subpop_data) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEARESTINTNEIGH) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_NEARESTINTNEIGH); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, exerters, receiver_subpop_data, exerter_subpop_data) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEARESTINTNEIGH) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receiver_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -5331,7 +5341,8 @@ EidosValue_SP InteractionType::ExecuteMethod_nearestNeighbors(EidosGlobalStringI InteractionsData &exerter_subpop_data = InteractionsDataForSubpop(data_, exerter_subpop); EnsureKDTreePresent(exerter_subpop_data); -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEARESTNEIGH) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_NEARESTNEIGH); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receiver_value, result_vectors, count, exerter_subpop_size) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEARESTNEIGH) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receiver_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -5499,7 +5510,8 @@ EidosValue_SP InteractionType::ExecuteMethod_neighborCount(EidosGlobalStringID p EidosValue_Int_vector *result_vec = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(receivers_count); bool saw_error_1 = false, saw_error_2 = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEIGHCOUNT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_NEIGHCOUNT); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) if(receivers_count >= EIDOS_OMPMIN_NEIGHCOUNT) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receivers_value->ObjectElementAtIndex(receiver_index, nullptr); @@ -5960,7 +5972,8 @@ EidosValue_SP InteractionType::ExecuteMethod_totalOfNeighborStrengths(EidosGloba #endif bool saw_error_1 = false, saw_error_2 = false, saw_error_3 = false; -#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) reduction(||: saw_error_3) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_TOTNEIGHSTRENGTH)) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_TOTNEIGHSTRENGTH); +#pragma omp parallel for schedule(dynamic, 16) default(none) shared(receivers_count, receiver_subpop, exerter_subpop, receiver_subpop_data, exerter_subpop_data) firstprivate(receivers_value, result_vec) reduction(||: saw_error_1) reduction(||: saw_error_2) reduction(||: saw_error_3) if(!has_interaction_callbacks && (receivers_count >= EIDOS_OMPMIN_TOTNEIGHSTRENGTH)) num_threads(thread_count) for (int receiver_index = 0; receiver_index < receivers_count; ++receiver_index) { Individual *receiver = (Individual *)receivers_value->ObjectElementAtIndex(receiver_index, nullptr); diff --git a/core/main.cpp b/core/main.cpp index 49b757728..5e73ca11a 100644 --- a/core/main.cpp +++ b/core/main.cpp @@ -396,9 +396,9 @@ int main(int argc, char *argv[]) max_thread_count = count; changed_max_thread_count = true; - if ((max_thread_count < 1) || (max_thread_count > 1024)) + if ((max_thread_count < 1) || (max_thread_count > EIDOS_OMP_MAX_THREADS)) { - SLIM_OUTSTREAM << "The -maxthreads command-line option enforces a range of [1, 1024]." << std::endl; + SLIM_OUTSTREAM << "The -maxthreads command-line option enforces a range of [1, " << EIDOS_OMP_MAX_THREADS << "]." << std::endl; exit(EXIT_FAILURE); } diff --git a/core/population.cpp b/core/population.cpp index 26628c6fa..c00e96f99 100644 --- a/core/population.cpp +++ b/core/population.cpp @@ -604,7 +604,8 @@ void Population::DoDeferredReproduction(void) #endif // now generate the genomes of the deferred offspring in parallel -#pragma omp parallel for schedule(dynamic, 1) default(none) shared(deferred_count_nonrecombinant) if(deferred_count_nonrecombinant >= EIDOS_OMPMIN_DEFERRED_REPRO) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_DEFERRED_REPRO); +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(deferred_count_nonrecombinant) if(deferred_count_nonrecombinant >= EIDOS_OMPMIN_DEFERRED_REPRO) num_threads(thread_count) for (size_t deferred_index = 0; deferred_index < deferred_count_nonrecombinant; ++deferred_index) { SLiM_DeferredReproduction_NonRecombinant &deferred_rec = deferred_reproduction_nonrecombinant_[deferred_index]; @@ -623,7 +624,8 @@ void Population::DoDeferredReproduction(void) } } -#pragma omp parallel for schedule(dynamic, 1) default(none) shared(deferred_count_recombinant) if(deferred_count_recombinant >= EIDOS_OMPMIN_DEFERRED_REPRO) + //EIDOS_THREAD_COUNT(gEidos_OMP_threads_DEFERRED_REPRO); +#pragma omp parallel for schedule(dynamic, 1) default(none) shared(deferred_count_recombinant) if(deferred_count_recombinant >= EIDOS_OMPMIN_DEFERRED_REPRO) num_threads(thread_count) for (size_t deferred_index = 0; deferred_index < deferred_count_recombinant; ++deferred_index) { SLiM_DeferredReproduction_Recombinant &deferred_rec = deferred_reproduction_recombinant_[deferred_index]; diff --git a/core/slim_test.cpp b/core/slim_test.cpp index b1d447796..2c02ebd6b 100644 --- a/core/slim_test.cpp +++ b/core/slim_test.cpp @@ -1083,11 +1083,13 @@ extern void _RunParallelSLiMTests() // Note that we ensure that we are using the maximum number of threads at start & end { gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; omp_set_num_threads(gEidosMaxThreads); SLiMAssertScriptSuccess(test_string); gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; omp_set_num_threads(gEidosMaxThreads); } diff --git a/core/species.cpp b/core/species.cpp index fc185885d..e11c2e403 100644 --- a/core/species.cpp +++ b/core/species.cpp @@ -2444,7 +2444,8 @@ void Species::nonWF_MergeOffspring(void) std::vector &parents = subpop->parent_individuals_; size_t parent_count = parents.size(); -#pragma omp parallel for schedule(static) default(none) shared(parent_count) firstprivate(parents) if(parent_count >= EIDOS_OMPMIN_MIGRANTCLEAR) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MIGRANT_CLEAR); +#pragma omp parallel for schedule(static) default(none) shared(parent_count) firstprivate(parents) if(parent_count >= EIDOS_OMPMIN_MIGRANT_CLEAR) num_threads(thread_count) for (size_t parent_index = 0; parent_index < parent_count; ++parent_index) parents[parent_index]->migrant_ = false; } diff --git a/core/species_eidos.cpp b/core/species_eidos.cpp index b20d3e3dc..c7012acc5 100644 --- a/core/species_eidos.cpp +++ b/core/species_eidos.cpp @@ -1938,7 +1938,8 @@ EidosValue_SP Species::ExecuteMethod_individualsWithPedigreeIDs(EidosGlobalStrin Individual **result_data = (Individual **)result->data(); bool any_unmatched = false; -#pragma omp parallel for schedule(static) default(none) shared(pedigreeIDs_count, fromIDToIndividual) firstprivate(pedigree_id_data, result_data) reduction(||: any_unmatched) // if(EIDOS_OMPMIN_INDS_W_PEDIGREE_IDS) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_INDS_W_PEDIGREE_IDS); +#pragma omp parallel for schedule(static) default(none) shared(pedigreeIDs_count, fromIDToIndividual) firstprivate(pedigree_id_data, result_data) reduction(||: any_unmatched) num_threads(thread_count) // if(EIDOS_OMPMIN_INDS_W_PEDIGREE_IDS) for (int value_index = 0; value_index < pedigreeIDs_count; ++value_index) { auto find_iter = fromIDToIndividual.find(pedigree_id_data[value_index]); diff --git a/core/subpopulation.cpp b/core/subpopulation.cpp index 804b5580c..9e8fc1612 100644 --- a/core/subpopulation.cpp +++ b/core/subpopulation.cpp @@ -1546,7 +1546,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect { if (Individual::s_any_individual_fitness_scaling_set_) { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalFemaleFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_F_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_SEX_F_1); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalFemaleFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_F_1) num_threads(thread_count) for (slim_popsize_t female_index = 0; female_index < parent_first_male_index_; female_index++) { double fitness = parent_individuals_[female_index]->fitness_scaling_; @@ -1578,7 +1579,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect } else { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_F_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_SEX_F_2); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_F_2) num_threads(thread_count) for (slim_popsize_t female_index = 0; female_index < parent_first_male_index_; female_index++) parent_individuals_[female_index]->cached_fitness_UNSAFE_ = fitness; } @@ -1764,7 +1766,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect { if (Individual::s_any_individual_fitness_scaling_set_) { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalMaleFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_M_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_SEX_M_1); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalMaleFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_M_1) num_threads(thread_count) for (slim_popsize_t male_index = parent_first_male_index_; male_index < parent_subpop_size_; male_index++) { double fitness = parent_individuals_[male_index]->fitness_scaling_; @@ -1796,7 +1799,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect } else { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_M_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_SEX_M_2); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_SEX_M_2) num_threads(thread_count) for (slim_popsize_t male_index = parent_first_male_index_; male_index < parent_subpop_size_; male_index++) parent_individuals_[male_index]->cached_fitness_UNSAFE_ = fitness; } @@ -1984,7 +1988,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect { if (Individual::s_any_individual_fitness_scaling_set_) { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_ASEX_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_ASEX_1); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(subpop_fitness_scaling) reduction(+: totalFitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_ASEX_1) num_threads(thread_count) for (slim_popsize_t individual_index = 0; individual_index < parent_subpop_size_; individual_index++) { double fitness = parent_individuals_[individual_index]->fitness_scaling_; @@ -2016,7 +2021,8 @@ void Subpopulation::UpdateFitness(std::vector &p_mutationEffect } else { -#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_ASEX_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FITNESS_ASEX_2); +#pragma omp parallel for schedule(static) default(none) shared(parent_subpop_size_) firstprivate(fitness) if(parent_subpop_size_ >= EIDOS_OMPMIN_FITNESS_ASEX_2) num_threads(thread_count) for (slim_popsize_t individual_index = 0; individual_index < parent_subpop_size_; individual_index++) parent_individuals_[individual_index]->cached_fitness_UNSAFE_ = fitness; } @@ -3893,7 +3899,8 @@ void Subpopulation::ViabilitySurvival(std::vector &p_survival_c if (no_callbacks) { // this is the simple case with no callbacks and thus no shuffle buffer -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, survival_buffer, parent_subpop_size_) firstprivate(individual_data) if(parent_subpop_size_ >= EIDOS_OMPMIN_SURVIVAL) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SURVIVAL); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, survival_buffer, parent_subpop_size_) firstprivate(individual_data) if(parent_subpop_size_ >= EIDOS_OMPMIN_SURVIVAL) num_threads(thread_count) { uint8_t *survival_buf_perthread = survival_buffer; gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -4022,7 +4029,8 @@ void Subpopulation::IncrementIndividualAges(void) std::vector &parents = parent_individuals_; size_t parent_count = parents.size(); -#pragma omp parallel for schedule(static) default(none) shared(parent_count) firstprivate(parents) if(parent_count >= EIDOS_OMPMIN_AGEINC) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_AGE_INCR); +#pragma omp parallel for schedule(static) default(none) shared(parent_count) firstprivate(parents) if(parent_count >= EIDOS_OMPMIN_AGE_INCR) num_threads(thread_count) for (size_t parent_index = 0; parent_index < parent_count; ++parent_index) (parents[parent_index]->age_)++; } @@ -6000,7 +6008,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointInBounds(EidosGlobalStringID p_m { double bx0 = bounds_x0_, bx1 = bounds_x1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_IN_BOUNDS); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) num_threads(thread_count) for (int point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index]; @@ -6014,7 +6023,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointInBounds(EidosGlobalStringID p_m { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_IN_BOUNDS); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) num_threads(thread_count) for (int point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 2]; @@ -6029,7 +6039,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointInBounds(EidosGlobalStringID p_m { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_, bz0 = bounds_z0_, bz1 = bounds_z1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_IN_BOUNDS); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, logical_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_IN_BOUNDS) num_threads(thread_count) for (int point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 3]; @@ -6095,7 +6106,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointReflected(EidosGlobalStringID p_ { double bx0 = bounds_x0_, bx1 = bounds_x1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_REFLECTED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index]; @@ -6113,7 +6125,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointReflected(EidosGlobalStringID p_ { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_REFLECTED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 2]; @@ -6140,7 +6153,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointReflected(EidosGlobalStringID p_ { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_, bz0 = bounds_z0_, bz1 = bounds_z1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_REFLECTED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_REFLECTED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 3]; @@ -6219,7 +6233,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointStopped(EidosGlobalStringID p_me { double bx0 = bounds_x0_, bx1 = bounds_x1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_STOPPED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index]; @@ -6231,7 +6246,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointStopped(EidosGlobalStringID p_me { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_STOPPED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 2]; @@ -6246,7 +6262,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointStopped(EidosGlobalStringID p_me { double bx0 = bounds_x0_, bx1 = bounds_x1_, by0 = bounds_y0_, by1 = bounds_y1_, bz0 = bounds_z0_, bz1 = bounds_z1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_STOPPED); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx0, bx1, by0, by1, bz0, bz1) if(point_count >= EIDOS_OMPMIN_POINT_STOPPED) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 3]; @@ -6326,7 +6343,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointPeriodic(EidosGlobalStringID p_m { double bx1 = bounds_x1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, periodic_x) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_PERIODIC); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, periodic_x) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index]; @@ -6343,7 +6361,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointPeriodic(EidosGlobalStringID p_m { double bx1 = bounds_x1_, by1 = bounds_y1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, by1, periodic_x, periodic_y) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_PERIODIC); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, by1, periodic_x, periodic_y) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 2]; @@ -6368,7 +6387,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointPeriodic(EidosGlobalStringID p_m { double bx1 = bounds_x1_, by1 = bounds_y1_, bz1 = bounds_z1_; -#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, by1, bz1, periodic_x, periodic_y, periodic_z) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_PERIODIC); +#pragma omp parallel for schedule(static) default(none) shared(point_count) firstprivate(point_buf, float_result_data, bx1, by1, bz1, periodic_x, periodic_y, periodic_z) if(point_count >= EIDOS_OMPMIN_POINT_PERIODIC) num_threads(thread_count) for (int64_t point_index = 0; point_index < point_count; ++point_index) { double x = point_buf[point_index * 3]; @@ -6432,7 +6452,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me { case 1: { -#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM); +#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; @@ -6447,7 +6468,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me } case 2: { -#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM); +#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; @@ -6464,7 +6486,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me } case 3: { -#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM); +#pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; @@ -6901,7 +6924,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID EidosValue_Object_vector *result = ((EidosValue_Object_vector *)result_SP.get())->resize_no_initialize(sample_size); EidosObject **object_result_data = result->data(); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(candidate_count, first_candidate_index, excluded_index, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(candidate_count, first_candidate_index, excluded_index, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_1) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -7065,7 +7089,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID if (replace) { // base case with replacement -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size, index_buffer) firstprivate(candidate_count, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size, index_buffer) firstprivate(candidate_count, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -7689,7 +7714,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_spatialMapValue(EidosGlobalStringID p if (spatiality_type == 0) EIDOS_TERMINATION << "ERROR (Subpopulation::ExecuteMethod_spatialMapValue): (internal error) unrecognized spatiality." << EidosTerminate(); -#pragma omp parallel for schedule(static) default(none) shared(x_count, float_singleton_result) firstprivate(map, map_spatiality, spatiality_type, point, float_result) if(x_count >= EIDOS_OMPMIN_SPATIAL_MAP_VALUE) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SPATIAL_MAP_VALUE); +#pragma omp parallel for schedule(static) default(none) shared(x_count, float_singleton_result) firstprivate(map, map_spatiality, spatiality_type, point, float_result) if(x_count >= EIDOS_OMPMIN_SPATIAL_MAP_VALUE) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { // We need to use the correct spatial bounds for each coordinate, which depends upon our exact spatiality diff --git a/eidos/eidos_functions.cpp b/eidos/eidos_functions.cpp index fa3023e3a..2fd7ff232 100644 --- a/eidos/eidos_functions.cpp +++ b/eidos/eidos_functions.cpp @@ -299,7 +299,9 @@ const std::vector &EidosInterpreter::BuiltInFunction signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("license", Eidos_ExecuteFunction_license, kEidosValueMaskVOID))); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("parallelGetNumThreads", Eidos_ExecuteFunction_parallelGetNumThreads, kEidosValueMaskInt | kEidosValueMaskSingleton))); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("parallelGetMaxThreads", Eidos_ExecuteFunction_parallelGetMaxThreads, kEidosValueMaskInt | kEidosValueMaskSingleton))); + signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("parallelGetTaskThreadCounts", Eidos_ExecuteFunction_parallelGetTaskThreadCounts, kEidosValueMaskObject | kEidosValueMaskSingleton, gEidosDictionaryRetained_Class))); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("parallelSetNumThreads", Eidos_ExecuteFunction_parallelSetNumThreads, kEidosValueMaskVOID))->AddInt_OSN("numThreads", gStaticEidosValueNULL)); + signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("parallelSetTaskThreadCounts", Eidos_ExecuteFunction_parallelSetTaskThreadCounts, kEidosValueMaskVOID))->AddObject_SN("dict", nullptr)); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature(gEidosStr_rm, Eidos_ExecuteFunction_rm, kEidosValueMaskVOID))->AddString_ON("variableNames", gStaticEidosValueNULL)); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("setSeed", Eidos_ExecuteFunction_setSeed, kEidosValueMaskVOID))->AddInt_S("seed")); signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("getSeed", Eidos_ExecuteFunction_getSeed, kEidosValueMaskInt | kEidosValueMaskSingleton))); diff --git a/eidos/eidos_functions.h b/eidos/eidos_functions.h index afb911de9..c7d850371 100644 --- a/eidos/eidos_functions.h +++ b/eidos/eidos_functions.h @@ -283,7 +283,9 @@ EidosValue_SP Eidos_ExecuteFunction_license(const std::vector &p_ EidosValue_SP Eidos_ExecuteFunction_ls(const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_parallelGetNumThreads(__attribute__((unused)) const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_parallelGetMaxThreads(__attribute__((unused)) const std::vector &p_arguments, EidosInterpreter &p_interpreter); +EidosValue_SP Eidos_ExecuteFunction_parallelGetTaskThreadCounts(__attribute__((unused)) const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_parallelSetNumThreads(__attribute__((unused)) const std::vector &p_arguments, EidosInterpreter &p_interpreter); +EidosValue_SP Eidos_ExecuteFunction_parallelSetTaskThreadCounts(__attribute__((unused)) const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_rm(const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_sapply(const std::vector &p_arguments, EidosInterpreter &p_interpreter); EidosValue_SP Eidos_ExecuteFunction_setSeed(const std::vector &p_arguments, EidosInterpreter &p_interpreter); diff --git a/eidos/eidos_functions_distributions.cpp b/eidos/eidos_functions_distributions.cpp index 33dc2e5cc..38f45f899 100644 --- a/eidos/eidos_functions_distributions.cpp +++ b/eidos/eidos_functions_distributions.cpp @@ -385,7 +385,8 @@ EidosValue_SP Eidos_ExecuteFunction_dnorm(const std::vector &p_ar EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(num_quantiles); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for schedule(static) default(none) shared(num_quantiles) firstprivate(float_data, float_result, mu0, sigma0) if(num_quantiles >= EIDOS_OMPMIN_DNORM_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_DNORM_1); +#pragma omp parallel for schedule(static) default(none) shared(num_quantiles) firstprivate(float_data, float_result, mu0, sigma0) if(num_quantiles >= EIDOS_OMPMIN_DNORM_1) num_threads(thread_count) for (int value_index = 0; value_index < num_quantiles; ++value_index) float_result->set_float_no_check(gsl_ran_gaussian_pdf(float_data[value_index] - mu0, sigma0), value_index); } @@ -398,7 +399,8 @@ EidosValue_SP Eidos_ExecuteFunction_dnorm(const std::vector &p_ar bool saw_error = false; -#pragma omp parallel for schedule(static) default(none) shared(num_quantiles) firstprivate(float_data, float_result, mu_singleton, sigma_singleton, mu0, sigma0, arg_mu, arg_sigma) reduction(||: saw_error) if(num_quantiles >= EIDOS_OMPMIN_DNORM_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_DNORM_2); +#pragma omp parallel for schedule(static) default(none) shared(num_quantiles) firstprivate(float_data, float_result, mu_singleton, sigma_singleton, mu0, sigma0, arg_mu, arg_sigma) reduction(||: saw_error) if(num_quantiles >= EIDOS_OMPMIN_DNORM_2) num_threads(thread_count) for (int value_index = 0; value_index < num_quantiles; ++value_index) { double mu = (mu_singleton ? mu0 : arg_mu->FloatAtIndex(value_index, nullptr)); @@ -750,7 +752,8 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a if ((probability0 == 0.5) && (size0 == 1)) { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result) if(num_draws >= EIDOS_OMPMIN_RBINOM_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RBINOM_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result) if(num_draws >= EIDOS_OMPMIN_RBINOM_1) num_threads(thread_count) { Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); @@ -761,7 +764,8 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a } else { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, probability0, size0) if(num_draws >= EIDOS_OMPMIN_RBINOM_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RBINOM_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, probability0, size0) if(num_draws >= EIDOS_OMPMIN_RBINOM_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -779,7 +783,8 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a bool saw_error1 = false, saw_error2 = false; -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, size_singleton, prob_singleton, size0, probability0, arg_size, arg_prob) reduction(||: saw_error1) reduction(||: saw_error2) if(num_draws >= EIDOS_OMPMIN_RBINOM_3) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RBINOM_3); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, size_singleton, prob_singleton, size0, probability0, arg_size, arg_prob) reduction(||: saw_error1) reduction(||: saw_error2) if(num_draws >= EIDOS_OMPMIN_RBINOM_3) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -933,7 +938,8 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a if (count0 == 2) { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_value0) if(num_draws >= EIDOS_OMPMIN_RDUNIF_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RDUNIF_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_value0) if(num_draws >= EIDOS_OMPMIN_RDUNIF_1) num_threads(thread_count) { Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); @@ -944,7 +950,8 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a } else { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_value0, count0) if(num_draws >= EIDOS_OMPMIN_RDUNIF_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RDUNIF_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_value0, count0) if(num_draws >= EIDOS_OMPMIN_RDUNIF_2) num_threads(thread_count) { Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); @@ -962,7 +969,8 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a bool saw_error = false; -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_singleton, max_singleton, min_value0, max_value0, arg_min, arg_max) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RDUNIF_3) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RDUNIF_3); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_singleton, max_singleton, min_value0, max_value0, arg_min, arg_max) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RDUNIF_3) num_threads(thread_count) { Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); @@ -1074,7 +1082,8 @@ EidosValue_SP Eidos_ExecuteFunction_rexp(const std::vector &p_arg EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(num_draws); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu0) if(num_draws >= EIDOS_OMPMIN_REXP_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_REXP_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu0) if(num_draws >= EIDOS_OMPMIN_REXP_1) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1089,7 +1098,8 @@ EidosValue_SP Eidos_ExecuteFunction_rexp(const std::vector &p_arg EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize((int)num_draws); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, arg_mu) if(num_draws >= EIDOS_OMPMIN_REXP_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_REXP_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, arg_mu) if(num_draws >= EIDOS_OMPMIN_REXP_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1667,7 +1677,8 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar if (mu_singleton && sigma_singleton) { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, mu0) if(num_draws >= EIDOS_OMPMIN_RNORM_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, mu0) if(num_draws >= EIDOS_OMPMIN_RNORM_1) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1678,7 +1689,8 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar } else if (sigma_singleton) // && !mu_singleton { -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, arg_mu) if(num_draws >= EIDOS_OMPMIN_RNORM_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, arg_mu) if(num_draws >= EIDOS_OMPMIN_RNORM_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1695,7 +1707,8 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar { bool saw_error = false; -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu_singleton, mu0, arg_mu, arg_sigma) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RNORM_3) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_3); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu_singleton, mu0, arg_mu, arg_sigma) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RNORM_3) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1762,7 +1775,8 @@ EidosValue_SP Eidos_ExecuteFunction_rpois(const std::vector &p_ar EidosValue_Int_vector *int_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(num_draws); result_SP = EidosValue_SP(int_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, lambda0) if(num_draws >= EIDOS_OMPMIN_RPOIS_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RPOIS_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, lambda0) if(num_draws >= EIDOS_OMPMIN_RPOIS_1) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1779,7 +1793,8 @@ EidosValue_SP Eidos_ExecuteFunction_rpois(const std::vector &p_ar bool saw_error = false; -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, arg_lambda) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RPOIS_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RPOIS_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, arg_lambda) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RPOIS_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1845,7 +1860,8 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(num_draws); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result) if(num_draws >= EIDOS_OMPMIN_RUNIF_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_1); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws, std::cout) firstprivate(float_result) if(num_draws >= EIDOS_OMPMIN_RUNIF_1) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1875,7 +1891,8 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar EidosValue_Float_vector *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float_vector())->resize_no_initialize(num_draws); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, range0, min_value0) if(num_draws >= EIDOS_OMPMIN_RUNIF_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_2); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, range0, min_value0) if(num_draws >= EIDOS_OMPMIN_RUNIF_2) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1892,7 +1909,8 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar bool saw_error = false; -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, min_singleton, max_singleton, min_value0, max_value0, arg_min, arg_max) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RUNIF_3) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_3); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, min_singleton, max_singleton, min_value0, max_value0, arg_min, arg_max) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RUNIF_3) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); diff --git a/eidos/eidos_functions_math.cpp b/eidos/eidos_functions_math.cpp index a65355d5c..9d9f9a72d 100644 --- a/eidos/eidos_functions_math.cpp +++ b/eidos/eidos_functions_math.cpp @@ -113,7 +113,8 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector &p_argu double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ABS_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_ABS_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ABS_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = fabs(float_data[value_index]); } @@ -262,7 +263,8 @@ EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector &p_arg double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_CEIL) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_CEIL); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_CEIL) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = ceil(float_data[value_index]); } @@ -452,7 +454,8 @@ EidosValue_SP Eidos_ExecuteFunction_exp(const std::vector &p_argu double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_EXP_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_EXP_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_EXP_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = exp(float_data[value_index]); } @@ -482,7 +485,8 @@ EidosValue_SP Eidos_ExecuteFunction_floor(const std::vector &p_ar double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_FLOOR) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_FLOOR); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_FLOOR) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = floor(float_data[value_index]); } @@ -818,7 +822,8 @@ EidosValue_SP Eidos_ExecuteFunction_log(const std::vector &p_argu double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_LOG_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = log(float_data[value_index]); } @@ -856,7 +861,8 @@ EidosValue_SP Eidos_ExecuteFunction_log10(const std::vector &p_ar double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG10_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_LOG10_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG10_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = log10(float_data[value_index]); } @@ -894,7 +900,8 @@ EidosValue_SP Eidos_ExecuteFunction_log2(const std::vector &p_arg double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG2_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_LOG2_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_LOG2_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = log2(float_data[value_index]); } @@ -997,7 +1004,8 @@ EidosValue_SP Eidos_ExecuteFunction_round(const std::vector &p_ar double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ROUND) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_ROUND); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ROUND) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = round(float_data[value_index]); } @@ -2547,7 +2555,8 @@ EidosValue_SP Eidos_ExecuteFunction_sqrt(const std::vector &p_arg double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_SQRT_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SQRT_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_SQRT_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = sqrt(float_data[value_index]); } @@ -2616,7 +2625,8 @@ EidosValue_SP Eidos_ExecuteFunction_sum(const std::vector &p_argu const int64_t *int_data = x_value->IntVector()->data(); double sum_d = 0; -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int_data) reduction(+: sum_d) if(parallel:x_count >= EIDOS_OMPMIN_SUM_INTEGER) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_INTEGER); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int_data) reduction(+: sum_d) if(parallel:x_count >= EIDOS_OMPMIN_SUM_INTEGER) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) sum_d += int_data[value_index]; @@ -2643,7 +2653,8 @@ EidosValue_SP Eidos_ExecuteFunction_sum(const std::vector &p_argu const double *float_data = x_value->FloatVector()->data(); double sum = 0; -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_FLOAT); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) sum += float_data[value_index]; @@ -2656,7 +2667,8 @@ EidosValue_SP Eidos_ExecuteFunction_sum(const std::vector &p_argu const eidos_logical_t *logical_data = x_value->LogicalVector()->data(); int64_t sum = 0; -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(logical_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_LOGICAL) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_LOGICAL); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(logical_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_LOGICAL) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) sum += logical_data[value_index]; @@ -2736,7 +2748,8 @@ EidosValue_SP Eidos_ExecuteFunction_trunc(const std::vector &p_ar double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_TRUNC) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_TRUNC); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_TRUNC) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) float_result_data[value_index] = trunc(float_data[value_index]); } diff --git a/eidos/eidos_functions_other.cpp b/eidos/eidos_functions_other.cpp index dfdf8885b..0ef063a0c 100644 --- a/eidos/eidos_functions_other.cpp +++ b/eidos/eidos_functions_other.cpp @@ -23,6 +23,7 @@ #include "eidos_call_signature.h" #include "eidos_rng.h" #include "eidos_beep.h" +#include "eidos_openmp.h" #include #include @@ -719,6 +720,122 @@ EidosValue_SP Eidos_ExecuteFunction_parallelGetMaxThreads(__attribute__((unused) return result_SP; } +// (object$)parallelGetTaskThreadCounts(void) +EidosValue_SP Eidos_ExecuteFunction_parallelGetTaskThreadCounts(__attribute__((unused)) const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) +{ + EidosDictionaryRetained *objectElement = new EidosDictionaryRetained(); + EidosValue_SP result_SP = EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Object_singleton(objectElement, gEidosDictionaryRetained_Class)); + +#ifdef _OPENMP + objectElement->SetKeyValue_StringKeys("ABS_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_ABS_FLOAT))); + objectElement->SetKeyValue_StringKeys("CEIL", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CEIL))); + objectElement->SetKeyValue_StringKeys("EXP_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_EXP_FLOAT))); + objectElement->SetKeyValue_StringKeys("FLOOR", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FLOOR))); + objectElement->SetKeyValue_StringKeys("LOG_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_LOG_FLOAT))); + objectElement->SetKeyValue_StringKeys("LOG10_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_LOG10_FLOAT))); + objectElement->SetKeyValue_StringKeys("LOG2_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_LOG2_FLOAT))); + objectElement->SetKeyValue_StringKeys("ROUND", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_ROUND))); + objectElement->SetKeyValue_StringKeys("SQRT_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SQRT_FLOAT))); + objectElement->SetKeyValue_StringKeys("SUM_INTEGER", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SUM_INTEGER))); + objectElement->SetKeyValue_StringKeys("SUM_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SUM_FLOAT))); + objectElement->SetKeyValue_StringKeys("SUM_LOGICAL", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SUM_LOGICAL))); + objectElement->SetKeyValue_StringKeys("TRUNC", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_TRUNC))); + + objectElement->SetKeyValue_StringKeys("MAX_INT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MAX_INT))); + objectElement->SetKeyValue_StringKeys("MAX_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MAX_FLOAT))); + objectElement->SetKeyValue_StringKeys("MIN_INT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MIN_INT))); + objectElement->SetKeyValue_StringKeys("MIN_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MIN_FLOAT))); + objectElement->SetKeyValue_StringKeys("PMAX_INT_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMAX_INT_1))); + objectElement->SetKeyValue_StringKeys("PMAX_INT_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMAX_INT_2))); + objectElement->SetKeyValue_StringKeys("PMAX_FLOAT_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMAX_FLOAT_1))); + objectElement->SetKeyValue_StringKeys("PMAX_FLOAT_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMAX_FLOAT_2))); + objectElement->SetKeyValue_StringKeys("PMIN_INT_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMIN_INT_1))); + objectElement->SetKeyValue_StringKeys("PMIN_INT_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMIN_INT_2))); + objectElement->SetKeyValue_StringKeys("PMIN_FLOAT_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMIN_FLOAT_1))); + objectElement->SetKeyValue_StringKeys("PMIN_FLOAT_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_PMIN_FLOAT_2))); + + objectElement->SetKeyValue_StringKeys("MATCH_INT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MATCH_INT))); + objectElement->SetKeyValue_StringKeys("MATCH_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MATCH_FLOAT))); + objectElement->SetKeyValue_StringKeys("MATCH_STRING", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MATCH_STRING))); + objectElement->SetKeyValue_StringKeys("MATCH_OBJECT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MATCH_OBJECT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_INDEX", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_INDEX))); + objectElement->SetKeyValue_StringKeys("SAMPLE_R_INT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_R_INT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_R_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_R_FLOAT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_R_OBJECT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_R_OBJECT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_WR_INT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_WR_INT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_WR_FLOAT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_WR_FLOAT))); + objectElement->SetKeyValue_StringKeys("SAMPLE_WR_OBJECT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_WR_OBJECT))); + objectElement->SetKeyValue_StringKeys("TABULATE", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_TABULATE))); + + objectElement->SetKeyValue_StringKeys("CONTAINS_MARKER_MUT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CONTAINS_MARKER_MUT))); + objectElement->SetKeyValue_StringKeys("I_COUNT_OF_MUTS_OF_TYPE", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE))); + objectElement->SetKeyValue_StringKeys("G_COUNT_OF_MUTS_OF_TYPE", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE))); + objectElement->SetKeyValue_StringKeys("INDS_W_PEDIGREE_IDS", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_INDS_W_PEDIGREE_IDS))); + objectElement->SetKeyValue_StringKeys("RELATEDNESS", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RELATEDNESS))); + objectElement->SetKeyValue_StringKeys("SAMPLE_INDIVIDUALS_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1))); + objectElement->SetKeyValue_StringKeys("SAMPLE_INDIVIDUALS_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2))); + objectElement->SetKeyValue_StringKeys("SET_FITNESS_SCALE_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SET_FITNESS_SCALE_1))); + objectElement->SetKeyValue_StringKeys("SET_FITNESS_SCALE_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SET_FITNESS_SCALE_2))); + objectElement->SetKeyValue_StringKeys("SUM_OF_MUTS_OF_TYPE", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE))); + + objectElement->SetKeyValue_StringKeys("DNORM_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_DNORM_1))); + objectElement->SetKeyValue_StringKeys("DNORM_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_DNORM_2))); + objectElement->SetKeyValue_StringKeys("RBINOM_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RBINOM_1))); + objectElement->SetKeyValue_StringKeys("RBINOM_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RBINOM_2))); + objectElement->SetKeyValue_StringKeys("RBINOM_3", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RBINOM_3))); + objectElement->SetKeyValue_StringKeys("RDUNIF_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RDUNIF_1))); + objectElement->SetKeyValue_StringKeys("RDUNIF_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RDUNIF_2))); + objectElement->SetKeyValue_StringKeys("RDUNIF_3", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RDUNIF_3))); + objectElement->SetKeyValue_StringKeys("REXP_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_REXP_1))); + objectElement->SetKeyValue_StringKeys("REXP_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_REXP_2))); + objectElement->SetKeyValue_StringKeys("RNORM_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RNORM_1))); + objectElement->SetKeyValue_StringKeys("RNORM_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RNORM_2))); + objectElement->SetKeyValue_StringKeys("RNORM_3", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RNORM_3))); + objectElement->SetKeyValue_StringKeys("RPOIS_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RPOIS_1))); + objectElement->SetKeyValue_StringKeys("RPOIS_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RPOIS_2))); + objectElement->SetKeyValue_StringKeys("RUNIF_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RUNIF_1))); + objectElement->SetKeyValue_StringKeys("RUNIF_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RUNIF_2))); + objectElement->SetKeyValue_StringKeys("RUNIF_3", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_RUNIF_3))); + + objectElement->SetKeyValue_StringKeys("POINT_IN_BOUNDS", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_POINT_IN_BOUNDS))); + objectElement->SetKeyValue_StringKeys("POINT_PERIODIC", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_POINT_PERIODIC))); + objectElement->SetKeyValue_StringKeys("POINT_REFLECTED", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_POINT_REFLECTED))); + objectElement->SetKeyValue_StringKeys("POINT_STOPPED", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_POINT_STOPPED))); + objectElement->SetKeyValue_StringKeys("POINT_UNIFORM", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_POINT_UNIFORM))); + objectElement->SetKeyValue_StringKeys("SET_SPATIAL_POS_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SET_SPATIAL_POS_1))); + objectElement->SetKeyValue_StringKeys("SET_SPATIAL_POS_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SET_SPATIAL_POS_2))); + objectElement->SetKeyValue_StringKeys("SPATIAL_MAP_VALUE", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SPATIAL_MAP_VALUE))); + + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_1))); + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_2))); + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_3", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_3))); + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_4", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_4))); + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_5", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_5))); + objectElement->SetKeyValue_StringKeys("CLIPPEDINTEGRAL_6", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_CLIPPEDINTEGRAL_6))); + objectElement->SetKeyValue_StringKeys("DRAWBYSTRENGTH", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_DRAWBYSTRENGTH))); + objectElement->SetKeyValue_StringKeys("INTNEIGHCOUNT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_INTNEIGHCOUNT))); + objectElement->SetKeyValue_StringKeys("LOCALPOPDENSITY", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_LOCALPOPDENSITY))); + objectElement->SetKeyValue_StringKeys("NEARESTINTNEIGH", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_NEARESTINTNEIGH))); + objectElement->SetKeyValue_StringKeys("NEARESTNEIGH", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_NEARESTNEIGH))); + objectElement->SetKeyValue_StringKeys("NEIGHCOUNT", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_NEIGHCOUNT))); + objectElement->SetKeyValue_StringKeys("TOTNEIGHSTRENGTH", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_TOTNEIGHSTRENGTH))); + + objectElement->SetKeyValue_StringKeys("AGE_INCR", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_AGE_INCR))); + objectElement->SetKeyValue_StringKeys("DEFERRED_REPRO", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_DEFERRED_REPRO))); + objectElement->SetKeyValue_StringKeys("FITNESS_ASEX_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_ASEX_1))); + objectElement->SetKeyValue_StringKeys("FITNESS_ASEX_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_ASEX_2))); + objectElement->SetKeyValue_StringKeys("FITNESS_SEX_F_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_SEX_F_1))); + objectElement->SetKeyValue_StringKeys("FITNESS_SEX_F_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_SEX_F_2))); + objectElement->SetKeyValue_StringKeys("FITNESS_SEX_M_1", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_SEX_M_1))); + objectElement->SetKeyValue_StringKeys("FITNESS_SEX_M_2", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_FITNESS_SEX_M_2))); + objectElement->SetKeyValue_StringKeys("MIGRANT_CLEAR", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_MIGRANT_CLEAR))); + objectElement->SetKeyValue_StringKeys("SURVIVAL", EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Int_singleton(gEidos_OMP_threads_SURVIVAL))); +#endif + + objectElement->ContentsChanged("parallelGetTaskThreadCounts()"); + return result_SP; +} + // (void)parallelSetNumThreads([Ni$ numThreads = NULL]) EidosValue_SP Eidos_ExecuteFunction_parallelSetNumThreads(__attribute__((unused)) const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { @@ -727,7 +844,16 @@ EidosValue_SP Eidos_ExecuteFunction_parallelSetNumThreads(__attribute__((unused) int64_t numThreads = gEidosMaxThreads; // the default value, used for NULL if (numThreads_value->Type() == EidosValueType::kValueInt) + { + // An explicit override has been requested, even if numThreads == gEidosMaxThreads numThreads = numThreads_value->IntAtIndex(0, nullptr); + gEidosNumThreadsOverride = true; + } + else + { + // The user has requested, with NULL, that the default thread usage pattern not be overridden + gEidosNumThreadsOverride = false; + } if (numThreads < 1) numThreads = 1; @@ -743,6 +869,172 @@ EidosValue_SP Eidos_ExecuteFunction_parallelSetNumThreads(__attribute__((unused) return gStaticEidosValueVOID; } +// (void)parallelSetTaskThreadCounts(object$ dict) +EidosValue_SP Eidos_ExecuteFunction_parallelSetTaskThreadCounts(__attribute__((unused)) const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) +{ + EidosValue *source_value = p_arguments[0].get(); + + if (source_value->Type() == EidosValueType::kValueNULL) + { + // A dict value of NULL means "reset to default settings", which we have a function for + _Eidos_SetDefaultOpenMPThreadCounts(); + } + else + { + // Check that source is a subclass of EidosDictionaryUnretained. We do this check here because we want to avoid making + // EidosDictionaryUnretained visible in the public API; we want to pretend that there is just one class, Dictionary. + // I'm not sure whether that's going to be right in the long term, but I want to keep my options open for now. + EidosDictionaryUnretained *source = dynamic_cast(source_value->ObjectElementAtIndex(0, nullptr)); + + if (!source) + EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_parallelSetTaskThreadCounts): parallelSetTaskThreadCounts() can only take values from a Dictionary or a subclass of Dictionary." << EidosTerminate(nullptr); + + if (source->KeysAreStrings()) + { + const EidosDictionaryHashTable_StringKeys *source_symbols = source->DictionarySymbols_StringKeys(); + const std::vector source_keys = source->SortedKeys_StringKeys(); + + if (source_symbols && source_symbols->size()) + { + for (const std::string &key : source_keys) + { + auto kv_pair = source_symbols->find(key); + const EidosValue_SP &value = kv_pair->second; + + if ((value->Type() == EidosValueType::kValueInt) && (value->Count() == 1)) + { + int64_t value_int64 = value->IntAtIndex(0, nullptr); + + if ((value_int64 < 1) || (value_int64 > EIDOS_OMP_MAX_THREADS)) + EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_parallelSetTaskThreadCounts): parallelSetTaskThreadCounts() requires thread counts to be in [1, " << EIDOS_OMP_MAX_THREADS << "]." << EidosTerminate(nullptr); + +#ifdef _OPENMP + // We only actually process the key-value pairs when running multithreaded; + // single-threaded, they are all ignored (for cross-compatibility). + if (key == "ABS_FLOAT") gEidos_OMP_threads_ABS_FLOAT = (int)value_int64; + else if (key == "CEIL") gEidos_OMP_threads_CEIL = (int)value_int64; + else if (key == "EXP_FLOAT") gEidos_OMP_threads_EXP_FLOAT = (int)value_int64; + else if (key == "FLOOR") gEidos_OMP_threads_FLOOR = (int)value_int64; + else if (key == "LOG_FLOAT") gEidos_OMP_threads_LOG_FLOAT = (int)value_int64; + else if (key == "LOG10_FLOAT") gEidos_OMP_threads_LOG10_FLOAT = (int)value_int64; + else if (key == "LOG2_FLOAT") gEidos_OMP_threads_LOG2_FLOAT = (int)value_int64; + else if (key == "ROUND") gEidos_OMP_threads_ROUND = (int)value_int64; + else if (key == "SQRT_FLOAT") gEidos_OMP_threads_SQRT_FLOAT = (int)value_int64; + else if (key == "SUM_INTEGER") gEidos_OMP_threads_SUM_INTEGER = (int)value_int64; + else if (key == "SUM_FLOAT") gEidos_OMP_threads_SUM_FLOAT = (int)value_int64; + else if (key == "SUM_LOGICAL") gEidos_OMP_threads_SUM_LOGICAL = (int)value_int64; + else if (key == "TRUNC") gEidos_OMP_threads_TRUNC = (int)value_int64; + + else if (key == "MAX_INT") gEidos_OMP_threads_MAX_INT = (int)value_int64; + else if (key == "MAX_FLOAT") gEidos_OMP_threads_MAX_FLOAT = (int)value_int64; + else if (key == "MIN_INT") gEidos_OMP_threads_MIN_INT = (int)value_int64; + else if (key == "MIN_FLOAT") gEidos_OMP_threads_MIN_FLOAT = (int)value_int64; + else if (key == "PMAX_INT_1") gEidos_OMP_threads_PMAX_INT_1 = (int)value_int64; + else if (key == "PMAX_INT_2") gEidos_OMP_threads_PMAX_INT_2 = (int)value_int64; + else if (key == "PMAX_FLOAT_1") gEidos_OMP_threads_PMAX_FLOAT_1 = (int)value_int64; + else if (key == "PMAX_FLOAT_2") gEidos_OMP_threads_PMAX_FLOAT_2 = (int)value_int64; + else if (key == "PMIN_INT_1") gEidos_OMP_threads_PMIN_INT_1 = (int)value_int64; + else if (key == "PMIN_INT_2") gEidos_OMP_threads_PMIN_INT_2 = (int)value_int64; + else if (key == "PMIN_FLOAT_1") gEidos_OMP_threads_PMIN_FLOAT_1 = (int)value_int64; + else if (key == "PMIN_FLOAT_2") gEidos_OMP_threads_PMIN_FLOAT_2 = (int)value_int64; + + else if (key == "MATCH_INT") gEidos_OMP_threads_MATCH_INT = (int)value_int64; + else if (key == "MATCH_FLOAT") gEidos_OMP_threads_MATCH_FLOAT = (int)value_int64; + else if (key == "MATCH_STRING") gEidos_OMP_threads_MATCH_STRING = (int)value_int64; + else if (key == "MATCH_OBJECT") gEidos_OMP_threads_MATCH_OBJECT = (int)value_int64; + else if (key == "SAMPLE_INDEX") gEidos_OMP_threads_SAMPLE_INDEX = (int)value_int64; + else if (key == "SAMPLE_R_INT") gEidos_OMP_threads_SAMPLE_R_INT = (int)value_int64; + else if (key == "SAMPLE_R_FLOAT") gEidos_OMP_threads_SAMPLE_R_FLOAT = (int)value_int64; + else if (key == "SAMPLE_R_OBJECT") gEidos_OMP_threads_SAMPLE_R_OBJECT = (int)value_int64; + else if (key == "SAMPLE_WR_INT") gEidos_OMP_threads_SAMPLE_WR_INT = (int)value_int64; + else if (key == "SAMPLE_WR_FLOAT") gEidos_OMP_threads_SAMPLE_WR_FLOAT = (int)value_int64; + else if (key == "SAMPLE_WR_OBJECT") gEidos_OMP_threads_SAMPLE_WR_OBJECT = (int)value_int64; + else if (key == "TABULATE") gEidos_OMP_threads_TABULATE = (int)value_int64; + + else if (key == "CONTAINS_MARKER_MUT") gEidos_OMP_threads_CONTAINS_MARKER_MUT = (int)value_int64; + else if (key == "I_COUNT_OF_MUTS_OF_TYPE") gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE = (int)value_int64; + else if (key == "G_COUNT_OF_MUTS_OF_TYPE") gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE = (int)value_int64; + else if (key == "INDS_W_PEDIGREE_IDS") gEidos_OMP_threads_INDS_W_PEDIGREE_IDS = (int)value_int64; + else if (key == "RELATEDNESS") gEidos_OMP_threads_RELATEDNESS = (int)value_int64; + else if (key == "SAMPLE_INDIVIDUALS_1") gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1 = (int)value_int64; + else if (key == "SAMPLE_INDIVIDUALS_2") gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2 = (int)value_int64; + else if (key == "SET_FITNESS_SCALE_1") gEidos_OMP_threads_SET_FITNESS_SCALE_1 = (int)value_int64; + else if (key == "SET_FITNESS_SCALE_2") gEidos_OMP_threads_SET_FITNESS_SCALE_2 = (int)value_int64; + else if (key == "SUM_OF_MUTS_OF_TYPE") gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE = (int)value_int64; + + else if (key == "DNORM_1") gEidos_OMP_threads_DNORM_1 = (int)value_int64; + else if (key == "DNORM_2") gEidos_OMP_threads_DNORM_2 = (int)value_int64; + else if (key == "RBINOM_1") gEidos_OMP_threads_RBINOM_1 = (int)value_int64; + else if (key == "RBINOM_2") gEidos_OMP_threads_RBINOM_2 = (int)value_int64; + else if (key == "RBINOM_3") gEidos_OMP_threads_RBINOM_3 = (int)value_int64; + else if (key == "RDUNIF_1") gEidos_OMP_threads_RDUNIF_1 = (int)value_int64; + else if (key == "RDUNIF_2") gEidos_OMP_threads_RDUNIF_2 = (int)value_int64; + else if (key == "RDUNIF_3") gEidos_OMP_threads_RDUNIF_3 = (int)value_int64; + else if (key == "REXP_1") gEidos_OMP_threads_REXP_1 = (int)value_int64; + else if (key == "REXP_2") gEidos_OMP_threads_REXP_2 = (int)value_int64; + else if (key == "RNORM_1") gEidos_OMP_threads_RNORM_1 = (int)value_int64; + else if (key == "RNORM_2") gEidos_OMP_threads_RNORM_2 = (int)value_int64; + else if (key == "RNORM_3") gEidos_OMP_threads_RNORM_3 = (int)value_int64; + else if (key == "RPOIS_1") gEidos_OMP_threads_RPOIS_1 = (int)value_int64; + else if (key == "RPOIS_2") gEidos_OMP_threads_RPOIS_2 = (int)value_int64; + else if (key == "RUNIF_1") gEidos_OMP_threads_RUNIF_1 = (int)value_int64; + else if (key == "RUNIF_2") gEidos_OMP_threads_RUNIF_2 = (int)value_int64; + else if (key == "RUNIF_3") gEidos_OMP_threads_RUNIF_3 = (int)value_int64; + + else if (key == "POINT_IN_BOUNDS") gEidos_OMP_threads_POINT_IN_BOUNDS = (int)value_int64; + else if (key == "POINT_PERIODIC") gEidos_OMP_threads_POINT_PERIODIC = (int)value_int64; + else if (key == "POINT_REFLECTED") gEidos_OMP_threads_POINT_REFLECTED = (int)value_int64; + else if (key == "POINT_STOPPED") gEidos_OMP_threads_POINT_STOPPED = (int)value_int64; + else if (key == "POINT_UNIFORM") gEidos_OMP_threads_POINT_UNIFORM = (int)value_int64; + else if (key == "SET_SPATIAL_POS_1") gEidos_OMP_threads_SET_SPATIAL_POS_1 = (int)value_int64; + else if (key == "SET_SPATIAL_POS_2") gEidos_OMP_threads_SET_SPATIAL_POS_2 = (int)value_int64; + else if (key == "SPATIAL_MAP_VALUE") gEidos_OMP_threads_SPATIAL_MAP_VALUE = (int)value_int64; + + else if (key == "CLIPPEDINTEGRAL_1") gEidos_OMP_threads_CLIPPEDINTEGRAL_1 = (int)value_int64; + else if (key == "CLIPPEDINTEGRAL_2") gEidos_OMP_threads_CLIPPEDINTEGRAL_2 = (int)value_int64; + else if (key == "CLIPPEDINTEGRAL_3") gEidos_OMP_threads_CLIPPEDINTEGRAL_3 = (int)value_int64; + else if (key == "CLIPPEDINTEGRAL_4") gEidos_OMP_threads_CLIPPEDINTEGRAL_4 = (int)value_int64; + else if (key == "CLIPPEDINTEGRAL_5") gEidos_OMP_threads_CLIPPEDINTEGRAL_5 = (int)value_int64; + else if (key == "CLIPPEDINTEGRAL_6") gEidos_OMP_threads_CLIPPEDINTEGRAL_6 = (int)value_int64; + else if (key == "DRAWBYSTRENGTH") gEidos_OMP_threads_DRAWBYSTRENGTH = (int)value_int64; + else if (key == "INTNEIGHCOUNT") gEidos_OMP_threads_INTNEIGHCOUNT = (int)value_int64; + else if (key == "LOCALPOPDENSITY") gEidos_OMP_threads_LOCALPOPDENSITY = (int)value_int64; + else if (key == "NEARESTINTNEIGH") gEidos_OMP_threads_NEARESTINTNEIGH = (int)value_int64; + else if (key == "NEARESTNEIGH") gEidos_OMP_threads_NEARESTNEIGH = (int)value_int64; + else if (key == "NEIGHCOUNT") gEidos_OMP_threads_NEIGHCOUNT = (int)value_int64; + else if (key == "TOTNEIGHSTRENGTH") gEidos_OMP_threads_TOTNEIGHSTRENGTH = (int)value_int64; + + else if (key == "AGE_INCR") gEidos_OMP_threads_AGE_INCR = (int)value_int64; + else if (key == "DEFERRED_REPRO") gEidos_OMP_threads_DEFERRED_REPRO = (int)value_int64; + else if (key == "FITNESS_ASEX_1") gEidos_OMP_threads_FITNESS_ASEX_1 = (int)value_int64; + else if (key == "FITNESS_ASEX_2") gEidos_OMP_threads_FITNESS_ASEX_2 = (int)value_int64; + else if (key == "FITNESS_SEX_F_1") gEidos_OMP_threads_FITNESS_SEX_F_1 = (int)value_int64; + else if (key == "FITNESS_SEX_F_2") gEidos_OMP_threads_FITNESS_SEX_F_2 = (int)value_int64; + else if (key == "FITNESS_SEX_M_1") gEidos_OMP_threads_FITNESS_SEX_M_1 = (int)value_int64; + else if (key == "FITNESS_SEX_M_2") gEidos_OMP_threads_FITNESS_SEX_M_2 = (int)value_int64; + else if (key == "MIGRANT_CLEAR") gEidos_OMP_threads_MIGRANT_CLEAR = (int)value_int64; + else if (key == "SURVIVAL") gEidos_OMP_threads_SURVIVAL = (int)value_int64; + else + EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_parallelSetTaskThreadCounts): parallelSetTaskThreadCounts() does not recognize the task name " << key << "." << EidosTerminate(nullptr); +#endif + } + else + EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_parallelSetTaskThreadCounts): parallelSetTaskThreadCounts() expects dict to contain singleton integer values." << EidosTerminate(nullptr); + } + + // Clip all values to gEidosMaxThreads in preparation for use +#ifdef _OPENMP + _Eidos_ClipOpenMPThreadCounts(); +#endif + } + } + else + EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_parallelSetTaskThreadCounts): parallelSetTaskThreadCounts() expects dict to use string keys." << EidosTerminate(nullptr); + } + + return gStaticEidosValueVOID; +} + // (void)rm([Ns variableNames = NULL]) // [logical$ removeConstants = F] removed in SLiM 4 EidosValue_SP Eidos_ExecuteFunction_rm(const std::vector &p_arguments, EidosInterpreter &p_interpreter) { diff --git a/eidos/eidos_functions_stats.cpp b/eidos/eidos_functions_stats.cpp index 0aa443935..b90b6fbe4 100644 --- a/eidos/eidos_functions_stats.cpp +++ b/eidos/eidos_functions_stats.cpp @@ -223,7 +223,8 @@ EidosValue_SP Eidos_ExecuteFunction_max(const std::vector &p_argu const int64_t *int_data = arg_value->IntVector()->data(); int64_t loop_max = INT64_MIN; -#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(int_data) reduction(max: loop_max) if(arg_count >= EIDOS_OMPMIN_MAX_INT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MAX_INT); +#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(int_data) reduction(max: loop_max) if(arg_count >= EIDOS_OMPMIN_MAX_INT) num_threads(thread_count) for (int value_index = 0; value_index < arg_count; ++value_index) { int64_t temp = int_data[value_index]; @@ -264,7 +265,8 @@ EidosValue_SP Eidos_ExecuteFunction_max(const std::vector &p_argu double loop_max = -std::numeric_limits::infinity(); bool saw_NAN = false; -#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(float_data) reduction(max: loop_max) reduction(||: saw_NAN) if(arg_count >= EIDOS_OMPMIN_MAX_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MAX_FLOAT); +#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(float_data) reduction(max: loop_max) reduction(||: saw_NAN) if(arg_count >= EIDOS_OMPMIN_MAX_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < arg_count; ++value_index) { double temp = float_data[value_index]; @@ -436,7 +438,8 @@ EidosValue_SP Eidos_ExecuteFunction_min(const std::vector &p_argu const int64_t *int_data = arg_value->IntVector()->data(); int64_t loop_min = INT64_MAX; -#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(int_data) reduction(min: loop_min) if(arg_count >= EIDOS_OMPMIN_MIN_INT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MIN_INT); +#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(int_data) reduction(min: loop_min) if(arg_count >= EIDOS_OMPMIN_MIN_INT) num_threads(thread_count) for (int value_index = 0; value_index < arg_count; ++value_index) { int64_t temp = int_data[value_index]; @@ -477,7 +480,8 @@ EidosValue_SP Eidos_ExecuteFunction_min(const std::vector &p_argu double loop_min = std::numeric_limits::infinity(); bool saw_NAN = false; -#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(float_data) reduction(min: loop_min) reduction(||: saw_NAN) if(arg_count >= EIDOS_OMPMIN_MIN_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MIN_FLOAT); +#pragma omp parallel for schedule(static) default(none) shared(arg_count) firstprivate(float_data) reduction(min: loop_min) reduction(||: saw_NAN) if(arg_count >= EIDOS_OMPMIN_MIN_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < arg_count; ++value_index) { double temp = float_data[value_index]; @@ -623,7 +627,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmax(const std::vector &p_arg // difference. I looked at the alignment of int0_data and int_result_data, and that is uncorrelated with the performance issue. // I haven't figured out how to confirm my hypothesis with profiling tools yet. It's a mystery. Leaving this comment here // for posterity. It's not a big deal in the grand scheme of things, but I would love to know what's going on. FIXME -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int_result_data, y_singleton_value) if(parallel:x_count >= EIDOS_OMPMIN_PMAX_INT_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMAX_INT_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int_result_data, y_singleton_value) if(parallel:x_count >= EIDOS_OMPMIN_PMAX_INT_1) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { int64_t int0_value = int0_data[value_index]; @@ -638,7 +643,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmax(const std::vector &p_arg double * __restrict__ float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float_result_data, y_singleton_value) if(x_count >= EIDOS_OMPMIN_PMAX_FLOAT_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMAX_FLOAT_1); +#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float_result_data, y_singleton_value) if(x_count >= EIDOS_OMPMIN_PMAX_FLOAT_1) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { // if there is a NAN the result is always NAN @@ -681,7 +687,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmax(const std::vector &p_arg int64_t * __restrict__ int_result_data = int_result->data(); result_SP = EidosValue_SP(int_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int1_data, int_result_data) if(parallel:x_count >= EIDOS_OMPMIN_PMAX_INT_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMAX_INT_2); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int1_data, int_result_data) if(parallel:x_count >= EIDOS_OMPMIN_PMAX_INT_2) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { int64_t int0_value = int0_data[value_index]; @@ -697,7 +704,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmax(const std::vector &p_arg double * __restrict__ float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float1_data, float_result_data) if(x_count >= EIDOS_OMPMIN_PMAX_FLOAT_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMAX_FLOAT_2); +#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float1_data, float_result_data) if(x_count >= EIDOS_OMPMIN_PMAX_FLOAT_2) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { // if there is a NAN the result is always NAN @@ -807,7 +815,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmin(const std::vector &p_arg int64_t * __restrict__ int_result_data = int_result->data(); result_SP = EidosValue_SP(int_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int_result_data, y_singleton_value) if(parallel:x_count >= EIDOS_OMPMIN_PMIN_INT_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMIN_INT_1); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int_result_data, y_singleton_value) if(parallel:x_count >= EIDOS_OMPMIN_PMIN_INT_1) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { int64_t int0_value = int0_data[value_index]; @@ -822,7 +831,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmin(const std::vector &p_arg double * __restrict__ float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float_result_data, y_singleton_value) if(x_count >= EIDOS_OMPMIN_PMIN_FLOAT_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMIN_FLOAT_1); +#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float_result_data, y_singleton_value) if(x_count >= EIDOS_OMPMIN_PMIN_FLOAT_1) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { // if there is a NAN the result is always NAN @@ -865,7 +875,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmin(const std::vector &p_arg int64_t * __restrict__ int_result_data = int_result->data(); result_SP = EidosValue_SP(int_result); -#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int1_data, int_result_data) if(parallel:x_count >= EIDOS_OMPMIN_PMIN_INT_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMIN_INT_2); +#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int0_data, int1_data, int_result_data) if(parallel:x_count >= EIDOS_OMPMIN_PMIN_INT_2) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { int64_t int0_value = int0_data[value_index]; @@ -881,7 +892,8 @@ EidosValue_SP Eidos_ExecuteFunction_pmin(const std::vector &p_arg double * __restrict__ float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float1_data, float_result_data) if(x_count >= EIDOS_OMPMIN_PMIN_FLOAT_2) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_PMIN_FLOAT_2); +#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(float0_data, float1_data, float_result_data) if(x_count >= EIDOS_OMPMIN_PMIN_FLOAT_2) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { // if there is a NAN the result is always NAN diff --git a/eidos/eidos_functions_values.cpp b/eidos/eidos_functions_values.cpp index ed5f96385..74f0bcf82 100644 --- a/eidos/eidos_functions_values.cpp +++ b/eidos/eidos_functions_values.cpp @@ -383,7 +383,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_sample): allocation failed; you may need to raise the memory limit for SLiM." << EidosTerminate(nullptr); } -#pragma omp parallel for schedule(static) default(none) shared(index_buffer, x_count) if(x_count > EIDOS_OMPMIN_SAMPLE_1) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_INDEX); +#pragma omp parallel for schedule(static) default(none) shared(index_buffer, x_count) if(x_count > EIDOS_OMPMIN_SAMPLE_INDEX) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) index_buffer[value_index] = value_index; } @@ -445,7 +446,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a int64_t *int_result_data = int_result->data(); result_SP = EidosValue_SP(int_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, int_data, int_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_INT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_INT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, int_data, int_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_INT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -465,7 +467,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, float_data, float_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_FLOAT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, float_data, float_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_FLOAT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -486,7 +489,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EidosObject **object_result_data = object_result->data(); result_SP = EidosValue_SP(object_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, object_data, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_OBJECT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_OBJECT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, object_data, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_OBJECT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -759,7 +763,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a int64_t *int_result_data = int_result->data(); result_SP = EidosValue_SP(int_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(int_data, int_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_INT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_INT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(int_data, int_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_INT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -778,7 +783,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a double *float_result_data = float_result->data(); result_SP = EidosValue_SP(float_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(float_data, float_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_FLOAT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(float_data, float_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_FLOAT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -798,7 +804,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EidosObject **object_result_data = object_result->data(); result_SP = EidosValue_SP(object_result); -#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(object_data, object_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_OBJECT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_OBJECT); +#pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(object_data, object_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_OBJECT) num_threads(thread_count) { gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); @@ -1916,7 +1923,8 @@ EidosValue_SP Eidos_ExecuteFunction_match(const std::vector &p_ar EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_match): (internal error) function match() encountered a raise from its internal hash table (kValueInt); please report this." << EidosTerminate(nullptr); } -#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(int_data0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_INT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MATCH_INT); +#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(int_data0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_INT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { auto find_iter = fromValueToIndex.find(int_data0[value_index]); @@ -1963,7 +1971,8 @@ EidosValue_SP Eidos_ExecuteFunction_match(const std::vector &p_ar EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_match): (internal error) function match() encountered a raise from its internal hash table (kValueFloat); please report this." << EidosTerminate(nullptr); } -#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(float_data0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_FLOAT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MATCH_FLOAT); +#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(float_data0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_FLOAT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { auto find_iter = fromValueToIndex.find(float_data0[value_index]); @@ -2013,7 +2022,8 @@ EidosValue_SP Eidos_ExecuteFunction_match(const std::vector &p_ar } // Note that if string_vec0 were firstprivate, OpenMP would copy the data, NOT the reference!!! -#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex, string_vec0) firstprivate(int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_STRING) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MATCH_STRING); +#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex, string_vec0) firstprivate(int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_STRING) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { auto find_iter = fromValueToIndex.find(string_vec0[value_index]); @@ -2058,7 +2068,8 @@ EidosValue_SP Eidos_ExecuteFunction_match(const std::vector &p_ar EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_match): (internal error) function match() encountered a raise from its internal hash table (kValueObject); please report this." << EidosTerminate(nullptr); } -#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(objelement_vec0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_OBJECT) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_MATCH_OBJECT); +#pragma omp parallel for schedule(static) default(none) shared(x_count, fromValueToIndex) firstprivate(objelement_vec0, int_result_data) if(x_count >= EIDOS_OMPMIN_MATCH_OBJECT) num_threads(thread_count) for (int value_index = 0; value_index < x_count; ++value_index) { auto find_iter = fromValueToIndex.find(objelement_vec0[value_index]); @@ -2586,7 +2597,8 @@ EidosValue_SP Eidos_ExecuteFunction_tabulate(const std::vector &p { maxbin = 0; // note that if the parallel loop runs, this gets reinitialized to the most negative number! -#pragma omp parallel for schedule(static) default(none) shared(value_count) firstprivate(int_data) reduction(max: maxbin) if(value_count >= EIDOS_OMPMIN_TABULATE) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_TABULATE); +#pragma omp parallel for schedule(static) default(none) shared(value_count) firstprivate(int_data) reduction(max: maxbin) if(value_count >= EIDOS_OMPMIN_TABULATE) num_threads(thread_count) for (int value_index = 0; value_index < value_count; ++value_index) { int64_t value = int_data[value_index]; @@ -2618,7 +2630,8 @@ EidosValue_SP Eidos_ExecuteFunction_tabulate(const std::vector &p // Our custom OpenMP implementation has some extra overhead that we want to avoid when running single-threaded // We make completely separate tallies in each thread, and then do a reduction at the end into result_data. // I tried some other approaches – per-thread locks, and atomic updates – and they were much slower. -#pragma omp parallel default(none) shared(value_count, num_bins) firstprivate(int_data, result_data) + EIDOS_THREAD_COUNT(gEidos_OMP_threads_TABULATE); +#pragma omp parallel default(none) shared(value_count, num_bins) firstprivate(int_data, result_data) num_threads(thread_count) { int64_t *perthread_tallies = (int64_t *)calloc(num_bins, sizeof(int64_t)); diff --git a/eidos/eidos_globals.cpp b/eidos/eidos_globals.cpp index 54ad2c667..d40bdc270 100644 --- a/eidos/eidos_globals.cpp +++ b/eidos/eidos_globals.cpp @@ -92,6 +92,7 @@ // declared in eidos_openmp.h, set in Eidos_WarmUpOpenMP() when parallel int gEidosMaxThreads = 1; int gEidosNumThreads = 1; +bool gEidosNumThreadsOverride = false; // Require 64-bit; apparently there are some issues on 32-bit, and nobody should be doing that anyway @@ -217,6 +218,336 @@ EidosValue_SP Eidos_ValueForCommandLineExpression(std::string &p_value_expressio #ifdef _OPENMP + +// Declarations for the number of threads we prefer to use for each parallel loop. +// These default values are all EIDOS_OMP_MAX_THREADS, to use the maximum number +// of threads in all cases. This is primarily useful for benchmarking; normally +// these default values get overwritten by _Eidos_SetDefaultOpenMPThreadCounts(). +int gEidos_OMP_threads_ABS_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CEIL = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_EXP_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FLOOR = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_LOG_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_LOG10_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_LOG2_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_ROUND = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SQRT_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SUM_INTEGER = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SUM_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SUM_LOGICAL = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_TRUNC = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_MAX_INT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MAX_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MIN_INT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MIN_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMAX_INT_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMAX_INT_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMAX_FLOAT_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMAX_FLOAT_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMIN_INT_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMIN_INT_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMIN_FLOAT_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_PMIN_FLOAT_2 = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_MATCH_INT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MATCH_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MATCH_STRING = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MATCH_OBJECT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_INDEX = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_R_INT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_R_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_R_OBJECT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_WR_INT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_WR_FLOAT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_WR_OBJECT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_TABULATE = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_CONTAINS_MARKER_MUT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_INDS_W_PEDIGREE_IDS = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RELATEDNESS = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SET_FITNESS_SCALE_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SET_FITNESS_SCALE_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_DNORM_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_DNORM_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RBINOM_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RBINOM_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RBINOM_3 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RDUNIF_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RDUNIF_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RDUNIF_3 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_REXP_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_REXP_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RNORM_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RNORM_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RNORM_3 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RPOIS_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RPOIS_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RUNIF_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RUNIF_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_RUNIF_3 = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_POINT_IN_BOUNDS = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_POINT_PERIODIC = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_POINT_REFLECTED = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_POINT_STOPPED = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_POINT_UNIFORM = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SET_SPATIAL_POS_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SET_SPATIAL_POS_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SPATIAL_MAP_VALUE = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_CLIPPEDINTEGRAL_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CLIPPEDINTEGRAL_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CLIPPEDINTEGRAL_3 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CLIPPEDINTEGRAL_4 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CLIPPEDINTEGRAL_5 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_CLIPPEDINTEGRAL_6 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_DRAWBYSTRENGTH = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_INTNEIGHCOUNT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_LOCALPOPDENSITY = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_NEARESTINTNEIGH = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_NEARESTNEIGH = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_NEIGHCOUNT = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_TOTNEIGHSTRENGTH = EIDOS_OMP_MAX_THREADS; + +int gEidos_OMP_threads_AGE_INCR = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_DEFERRED_REPRO = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_ASEX_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_ASEX_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_SEX_F_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_SEX_F_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_SEX_M_1 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_FITNESS_SEX_M_2 = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_MIGRANT_CLEAR = EIDOS_OMP_MAX_THREADS; +int gEidos_OMP_threads_SURVIVAL = EIDOS_OMP_MAX_THREADS; + +void _Eidos_SetDefaultOpenMPThreadCounts(void) +{ + // These default values are determined empirically by a profile on a big machine; where the scaling curve + // tops out, that determines the default number of threads (since performance degrades beyond that point). + // Of course that will be hardware-specific, so these defaults are just guesses really. + gEidos_OMP_threads_ABS_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CEIL = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_EXP_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FLOOR = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_LOG_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_LOG10_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_LOG2_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_ROUND = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SQRT_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SUM_INTEGER = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SUM_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SUM_LOGICAL = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_TRUNC = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_MAX_INT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MAX_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MIN_INT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MIN_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMAX_INT_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMAX_INT_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMAX_FLOAT_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMAX_FLOAT_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMIN_INT_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMIN_INT_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMIN_FLOAT_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_PMIN_FLOAT_2 = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_MATCH_INT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MATCH_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MATCH_STRING = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MATCH_OBJECT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_INDEX = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_R_INT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_R_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_R_OBJECT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_WR_INT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_WR_FLOAT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_WR_OBJECT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_TABULATE = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_CONTAINS_MARKER_MUT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_INDS_W_PEDIGREE_IDS = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RELATEDNESS = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SET_FITNESS_SCALE_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SET_FITNESS_SCALE_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_DNORM_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_DNORM_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RBINOM_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RBINOM_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RBINOM_3 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RDUNIF_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RDUNIF_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RDUNIF_3 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_REXP_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_REXP_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RNORM_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RNORM_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RNORM_3 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RPOIS_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RPOIS_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RUNIF_1 = 3; //EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RUNIF_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_RUNIF_3 = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_POINT_IN_BOUNDS = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_POINT_PERIODIC = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_POINT_REFLECTED = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_POINT_STOPPED = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_POINT_UNIFORM = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SET_SPATIAL_POS_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SET_SPATIAL_POS_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SPATIAL_MAP_VALUE = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_CLIPPEDINTEGRAL_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CLIPPEDINTEGRAL_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CLIPPEDINTEGRAL_3 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CLIPPEDINTEGRAL_4 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CLIPPEDINTEGRAL_5 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_CLIPPEDINTEGRAL_6 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_DRAWBYSTRENGTH = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_INTNEIGHCOUNT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_LOCALPOPDENSITY = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_NEARESTINTNEIGH = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_NEARESTNEIGH = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_NEIGHCOUNT = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_TOTNEIGHSTRENGTH = EIDOS_OMP_MAX_THREADS; + + gEidos_OMP_threads_AGE_INCR = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_DEFERRED_REPRO = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_ASEX_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_ASEX_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_SEX_F_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_SEX_F_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_SEX_M_1 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_FITNESS_SEX_M_2 = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_MIGRANT_CLEAR = EIDOS_OMP_MAX_THREADS; + gEidos_OMP_threads_SURVIVAL = EIDOS_OMP_MAX_THREADS; + + // Always clip the above counts to gEidosMaxThreads + _Eidos_ClipOpenMPThreadCounts(); +} + +void _Eidos_ClipOpenMPThreadCounts(void) +{ + // This clips all thread-count ivars to gEidosMaxThreads, so they can be used at runtime without checking + gEidos_OMP_threads_ABS_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_ABS_FLOAT); + gEidos_OMP_threads_CEIL = std::min(gEidosMaxThreads, gEidos_OMP_threads_CEIL); + gEidos_OMP_threads_EXP_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_EXP_FLOAT); + gEidos_OMP_threads_FLOOR = std::min(gEidosMaxThreads, gEidos_OMP_threads_FLOOR); + gEidos_OMP_threads_LOG_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_LOG_FLOAT); + gEidos_OMP_threads_LOG10_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_LOG10_FLOAT); + gEidos_OMP_threads_LOG2_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_LOG2_FLOAT); + gEidos_OMP_threads_ROUND = std::min(gEidosMaxThreads, gEidos_OMP_threads_ROUND); + gEidos_OMP_threads_SQRT_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SQRT_FLOAT); + gEidos_OMP_threads_SUM_INTEGER = std::min(gEidosMaxThreads, gEidos_OMP_threads_SUM_INTEGER); + gEidos_OMP_threads_SUM_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SUM_FLOAT); + gEidos_OMP_threads_SUM_LOGICAL = std::min(gEidosMaxThreads, gEidos_OMP_threads_SUM_LOGICAL); + gEidos_OMP_threads_TRUNC = std::min(gEidosMaxThreads, gEidos_OMP_threads_TRUNC); + + gEidos_OMP_threads_MAX_INT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MAX_INT); + gEidos_OMP_threads_MAX_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MAX_FLOAT); + gEidos_OMP_threads_MIN_INT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MIN_INT); + gEidos_OMP_threads_MIN_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MIN_FLOAT); + gEidos_OMP_threads_PMAX_INT_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMAX_INT_1); + gEidos_OMP_threads_PMAX_INT_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMAX_INT_2); + gEidos_OMP_threads_PMAX_FLOAT_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMAX_FLOAT_1); + gEidos_OMP_threads_PMAX_FLOAT_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMAX_FLOAT_2); + gEidos_OMP_threads_PMIN_INT_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMIN_INT_1); + gEidos_OMP_threads_PMIN_INT_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMIN_INT_2); + gEidos_OMP_threads_PMIN_FLOAT_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMIN_FLOAT_1); + gEidos_OMP_threads_PMIN_FLOAT_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_PMIN_FLOAT_2); + + gEidos_OMP_threads_MATCH_INT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MATCH_INT); + gEidos_OMP_threads_MATCH_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MATCH_FLOAT); + gEidos_OMP_threads_MATCH_STRING = std::min(gEidosMaxThreads, gEidos_OMP_threads_MATCH_STRING); + gEidos_OMP_threads_MATCH_OBJECT = std::min(gEidosMaxThreads, gEidos_OMP_threads_MATCH_OBJECT); + gEidos_OMP_threads_SAMPLE_INDEX = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_INDEX); + gEidos_OMP_threads_SAMPLE_R_INT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_R_INT); + gEidos_OMP_threads_SAMPLE_R_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_R_FLOAT); + gEidos_OMP_threads_SAMPLE_R_OBJECT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_R_OBJECT); + gEidos_OMP_threads_SAMPLE_WR_INT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_WR_INT); + gEidos_OMP_threads_SAMPLE_WR_FLOAT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_WR_FLOAT); + gEidos_OMP_threads_SAMPLE_WR_OBJECT = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_WR_OBJECT); + gEidos_OMP_threads_TABULATE = std::min(gEidosMaxThreads, gEidos_OMP_threads_TABULATE); + + gEidos_OMP_threads_CONTAINS_MARKER_MUT = std::min(gEidosMaxThreads, gEidos_OMP_threads_CONTAINS_MARKER_MUT); + gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE = std::min(gEidosMaxThreads, gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE); + gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE = std::min(gEidosMaxThreads, gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE); + gEidos_OMP_threads_INDS_W_PEDIGREE_IDS = std::min(gEidosMaxThreads, gEidos_OMP_threads_INDS_W_PEDIGREE_IDS); + gEidos_OMP_threads_RELATEDNESS = std::min(gEidosMaxThreads, gEidos_OMP_threads_RELATEDNESS); + gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1); + gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2); + gEidos_OMP_threads_SET_FITNESS_SCALE_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SET_FITNESS_SCALE_1); + gEidos_OMP_threads_SET_FITNESS_SCALE_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SET_FITNESS_SCALE_2); + gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE = std::min(gEidosMaxThreads, gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE); + + gEidos_OMP_threads_DNORM_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_DNORM_1); + gEidos_OMP_threads_DNORM_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_DNORM_2); + gEidos_OMP_threads_RBINOM_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RBINOM_1); + gEidos_OMP_threads_RBINOM_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RBINOM_2); + gEidos_OMP_threads_RBINOM_3 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RBINOM_3); + gEidos_OMP_threads_RDUNIF_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RDUNIF_1); + gEidos_OMP_threads_RDUNIF_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RDUNIF_2); + gEidos_OMP_threads_RDUNIF_3 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RDUNIF_3); + gEidos_OMP_threads_REXP_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_REXP_1); + gEidos_OMP_threads_REXP_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_REXP_2); + gEidos_OMP_threads_RNORM_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RNORM_1); + gEidos_OMP_threads_RNORM_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RNORM_2); + gEidos_OMP_threads_RNORM_3 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RNORM_3); + gEidos_OMP_threads_RPOIS_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RPOIS_1); + gEidos_OMP_threads_RPOIS_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RPOIS_2); + gEidos_OMP_threads_RUNIF_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RUNIF_1); + gEidos_OMP_threads_RUNIF_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RUNIF_2); + gEidos_OMP_threads_RUNIF_3 = std::min(gEidosMaxThreads, gEidos_OMP_threads_RUNIF_3); + + gEidos_OMP_threads_POINT_IN_BOUNDS = std::min(gEidosMaxThreads, gEidos_OMP_threads_POINT_IN_BOUNDS); + gEidos_OMP_threads_POINT_PERIODIC = std::min(gEidosMaxThreads, gEidos_OMP_threads_POINT_PERIODIC); + gEidos_OMP_threads_POINT_REFLECTED = std::min(gEidosMaxThreads, gEidos_OMP_threads_POINT_REFLECTED); + gEidos_OMP_threads_POINT_STOPPED = std::min(gEidosMaxThreads, gEidos_OMP_threads_POINT_STOPPED); + gEidos_OMP_threads_POINT_UNIFORM = std::min(gEidosMaxThreads, gEidos_OMP_threads_POINT_UNIFORM); + gEidos_OMP_threads_SET_SPATIAL_POS_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SET_SPATIAL_POS_1); + gEidos_OMP_threads_SET_SPATIAL_POS_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_SET_SPATIAL_POS_2); + gEidos_OMP_threads_SPATIAL_MAP_VALUE = std::min(gEidosMaxThreads, gEidos_OMP_threads_SPATIAL_MAP_VALUE); + + gEidos_OMP_threads_CLIPPEDINTEGRAL_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_1); + gEidos_OMP_threads_CLIPPEDINTEGRAL_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_2); + gEidos_OMP_threads_CLIPPEDINTEGRAL_3 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_3); + gEidos_OMP_threads_CLIPPEDINTEGRAL_4 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_4); + gEidos_OMP_threads_CLIPPEDINTEGRAL_5 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_5); + gEidos_OMP_threads_CLIPPEDINTEGRAL_6 = std::min(gEidosMaxThreads, gEidos_OMP_threads_CLIPPEDINTEGRAL_6); + gEidos_OMP_threads_DRAWBYSTRENGTH = std::min(gEidosMaxThreads, gEidos_OMP_threads_DRAWBYSTRENGTH); + gEidos_OMP_threads_INTNEIGHCOUNT = std::min(gEidosMaxThreads, gEidos_OMP_threads_INTNEIGHCOUNT); + gEidos_OMP_threads_LOCALPOPDENSITY = std::min(gEidosMaxThreads, gEidos_OMP_threads_LOCALPOPDENSITY); + gEidos_OMP_threads_NEARESTINTNEIGH = std::min(gEidosMaxThreads, gEidos_OMP_threads_NEARESTINTNEIGH); + gEidos_OMP_threads_NEARESTNEIGH = std::min(gEidosMaxThreads, gEidos_OMP_threads_NEARESTNEIGH); + gEidos_OMP_threads_NEIGHCOUNT = std::min(gEidosMaxThreads, gEidos_OMP_threads_NEIGHCOUNT); + gEidos_OMP_threads_TOTNEIGHSTRENGTH = std::min(gEidosMaxThreads, gEidos_OMP_threads_TOTNEIGHSTRENGTH); + + gEidos_OMP_threads_AGE_INCR = std::min(gEidosMaxThreads, gEidos_OMP_threads_AGE_INCR); + gEidos_OMP_threads_DEFERRED_REPRO = std::min(gEidosMaxThreads, gEidos_OMP_threads_DEFERRED_REPRO); + gEidos_OMP_threads_FITNESS_ASEX_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_ASEX_1); + gEidos_OMP_threads_FITNESS_ASEX_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_ASEX_2); + gEidos_OMP_threads_FITNESS_SEX_F_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_SEX_F_1); + gEidos_OMP_threads_FITNESS_SEX_F_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_SEX_F_2); + gEidos_OMP_threads_FITNESS_SEX_M_1 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_SEX_M_1); + gEidos_OMP_threads_FITNESS_SEX_M_2 = std::min(gEidosMaxThreads, gEidos_OMP_threads_FITNESS_SEX_M_2); + gEidos_OMP_threads_MIGRANT_CLEAR = std::min(gEidosMaxThreads, gEidos_OMP_threads_MIGRANT_CLEAR); + gEidos_OMP_threads_SURVIVAL = std::min(gEidosMaxThreads, gEidos_OMP_threads_SURVIVAL); +} + void Eidos_WarmUpOpenMP(std::ostream *outstream, bool changed_max_thread_count, int new_max_thread_count, bool active_threads) { // When running under OpenMP, print a log, and also set values for the OpenMP ICV's that we want to guarantee @@ -255,6 +586,15 @@ void Eidos_WarmUpOpenMP(std::ostream *outstream, bool changed_max_thread_count, // Get the maximum number of threads in effect, which might be different from the number requested gEidosMaxThreads = omp_get_max_threads(); gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; + +#if USE_OMP_LIMITS + // If we are supposed to use our built-in default OMP limits, set them for our task thread counts + _Eidos_SetDefaultOpenMPThreadCounts(); +#else + // Enforce gEidosMaxThreads for the thread count ivars that govern how many threads various loops will use + _Eidos_ClipOpenMPThreadCounts(); +#endif // Write some diagnostic output about our configuration. If the verbosity level is 0, outstream will be nullptr. if (outstream) diff --git a/eidos/eidos_globals.h b/eidos/eidos_globals.h index 2c78ec596..5a0578888 100644 --- a/eidos/eidos_globals.h +++ b/eidos/eidos_globals.h @@ -61,6 +61,8 @@ class EidosToken; // These should be called once at startup to give Eidos an opportunity to initialize static state #ifdef _OPENMP +void _Eidos_SetDefaultOpenMPThreadCounts(void); +void _Eidos_ClipOpenMPThreadCounts(void); void Eidos_WarmUpOpenMP(std::ostream *outstream, bool changed_max_thread_count, int new_max_thread_count, bool active_threads); #endif diff --git a/eidos/eidos_openmp.h b/eidos/eidos_openmp.h index 53fec1ab3..ff876c2c9 100644 --- a/eidos/eidos_openmp.h +++ b/eidos/eidos_openmp.h @@ -61,14 +61,23 @@ #include +// This is the largest number of threads we allow the user to set. There is no hard limit in the code; +// this is primarily just to prevent people from doing anything stupid. +#define EIDOS_OMP_MAX_THREADS 1024 + // This is a cached result from omp_get_max_threads() after warmup, providing the final number of threads that we will // be using (maximum) in parallel regions. This can be used to preallocate per-thread data structures. extern int gEidosMaxThreads; // This is the number of threads that will be used in the next parallel region to execute, as set by the Eidos // function parallelSetNumThreads(). This will generally be equal to omp_get_max_threads(). It will be clamped -// to the interval [1, gEidosMaxThreads]. +// to the interval [1, gEidosMaxThreads]. If it has been set explicitly, gEidosNumThreadsOverride is set to true; +// if not, gEidosNumThreadsOverride is false. This allows Eidos to distinguish between gEidosNumThreads == gEidosMaxThreads +// simply because it hasn't been set (gEidosNumThreadsOverride == false), indicating a desire to receive the default +// number of threads, versus having been explicitly set to gEidosMaxThreads (gEidosNumThreadsOverride == true), +// indicating a desire to force the maximum number of threads to be used even if it normally wouldn't. extern int gEidosNumThreads; +extern bool gEidosNumThreadsOverride; // We want to use SIGTRAP to catch problems in the debugger in a few key spots, but it doesn't exist on Windows. @@ -121,6 +130,15 @@ extern int gEidosNumThreads; #endif +// This macro is for calculating the correct number of threads to use for a given loop; it uses a thread count set with +// parallelSetNumThreads() if it exists, otherwise uses the thread count provided by (x), expected to be <= gEidosMaxThreads. +#ifdef _OPENMP +#define EIDOS_THREAD_COUNT(x) int thread_count = (gEidosNumThreadsOverride ? gEidosNumThreads : (x)) +#else +#define EIDOS_THREAD_COUNT(x) +#endif + + #ifdef _OPENMP // Check that the OpenMP version supported by the compiler suffices. Note that _OPENMP is formatted as a "YYYYMM" date of @@ -218,7 +236,11 @@ class EidosDebugLock // here a bit; a slim_openmp.h header could be created to alleviate that if it's a problem, but it seems harmless for now. // These counts are collected in one place to make it easier to optimize their values in a pre-build optimization pass. -#if 1 +// Set this flag to 0 to switch to running parallel loops with the maximum number of threads, regardless of the per-task +// default thread counts or the task size thresholds (but not regardless of the user's parallelSetNumThreads() setting). +#define USE_OMP_LIMITS 1 + +#if USE_OMP_LIMITS // This set of minimum counts is for production code // Eidos: math functions @@ -255,7 +277,7 @@ class EidosDebugLock #define EIDOS_OMPMIN_MATCH_FLOAT 2000 #define EIDOS_OMPMIN_MATCH_STRING 2000 #define EIDOS_OMPMIN_MATCH_OBJECT 2000 -#define EIDOS_OMPMIN_SAMPLE_1 2000 +#define EIDOS_OMPMIN_SAMPLE_INDEX 2000 #define EIDOS_OMPMIN_SAMPLE_R_INT 2000 #define EIDOS_OMPMIN_SAMPLE_R_FLOAT 2000 #define EIDOS_OMPMIN_SAMPLE_R_OBJECT 2000 @@ -272,8 +294,8 @@ class EidosDebugLock #define EIDOS_OMPMIN_RELATEDNESS 2000 #define EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_1 2000 #define EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_2 2000 -#define EIDOS_OMPMIN_SET_FITNESS_S1 900 -#define EIDOS_OMPMIN_SET_FITNESS_S2 1500 +#define EIDOS_OMPMIN_SET_FITNESS_SCALE_1 900 +#define EIDOS_OMPMIN_SET_FITNESS_SCALE_2 1500 #define EIDOS_OMPMIN_SUM_OF_MUTS_OF_TYPE 2 // Distribution draws and related @@ -322,7 +344,7 @@ class EidosDebugLock #define EIDOS_OMPMIN_TOTNEIGHSTRENGTH 10 // SLiM core -#define EIDOS_OMPMIN_AGEINC 10000 +#define EIDOS_OMPMIN_AGE_INCR 10000 #define EIDOS_OMPMIN_DEFERRED_REPRO 100 #define EIDOS_OMPMIN_FITNESS_ASEX_1 10000 #define EIDOS_OMPMIN_FITNESS_ASEX_2 10000 @@ -330,8 +352,7 @@ class EidosDebugLock #define EIDOS_OMPMIN_FITNESS_SEX_F_2 10000 #define EIDOS_OMPMIN_FITNESS_SEX_M_1 10000 #define EIDOS_OMPMIN_FITNESS_SEX_M_2 10000 -#define EIDOS_OMPMIN_MIGRANTCLEAR 10000 -#define EIDOS_OMPMIN_MUTTALLY 100000 +#define EIDOS_OMPMIN_MIGRANT_CLEAR 10000 #define EIDOS_OMPMIN_SURVIVAL 10000 #else @@ -373,7 +394,7 @@ class EidosDebugLock #define EIDOS_OMPMIN_MATCH_FLOAT 0 #define EIDOS_OMPMIN_MATCH_STRING 0 #define EIDOS_OMPMIN_MATCH_OBJECT 0 -#define EIDOS_OMPMIN_SAMPLE_1 0 +#define EIDOS_OMPMIN_SAMPLE_INDEX 0 #define EIDOS_OMPMIN_SAMPLE_R_INT 0 #define EIDOS_OMPMIN_SAMPLE_R_FLOAT 0 #define EIDOS_OMPMIN_SAMPLE_R_OBJECT 0 @@ -390,8 +411,8 @@ class EidosDebugLock #define EIDOS_OMPMIN_RELATEDNESS 0 #define EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_1 0 #define EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_2 0 -#define EIDOS_OMPMIN_SET_FITNESS_S1 0 -#define EIDOS_OMPMIN_SET_FITNESS_S2 0 +#define EIDOS_OMPMIN_SET_FITNESS_SCALE_1 0 +#define EIDOS_OMPMIN_SET_FITNESS_SCALE_2 0 #define EIDOS_OMPMIN_SUM_OF_MUTS_OF_TYPE 0 // Distribution draws and related @@ -440,7 +461,7 @@ class EidosDebugLock #define EIDOS_OMPMIN_TOTNEIGHSTRENGTH 0 // SLiM core -#define EIDOS_OMPMIN_AGEINC 0 +#define EIDOS_OMPMIN_AGE_INCR 0 #define EIDOS_OMPMIN_DEFERRED_REPRO 0 #define EIDOS_OMPMIN_FITNESS_ASEX_1 0 #define EIDOS_OMPMIN_FITNESS_ASEX_2 0 @@ -448,12 +469,119 @@ class EidosDebugLock #define EIDOS_OMPMIN_FITNESS_SEX_F_2 0 #define EIDOS_OMPMIN_FITNESS_SEX_M_1 0 #define EIDOS_OMPMIN_FITNESS_SEX_M_2 0 -#define EIDOS_OMPMIN_MIGRANTCLEAR 0 -#define EIDOS_OMPMIN_MUTTALLY 0 +#define EIDOS_OMPMIN_MIGRANT_CLEAR 0 #define EIDOS_OMPMIN_SURVIVAL 0 #endif + +// Here we declare variables that hold the number of threads we prefer to use for each parallel loop. +// These have default values, which can be overridden with parallelSetTaskThreadCounts(). +extern int gEidos_OMP_threads_ABS_FLOAT; +extern int gEidos_OMP_threads_CEIL; +extern int gEidos_OMP_threads_EXP_FLOAT; +extern int gEidos_OMP_threads_FLOOR; +extern int gEidos_OMP_threads_LOG_FLOAT; +extern int gEidos_OMP_threads_LOG10_FLOAT; +extern int gEidos_OMP_threads_LOG2_FLOAT; +extern int gEidos_OMP_threads_ROUND; +extern int gEidos_OMP_threads_SQRT_FLOAT; +extern int gEidos_OMP_threads_SUM_INTEGER; +extern int gEidos_OMP_threads_SUM_FLOAT; +extern int gEidos_OMP_threads_SUM_LOGICAL; +extern int gEidos_OMP_threads_TRUNC; + +extern int gEidos_OMP_threads_MAX_INT; +extern int gEidos_OMP_threads_MAX_FLOAT; +extern int gEidos_OMP_threads_MIN_INT; +extern int gEidos_OMP_threads_MIN_FLOAT; +extern int gEidos_OMP_threads_PMAX_INT_1; +extern int gEidos_OMP_threads_PMAX_INT_2; +extern int gEidos_OMP_threads_PMAX_FLOAT_1; +extern int gEidos_OMP_threads_PMAX_FLOAT_2; +extern int gEidos_OMP_threads_PMIN_INT_1; +extern int gEidos_OMP_threads_PMIN_INT_2; +extern int gEidos_OMP_threads_PMIN_FLOAT_1; +extern int gEidos_OMP_threads_PMIN_FLOAT_2; + +extern int gEidos_OMP_threads_MATCH_INT; +extern int gEidos_OMP_threads_MATCH_FLOAT; +extern int gEidos_OMP_threads_MATCH_STRING; +extern int gEidos_OMP_threads_MATCH_OBJECT; +extern int gEidos_OMP_threads_SAMPLE_INDEX; +extern int gEidos_OMP_threads_SAMPLE_R_INT; +extern int gEidos_OMP_threads_SAMPLE_R_FLOAT; +extern int gEidos_OMP_threads_SAMPLE_R_OBJECT; +extern int gEidos_OMP_threads_SAMPLE_WR_INT; +extern int gEidos_OMP_threads_SAMPLE_WR_FLOAT; +extern int gEidos_OMP_threads_SAMPLE_WR_OBJECT; +extern int gEidos_OMP_threads_TABULATE; + +extern int gEidos_OMP_threads_CONTAINS_MARKER_MUT; +extern int gEidos_OMP_threads_I_COUNT_OF_MUTS_OF_TYPE; +extern int gEidos_OMP_threads_G_COUNT_OF_MUTS_OF_TYPE; +extern int gEidos_OMP_threads_INDS_W_PEDIGREE_IDS; +extern int gEidos_OMP_threads_RELATEDNESS; +extern int gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1; +extern int gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2; +extern int gEidos_OMP_threads_SET_FITNESS_SCALE_1; +extern int gEidos_OMP_threads_SET_FITNESS_SCALE_2; +extern int gEidos_OMP_threads_SUM_OF_MUTS_OF_TYPE; + +extern int gEidos_OMP_threads_DNORM_1; +extern int gEidos_OMP_threads_DNORM_2; +extern int gEidos_OMP_threads_RBINOM_1; +extern int gEidos_OMP_threads_RBINOM_2; +extern int gEidos_OMP_threads_RBINOM_3; +extern int gEidos_OMP_threads_RDUNIF_1; +extern int gEidos_OMP_threads_RDUNIF_2; +extern int gEidos_OMP_threads_RDUNIF_3; +extern int gEidos_OMP_threads_REXP_1; +extern int gEidos_OMP_threads_REXP_2; +extern int gEidos_OMP_threads_RNORM_1; +extern int gEidos_OMP_threads_RNORM_2; +extern int gEidos_OMP_threads_RNORM_3; +extern int gEidos_OMP_threads_RPOIS_1; +extern int gEidos_OMP_threads_RPOIS_2; +extern int gEidos_OMP_threads_RUNIF_1; +extern int gEidos_OMP_threads_RUNIF_2; +extern int gEidos_OMP_threads_RUNIF_3; + +extern int gEidos_OMP_threads_POINT_IN_BOUNDS; +extern int gEidos_OMP_threads_POINT_PERIODIC; +extern int gEidos_OMP_threads_POINT_REFLECTED; +extern int gEidos_OMP_threads_POINT_STOPPED; +extern int gEidos_OMP_threads_POINT_UNIFORM; +extern int gEidos_OMP_threads_SET_SPATIAL_POS_1; +extern int gEidos_OMP_threads_SET_SPATIAL_POS_2; +extern int gEidos_OMP_threads_SPATIAL_MAP_VALUE; + +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_1; +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_2; +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_3; +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_4; +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_5; +extern int gEidos_OMP_threads_CLIPPEDINTEGRAL_6; +extern int gEidos_OMP_threads_DRAWBYSTRENGTH; +extern int gEidos_OMP_threads_INTNEIGHCOUNT; +extern int gEidos_OMP_threads_LOCALPOPDENSITY; +extern int gEidos_OMP_threads_NEARESTINTNEIGH; +extern int gEidos_OMP_threads_NEARESTNEIGH; +extern int gEidos_OMP_threads_NEIGHCOUNT; +extern int gEidos_OMP_threads_TOTNEIGHSTRENGTH; + +extern int gEidos_OMP_threads_AGE_INCR; +extern int gEidos_OMP_threads_DEFERRED_REPRO; +extern int gEidos_OMP_threads_FITNESS_ASEX_1; +extern int gEidos_OMP_threads_FITNESS_ASEX_2; +extern int gEidos_OMP_threads_FITNESS_SEX_F_1; +extern int gEidos_OMP_threads_FITNESS_SEX_F_2; +extern int gEidos_OMP_threads_FITNESS_SEX_M_1; +extern int gEidos_OMP_threads_FITNESS_SEX_M_2; +extern int gEidos_OMP_threads_MIGRANT_CLEAR; +extern int gEidos_OMP_threads_SURVIVAL; + + #else /* ifdef _OPENMP */ // No OpenMP. This is the "stub header" from the OpenMP 4.5 specification. I've added "inline" in various spots to diff --git a/eidos/eidos_test_functions_other.cpp b/eidos/eidos_test_functions_other.cpp index 905cc6c4f..b036610a8 100644 --- a/eidos/eidos_test_functions_other.cpp +++ b/eidos/eidos_test_functions_other.cpp @@ -1701,11 +1701,13 @@ void _RunUserDefinedFunctionTests(void) // Note that we ensure that we are using the maximum number of threads at start & end gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; omp_set_num_threads(gEidosMaxThreads); EidosAssertScriptSuccess_L(test_string_fixed, true); gEidosNumThreads = gEidosMaxThreads; + gEidosNumThreadsOverride = false; omp_set_num_threads(gEidosMaxThreads); } } diff --git a/eidostool/main.cpp b/eidostool/main.cpp index 53dc625e5..d8ea5c5e3 100644 --- a/eidostool/main.cpp +++ b/eidostool/main.cpp @@ -118,9 +118,9 @@ int main(int argc, const char * argv[]) max_thread_count = count; changed_max_thread_count = true; - if ((max_thread_count < 1) || (max_thread_count > 1024)) + if ((max_thread_count < 1) || (max_thread_count > EIDOS_OMP_MAX_THREADS)) { - std::cout << "The -maxthreads command-line option enforces a range of [1, 1024]." << std::endl; + std::cout << "The -maxthreads command-line option enforces a range of [1, " << EIDOS_OMP_MAX_THREADS << "]." << std::endl; exit(EXIT_FAILURE); }