Skip to content

Commit

Permalink
switch to binary search for findInterval() for performance
Browse files Browse the repository at this point in the history
  • Loading branch information
bhaller committed Dec 5, 2023
1 parent 0c5c98f commit 4b4fcff
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ development head (in the master branch):
add xy, xz, yz, and xyz properties to Individual that allow joint access to the x/y/z properties, for easier spatiality operations in complex spatial models (and to support some unit testing additions)
fixed a variety of memory leaks and extended the unit testing for leaks considerably; should be pretty clean in its memory usage now
switch chapters 15 and 16 (nonWF and spatial models) and reorganize/renumber to rationalize the manual organization; many recipe numbers changed
speed up findInterval() by using binary search instead of linear search - O(log n) instead of O(n) to find the interval for one element, with n being the number of intervals (i.e., the length of vec)


version 4.0.1 (Eidos version 3.0.1):
Expand Down
95 changes: 95 additions & 0 deletions eidos/eidos_functions_distributions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
// (integer)findInterval(numeric x, numeric vec, [logical$ rightmostClosed = F], [logical$ allInside = F])
EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
{
// Switching to using binary search for this algorithm, but I want to keep the old code around in case it is needed
#define EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH 1

EidosValue *arg_x = p_arguments[0].get();
EidosValue *arg_vec = p_arguments[1].get();
EidosValue *arg_rightmostClosed = p_arguments[2].get();
Expand All @@ -72,7 +75,11 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP

bool rightmostClosed = arg_rightmostClosed->LogicalAtIndex(0, nullptr);
bool allInside = arg_allInside->LogicalAtIndex(0, nullptr);

#if !(EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH)
// Used by the old linear-search algorithm
int initial_x_result = allInside ? 0 : -1;
#endif

EidosValue_Int_vector *int_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Int_vector())->resize_no_initialize(x_count);

Expand All @@ -92,6 +99,27 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP
const int64_t *x_data = arg_x->IsSingleton() ? &((EidosValue_Int_singleton *)arg_x)->IntValue_Mutable() : ((EidosValue_Int_vector *)arg_x)->data();

// Find intervals for integer vec, integer x

#if EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH
// binary search - testing indicates this is pretty much always as fast or faster than linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
int64_t x_value = x_data[x_index];
long x_result;

if (x_value < vec_data[0])
x_result = (allInside ? 0 : -1);
else if (x_value > vec_data[vec_count - 1])
x_result = (allInside ? vec_count - 2 : vec_count - 1);
else if (x_value == vec_data[vec_count - 1])
x_result = ((rightmostClosed || allInside) ? vec_count - 2 : vec_count - 1);
else
x_result = (std::upper_bound(vec_data, vec_data + vec_count, x_value) - vec_data) - 1;

int_result->set_int_no_check(x_result, x_index);
}
#else
// linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
int64_t x_value = x_data[x_index];
Expand All @@ -111,12 +139,34 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP

int_result->set_int_no_check(x_result, x_index);
}
#endif
}
else // (x_type == EidosValueType::kValueFloat)
{
const double *x_data = arg_x->IsSingleton() ? &((EidosValue_Float_singleton *)arg_x)->FloatValue_Mutable() : ((EidosValue_Float_vector *)arg_x)->data();

// Find intervals for integer vec, float x

#if EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH
// binary search - testing indicates this is pretty much always as fast or faster than linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
double x_value = x_data[x_index];
long x_result;

if (x_value < vec_data[0])
x_result = (allInside ? 0 : -1);
else if (x_value > vec_data[vec_count - 1])
x_result = (allInside ? vec_count - 2 : vec_count - 1);
else if (x_value == vec_data[vec_count - 1])
x_result = ((rightmostClosed || allInside) ? vec_count - 2 : vec_count - 1);
else
x_result = (std::upper_bound(vec_data, vec_data + vec_count, x_value) - vec_data) - 1;

int_result->set_int_no_check(x_result, x_index);
}
#else
// linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
double x_value = x_data[x_index];
Expand All @@ -136,6 +186,7 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP

int_result->set_int_no_check(x_result, x_index);
}
#endif
}
}
else // (vec_type == EidosValueType::kValueFloat))
Expand All @@ -154,6 +205,27 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP
const int64_t *x_data = arg_x->IsSingleton() ? &((EidosValue_Int_singleton *)arg_x)->IntValue_Mutable() : ((EidosValue_Int_vector *)arg_x)->data();

// Find intervals for float vec, integer x

#if EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH
// binary search - testing indicates this is pretty much always as fast or faster than linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
int64_t x_value = x_data[x_index];
long x_result;

if (x_value < vec_data[0])
x_result = (allInside ? 0 : -1);
else if (x_value > vec_data[vec_count - 1])
x_result = (allInside ? vec_count - 2 : vec_count - 1);
else if (x_value == vec_data[vec_count - 1])
x_result = ((rightmostClosed || allInside) ? vec_count - 2 : vec_count - 1);
else
x_result = (std::upper_bound(vec_data, vec_data + vec_count, x_value) - vec_data) - 1;

int_result->set_int_no_check(x_result, x_index);
}
#else
// linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
int64_t x_value = x_data[x_index];
Expand All @@ -173,12 +245,34 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP

int_result->set_int_no_check(x_result, x_index);
}
#endif
}
else // (x_type == EidosValueType::kValueFloat)
{
const double *x_data = arg_x->IsSingleton() ? &((EidosValue_Float_singleton *)arg_x)->FloatValue_Mutable() : ((EidosValue_Float_vector *)arg_x)->data();

// Find intervals for float vec, float x

#if EIDOS_FIND_INTERVAL_USE_BINARY_SEARCH
// binary search - testing indicates this is pretty much always as fast or faster than linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
double x_value = x_data[x_index];
long x_result;

if (x_value < vec_data[0])
x_result = (allInside ? 0 : -1);
else if (x_value > vec_data[vec_count - 1])
x_result = (allInside ? vec_count - 2 : vec_count - 1);
else if (x_value == vec_data[vec_count - 1])
x_result = ((rightmostClosed || allInside) ? vec_count - 2 : vec_count - 1);
else
x_result = (std::upper_bound(vec_data, vec_data + vec_count, x_value) - vec_data) - 1;

int_result->set_int_no_check(x_result, x_index);
}
#else
// linear search
for (int x_index = 0; x_index < x_count; ++x_index)
{
double x_value = x_data[x_index];
Expand All @@ -198,6 +292,7 @@ EidosValue_SP Eidos_ExecuteFunction_findInterval(const std::vector<EidosValue_SP

int_result->set_int_no_check(x_result, x_index);
}
#endif
}
}

Expand Down

0 comments on commit 4b4fcff

Please sign in to comment.