From 5a325858caa765abc7a5cd9f70cef9beabe000f8 Mon Sep 17 00:00:00 2001 From: Ben Haller Date: Fri, 29 Dec 2023 13:42:48 -0500 Subject: [PATCH] optimizations for mapValue() / spatialMapValue() --- VERSIONS | 1 + core/spatial_map.cpp | 348 +++++++++++++++++++++++++++++++++++-------- core/spatial_map.h | 34 +++++ 3 files changed, 321 insertions(+), 62 deletions(-) diff --git a/VERSIONS b/VERSIONS index c00eb51f..18c07a98 100644 --- a/VERSIONS +++ b/VERSIONS @@ -11,6 +11,7 @@ development head (in the master branch): policy change: float indices are no longer legal for subsetting, indices must be integer (or a logical vector, as usual); this was inherited from R and is a bad idea for Eidos policy change: assignment into object properties must match the type of the property; no more promotion to integer/float from lower types add some internal magic to accelerate statements of the form "x = c(x,y)" by appending y onto the end of x directly + optimize mapValue() / spatialMapValue() - 6x speedup for large-vector 2D case version 4.1 (Eidos version 3.1): diff --git a/core/spatial_map.cpp b/core/spatial_map.cpp index 128ff9df..60706829 100644 --- a/core/spatial_map.cpp +++ b/core/spatial_map.cpp @@ -1992,82 +1992,306 @@ EidosValue_SP SpatialMap::ExecuteMethod_mapValue(EidosGlobalStringID p_method_id #pragma unused (p_method_id, p_arguments, p_interpreter) EidosValue *point = p_arguments[0].get(); - EidosValue_Float *float_result = nullptr; - // Note that point is required to already be in terms of our spatiality; if we are an "xz" map, it must contain "xz" values - int x_count; - - if (point->Count() == spatiality_) - { - x_count = 1; - float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float(0.0)); - } - else if (point->Count() % spatiality_ == 0) - { - x_count = point->Count() / spatiality_; - float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - } - else + // Also note that we clamp coordinates here, whether they are periodic or not; the caller should use pointPeriodic() + if (point->Count() % spatiality_ != 0) EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_mapValue): mapValue() length of point must match spatiality of map " << name_ << ", or be a multiple thereof." << EidosTerminate(); + int x_count = point->Count() / spatiality_; + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); const double *point_data = point->FloatData(); - EIDOS_THREAD_COUNT(gEidos_OMP_threads_SPATIAL_MAP_VALUE); -#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(point_data, float_result) if(x_count >= EIDOS_OMPMIN_SPATIAL_MAP_VALUE) num_threads(thread_count) - for (int value_index = 0; value_index < x_count; ++value_index) + // need to parallelize the three loops below separately now; disabling this parallelization for now + //EIDOS_THREAD_COUNT(gEidos_OMP_threads_SPATIAL_MAP_VALUE); +//#pragma omp parallel for schedule(static) default(none) shared(x_count) firstprivate(point_data, float_result) if(x_count >= EIDOS_OMPMIN_SPATIAL_MAP_VALUE) num_threads(thread_count) + + // It's very common for spatial bounds to be [0, 1] or [0, x] rather than the fully general case, + // so we actually optimize all of the cases here + + switch (spatiality_) // NOLINT(*-missing-default-case) : our spatiality is always in [1,3], and we can't throw from a parallel region { - // We need to use the correct spatial bounds for each coordinate, which depends upon our exact spatiality - // There is doubtless a way to make this code smarter, but brute force is sometimes best... - // Note that we clamp coordinates here, whether they are periodic or not; the caller should use pointPeriodic() - double map_value = 0; - - switch (spatiality_) // NOLINT(*-missing-default-case) : our spatiality is always in [1,3], and we can't throw from a parallel region + case 1: { - case 1: + if (!interpolate_) { - double point_vec[1]; - int value_offset = value_index; - - double a = (point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_); - point_vec[0] = SLiMClampCoordinate(a); - - map_value = ValueAtPoint_S1(point_vec); - break; + // without interpolation, use the inline version of ValueAtPoint_S1() + if (bounds_a0_ == 0.0) + { + if (bounds_a1_ == 1.0) + { + for (int value_index = 0; value_index < x_count; ++value_index) + { + double a = SLiMClampCoordinate(point_data[0 + value_index]); + + float_result->set_float_no_check(ValueAtPoint_S1_NOINTERPOLATE(a), value_index); + } + } + else + { + for (int value_index = 0; value_index < x_count; ++value_index) + { + double a = SLiMClampCoordinate(point_data[0 + value_index] / bounds_a1_); + + float_result->set_float_no_check(ValueAtPoint_S1_NOINTERPOLATE(a), value_index); + } + } + } + else + { + // full 1D case without interpolation + for (int value_index = 0; value_index < x_count; ++value_index) + { + double a = SLiMClampCoordinate((point_data[0 + value_index] - bounds_a0_) / (bounds_a1_ - bounds_a0_)); + + float_result->set_float_no_check(ValueAtPoint_S1_NOINTERPOLATE(a), value_index); + } + } } - case 2: + else { - double point_vec[2]; - int value_offset = value_index * 2; - - double a = (point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_); - point_vec[0] = SLiMClampCoordinate(a); - - double b = (point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_); - point_vec[1] = SLiMClampCoordinate(b); - - map_value = ValueAtPoint_S2(point_vec); - break; + if (bounds_a0_ == 0.0) + { + if (bounds_a1_ == 1.0) + { + for (int value_index = 0; value_index < x_count; ++value_index) + { + double point_vec[1]; + + double a = point_data[0 + value_index]; + point_vec[0] = SLiMClampCoordinate(a); + + float_result->set_float_no_check(ValueAtPoint_S1(point_vec), value_index); + } + } + else + { + for (int value_index = 0; value_index < x_count; ++value_index) + { + double point_vec[1]; + + double a = point_data[0 + value_index] / bounds_a1_; + point_vec[0] = SLiMClampCoordinate(a); + + float_result->set_float_no_check(ValueAtPoint_S1(point_vec), value_index); + } + } + } + else + { + // full 1D case + for (int value_index = 0; value_index < x_count; ++value_index) + { + double point_vec[1]; + + double a = (point_data[0 + value_index] - bounds_a0_) / (bounds_a1_ - bounds_a0_); + point_vec[0] = SLiMClampCoordinate(a); + + float_result->set_float_no_check(ValueAtPoint_S1(point_vec), value_index); + } + } } - case 3: + break; + } + case 2: + { + if (!interpolate_) { - double point_vec[3]; - int value_offset = value_index * 3; - - double a = (point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_); - point_vec[0] = SLiMClampCoordinate(a); - - double b = (point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_); - point_vec[1] = SLiMClampCoordinate(b); - - double c = (point_data[2 + value_offset] - bounds_c0_) / (bounds_c1_ - bounds_c0_); - point_vec[2] = SLiMClampCoordinate(c); - - map_value = ValueAtPoint_S3(point_vec); - break; + // without interpolation, use the inline version of ValueAtPoint_S2() + if ((bounds_a0_ == 0.0) && (bounds_b0_ == 0.0)) + { + if ((bounds_a1_ == 1.0) && (bounds_b1_ == 1.0)) + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double a = SLiMClampCoordinate(point_data[0 + value_offset]); + double b = SLiMClampCoordinate(point_data[1 + value_offset]); + + float_result->set_float_no_check(ValueAtPoint_S2_NOINTERPOLATE(a, b), value_index); + } + } + else + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double a = SLiMClampCoordinate(point_data[0 + value_offset] / bounds_a1_); + double b = SLiMClampCoordinate(point_data[1 + value_offset] / bounds_b1_); + + float_result->set_float_no_check(ValueAtPoint_S2_NOINTERPOLATE(a, b), value_index); + } + } + } + else + { + // full 2D case without interpolation + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double a = SLiMClampCoordinate((point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_)); + double b = SLiMClampCoordinate((point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_)); + + float_result->set_float_no_check(ValueAtPoint_S2_NOINTERPOLATE(a, b), value_index); + } + } + } + else + { + if ((bounds_a0_ == 0.0) && (bounds_b0_ == 0.0)) + { + if ((bounds_a1_ == 1.0) && (bounds_b1_ == 1.0)) + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double point_vec[2]; + + double a = point_data[0 + value_offset]; + point_vec[0] = SLiMClampCoordinate(a); + + double b = point_data[1 + value_offset]; + point_vec[1] = SLiMClampCoordinate(b); + + float_result->set_float_no_check(ValueAtPoint_S2(point_vec), value_index); + } + } + else + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double point_vec[2]; + + double a = point_data[0 + value_offset] / bounds_a1_; + point_vec[0] = SLiMClampCoordinate(a); + + double b = point_data[1 + value_offset] / bounds_b1_; + point_vec[1] = SLiMClampCoordinate(b); + + float_result->set_float_no_check(ValueAtPoint_S2(point_vec), value_index); + } + } + } + else + { + // full 2D case + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 2) + { + double point_vec[2]; + + double a = (point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_); + point_vec[0] = SLiMClampCoordinate(a); + + double b = (point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_); + point_vec[1] = SLiMClampCoordinate(b); + + float_result->set_float_no_check(ValueAtPoint_S2(point_vec), value_index); + } + } } + break; + } + case 3: + { + if (!interpolate_) + { + // without interpolation, use the inline version of ValueAtPoint_S3() + if ((bounds_a0_ == 0.0) && (bounds_b0_ == 0.0) && (bounds_c0_ == 0.0)) + { + if ((bounds_a1_ == 1.0) && (bounds_b1_ == 1.0) && (bounds_c1_ == 1.0)) + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double a = SLiMClampCoordinate(point_data[0 + value_offset]); + double b = SLiMClampCoordinate(point_data[1 + value_offset]); + double c = SLiMClampCoordinate(point_data[2 + value_offset]); + + float_result->set_float_no_check(ValueAtPoint_S3_NOINTERPOLATE(a, b, c), value_index); + } + } + else + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double a = SLiMClampCoordinate(point_data[0 + value_offset] / bounds_a1_); + double b = SLiMClampCoordinate(point_data[1 + value_offset] / bounds_b1_); + double c = SLiMClampCoordinate(point_data[2 + value_offset] / bounds_c1_); + + float_result->set_float_no_check(ValueAtPoint_S3_NOINTERPOLATE(a, b, c), value_index); + } + } + } + else + { + // full 3D case without interpolation + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double a = SLiMClampCoordinate((point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_)); + double b = SLiMClampCoordinate((point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_)); + double c = SLiMClampCoordinate((point_data[2 + value_offset] - bounds_c0_) / (bounds_c1_ - bounds_c0_)); + + float_result->set_float_no_check(ValueAtPoint_S3_NOINTERPOLATE(a, b, c), value_index); + } + } + } + else + { + if ((bounds_a0_ == 0.0) && (bounds_b0_ == 0.0) && (bounds_c0_ == 0.0)) + { + if ((bounds_a1_ == 1.0) && (bounds_b1_ == 1.0) && (bounds_c1_ == 1.0)) + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double point_vec[3]; + + double a = point_data[0 + value_offset]; + point_vec[0] = SLiMClampCoordinate(a); + + double b = point_data[1 + value_offset]; + point_vec[1] = SLiMClampCoordinate(b); + + double c = point_data[2 + value_offset]; + point_vec[2] = SLiMClampCoordinate(c); + + float_result->set_float_no_check(ValueAtPoint_S3(point_vec), value_index); + } + } + else + { + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double point_vec[3]; + + double a = point_data[0 + value_offset] / bounds_a1_; + point_vec[0] = SLiMClampCoordinate(a); + + double b = point_data[1 + value_offset] / bounds_b1_; + point_vec[1] = SLiMClampCoordinate(b); + + double c = point_data[2 + value_offset] / bounds_c1_; + point_vec[2] = SLiMClampCoordinate(c); + + float_result->set_float_no_check(ValueAtPoint_S3(point_vec), value_index); + } + } + } + else + { + // full 3D case + for (int value_index = 0, value_offset = 0; value_index < x_count; ++value_index, value_offset += 3) + { + double point_vec[3]; + + double a = (point_data[0 + value_offset] - bounds_a0_) / (bounds_a1_ - bounds_a0_); + point_vec[0] = SLiMClampCoordinate(a); + + double b = (point_data[1 + value_offset] - bounds_b0_) / (bounds_b1_ - bounds_b0_); + point_vec[1] = SLiMClampCoordinate(b); + + double c = (point_data[2 + value_offset] - bounds_c0_) / (bounds_c1_ - bounds_c0_); + point_vec[2] = SLiMClampCoordinate(c); + + float_result->set_float_no_check(ValueAtPoint_S3(point_vec), value_index); + } + } + } + break; } - - float_result->set_float_no_check(map_value, value_index); } return EidosValue_SP(float_result); diff --git a/core/spatial_map.h b/core/spatial_map.h index 54412eba..b45dae22 100644 --- a/core/spatial_map.h +++ b/core/spatial_map.h @@ -106,6 +106,40 @@ class SpatialMap : public EidosDictionaryRetained double ValueAtPoint_S1(double *p_point); double ValueAtPoint_S2(double *p_point); double ValueAtPoint_S3(double *p_point); + + inline double ValueAtPoint_S1_NOINTERPOLATE(double x_fraction) + { + // See ValueAtPoint_S1(); this is a fast inline version that assumes no interpolation + int64_t xsize = grid_size_[0]; + int x_map = (int)round(x_fraction * (xsize - 1)); + + return values_[x_map]; + } + + inline double ValueAtPoint_S2_NOINTERPOLATE(double x_fraction, double y_fraction) + { + // See ValueAtPoint_S2(); this is a fast inline version that assumes no interpolation + int64_t xsize = grid_size_[0]; + int64_t ysize = grid_size_[1]; + int x_map = (int)round(x_fraction * (xsize - 1)); + int y_map = (int)round(y_fraction * (ysize - 1)); + + return values_[x_map + y_map * xsize]; + } + + double ValueAtPoint_S3_NOINTERPOLATE(double x_fraction, double y_fraction, double z_fraction) + { + // See ValueAtPoint_S3(); this is a fast inline version that assumes no interpolation + int64_t xsize = grid_size_[0]; + int64_t ysize = grid_size_[1]; + int64_t zsize = grid_size_[2]; + int x_map = (int)round(x_fraction * (xsize - 1)); + int y_map = (int)round(y_fraction * (ysize - 1)); + int z_map = (int)round(z_fraction * (zsize - 1)); + + return values_[x_map + y_map * xsize + z_map * xsize * ysize]; + } + void ColorForValue(double p_value, double *p_rgb_ptr); void ColorForValue(double p_value, float *p_rgb_ptr);