Skip to content

Commit

Permalink
aarch64 performance fix: replace long double with double (#790)
Browse files Browse the repository at this point in the history
* Replace long double with double

Use lround instead of lroundl, for perf improvements on aarch64 machines.

* Replace long double with double

Use lround instead of lroundl, for perf improvements on aarch64 machines.

* Replace long double with double

Use lround instead of lroundl, for perf improvements on aarch64 machines.

* Update faceijk.c

Update copyright year.

* Update CHANGELOG.md
  • Loading branch information
heshpdx authored Oct 22, 2023
1 parent 017f810 commit 693d084
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The public API of this library consists of the functions declared in file
## [Unreleased]
### Changed
- Reorganize tests into public / internal. (#762)
- Performance enhancement for aarch64, should not affect other platforms

## [4.1.0] - 2023-01-18
### Added
Expand Down
44 changes: 22 additions & 22 deletions src/h3lib/lib/coordijk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2016-2018, 2020-2022 Uber Technologies, Inc.
* Copyright 2016-2018, 2020-2023 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,7 +67,7 @@ void _hex2dToCoordIJK(const Vec2d *v, CoordIJK *h) {

// first do a reverse conversion
x2 = a2 / M_SIN60;
x1 = a1 + x2 / 2.0L;
x1 = a1 + x2 / 2.0;

// check if we have the center of a hex
m1 = x1;
Expand All @@ -77,43 +77,43 @@ void _hex2dToCoordIJK(const Vec2d *v, CoordIJK *h) {
r1 = x1 - m1;
r2 = x2 - m2;

if (r1 < 0.5L) {
if (r1 < 1.0L / 3.0L) {
if (r2 < (1.0L + r1) / 2.0L) {
if (r1 < 0.5) {
if (r1 < 1.0 / 3.0) {
if (r2 < (1.0 + r1) / 2.0) {
h->i = m1;
h->j = m2;
} else {
h->i = m1;
h->j = m2 + 1;
}
} else {
if (r2 < (1.0L - r1)) {
if (r2 < (1.0 - r1)) {
h->j = m2;
} else {
h->j = m2 + 1;
}

if ((1.0L - r1) <= r2 && r2 < (2.0 * r1)) {
if ((1.0 - r1) <= r2 && r2 < (2.0 * r1)) {
h->i = m1 + 1;
} else {
h->i = m1;
}
}
} else {
if (r1 < 2.0L / 3.0L) {
if (r2 < (1.0L - r1)) {
if (r1 < 2.0 / 3.0) {
if (r2 < (1.0 - r1)) {
h->j = m2;
} else {
h->j = m2 + 1;
}

if ((2.0L * r1 - 1.0L) < r2 && r2 < (1.0L - r1)) {
if ((2.0 * r1 - 1.0) < r2 && r2 < (1.0 - r1)) {
h->i = m1;
} else {
h->i = m1 + 1;
}
} else {
if (r2 < (r1 / 2.0L)) {
if (r2 < (r1 / 2.0)) {
h->i = m1 + 1;
h->j = m2;
} else {
Expand All @@ -125,7 +125,7 @@ void _hex2dToCoordIJK(const Vec2d *v, CoordIJK *h) {

// now fold across the axes if necessary

if (v->x < 0.0L) {
if (v->x < 0.0) {
if ((h->j % 2) == 0) // even
{
long long int axisi = h->j / 2;
Expand All @@ -138,7 +138,7 @@ void _hex2dToCoordIJK(const Vec2d *v, CoordIJK *h) {
}
}

if (v->y < 0.0L) {
if (v->y < 0.0) {
h->i = h->i - (2 * h->j + 1) / 2;
h->j = -1 * h->j;
}
Expand All @@ -156,7 +156,7 @@ void _ijkToHex2d(const CoordIJK *h, Vec2d *v) {
int i = h->i - h->k;
int j = h->j - h->k;

v->x = i - 0.5L * j;
v->x = i - 0.5 * j;
v->y = j * M_SQRT3_2;
}

Expand Down Expand Up @@ -346,8 +346,8 @@ H3Error _upAp7Checked(CoordIJK *ijk) {
}

// TODO: Do the int math parts here in long double?
ijk->i = (int)lroundl(((i * 3) - j) / 7.0L);
ijk->j = (int)lroundl((i + (j * 2)) / 7.0L);
ijk->i = (int)lround(((i * 3) - j) / 7.0);
ijk->j = (int)lround((i + (j * 2)) / 7.0);
ijk->k = 0;

// Expected not to be reachable, because max + min or max - min would need
Expand Down Expand Up @@ -395,8 +395,8 @@ H3Error _upAp7rChecked(CoordIJK *ijk) {
}

// TODO: Do the int math parts here in long double?
ijk->i = (int)lroundl(((i * 2) + j) / 7.0L);
ijk->j = (int)lroundl(((j * 3) - i) / 7.0L);
ijk->i = (int)lround(((i * 2) + j) / 7.0);
ijk->j = (int)lround(((j * 3) - i) / 7.0);
ijk->k = 0;

// Expected not to be reachable, because max + min or max - min would need
Expand All @@ -419,8 +419,8 @@ void _upAp7(CoordIJK *ijk) {
int i = ijk->i - ijk->k;
int j = ijk->j - ijk->k;

ijk->i = (int)lroundl((3 * i - j) / 7.0L);
ijk->j = (int)lroundl((i + 2 * j) / 7.0L);
ijk->i = (int)lround((3 * i - j) / 7.0);
ijk->j = (int)lround((i + 2 * j) / 7.0);
ijk->k = 0;
_ijkNormalize(ijk);
}
Expand All @@ -436,8 +436,8 @@ void _upAp7r(CoordIJK *ijk) {
int i = ijk->i - ijk->k;
int j = ijk->j - ijk->k;

ijk->i = (int)lroundl((2 * i + j) / 7.0L);
ijk->j = (int)lroundl((3 * j - i) / 7.0L);
ijk->i = (int)lround((2 * i + j) / 7.0);
ijk->j = (int)lround((3 * j - i) / 7.0);
ijk->k = 0;
_ijkNormalize(ijk);
}
Expand Down
6 changes: 3 additions & 3 deletions src/h3lib/lib/faceijk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2016-2021 Uber Technologies, Inc.
* Copyright 2016-2023 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,7 @@
#include "vec3d.h"

/** square root of 7 */
#define M_SQRT7 2.6457513110645905905016157536392604257102L
#define M_SQRT7 2.6457513110645905905016157536392604257102

/** @brief icosahedron face centers in lat/lng radians */
const LatLng faceCenterGeo[NUM_ICOSA_FACES] = {
Expand Down Expand Up @@ -395,7 +395,7 @@ void _geoToHex2d(const LatLng *g, int res, int *face, Vec2d *v) {
double r = acos(1 - sqd / 2);

if (r < EPSILON) {
v->x = v->y = 0.0L;
v->x = v->y = 0.0;
return;
}

Expand Down
4 changes: 2 additions & 2 deletions src/h3lib/lib/latLng.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2016-2021 Uber Technologies, Inc.
* Copyright 2016-2023 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@
* @return The normalized radians value.
*/
double _posAngleRads(double rads) {
double tmp = ((rads < 0.0L) ? rads + M_2PI : rads);
double tmp = ((rads < 0.0) ? rads + M_2PI : rads);
if (rads >= M_2PI) tmp -= M_2PI;
return tmp;
}
Expand Down

0 comments on commit 693d084

Please sign in to comment.