Skip to content
This repository was archived by the owner on Apr 2, 2021. It is now read-only.

Commit 24a1822

Browse files
committed
Non aligned SSE fix, cellular optimisation
1 parent 718165b commit 24a1822

3 files changed

+38
-29
lines changed

FastNoiseSIMD.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ int FastNoiseSIMD::s_currentSIMDLevel = -1;
6464
void cpuid(int32_t out[4], int32_t x) {
6565
__cpuidex(out, x, 0);
6666
}
67-
__int64 xgetbv(unsigned int x) {
67+
uint64_t xgetbv(unsigned int x) {
6868
return _xgetbv(x);
6969
}
7070
#else
@@ -107,7 +107,7 @@ int GetFastestSIMD()
107107

108108
if (osAVXSuport && cpuAVXSuport)
109109
{
110-
__int64 xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
110+
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
111111
if ((xcrFeatureMask & 0x6) != 0x6)
112112
return FN_SSE41;
113113
}
@@ -167,9 +167,9 @@ int FastNoiseSIMD::GetSIMDLevel()
167167

168168
void FastNoiseSIMD::FreeNoiseSet(float* floatArray)
169169
{
170+
#ifdef FN_ALIGNED_SETS
170171
GetSIMDLevel();
171172

172-
#ifdef FN_ALIGNED_SETS
173173
if (s_currentSIMDLevel > FN_NO_SIMD_FALLBACK)
174174
#ifdef _WIN32
175175
_aligned_free(floatArray);
@@ -267,7 +267,6 @@ void FastNoiseSIMD::FillSamplingVectorSet(FastNoiseVectorSet* vectorSet, int sam
267267

268268
int sampleSize = 1 << sampleScale;
269269
int sampleMask = sampleSize - 1;
270-
float scaleModifier = float(sampleSize);
271270

272271
int xSizeSample = xSize;
273272
int ySizeSample = ySize;
@@ -431,5 +430,5 @@ void FastNoiseVectorSet::SetSize(int _size)
431430

432431
xSet = FastNoiseSIMD::GetEmptySet(alignedSize * 3);
433432
ySet = xSet + alignedSize;
434-
zSet = xSet + alignedSize * 2;
433+
zSet = ySet + alignedSize;
435434
}

FastNoiseSIMD_internal.cpp

+33-23
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ static SIMDf SIMDf_NUM(1);
125125

126126
#ifdef FN_ALIGNED_SETS
127127
#define SIMDf_STORE(p,a) _mm256_store_ps(p,a)
128+
#define SIMDf_LOAD(p) _mm256_load_ps(p)
128129
#else
129130
#define SIMDf_STORE(p,a) _mm256_storeu_ps(p,a)
131+
#define SIMDf_LOAD(p) _mm256_loadu_ps(p)
130132
#endif
131-
#define SIMDf_LOAD(p) _mm256_load_ps(p)
132133

133134
#define SIMDf_ADD(a,b) _mm256_add_ps(a,b)
134135
#define SIMDf_SUB(a,b) _mm256_sub_ps(a,b)
@@ -178,10 +179,11 @@ static SIMDf SIMDf_NUM(1);
178179

179180
#ifdef FN_ALIGNED_SETS
180181
#define SIMDf_STORE(p,a) _mm_store_ps(p,a)
182+
#define SIMDf_LOAD(p) _mm_load_ps(p)
181183
#else
182184
#define SIMDf_STORE(p,a) _mm_storeu_ps(p,a)
185+
#define SIMDf_LOAD(p) _mm_loadu_ps(p)
183186
#endif
184-
#define SIMDf_LOAD(p) _mm_load_ps(p)
185187

186188
#define SIMDf_ADD(a,b) _mm_add_ps(a,b)
187189
#define SIMDf_SUB(a,b) _mm_sub_ps(a,b)
@@ -1389,8 +1391,6 @@ void SIMD_LEVEL_CLASS::FillCellularSet(float* noiseSet, FastNoiseVectorSet* vect
13891391

13901392
SIMDi seedV = SIMDi_SET(m_seed);
13911393
SIMDf freqV = SIMDf_SET(m_frequency);
1392-
SIMDf lacunarityV = SIMDf_SET(m_lacunarity);
1393-
SIMDf gainV = SIMDf_SET(m_gain);
13941394
SIMDf xOffsetV = SIMDf_SET(xOffset*m_frequency);
13951395
SIMDf yOffsetV = SIMDf_SET(yOffset*m_frequency);
13961396
SIMDf zOffsetV = SIMDf_SET(zOffset*m_frequency);
@@ -1443,9 +1443,9 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
14431443
int sampleMask = sampleSize - 1;
14441444
float scaleModifier = float(sampleSize);
14451445

1446-
int xOffset = sampleSize - (xStart & sampleMask) & sampleMask;
1447-
int yOffset = sampleSize - (yStart & sampleMask) & sampleMask;
1448-
int zOffset = sampleSize - (zStart & sampleMask) & sampleMask;
1446+
int xOffset = (sampleSize - (xStart & sampleMask)) & sampleMask;
1447+
int yOffset = (sampleSize - (yStart & sampleMask)) & sampleMask;
1448+
int zOffset = (sampleSize - (zStart & sampleMask)) & sampleMask;
14491449

14501450
int xSizeSample = xSize + xOffset;
14511451
int ySizeSample = ySize + yOffset;
@@ -1487,16 +1487,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
14871487
for (int y = 0; y < ySizeSample - 1; y++)
14881488
{
14891489
SIMDi zSIMD = zBase;
1490+
1491+
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
1492+
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
1493+
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
1494+
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
14901495
for (int z = 0; z < zSizeSample - 1; z++)
14911496
{
1492-
SIMDf c000 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z)]);
1493-
SIMDf c100 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z)]);
1494-
SIMDf c010 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z)]);
1495-
SIMDf c110 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z)]);
1496-
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
1497-
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
1498-
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
1499-
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
1497+
SIMDf c000 = c001;
1498+
SIMDf c100 = c101;
1499+
SIMDf c010 = c011;
1500+
SIMDf c110 = c111;
1501+
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
1502+
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
1503+
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
1504+
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
15001505

15011506
SIMDi localCountSIMD = SIMDi_NUM(incremental);
15021507

@@ -1605,16 +1610,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet*
16051610
for (int y = 0; y < ySizeSample - 1; y++)
16061611
{
16071612
SIMDi zSIMD = SIMDi_SET_ZERO();
1613+
1614+
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
1615+
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
1616+
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
1617+
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
16081618
for (int z = 0; z < zSizeSample - 1; z++)
16091619
{
1610-
SIMDf c000 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z)]);
1611-
SIMDf c100 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z)]);
1612-
SIMDf c010 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z)]);
1613-
SIMDf c110 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z)]);
1614-
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
1615-
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
1616-
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
1617-
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
1620+
SIMDf c000 = c001;
1621+
SIMDf c100 = c101;
1622+
SIMDf c010 = c011;
1623+
SIMDf c110 = c111;
1624+
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
1625+
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
1626+
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
1627+
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
16181628

16191629
SIMDi localCountSIMD = SIMDi_NUM(incremental);
16201630

FastNoiseSIMD_internal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
//
2828

2929
#ifndef SIMD_LEVEL_H
30-
#error Don't include this file without defining SIMD_LEVEL_H
30+
#error Dont include this file without defining SIMD_LEVEL_H
3131
#else
3232
#define FASTNOISE_SIMD_CLASS2(x) FastNoiseSIMD_L##x
3333
#define FASTNOISE_SIMD_CLASS(level) FASTNOISE_SIMD_CLASS2(level)

0 commit comments

Comments
 (0)