@@ -125,10 +125,11 @@ static SIMDf SIMDf_NUM(1);
125
125
126
126
#ifdef FN_ALIGNED_SETS
127
127
#define SIMDf_STORE (p,a ) _mm256_store_ps(p,a)
128
+ #define SIMDf_LOAD (p ) _mm256_load_ps(p)
128
129
#else
129
130
#define SIMDf_STORE (p,a ) _mm256_storeu_ps(p,a)
131
+ #define SIMDf_LOAD (p ) _mm256_loadu_ps(p)
130
132
#endif
131
- #define SIMDf_LOAD (p ) _mm256_load_ps(p)
132
133
133
134
#define SIMDf_ADD (a,b ) _mm256_add_ps(a,b)
134
135
#define SIMDf_SUB (a,b ) _mm256_sub_ps(a,b)
@@ -178,10 +179,11 @@ static SIMDf SIMDf_NUM(1);
178
179
179
180
#ifdef FN_ALIGNED_SETS
180
181
#define SIMDf_STORE (p,a ) _mm_store_ps(p,a)
182
+ #define SIMDf_LOAD (p ) _mm_load_ps(p)
181
183
#else
182
184
#define SIMDf_STORE (p,a ) _mm_storeu_ps(p,a)
185
+ #define SIMDf_LOAD (p ) _mm_loadu_ps(p)
183
186
#endif
184
- #define SIMDf_LOAD (p ) _mm_load_ps(p)
185
187
186
188
#define SIMDf_ADD (a,b ) _mm_add_ps(a,b)
187
189
#define SIMDf_SUB (a,b ) _mm_sub_ps(a,b)
@@ -1389,8 +1391,6 @@ void SIMD_LEVEL_CLASS::FillCellularSet(float* noiseSet, FastNoiseVectorSet* vect
1389
1391
1390
1392
SIMDi seedV = SIMDi_SET (m_seed);
1391
1393
SIMDf freqV = SIMDf_SET (m_frequency);
1392
- SIMDf lacunarityV = SIMDf_SET (m_lacunarity);
1393
- SIMDf gainV = SIMDf_SET (m_gain);
1394
1394
SIMDf xOffsetV = SIMDf_SET (xOffset*m_frequency);
1395
1395
SIMDf yOffsetV = SIMDf_SET (yOffset*m_frequency);
1396
1396
SIMDf zOffsetV = SIMDf_SET (zOffset*m_frequency);
@@ -1443,9 +1443,9 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
1443
1443
int sampleMask = sampleSize - 1 ;
1444
1444
float scaleModifier = float (sampleSize);
1445
1445
1446
- int xOffset = sampleSize - (xStart & sampleMask) & sampleMask;
1447
- int yOffset = sampleSize - (yStart & sampleMask) & sampleMask;
1448
- int zOffset = sampleSize - (zStart & sampleMask) & sampleMask;
1446
+ int xOffset = ( sampleSize - (xStart & sampleMask) ) & sampleMask;
1447
+ int yOffset = ( sampleSize - (yStart & sampleMask) ) & sampleMask;
1448
+ int zOffset = ( sampleSize - (zStart & sampleMask) ) & sampleMask;
1449
1449
1450
1450
int xSizeSample = xSize + xOffset;
1451
1451
int ySizeSample = ySize + yOffset;
@@ -1487,16 +1487,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
1487
1487
for (int y = 0 ; y < ySizeSample - 1 ; y++)
1488
1488
{
1489
1489
SIMDi zSIMD = zBase;
1490
+
1491
+ SIMDf c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, 0 )]);
1492
+ SIMDf c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, 0 )]);
1493
+ SIMDf c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , 0 )]);
1494
+ SIMDf c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , 0 )]);
1490
1495
for (int z = 0 ; z < zSizeSample - 1 ; z++)
1491
1496
{
1492
- SIMDf c000 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x, y, z)]) ;
1493
- SIMDf c100 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x + 1 , y, z)]) ;
1494
- SIMDf c010 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x, y + 1 , z)]) ;
1495
- SIMDf c110 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x + 1 , y + 1 , z)]) ;
1496
- SIMDf c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, z + 1 )]);
1497
- SIMDf c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, z + 1 )]);
1498
- SIMDf c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , z + 1 )]);
1499
- SIMDf c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , z + 1 )]);
1497
+ SIMDf c000 = c001 ;
1498
+ SIMDf c100 = c101 ;
1499
+ SIMDf c010 = c011 ;
1500
+ SIMDf c110 = c111 ;
1501
+ c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, z + 1 )]);
1502
+ c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, z + 1 )]);
1503
+ c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , z + 1 )]);
1504
+ c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , z + 1 )]);
1500
1505
1501
1506
SIMDi localCountSIMD = SIMDi_NUM (incremental);
1502
1507
@@ -1605,16 +1610,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet*
1605
1610
for (int y = 0 ; y < ySizeSample - 1 ; y++)
1606
1611
{
1607
1612
SIMDi zSIMD = SIMDi_SET_ZERO ();
1613
+
1614
+ SIMDf c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, 0 )]);
1615
+ SIMDf c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, 0 )]);
1616
+ SIMDf c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , 0 )]);
1617
+ SIMDf c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , 0 )]);
1608
1618
for (int z = 0 ; z < zSizeSample - 1 ; z++)
1609
1619
{
1610
- SIMDf c000 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x, y, z)]) ;
1611
- SIMDf c100 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x + 1 , y, z)]) ;
1612
- SIMDf c010 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x, y + 1 , z)]) ;
1613
- SIMDf c110 = SIMDf_SET (noiseSetSample[ SAMPLE_INDEX (x + 1 , y + 1 , z)]) ;
1614
- SIMDf c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, z + 1 )]);
1615
- SIMDf c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, z + 1 )]);
1616
- SIMDf c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , z + 1 )]);
1617
- SIMDf c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , z + 1 )]);
1620
+ SIMDf c000 = c001 ;
1621
+ SIMDf c100 = c101 ;
1622
+ SIMDf c010 = c011 ;
1623
+ SIMDf c110 = c111 ;
1624
+ c001 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y, z + 1 )]);
1625
+ c101 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y, z + 1 )]);
1626
+ c011 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x, y + 1 , z + 1 )]);
1627
+ c111 = SIMDf_SET (noiseSetSample[SAMPLE_INDEX (x + 1 , y + 1 , z + 1 )]);
1618
1628
1619
1629
SIMDi localCountSIMD = SIMDi_NUM (incremental);
1620
1630
0 commit comments