Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ NBL_CONCEPT_END(
#include <nbl/builtin/hlsl/concepts/__end.hlsl>

template<typename T, typename V, typename I=uint32_t>
NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor<T,V,I> && GenericWriteAccessor<T,V,I>;
NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor<T,V,I> && GenericWriteAccessor<T,V,I>;

}
}
Expand Down
41 changes: 41 additions & 0 deletions include/nbl/builtin/hlsl/concepts/warp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_
#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_

#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl"
#include "nbl/builtin/hlsl/fft/common.hlsl"

namespace nbl
{
namespace hlsl
{
namespace concepts
{

// declare concept
#define NBL_CONCEPT_NAME WARP
#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)
#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C)
// not the greatest syntax but works
#define NBL_CONCEPT_PARAM_0 (warp,U)
#define NBL_CONCEPT_PARAM_1 (uv,float32_t2)
#define NBL_CONCEPT_PARAM_2 (out,C)
// start concept
NBL_CONCEPT_BEGIN(3)
#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
NBL_CONCEPT_END(
((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C))
((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t))
((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t))
);
#undef out
#undef warp
#undef uv
#include <nbl/builtin/hlsl/concepts/__end.hlsl>

}
}
}

#endif
136 changes: 136 additions & 0 deletions include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_
#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_

#include <nbl/builtin/hlsl/concepts/warp.hlsl>
#include <nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl>

namespace nbl
{
namespace hlsl
{
namespace sampling
{

class HierarchicalImage
{
private:

static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf)
{
const float32_t2 interpolant = frac(unnormCoord);
const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY));

const float32_t2 xDiffs[] = {
uvs[2] - uvs[3],
uvs[1] - uvs[0]
};
const float32_t2 yVals[] = {
xDiffs[0] * interpolant.x + uvs[3],
xDiffs[1] * interpolant.x + uvs[0]
};
const float32_t2 yDiff = yVals[1] - yVals[0];
const float32_t2 uv = yDiff * interpolant.y + yVals[0];

// Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity
const float32_t3 L = PostWarp::warp(uv);

const float detInterpolJacobian = determinant(float32_t2x2(
lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx
yDiff // second column dFdy
));

pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y));

return L;
}

public:
template <typename LuminanceAccessor NBL_FUNC_REQUIRES (hierarchical_image::LuminanceReadAccessor<LuminanceAccessor>)
static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1)
{

uint32_t2 p = uint32_t2(0, 0);

if (aspect2x1) {
// TODO(kevinyu): Implement findMSB
const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1;

// do one split in the X axis first cause penultimate full mip would have been 2x1
p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0;
}

for (uint32_t i = mip2x1; i != 0;)
{
--i;
p <<= 1;
const float32_t4 values = luminanceAccessor.gather(p, i);
float32_t wx_0, wx_1;
{
const float32_t wy_0 = values[3] + values[2];
const float32_t wy_1 = values[1] + values[0];
if (impl::choseSecond(wy_0, wy_1, xi.y))
{
p.y |= 1;
wx_0 = values[0];
wx_1 = values[1];
}
else
{
wx_0 = values[3];
wx_1 = values[2];
}
}

if (impl::choseSecond(wx_0, wx_1, xi.x))
p.x |= 1;
}

// TODO(kevinyu): Add some comment why we add xi.
const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize);
return directionUV;
}


template <typename WarpmapAccessor, typename PostWarp NBL_FUNC_REQUIRES(hierarchical_image::WarpmapReadAccessor<WarpmapAccessor>&& Warp<PostWarp, float32_t3>)
static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) {

// TODO(kevinyu): Add some comment why we substract by 1
const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1);

const float32_t2 unnormCoord = xi * lastWarpmapPixel;
const float32_t2 interpolant = frac(unnormCoord);
const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y);
const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord);
const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord);

return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf);

}

template <typename LuminanceAccessor, typename PostWarp NBL_FUNC_REQUIRES(hierarchical_image::LuminanceReadAccessor<LuminanceAccessor>&& Warp<PostWarp, float32_t3>)
static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) {

const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1);
const float32_t2 unnormCoord = xi * lastWarpmapPixel;
const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y);
const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1);
const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1);
const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1);
const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1);

const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x);
const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y);

return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf);

}
};

}
}

#endif
53 changes: 53 additions & 0 deletions include/nbl/builtin/hlsl/warp/spherical.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_
#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_

#include <nbl/builtin/hlsl/numbers.hlsl>

namespace nbl
{
namespace hlsl
{
namespace warp
{

class Spherical
{
public:
using codomain_type = float32_t3;

template <typename UV NBL_FUNC_REQUIRES(is_same_v<UV, float32_t2>)
static codomain_type warp(const UV uv)
{
const float32_t phi = 2 * uv.x * numbers::pi<float32_t>;
const float32_t theta = uv.y * numbers::pi<float32_t>;
float32_t3 dir;
dir.x = cos(uv.x * 2.f * numbers::pi<float32_t>);
dir.y = sqrt(1.f - dir.x * dir.x);
if (uv.x > 0.5f) dir.y = -dir.y;
const float32_t cosTheta = cos(theta);
float32_t sinTheta = (1.0 - cosTheta * cosTheta);
dir.xy *= sinTheta;
dir.z = cosTheta;
return dir;
}

template <typename UV NBL_FUNC_REQUIRES(is_same_v<UV, float32_t2>)
static float32_t forwardDensity(const UV uv)
{
const float32_t theta = uv.y * numbers::pi<float32_t>;
return 1.0f / (sin(theta) * 2 * PI * PI);

}

template <typename C NBL_FUNC_REQUIRES(is_same_v<C, codomain_type>)
static float32_t backwardDensity(const C out)
{
//TODO(kevinyu): Derive this density
}
};

}
}
}

#endif
108 changes: 108 additions & 0 deletions include/nbl/builtin/hlsl/workgroup/envmap.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@

#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_
#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_

namespace nbl
{
namespace hlsl
{
namespace workgroup
{
namespace envmap
Comment on lines +9 to +11

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're not in workgroup anything, also envmap should be image_importance_sampling

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually nbl::hlsl::sampling and nbl/builtin/hlsl/sampling folder

{
namespace impl
{
bool choseSecond(float first, float second, NBL_REF_ARG(float) xi)
{
// numerical resilience against IEEE754
float firstProb = 1.0f / (1.0f + second / first);
float dummy = 0.0f;
return math::partitionRandVariable(firstProb, xi, dummy);
}

}

}
}
}
}

#ifdef __HLSL_VERSION
namespace nbl
{
namespace hlsl
{
namespace workgroup
{
namespace envmap
{

struct WarpmapGeneration

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to Image , this could be used to importance sample without a warpmap given a xi like all the other sampling structs in nbl/builtin/hlsl/sampling

warpmap just caches the results in a LUT for a grid of xi inputs

{

template <typename LuminanceAccessor, typename OutputAccessor NBL_FUNC_REQUIRES (envmap::LuminanceReadAccessor<LuminanceAccessor> && envmap::WarpmapWriteAccessor<OutputAccessor>)
// TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not necessary, this is fine, make the const though

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take the xi from the outside and never ask about the warpMapSize

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also no write accessor necessary

static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize)
{
const uint32_t threadID = uint32_t(SubgroupContiguousIndex());
const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1);

if (all(threadID < warpMapSize))
{
float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel);
Comment on lines +47 to +52

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nope, threadID is 1D, AND its in 0,512

There's nothing "workgroup" about this.

Take the xi from the outside and never ask about the warpMapSize

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also when you move the code outside, leave comments about why the xi calculation is like that (corner sampled images)


uint32_t2 p;
p.y = 0;

// TODO(kevinyu): Implement findMSB
const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1;
Comment on lines +57 to +58

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe ask for the uint32_t lumaMapWidthLog2 and bool aspectRatio2_1 instead of lumaMapSize

// do one split in the X axis first cause penultimate full mip would have been 2x1
p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0;
Comment on lines +59 to +60

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to support octahedral maps (which are 1:1 apsect raito) only do this split if (aspectRatio2_1), and initialize p.x to 0 at the time p.y gets initialized

for (uint32_t i = mip2x1; i != 0;)
{
--i;
p <<= 1;
const float32_t4 values = float32_t4(
luminanceAccessor.get(p, i, uint32_t2(0, 1)),
luminanceAccessor.get(p, i, uint32_t2(1, 1)),
luminanceAccessor.get(p, i, uint32_t2(1, 0)),
luminanceAccessor.get(p, i, uint32_t2(0, 0))
);
Comment on lines +65 to +70

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the get signature should be different, should be as if a textureGatherLod existed (which allows accessor to use textureGather + descriptor indexing for the mip-levels internally)


float32_t wx_0, wx_1;
{
const float32_t wy_0 = values[3] + values[2];
const float32_t wy_1 = values[1] + values[0];
if (impl::choseSecond(wy_0, wy_1, xi.y))
{
p.y |= 1;
wx_0 = values[0];
wx_1 = values[1];
}
else
{
wx_0 = values[3];
wx_1 = values[2];
}
}

if (impl::choseSecond(wx_0, wx_1, xi.x))
{
p.x |= 1;
}
}

const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comments, comments, lets not let the discord discussions go to waste

outputAccessor.set(threadID, directionUV);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't use an output accessor here because we just want a sampling function, so just return directionUV

}
}

};

}
}
}
}
#endif

#endif
Loading