-
Notifications
You must be signed in to change notification settings - Fork 67
Rework environment map importance sampling to vulkan and hlsl #946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b99ae6e
b9537ea
a737173
64349db
e44fcf4
9b29dfd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| #ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ | ||
|
|
||
| #include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" | ||
| #include "nbl/builtin/hlsl/fft/common.hlsl" | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace concepts | ||
| { | ||
|
|
||
| // declare concept | ||
| #define NBL_CONCEPT_NAME WARP | ||
| #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) | ||
| #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) | ||
| // not the greatest syntax but works | ||
| #define NBL_CONCEPT_PARAM_0 (warp,U) | ||
| #define NBL_CONCEPT_PARAM_1 (uv,float32_t2) | ||
| #define NBL_CONCEPT_PARAM_2 (out,C) | ||
| // start concept | ||
| NBL_CONCEPT_BEGIN(3) | ||
| #define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 | ||
| #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 | ||
| #define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 | ||
| NBL_CONCEPT_END( | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) | ||
| ); | ||
| #undef out | ||
| #undef warp | ||
| #undef uv | ||
| #include <nbl/builtin/hlsl/concepts/__end.hlsl> | ||
|
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. | ||
| // This file is part of the "Nabla Engine". | ||
| // For conditions of distribution and use, see copyright notice in nabla.h | ||
|
|
||
| #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ | ||
|
|
||
| #include <nbl/builtin/hlsl/concepts/warp.hlsl> | ||
| #include <nbl/builtin/hlsl/concepts/accessors/hierarchical_image.hlsl> | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace sampling | ||
| { | ||
|
|
||
| class HierarchicalImage | ||
| { | ||
| private: | ||
|
|
||
| static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) | ||
| { | ||
| const float32_t2 interpolant = frac(unnormCoord); | ||
| const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); | ||
|
|
||
| const float32_t2 xDiffs[] = { | ||
| uvs[2] - uvs[3], | ||
| uvs[1] - uvs[0] | ||
| }; | ||
| const float32_t2 yVals[] = { | ||
| xDiffs[0] * interpolant.x + uvs[3], | ||
| xDiffs[1] * interpolant.x + uvs[0] | ||
| }; | ||
| const float32_t2 yDiff = yVals[1] - yVals[0]; | ||
| const float32_t2 uv = yDiff * interpolant.y + yVals[0]; | ||
|
|
||
| // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity | ||
| const float32_t3 L = PostWarp::warp(uv); | ||
|
|
||
| const float detInterpolJacobian = determinant(float32_t2x2( | ||
| lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx | ||
| yDiff // second column dFdy | ||
| )); | ||
|
|
||
| pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); | ||
|
|
||
| return L; | ||
| } | ||
|
|
||
| public: | ||
| template <typename LuminanceAccessor NBL_FUNC_REQUIRES (hierarchical_image::LuminanceReadAccessor<LuminanceAccessor>) | ||
| static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) | ||
| { | ||
|
|
||
| uint32_t2 p = uint32_t2(0, 0); | ||
|
|
||
| if (aspect2x1) { | ||
| // TODO(kevinyu): Implement findMSB | ||
| const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; | ||
|
|
||
| // do one split in the X axis first cause penultimate full mip would have been 2x1 | ||
| p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; | ||
| } | ||
|
|
||
| for (uint32_t i = mip2x1; i != 0;) | ||
| { | ||
| --i; | ||
| p <<= 1; | ||
| const float32_t4 values = luminanceAccessor.gather(p, i); | ||
| float32_t wx_0, wx_1; | ||
| { | ||
| const float32_t wy_0 = values[3] + values[2]; | ||
| const float32_t wy_1 = values[1] + values[0]; | ||
| if (impl::choseSecond(wy_0, wy_1, xi.y)) | ||
| { | ||
| p.y |= 1; | ||
| wx_0 = values[0]; | ||
| wx_1 = values[1]; | ||
| } | ||
| else | ||
| { | ||
| wx_0 = values[3]; | ||
| wx_1 = values[2]; | ||
| } | ||
| } | ||
|
|
||
| if (impl::choseSecond(wx_0, wx_1, xi.x)) | ||
| p.x |= 1; | ||
| } | ||
|
|
||
| // TODO(kevinyu): Add some comment why we add xi. | ||
| const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); | ||
| return directionUV; | ||
| } | ||
|
|
||
|
|
||
| template <typename WarpmapAccessor, typename PostWarp NBL_FUNC_REQUIRES(hierarchical_image::WarpmapReadAccessor<WarpmapAccessor>&& Warp<PostWarp, float32_t3>) | ||
| static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { | ||
|
|
||
| // TODO(kevinyu): Add some comment why we substract by 1 | ||
| const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); | ||
|
|
||
| const float32_t2 unnormCoord = xi * lastWarpmapPixel; | ||
| const float32_t2 interpolant = frac(unnormCoord); | ||
| const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); | ||
| const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); | ||
| const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); | ||
|
|
||
| return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); | ||
|
|
||
| } | ||
|
|
||
| template <typename LuminanceAccessor, typename PostWarp NBL_FUNC_REQUIRES(hierarchical_image::LuminanceReadAccessor<LuminanceAccessor>&& Warp<PostWarp, float32_t3>) | ||
| static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { | ||
|
|
||
| const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); | ||
| const float32_t2 unnormCoord = xi * lastWarpmapPixel; | ||
| const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); | ||
| const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); | ||
| const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); | ||
| const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); | ||
| const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); | ||
|
|
||
| const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); | ||
| const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); | ||
|
|
||
| return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); | ||
|
|
||
| } | ||
| }; | ||
|
|
||
| } | ||
| } | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| #ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ | ||
|
|
||
| #include <nbl/builtin/hlsl/numbers.hlsl> | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace warp | ||
| { | ||
|
|
||
| class Spherical | ||
| { | ||
| public: | ||
| using codomain_type = float32_t3; | ||
|
|
||
| template <typename UV NBL_FUNC_REQUIRES(is_same_v<UV, float32_t2>) | ||
| static codomain_type warp(const UV uv) | ||
| { | ||
| const float32_t phi = 2 * uv.x * numbers::pi<float32_t>; | ||
| const float32_t theta = uv.y * numbers::pi<float32_t>; | ||
| float32_t3 dir; | ||
| dir.x = cos(uv.x * 2.f * numbers::pi<float32_t>); | ||
| dir.y = sqrt(1.f - dir.x * dir.x); | ||
| if (uv.x > 0.5f) dir.y = -dir.y; | ||
| const float32_t cosTheta = cos(theta); | ||
| float32_t sinTheta = (1.0 - cosTheta * cosTheta); | ||
| dir.xy *= sinTheta; | ||
| dir.z = cosTheta; | ||
| return dir; | ||
| } | ||
|
|
||
| template <typename UV NBL_FUNC_REQUIRES(is_same_v<UV, float32_t2>) | ||
| static float32_t forwardDensity(const UV uv) | ||
| { | ||
| const float32_t theta = uv.y * numbers::pi<float32_t>; | ||
| return 1.0f / (sin(theta) * 2 * PI * PI); | ||
|
|
||
| } | ||
|
|
||
| template <typename C NBL_FUNC_REQUIRES(is_same_v<C, codomain_type>) | ||
| static float32_t backwardDensity(const C out) | ||
| { | ||
| //TODO(kevinyu): Derive this density | ||
| } | ||
| }; | ||
|
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
|
|
||
| #ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace workgroup | ||
| { | ||
| namespace envmap | ||
| { | ||
| namespace impl | ||
| { | ||
| bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) | ||
| { | ||
| // numerical resilience against IEEE754 | ||
| float firstProb = 1.0f / (1.0f + second / first); | ||
| float dummy = 0.0f; | ||
| return math::partitionRandVariable(firstProb, xi, dummy); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #ifdef __HLSL_VERSION | ||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace workgroup | ||
| { | ||
| namespace envmap | ||
| { | ||
|
|
||
| struct WarpmapGeneration | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to warpmap just caches the results in a LUT for a grid of |
||
| { | ||
|
|
||
| template <typename LuminanceAccessor, typename OutputAccessor NBL_FUNC_REQUIRES (envmap::LuminanceReadAccessor<LuminanceAccessor> && envmap::WarpmapWriteAccessor<OutputAccessor>) | ||
| // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not necessary, this is fine, make the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Take the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also no write accessor necessary |
||
| static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) | ||
| { | ||
| const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); | ||
| const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); | ||
|
|
||
| if (all(threadID < warpMapSize)) | ||
| { | ||
| float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); | ||
|
Comment on lines
+47
to
+52
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nope, threadID is 1D, AND its in 0,512 There's nothing "workgroup" about this. Take the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also when you move the code outside, leave comments about why the |
||
|
|
||
| uint32_t2 p; | ||
| p.y = 0; | ||
|
|
||
| // TODO(kevinyu): Implement findMSB | ||
| const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; | ||
|
Comment on lines
+57
to
+58
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe ask for the |
||
| // do one split in the X axis first cause penultimate full mip would have been 2x1 | ||
| p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; | ||
|
Comment on lines
+59
to
+60
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to support octahedral maps (which are 1:1 apsect raito) only do this split |
||
| for (uint32_t i = mip2x1; i != 0;) | ||
| { | ||
| --i; | ||
| p <<= 1; | ||
| const float32_t4 values = float32_t4( | ||
| luminanceAccessor.get(p, i, uint32_t2(0, 1)), | ||
| luminanceAccessor.get(p, i, uint32_t2(1, 1)), | ||
| luminanceAccessor.get(p, i, uint32_t2(1, 0)), | ||
| luminanceAccessor.get(p, i, uint32_t2(0, 0)) | ||
| ); | ||
|
Comment on lines
+65
to
+70
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the |
||
|
|
||
| float32_t wx_0, wx_1; | ||
| { | ||
| const float32_t wy_0 = values[3] + values[2]; | ||
| const float32_t wy_1 = values[1] + values[0]; | ||
| if (impl::choseSecond(wy_0, wy_1, xi.y)) | ||
| { | ||
| p.y |= 1; | ||
| wx_0 = values[0]; | ||
| wx_1 = values[1]; | ||
| } | ||
| else | ||
| { | ||
| wx_0 = values[3]; | ||
| wx_1 = values[2]; | ||
| } | ||
| } | ||
|
|
||
| if (impl::choseSecond(wx_0, wx_1, xi.x)) | ||
| { | ||
| p.x |= 1; | ||
| } | ||
| } | ||
|
|
||
| const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments, comments, lets not let the discord discussions go to waste |
||
| outputAccessor.set(threadID, directionUV); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't use an output accessor here because we just want a sampling function, so just return |
||
| } | ||
| } | ||
|
|
||
| }; | ||
|
|
||
| } | ||
| } | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you're not in workgroup anything, also
envmapshould beimage_importance_samplingThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually
nbl::hlsl::samplingandnbl/builtin/hlsl/samplingfolder