Skip to content

Commit

Permalink
Refactor downsample compute shader
Browse files Browse the repository at this point in the history
separate individual mip algorithms into functions.
  • Loading branch information
httpdigest committed Oct 31, 2021
1 parent a35f8c5 commit 5149b90
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 19 deletions.
51 changes: 35 additions & 16 deletions res/org/lwjgl/demo/opengl/shader/downsampling/downsample.cs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,25 @@ int unpack(int x) {

shared vec4 sm[4][4];

void main(void) {
void mip1(ivec2 i, inout vec4 t) {
// compute mip 1 using linear filtering
/*
* We just use a sampler with linear filter and
* sample exactly between four texels.
*/
ivec2 ts = textureSize(baseImage, 0);

// the actual size of our work items is only half the baseImage size, because for the first mip level
// each work item already uses linear filtering with a sampler to gather a 2x2 texel average
ivec2 s = ts / ivec2(2);

// Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
ivec2 l = ivec2(unpack(int(gl_LocalInvocationID.x)),
unpack(int(gl_LocalInvocationID.x >> 1u)));

// Compute the global (x, y) coordinate of this work item
ivec2 i = ivec2(gl_WorkGroupID.xy) * ivec2(16) + l;

// compute mip 1 using linear filtering
if (i.x >= s.x || i.y >= s.y)
return;
// Compute a texture coordinate right at the corner between four texels
vec2 tc = (vec2(i * 2) + vec2(1.0)) / vec2(ts);
vec4 t = textureLod(baseImage, tc, 0.0);
t = textureLod(baseImage, tc, 0.0);
imageStore(mips[0], i, t);
}

void mip2(ivec2 i, inout vec4 t) {
// compute mip 2 using subgroup quad sharing
/*
* The trick here is to assume a 1:1 correspondence between subgroup invocation ids
Expand All @@ -68,19 +65,23 @@ void main(void) {
t = (t + h + v + d) * vec4(0.25);
if ((gl_SubgroupInvocationID & 3) == 0)
imageStore(mips[1], i/ivec2(2), t);
}

void mip3(ivec2 i, inout vec4 t) {
// compute mip 3 using subgroup xor shuffles
/*
* The trick here is to exchange information between subgroup items with a stride
* of 4 items. In order to do this, we have subgroupShuffleXor().
*/
h = subgroupShuffleXor(t, 4);
v = subgroupShuffleXor(t, 8);
d = subgroupShuffleXor(t, 12);
vec4 h = subgroupShuffleXor(t, 4);
vec4 v = subgroupShuffleXor(t, 8);
vec4 d = subgroupShuffleXor(t, 12);
t = (t + h + v + d) * vec4(0.25);
if ((gl_SubgroupInvocationID & 15) == 0)
imageStore(mips[2], i/ivec2(4), t);
}

void mip4(ivec2 l, ivec2 i, inout vec4 t) {
// compute mip 4 using shared memory
/*
* For mip 4 we essentially have 8x8 work items.
Expand All @@ -94,12 +95,14 @@ void main(void) {
t = (sm[smc.x][smc.y] + sm[smi.x][smc.y] + sm[smc.x][smi.y] + sm[smi.x][smi.y]) * 0.25;
imageStore(mips[3], i/ivec2(8), t);
}
}

void mip5(ivec2 l, ivec2 i, vec4 t) {
// compute mip 5 also using shared memory
/*
* For mip 5 we have 16x16 work items.
*/
smc = l / ivec2(8);
ivec2 smc = l / ivec2(8);
if ((l.x & 7) == 0 && (l.y & 7) == 0)
sm[smc.x][smc.y] = t;
barrier();
Expand All @@ -108,3 +111,19 @@ void main(void) {
imageStore(mips[4], i/ivec2(16), t);
}
}

void main(void) {
// Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
ivec2 l = ivec2(unpack(int(gl_LocalInvocationID.x)),
unpack(int(gl_LocalInvocationID.x >> 1u)));

// Compute the global (x, y) coordinate of this work item
ivec2 i = ivec2(gl_WorkGroupID.xy) * ivec2(16) + l;

vec4 t = vec4(0.0);
mip1(i, t);
mip2(i, t);
mip3(i, t);
mip4(l, i, t);
mip5(l, i, t);
}
6 changes: 3 additions & 3 deletions src/org/lwjgl/demo/vulkan/raytracing/SdfBricks.java
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ private static VoxelField buildVoxelField() throws IOException {
Vector3i min = new Vector3i(Integer.MAX_VALUE);
Vector3i max = new Vector3i(Integer.MIN_VALUE);
byte[] field = new byte[(256 + 2) * (256 + 2) * (256 + 2)];
try (InputStream is = getSystemResourceAsStream("voxelmodel/vox/monument/monu2.vox");
try (InputStream is = getSystemResourceAsStream("org/lwjgl/demo/models/mikelovesrobots_mmmm/scene_house5.vox");
BufferedInputStream bis = new BufferedInputStream(is)) {
new MagicaVoxelLoader().read(bis, new MagicaVoxelLoader.Callback() {
public void voxel(int x, int y, int z, byte c) {
Expand Down Expand Up @@ -1042,8 +1042,8 @@ private static Geometry createGeometry() throws IOException {
int[] num = {0};
new GreedyVoxels(voxelField.ny, voxelField.py, voxelField.w, voxelField.d, new GreedyVoxels.Callback() {
public void voxel(int x0, int y0, int z0, int w, int h, int d, int v) {
aabbs.putFloat(x0*8).putFloat(y0*9.6f).putFloat(z0*8);
aabbs.putFloat((x0+w)*8).putFloat((y0+h)*9.6f+1.8f).putFloat((z0+d)*8);
aabbs.putFloat(x0).putFloat(y0).putFloat(z0);
aabbs.putFloat(x0+w).putFloat(y0+h+0.1f).putFloat(z0+d);
num[0]++;
}
}).merge(voxelField.field);
Expand Down

0 comments on commit 5149b90

Please sign in to comment.