165 lines
5.2 KiB
GLSL
165 lines
5.2 KiB
GLSL
#version 450
|
|
#pragma shader_stage(compute)
|
|
|
|
#extension GL_KHR_shader_subgroup_quad : require
|
|
#extension GL_KHR_shader_subgroup_arithmetic : require
|
|
#extension GL_EXT_samplerless_texture_functions : enable
|
|
|
|
#define COMPUTE 1
|
|
#define SPD 1
|
|
|
|
#include "../../common/macros.h"
|
|
#include "../../common/structs.h"
|
|
#include "../../common/functions.h"
|
|
|
|
layout (local_size_x = 8, local_size_y = 8, local_size_z = 8) in;
|
|
|
|
layout (constant_id = 0) const uint CASCADES = 8;
|
|
layout (constant_id = 1) const uint MIPS = 9; // 256^3 texture = 9 mips
|
|
|
|
layout(push_constant) uniform PushBlock {
|
|
uint mips;
|
|
uint cascade;
|
|
uint numWorkGroups;
|
|
uint workGroupOffset;
|
|
} PushConstant_;
|
|
|
|
layout (binding = 0) uniform sampler3D voxelRadiance[CASCADES];
|
|
layout (binding = 1, rgba8) coherent uniform image3D voxelMips[CASCADES * (MIPS - 1)];
|
|
|
|
layout (binding = 2, std430) buffer AtomicCounter {
|
|
uint counter;
|
|
} spdCounter;
|
|
|
|
|
|
// 8^3 = 512 threads
|
|
shared vec4 s_colorAlpha[512];
|
|
shared uint s_isLastWG;
|
|
|
|
vec4 reduce8(vec4 v[8]) {
|
|
vec3 color = vec3(0.0);
|
|
float alpha = 0.0;
|
|
|
|
for( int i = 0; i < 8; ++i ) {
|
|
float a = float(uint(v[i].a * 255.0 + 0.5) & 0xF) / 15.0;
|
|
color += v[i].rgb * a;
|
|
alpha += a;
|
|
}
|
|
|
|
if ( alpha > 0.001 ) color /= alpha;
|
|
alpha /= 8.0;
|
|
|
|
uint lum4 = uint(clamp(luma(color), 0.0, 1.0) * 15.0) & 0xF;
|
|
uint alpha4 = uint(clamp(alpha, 0.0, 1.0) * 15.0) & 0xF;
|
|
return vec4(color, float((lum4 << 4) | alpha4) / 255.0);
|
|
}
|
|
|
|
ivec3 index3D(uint idx, uint sizeX, uint sizeY) {
|
|
return ivec3( idx % sizeX, (idx / sizeX) % sizeY, idx / (sizeX * sizeY) );
|
|
}
|
|
|
|
vec4 reduceFromShared( uint lid, uint dst ) {
|
|
ivec3 pos = index3D(lid, dst, dst) * 2;
|
|
vec4 v[8];
|
|
uint src = dst * 2;
|
|
for ( int z = 0; z < 2; ++z ) {
|
|
for ( int y = 0; y < 2; ++y ) {
|
|
for ( int x = 0; x < 2; ++x ) {
|
|
ivec3 p = pos + ivec3(x,y,z);
|
|
uint flatIdx = p.x + p.y * src + p.z * src * src;
|
|
v[x + y*2 + z*4] = s_colorAlpha[flatIdx];
|
|
}
|
|
}
|
|
}
|
|
return reduce8(v);
|
|
}
|
|
|
|
void main() {
|
|
uint lid = gl_LocalInvocationIndex;
|
|
ivec3 wgID = ivec3(gl_WorkGroupID);
|
|
|
|
// mip 0 => 1
|
|
if ( 1 < PushConstant_.mips ) {
|
|
ivec3 gid = wgID * 8 + index3D(lid, 8, 8);
|
|
ivec3 pos = gid * 2;
|
|
|
|
vec4 v[8];
|
|
v[0] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(0,0,0), 0);
|
|
v[1] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(1,0,0), 0);
|
|
v[2] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(0,1,0), 0);
|
|
v[3] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(1,1,0), 0);
|
|
v[4] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(0,0,1), 0);
|
|
v[5] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(1,0,1), 0);
|
|
v[6] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(0,1,1), 0);
|
|
v[7] = texelFetch(voxelRadiance[PushConstant_.cascade], pos + ivec3(1,1,1), 0);
|
|
|
|
vec4 color = reduce8(v);
|
|
imageStore(voxelMips[PushConstant_.cascade + PushConstant_.mips * 0], gid, color);
|
|
s_colorAlpha[lid] = color;
|
|
}
|
|
memoryBarrierShared(); barrier();
|
|
|
|
// mip 1 => 2 => 3 => 4
|
|
int threads[3] = { 64, 8, 1 };
|
|
int edges[3] = { 4, 2, 1 };
|
|
|
|
{
|
|
for ( int i = 0; i < 3; ++i ) {
|
|
uint mipLevel = i + 2;
|
|
if ( mipLevel < PushConstant_.mips ) {
|
|
if ( lid < threads[i] ) {
|
|
vec4 color = reduceFromShared(lid, edges[i]);
|
|
ivec3 pos = wgID * edges[i] + index3D(lid, edges[i], edges[i]);
|
|
imageStore(voxelMips[PushConstant_.cascade + PushConstant_.mips * (mipLevel - 1)], pos, color);
|
|
s_colorAlpha[lid] = color;
|
|
}
|
|
}
|
|
memoryBarrierShared(); barrier();
|
|
}
|
|
}
|
|
if ( PushConstant_.mips <= 5 ) return; // bail if 16^3 or smaller
|
|
// atomic sync
|
|
|
|
{
|
|
if ( lid == 0 ) {
|
|
uint ticket = atomicAdd(spdCounter.counter, 1);
|
|
s_isLastWG = (ticket == PushConstant_.numWorkGroups - 1) ? 1 : 0;
|
|
}
|
|
memoryBarrierShared(); barrier();
|
|
if ( s_isLastWG == 0 ) return; // use last workgroup
|
|
if ( lid == 0 ) spdCounter.counter = 0; // reset for next frame
|
|
}
|
|
// mip 4 => 5
|
|
if ( 5 < PushConstant_.mips ) {
|
|
ivec3 pos = index3D(lid, 8, 8) * 2;
|
|
uint m4_idx = PushConstant_.cascade + PushConstant_.mips * 3;
|
|
vec4 v[8];
|
|
v[0] = imageLoad(voxelMips[m4_idx], pos + ivec3(0,0,0));
|
|
v[1] = imageLoad(voxelMips[m4_idx], pos + ivec3(1,0,0));
|
|
v[2] = imageLoad(voxelMips[m4_idx], pos + ivec3(0,1,0));
|
|
v[3] = imageLoad(voxelMips[m4_idx], pos + ivec3(1,1,0));
|
|
v[4] = imageLoad(voxelMips[m4_idx], pos + ivec3(0,0,1));
|
|
v[5] = imageLoad(voxelMips[m4_idx], pos + ivec3(1,0,1));
|
|
v[6] = imageLoad(voxelMips[m4_idx], pos + ivec3(0,1,1));
|
|
v[7] = imageLoad(voxelMips[m4_idx], pos + ivec3(1,1,1));
|
|
|
|
vec4 color = reduce8(v);
|
|
imageStore(voxelMips[PushConstant_.cascade + PushConstant_.mips * 4], pos / 2, color);
|
|
s_colorAlpha[lid] = color;
|
|
}
|
|
memoryBarrierShared(); barrier();
|
|
|
|
// mip 5 => 6 => 7 => 8
|
|
for ( int i = 0; i < 3; ++i ) {
|
|
uint mipLevel = i + 6; // Mips 6, 7, 8
|
|
if ( mipLevel < PushConstant_.mips ) {
|
|
if ( lid < threads[i] ) {
|
|
vec4 color = reduceFromShared(lid, edges[i]);
|
|
ivec3 pos = index3D(lid, edges[i], edges[i]);
|
|
imageStore(voxelMips[PushConstant_.cascade + PushConstant_.mips * (mipLevel - 1)], pos, color);
|
|
s_colorAlpha[lid] = color;
|
|
}
|
|
}
|
|
memoryBarrierShared(); barrier();
|
|
}
|
|
} |