engine/dep/include/FidelityFX/gpu/sssr/ffx_sssr_depth_downsample.h

116 lines
3.9 KiB
C

// This file is part of the FidelityFX SDK.
//
// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
FFX_GROUPSHARED FfxUInt32 spdCounter;
FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16];
// Define fetch and store functions
FfxUInt32 SpdGetAtomicCounter() {
return spdCounter;
}
#if FFX_HALF
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) {
#if FFX_SSSR_OPTION_INVERTED_DEPTH
return max(max(v0, v1), max(v2, v3));
#else
return min(min(v0, v1), min(v2, v3));
#endif
}
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) {
FfxFloat16 f = FfxFloat16(spdIntermediate[x][y]);
return FfxFloat16x4(f.x, f.x, f.x, f.x);
}
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) {
spdIntermediate[x][y] = value.x;
}
#endif // FFX_HALF
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) {
spdIntermediate[x][y] = value.x;
}
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) {
FfxFloat32 f = spdIntermediate[x][y];
return FfxFloat32x4(f.x, f.x, f.x, f.x);
}
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) {
#if FFX_SSSR_OPTION_INVERTED_DEPTH
return max(max(v0, v1), max(v2, v3));
#else
return min(min(v0, v1), min(v2, v3));
#endif
}
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
{
FFX_SSSR_SPDIncreaseAtomicCounter(spdCounter);
}
#include "../spd/ffx_spd.h"
FfxUInt32 GetThreadgroupCount(FfxUInt32x2 image_size){
// Each threadgroup works on 64x64 texels
return ((image_size.x + 63) / 64) * ((image_size.y + 63) / 64);
}
// Returns mips count of a texture with specified size
FfxFloat32 GetMipsCount(FfxFloat32x2 texture_size){
FfxFloat32 max_dim = max(texture_size.x, texture_size.y);
return 1.0 + floor(log2(max_dim));
}
void DepthDownsample(FfxUInt32 group_index, FfxUInt32x3 group_id, FfxUInt32x3 dispatch_thread_id){
FfxFloat32x2 depth_image_size = FfxFloat32x2(0.0f, 0.0f);
FFX_SSSR_GetInputDepthDimensions(depth_image_size);
// Copy most detailed level into the hierarchy and transform it.
FfxUInt32x2 u_depth_image_size = FfxUInt32x2(depth_image_size);
for (FfxInt32 i = 0; i < 2; ++i)
{
for (FfxInt32 j = 0; j < 8; ++j)
{
FfxUInt32x2 idx = FfxUInt32x2(2 * dispatch_thread_id.x + i, 8 * dispatch_thread_id.y + j);
if (idx.x < u_depth_image_size.x && idx.y < u_depth_image_size.y)
{
FFX_SSSR_WriteDepthHierarchy(0, idx, FFX_SSSR_GetInputDepth(idx));
}
}
}
FfxFloat32x2 image_size = FfxFloat32x2(0.0f, 0.0f);
FFX_SSSR_GetDepthHierarchyMipDimensions(0, image_size);
FfxFloat32 mips_count = GetMipsCount(image_size);
FfxUInt32 threadgroup_count = GetThreadgroupCount(FfxInt32x2(image_size));
SpdDownsample(
FfxUInt32x2(group_id.xy),
FfxUInt32(group_index),
FfxUInt32(mips_count),
FfxUInt32(threadgroup_count),
0);
}