engine/dep/include/FidelityFX/gpu/classifier/ffx_classifier_shadows.h

222 lines
9.2 KiB
C

// This file is part of the FidelityFX SDK.
//
// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
/// @defgroup FfxGPUClassifier FidelityFX Classifier
/// FidelityFX Classifier GPU documentation
///
/// @ingroup FfxGPUEffects
#include "ffx_classifier_common.h"
struct ClassifyResults
{
FfxBoolean bIsActiveLane;
FfxBoolean bIsInLight;
FfxFloat32 minT;
FfxFloat32 maxT;
};
ClassifyResults FfxClassify(const FfxUInt32x2 pixelCoord,
const FfxBoolean bUseNormal,
const FfxBoolean bUseCascadeBlocking)
{
const FfxBoolean bIsInViewport = all(FFX_LESS_THAN(pixelCoord, TextureSize().xy));
const FfxFloat32 depth = FfxClassifierSampleDepth(pixelCoord);
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
FfxBoolean bIsActiveLane = bIsInViewport && (depth > 0.0f);
#else
FfxBoolean bIsActiveLane = bIsInViewport && (depth < 1.0f);
#endif
FfxBoolean bIsInLight = FFX_FALSE;
FfxFloat32 minT = FFX_POSITIVE_INFINITY_FLOAT;
FfxFloat32 maxT = 0.f;
if (bUseNormal && bIsActiveLane)
{
const FfxFloat32x3 normal = normalize(FfxClassifierSampleNormal(pixelCoord));
const FfxBoolean bIsNormalFacingLight = dot(normal, -LightDir()) > 0;
bIsActiveLane = bIsActiveLane && bIsNormalFacingLight;
}
if (bUseCascadeBlocking && bIsActiveLane)
{
const FfxFloat32x2 uv = pixelCoord * TextureSize().zw;
const FfxFloat32x4 homogeneous = FFX_MATRIX_MULTIPLY(ViewToWorld(), FfxFloat32x4(2.0f * FfxFloat32x2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1));
const FfxFloat32x3 worldPos = homogeneous.xyz / homogeneous.w;
const FfxFloat32x3 lightViewSpacePos = FFX_MATRIX_MULTIPLY(LightView(), FfxFloat32x4(worldPos, 1)).xyz;
FfxBoolean bIsInActiveCascade = FFX_FALSE;
if (bUseCascadeBlocking)
{
const FfxFloat32 radius = SunSizeLightSpace() * lightViewSpacePos.z;
FfxFloat32x3 shadowCoord = FfxFloat32x3(0, 0, 0);
FfxUInt32 cascadeIndex = 0;
for (FfxUInt32 i = 0; i < CascadeCount(); ++i)
{
shadowCoord = lightViewSpacePos * CascadeScale(i).xyz + CascadeOffset(i).xyz;
if (all(FFX_GREATER_THAN(shadowCoord.xy, FfxFloat32x2(0, 0))) && all(FFX_LESS_THAN(shadowCoord.xy, FfxFloat32x2(1, 1))))
{
cascadeIndex = i;
break;
}
}
// grow search area by a pixel to make sure we search a wide enough area
// also scale everything from UV to pixel coord for image loads.
const FfxFloat32x2 radiusCoord = abs(FfxFloat32x2(radius, radius) * CascadeScale(cascadeIndex).xy) * FfxFloat32x2(CascadeSize(), CascadeSize()) + FfxFloat32x2(1,1);
shadowCoord.xy *= CascadeSize();
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
const FfxFloat32 depthCmp = shadowCoord.z + BlockerOffset();
#else
const FfxFloat32 depthCmp = shadowCoord.z - BlockerOffset();
#endif
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
FfxFloat32 maxD = 1;
FfxFloat32 minD = 0;
FfxFloat32 closetDepth = 1;
#else
FfxFloat32 maxD = 0;
FfxFloat32 minD = 1;
FfxFloat32 closetDepth = 0;
#endif
// With small shadow maps we will be bound on filtering since the shadow map can end up completely in LO cache
// using an image load is faster then a sample in RDNA but we will be losing the benefit of doing some of the ALU
// in the filter and getting 4 pixels of data per tap.
for (FfxUInt32 x = 0; x < k_poissonDiscSampleCountHigh; ++x)
{
const FfxFloat32x2 sampleUV = shadowCoord.xy + k_poissonDisc[x] * radiusCoord + 0.5f;
// UV bounds check
if (!(all(FFX_GREATER_THAN_EQUAL(sampleUV.xy, FfxFloat32x2(0, 0))) &&
all(FFX_LESS_THAN(sampleUV.xy, FfxFloat32x2(CascadeSize(), CascadeSize())))))
continue;
const FfxFloat32 pixelDepth = FfxClassifierSampleShadowMap(sampleUV, cascadeIndex);
// using min and max to reduce number of cmps
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
maxD = min(maxD, pixelDepth);
minD = max(minD, pixelDepth);
// need to find closet point in front of the receiver
if (pixelDepth > depthCmp)
{
closetDepth = min(closetDepth, pixelDepth);
}
#else
maxD = max(maxD, pixelDepth);
minD = min(minD, pixelDepth);
// need to find closet point in front of the receiver
if (pixelDepth < depthCmp)
{
closetDepth = max(closetDepth, pixelDepth);
}
#endif
}
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
const FfxBoolean bIsInShadow = (maxD >= depthCmp);
bIsInLight = RejectLitPixels() && (minD <= depthCmp);
#else
const FfxBoolean bIsInShadow = (maxD <= depthCmp);
bIsInLight = RejectLitPixels() && (minD >= depthCmp);
#endif
bIsInActiveCascade = !bIsInShadow && !bIsInLight;
if (bIsInActiveCascade && UseCascadesForRayT())
{
#if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH
const FfxFloat32 viewMinT = abs(min(shadowCoord.z + closetDepth + BlockerOffset(), 0) / CascadeScale(cascadeIndex).z);
const FfxFloat32 viewMaxT = abs((shadowCoord.z + minD - BlockerOffset()) / CascadeScale(cascadeIndex).z);
#else
const FfxFloat32 viewMinT = abs(max(shadowCoord.z - closetDepth - BlockerOffset(), 0) / CascadeScale(cascadeIndex).z);
const FfxFloat32 viewMaxT = abs((shadowCoord.z - minD + BlockerOffset()) / CascadeScale(cascadeIndex).z);
#endif
// if its known that the light view matrix is only a rotation or has uniform scale this can be optimized.
minT = length(FFX_MATRIX_MULTIPLY(InverseLightView(), FfxFloat32x4(0, 0, viewMinT, 0)).xyz);
maxT = length(FFX_MATRIX_MULTIPLY(InverseLightView(), FfxFloat32x4(0, radius, viewMaxT, 0)).xyz);
}
}
bIsActiveLane = bIsActiveLane && bIsInActiveCascade;
}
const ClassifyResults results = { bIsActiveLane, bIsInLight, minT, maxT };
return results;
}
/// Classifier pass entry point.
///
/// @param LocalThreadId The "flattened" index of a thread within a thread group (SV_GroupIndex).
/// @param WorkGroupId Index of the thread group currently executed (SV_GroupID).
/// @ingroup FfxGPUClassifier
void FfxClassifyShadows(FfxUInt32 LocalThreadId, FfxUInt32x3 WorkGroupId)
{
const FfxUInt32x2 localID = ffxRemapForWaveReduction(LocalThreadId);
const FfxUInt32x2 pixelCoord = WorkGroupId.xy * k_tileSize + localID.xy;
#if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 0
ClassifyResults results = FfxClassify(pixelCoord, FFX_TRUE, FFX_FALSE);
#endif
#if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 1
ClassifyResults results = FfxClassify(pixelCoord, FFX_TRUE, FFX_TRUE);
#endif
Tile currentTile = TileCreate(WorkGroupId.xy);
const FfxUInt32 mask = BoolToWaveMask(results.bIsActiveLane, localID);
currentTile.mask = mask;
#if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 1
if (UseCascadesForRayT())
{
// At lest one lane must be active for the tile to be written out, so the infinitly and zero will be emoved by the wave min and max.
// Otherwise we will get minT to be infinite and maxT to be 0
currentTile.minT = max(ffxWaveMin(results.minT), currentTile.minT);
currentTile.maxT = min(ffxWaveMax(results.maxT), currentTile.maxT);
}
#endif
const FfxUInt32 lightMask = BoolToWaveMask(results.bIsInLight, localID);
const FfxBoolean bDiscardTile = (CountBits(mask) <= TileTolerance());
if (LocalThreadId == 0)
{
if (!bDiscardTile)
{
FfxClassifierStoreTile(TileToUint(currentTile));
}
FfxClassifierStoreLightMask(WorkGroupId.xy, lightMask);
}
}