// This file is part of the FidelityFX SDK. // // Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. /// @defgroup FfxGPUClassifier FidelityFX Classifier /// FidelityFX Classifier GPU documentation /// /// @ingroup FfxGPUEffects #include "ffx_classifier_common.h" struct ClassifyResults { FfxBoolean bIsActiveLane; FfxBoolean bIsInLight; FfxFloat32 minT; FfxFloat32 maxT; }; ClassifyResults FfxClassify(const FfxUInt32x2 pixelCoord, const FfxBoolean bUseNormal, const FfxBoolean bUseCascadeBlocking) { const FfxBoolean bIsInViewport = all(FFX_LESS_THAN(pixelCoord, TextureSize().xy)); const FfxFloat32 depth = FfxClassifierSampleDepth(pixelCoord); #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH FfxBoolean bIsActiveLane = bIsInViewport && (depth > 0.0f); #else FfxBoolean bIsActiveLane = bIsInViewport && (depth < 1.0f); #endif FfxBoolean bIsInLight = FFX_FALSE; FfxFloat32 minT = FFX_POSITIVE_INFINITY_FLOAT; FfxFloat32 maxT = 0.f; if (bUseNormal && bIsActiveLane) { const FfxFloat32x3 normal = normalize(FfxClassifierSampleNormal(pixelCoord)); const FfxBoolean bIsNormalFacingLight = dot(normal, -LightDir()) > 0; bIsActiveLane = bIsActiveLane && bIsNormalFacingLight; } if (bUseCascadeBlocking && bIsActiveLane) { const FfxFloat32x2 uv = pixelCoord * TextureSize().zw; const FfxFloat32x4 homogeneous = FFX_MATRIX_MULTIPLY(ViewToWorld(), FfxFloat32x4(2.0f * FfxFloat32x2(uv.x, 1.0f - uv.y) - 1.0f, depth, 1)); const FfxFloat32x3 worldPos = homogeneous.xyz / homogeneous.w; const FfxFloat32x3 lightViewSpacePos = FFX_MATRIX_MULTIPLY(LightView(), FfxFloat32x4(worldPos, 1)).xyz; FfxBoolean bIsInActiveCascade = FFX_FALSE; if (bUseCascadeBlocking) { const FfxFloat32 radius = SunSizeLightSpace() * lightViewSpacePos.z; FfxFloat32x3 shadowCoord = FfxFloat32x3(0, 0, 0); FfxUInt32 cascadeIndex = 0; for (FfxUInt32 i = 0; i < CascadeCount(); ++i) { shadowCoord = lightViewSpacePos * CascadeScale(i).xyz + CascadeOffset(i).xyz; if (all(FFX_GREATER_THAN(shadowCoord.xy, FfxFloat32x2(0, 0))) && all(FFX_LESS_THAN(shadowCoord.xy, FfxFloat32x2(1, 1)))) { cascadeIndex = i; break; } } // grow search area by a pixel to make sure we search a wide enough area // also scale everything from UV to pixel coord for image loads. const FfxFloat32x2 radiusCoord = abs(FfxFloat32x2(radius, radius) * CascadeScale(cascadeIndex).xy) * FfxFloat32x2(CascadeSize(), CascadeSize()) + FfxFloat32x2(1,1); shadowCoord.xy *= CascadeSize(); #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH const FfxFloat32 depthCmp = shadowCoord.z + BlockerOffset(); #else const FfxFloat32 depthCmp = shadowCoord.z - BlockerOffset(); #endif #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH FfxFloat32 maxD = 1; FfxFloat32 minD = 0; FfxFloat32 closetDepth = 1; #else FfxFloat32 maxD = 0; FfxFloat32 minD = 1; FfxFloat32 closetDepth = 0; #endif // With small shadow maps we will be bound on filtering since the shadow map can end up completely in LO cache // using an image load is faster then a sample in RDNA but we will be losing the benefit of doing some of the ALU // in the filter and getting 4 pixels of data per tap. for (FfxUInt32 x = 0; x < k_poissonDiscSampleCountHigh; ++x) { const FfxFloat32x2 sampleUV = shadowCoord.xy + k_poissonDisc[x] * radiusCoord + 0.5f; // UV bounds check if (!(all(FFX_GREATER_THAN_EQUAL(sampleUV.xy, FfxFloat32x2(0, 0))) && all(FFX_LESS_THAN(sampleUV.xy, FfxFloat32x2(CascadeSize(), CascadeSize()))))) continue; const FfxFloat32 pixelDepth = FfxClassifierSampleShadowMap(sampleUV, cascadeIndex); // using min and max to reduce number of cmps #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH maxD = min(maxD, pixelDepth); minD = max(minD, pixelDepth); // need to find closet point in front of the receiver if (pixelDepth > depthCmp) { closetDepth = min(closetDepth, pixelDepth); } #else maxD = max(maxD, pixelDepth); minD = min(minD, pixelDepth); // need to find closet point in front of the receiver if (pixelDepth < depthCmp) { closetDepth = max(closetDepth, pixelDepth); } #endif } #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH const FfxBoolean bIsInShadow = (maxD >= depthCmp); bIsInLight = RejectLitPixels() && (minD <= depthCmp); #else const FfxBoolean bIsInShadow = (maxD <= depthCmp); bIsInLight = RejectLitPixels() && (minD >= depthCmp); #endif bIsInActiveCascade = !bIsInShadow && !bIsInLight; if (bIsInActiveCascade && UseCascadesForRayT()) { #if FFX_CLASSIFIER_OPTION_INVERTED_DEPTH const FfxFloat32 viewMinT = abs(min(shadowCoord.z + closetDepth + BlockerOffset(), 0) / CascadeScale(cascadeIndex).z); const FfxFloat32 viewMaxT = abs((shadowCoord.z + minD - BlockerOffset()) / CascadeScale(cascadeIndex).z); #else const FfxFloat32 viewMinT = abs(max(shadowCoord.z - closetDepth - BlockerOffset(), 0) / CascadeScale(cascadeIndex).z); const FfxFloat32 viewMaxT = abs((shadowCoord.z - minD + BlockerOffset()) / CascadeScale(cascadeIndex).z); #endif // if its known that the light view matrix is only a rotation or has uniform scale this can be optimized. minT = length(FFX_MATRIX_MULTIPLY(InverseLightView(), FfxFloat32x4(0, 0, viewMinT, 0)).xyz); maxT = length(FFX_MATRIX_MULTIPLY(InverseLightView(), FfxFloat32x4(0, radius, viewMaxT, 0)).xyz); } } bIsActiveLane = bIsActiveLane && bIsInActiveCascade; } const ClassifyResults results = { bIsActiveLane, bIsInLight, minT, maxT }; return results; } /// Classifier pass entry point. /// /// @param LocalThreadId The "flattened" index of a thread within a thread group (SV_GroupIndex). /// @param WorkGroupId Index of the thread group currently executed (SV_GroupID). /// @ingroup FfxGPUClassifier void FfxClassifyShadows(FfxUInt32 LocalThreadId, FfxUInt32x3 WorkGroupId) { const FfxUInt32x2 localID = ffxRemapForWaveReduction(LocalThreadId); const FfxUInt32x2 pixelCoord = WorkGroupId.xy * k_tileSize + localID.xy; #if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 0 ClassifyResults results = FfxClassify(pixelCoord, FFX_TRUE, FFX_FALSE); #endif #if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 1 ClassifyResults results = FfxClassify(pixelCoord, FFX_TRUE, FFX_TRUE); #endif Tile currentTile = TileCreate(WorkGroupId.xy); const FfxUInt32 mask = BoolToWaveMask(results.bIsActiveLane, localID); currentTile.mask = mask; #if FFX_CLASSIFIER_OPTION_CLASSIFIER_MODE == 1 if (UseCascadesForRayT()) { // At lest one lane must be active for the tile to be written out, so the infinitly and zero will be emoved by the wave min and max. // Otherwise we will get minT to be infinite and maxT to be 0 currentTile.minT = max(ffxWaveMin(results.minT), currentTile.minT); currentTile.maxT = min(ffxWaveMax(results.maxT), currentTile.maxT); } #endif const FfxUInt32 lightMask = BoolToWaveMask(results.bIsInLight, localID); const FfxBoolean bDiscardTile = (CountBits(mask) <= TileTolerance()); if (LocalThreadId == 0) { if (!bDiscardTile) { FfxClassifierStoreTile(TileToUint(currentTile)); } FfxClassifierStoreLightMask(WorkGroupId.xy, lightMask); } }