431 lines
18 KiB
C
431 lines
18 KiB
C
// This file is part of the FidelityFX SDK.
|
|
//
|
|
// Copyright (C) 2024 Advanced Micro Devices, Inc.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
|
|
#ifndef FFX_DNSR_SHADOWS_TILECLASSIFICATION_HLSL
|
|
#define FFX_DNSR_SHADOWS_TILECLASSIFICATION_HLSL
|
|
|
|
#include "ffx_denoiser_shadows_util.h"
|
|
|
|
FFX_GROUPSHARED FfxInt32 g_FFX_DNSR_Shadows_false_count;
|
|
FfxBoolean FFX_DNSR_Shadows_ThreadGroupAllTrue(FfxBoolean val)
|
|
{
|
|
const FfxUInt32 lane_count_in_thread_group = 64;
|
|
if (ffxWaveLaneCount() == lane_count_in_thread_group)
|
|
{
|
|
return ffxWaveAllTrue(val);
|
|
}
|
|
else
|
|
{
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
g_FFX_DNSR_Shadows_false_count = 0;
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
if (!val) g_FFX_DNSR_Shadows_false_count = 1;
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
return g_FFX_DNSR_Shadows_false_count == 0;
|
|
}
|
|
}
|
|
|
|
void FFX_DNSR_Shadows_SearchSpatialRegion(FfxUInt32x2 gid, out FfxBoolean all_in_light, out FfxBoolean all_in_shadow)
|
|
{
|
|
// The spatial passes can reach a total region of 1+2+4 = 7x7 around each block.
|
|
// The masks are 8x4, so we need a larger vertical stride
|
|
|
|
// Visualization - each x represents a 4x4 block, xx is one entire 8x4 mask as read from the raytracer result
|
|
// Same for yy, these are the ones we are working on right now
|
|
|
|
// xx xx xx
|
|
// xx xx xx
|
|
// xx yy xx <-- yy here is the base_tile below
|
|
// xx yy xx
|
|
// xx xx xx
|
|
// xx xx xx
|
|
|
|
// All of this should result in scalar ops
|
|
FfxUInt32x2 base_tile = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(gid * FfxInt32x2(8, 8));
|
|
|
|
// Load the entire region of masks in a scalar fashion
|
|
FfxUInt32 combined_or_mask = 0;
|
|
FfxUInt32 combined_and_mask = 0xFFFFFFFF;
|
|
for (FfxInt32 j = -2; j <= 3; ++j)
|
|
{
|
|
for (FfxInt32 i = -1; i <= 1; ++i)
|
|
{
|
|
FfxInt32x2 tile_index = FfxInt32x2(base_tile) + FfxInt32x2(i, j);
|
|
tile_index = clamp(tile_index, FfxInt32x2(0,0), FfxInt32x2(FFX_DNSR_Shadows_RoundedDivide(BufferDimensions().x, 8), FFX_DNSR_Shadows_RoundedDivide(BufferDimensions().y, 4)) - 1);
|
|
const FfxUInt32 linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, BufferDimensions().x);
|
|
const FfxUInt32 shadow_mask = LoadRaytracedShadowMask(linear_tile_index);
|
|
|
|
combined_or_mask = combined_or_mask | shadow_mask;
|
|
combined_and_mask = combined_and_mask & shadow_mask;
|
|
}
|
|
}
|
|
|
|
all_in_light = combined_and_mask == 0xFFFFFFFFu;
|
|
all_in_shadow = combined_or_mask == 0u;
|
|
}
|
|
|
|
FfxFloat32 FFX_DNSR_Shadows_GetLinearDepth(FfxUInt32x2 did, FfxFloat32 depth)
|
|
{
|
|
const FfxFloat32x2 uv = (did + 0.5f) * InvBufferDimensions();
|
|
const FfxFloat32x2 ndc = 2.0f * FfxFloat32x2(uv.x, 1.0f - uv.y) - 1.0f;
|
|
|
|
FfxFloat32x4 projected = FFX_MATRIX_MULTIPLY(ProjectionInverse(), FfxFloat32x4(ndc, depth, 1));
|
|
|
|
return abs(projected.z / projected.w);
|
|
}
|
|
|
|
FfxBoolean FFX_DNSR_Shadows_IsDisoccluded(FfxUInt32x2 did, FfxFloat32 depth, FfxFloat32x2 velocity)
|
|
{
|
|
const FfxInt32x2 dims = BufferDimensions();
|
|
const FfxFloat32x2 texel_size = InvBufferDimensions();
|
|
const FfxFloat32x2 uv = (did + 0.5f) * texel_size;
|
|
const FfxFloat32x2 ndc = (2.0f * uv - 1.0f) * FfxFloat32x2(1.0f, -1.0f);
|
|
const FfxFloat32x2 previous_uv = uv + velocity;
|
|
|
|
FfxBoolean is_disoccluded = FFX_TRUE;
|
|
|
|
if (all(FFX_GREATER_THAN(previous_uv, FfxFloat32x2(0,0))) && all(FFX_LESS_THAN(previous_uv, FfxFloat32x2(1,1))))
|
|
{
|
|
// Read the center values
|
|
FfxFloat32x3 normal = LoadNormals(did);
|
|
|
|
FfxFloat32x4 clip_space = FFX_MATRIX_MULTIPLY(ReprojectionMatrix(), FfxFloat32x4(ndc, depth, 1.0f));
|
|
|
|
clip_space.z /= clip_space.w; // perspective divide
|
|
|
|
// How aligned with the view vector? (the more Z aligned, the higher the depth errors)
|
|
const FfxFloat32x4 homogeneous = FFX_MATRIX_MULTIPLY(ViewProjectionInverse(), FfxFloat32x4(ndc, depth, 1.0f));
|
|
const FfxFloat32x3 world_position = FfxFloat32x3(homogeneous.xyz / homogeneous.w); // perspective divide
|
|
const FfxFloat32x3 view_direction = normalize(Eye().xyz - world_position);
|
|
FfxFloat32 z_alignment = 1.0f - dot(view_direction, normal);
|
|
z_alignment = pow(z_alignment, 8);
|
|
|
|
// Calculate the depth difference
|
|
FfxFloat32 linear_depth = FFX_DNSR_Shadows_GetLinearDepth(did, clip_space.z); // get linear depth
|
|
|
|
FfxInt32x2 idx = FfxInt32x2(previous_uv * FfxFloat32x2(dims));
|
|
const FfxFloat32 previous_depth = FFX_DNSR_Shadows_GetLinearDepth(idx, LoadPreviousDepth(idx));
|
|
const FfxFloat32 depth_difference = abs(previous_depth - linear_depth) / linear_depth;
|
|
|
|
// Resolve into the disocclusion mask
|
|
const FfxFloat32 depth_tolerance = ffxLerp(1e-2f, 1e-1f, z_alignment);
|
|
is_disoccluded = depth_difference >= depth_tolerance;
|
|
}
|
|
|
|
return is_disoccluded;
|
|
}
|
|
|
|
FfxFloat32x2 FFX_DNSR_Shadows_GetClosestVelocity(FfxInt32x2 did, FfxFloat32 depth)
|
|
{
|
|
FfxFloat32x2 closest_velocity = LoadVelocity(did);
|
|
FfxFloat32 closest_depth = depth;
|
|
|
|
FfxFloat32 new_depth = ffxQuadReadX(closest_depth);
|
|
FfxFloat32x2 new_velocity = ffxQuadReadX(closest_velocity);
|
|
|
|
#if FFX_DENOISER_OPTION_INVERTED_DEPTH
|
|
if (new_depth > closest_depth)
|
|
#else
|
|
if (new_depth < closest_depth)
|
|
#endif
|
|
{
|
|
closest_depth = new_depth;
|
|
closest_velocity = new_velocity;
|
|
}
|
|
|
|
new_depth = ffxQuadReadY(closest_depth);
|
|
new_velocity = ffxQuadReadY(closest_velocity);
|
|
|
|
#if FFX_DENOISER_OPTION_INVERTED_DEPTH
|
|
if (new_depth > closest_depth)
|
|
#else
|
|
if (new_depth < closest_depth)
|
|
#endif
|
|
{
|
|
closest_depth = new_depth;
|
|
closest_velocity = new_velocity;
|
|
}
|
|
|
|
return closest_velocity;
|
|
}
|
|
|
|
#define KERNEL_RADIUS 8
|
|
FfxFloat32 FFX_DNSR_Shadows_KernelWeight(FfxFloat32 i)
|
|
{
|
|
#define KERNEL_WEIGHT(i) (exp(-3.0 * FfxFloat32(i * i) / ((KERNEL_RADIUS + 1.0) * (KERNEL_RADIUS + 1.0))))
|
|
|
|
// Statically initialize kernel_weights_sum
|
|
FfxFloat32 kernel_weights_sum = 0;
|
|
kernel_weights_sum += KERNEL_WEIGHT(0);
|
|
for (FfxInt32 c = 1; c <= KERNEL_RADIUS; ++c)
|
|
{
|
|
kernel_weights_sum += 2 * KERNEL_WEIGHT(c); // Add other half of the kernel to the sum
|
|
}
|
|
FfxFloat32 inv_kernel_weights_sum = ffxReciprocal(kernel_weights_sum);
|
|
|
|
// The only runtime code in this function
|
|
return KERNEL_WEIGHT(i) * inv_kernel_weights_sum;
|
|
}
|
|
|
|
void FFX_DNSR_Shadows_AccumulateMoments(FfxFloat32 value, FfxFloat32 weight, inout FfxFloat32 moments)
|
|
{
|
|
// We get value from the horizontal neighborhood calculations. Thus, it's both mean and variance due to using one sample per pixel
|
|
moments += value * weight;
|
|
}
|
|
|
|
// The horizontal part of a 17x17 local neighborhood kernel
|
|
FfxFloat32 FFX_DNSR_Shadows_HorizontalNeighborhood(FfxInt32x2 did)
|
|
{
|
|
const FfxInt32x2 base_did = did;
|
|
|
|
// Prevent vertical out of bounds access
|
|
if ((base_did.y < 0) || (base_did.y >= BufferDimensions().y)) return 0;
|
|
|
|
const FfxUInt32x2 tile_index = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(base_did);
|
|
const FfxUInt32 linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, BufferDimensions().x);
|
|
|
|
const FfxUInt32 left_tile_index = linear_tile_index - 1;
|
|
const FfxUInt32 center_tile_index = linear_tile_index;
|
|
const FfxUInt32 right_tile_index = linear_tile_index + 1;
|
|
|
|
FfxBoolean is_first_tile_in_row = tile_index.x == 0;
|
|
FfxBoolean is_last_tile_in_row = tile_index.x == (FFX_DNSR_Shadows_RoundedDivide(BufferDimensions().x, 8) - 1);
|
|
|
|
FfxUInt32 left_tile = 0;
|
|
if (!is_first_tile_in_row) left_tile = LoadRaytracedShadowMask(left_tile_index);
|
|
FfxUInt32 center_tile = LoadRaytracedShadowMask(center_tile_index);
|
|
FfxUInt32 right_tile = 0;
|
|
if (!is_last_tile_in_row) right_tile = LoadRaytracedShadowMask(right_tile_index);
|
|
|
|
// Construct a single FfxUInt32 with the lowest 17bits containing the horizontal part of the local neighborhood.
|
|
|
|
// First extract the 8 bits of our row in each of the neighboring tiles
|
|
const FfxUInt32 row_base_index = (did.y % 4) * 8;
|
|
const FfxUInt32 left = (left_tile >> row_base_index) & 0xFF;
|
|
const FfxUInt32 center = (center_tile >> row_base_index) & 0xFF;
|
|
const FfxUInt32 right = (right_tile >> row_base_index) & 0xFF;
|
|
|
|
// Combine them into a single mask containting [left, center, right] from least significant to most significant bit
|
|
FfxUInt32 neighborhood = left | (center << 8) | (right << 16);
|
|
|
|
// Make sure our pixel is at bit position 9 to get the highest contribution from the filter kernel
|
|
const FfxUInt32 bit_index_in_row = (did.x % 8);
|
|
neighborhood = neighborhood >> bit_index_in_row; // Shift out bits to the right, so the center bit ends up at bit 9.
|
|
|
|
FfxFloat32 moment = 0.0; // For one sample per pixel this is both, mean and variance
|
|
|
|
// First 8 bits up to the center pixel
|
|
FfxUInt32 mask;
|
|
FfxInt32 i;
|
|
for (i = 0; i < 8; ++i)
|
|
{
|
|
mask = 1u << i;
|
|
moment += FfxBoolean(mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(8 - i) : 0;
|
|
}
|
|
|
|
// Center pixel
|
|
mask = 1u << 8;
|
|
moment += FfxBoolean(mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(0) : 0;
|
|
|
|
// Last 8 bits
|
|
for (i = 1; i <= 8; ++i)
|
|
{
|
|
mask = 1u << (8 + i);
|
|
moment += FfxBoolean(mask & neighborhood) ? FFX_DNSR_Shadows_KernelWeight(i) : 0;
|
|
}
|
|
|
|
return moment;
|
|
}
|
|
|
|
FFX_GROUPSHARED FfxFloat32 g_FFX_DNSR_Shadows_neighborhood[8][24];
|
|
|
|
FfxFloat32 FFX_DNSR_Shadows_ComputeLocalNeighborhood(FfxInt32x2 did, FfxInt32x2 gtid)
|
|
{
|
|
FfxFloat32 local_neighborhood = 0;
|
|
|
|
FfxFloat32 upper = FFX_DNSR_Shadows_HorizontalNeighborhood(FfxInt32x2(did.x, did.y - 8));
|
|
FfxFloat32 center = FFX_DNSR_Shadows_HorizontalNeighborhood(FfxInt32x2(did.x, did.y));
|
|
FfxFloat32 lower = FFX_DNSR_Shadows_HorizontalNeighborhood(FfxInt32x2(did.x, did.y + 8));
|
|
|
|
g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y] = upper;
|
|
g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y + 8] = center;
|
|
g_FFX_DNSR_Shadows_neighborhood[gtid.x][gtid.y + 16] = lower;
|
|
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
// First combine the own values.
|
|
// KERNEL_RADIUS pixels up is own upper and KERNEL_RADIUS pixels down is own lower value
|
|
FFX_DNSR_Shadows_AccumulateMoments(center, FFX_DNSR_Shadows_KernelWeight(0), local_neighborhood);
|
|
FFX_DNSR_Shadows_AccumulateMoments(upper, FFX_DNSR_Shadows_KernelWeight(KERNEL_RADIUS), local_neighborhood);
|
|
FFX_DNSR_Shadows_AccumulateMoments(lower, FFX_DNSR_Shadows_KernelWeight(KERNEL_RADIUS), local_neighborhood);
|
|
|
|
// Then read the neighboring values.
|
|
for (FfxInt32 i = 1; i < KERNEL_RADIUS; ++i)
|
|
{
|
|
FfxFloat32 upper_value = g_FFX_DNSR_Shadows_neighborhood[gtid.x][8 + gtid.y - i];
|
|
FfxFloat32 lower_value = g_FFX_DNSR_Shadows_neighborhood[gtid.x][8 + gtid.y + i];
|
|
FfxFloat32 weight = FFX_DNSR_Shadows_KernelWeight(i);
|
|
FFX_DNSR_Shadows_AccumulateMoments(upper_value, weight, local_neighborhood);
|
|
FFX_DNSR_Shadows_AccumulateMoments(lower_value, weight, local_neighborhood);
|
|
}
|
|
|
|
return local_neighborhood;
|
|
}
|
|
|
|
void FFX_DNSR_Shadows_WriteTileMetaData(FfxUInt32x2 gid, FfxUInt32x2 gtid, FfxBoolean is_cleared, FfxBoolean all_in_light)
|
|
{
|
|
if (all(FFX_EQUAL(gtid, FfxUInt32x2(0,0))))
|
|
{
|
|
FfxUInt32 light_mask = all_in_light ? TILE_META_DATA_LIGHT_MASK : 0;
|
|
FfxUInt32 clear_mask = is_cleared ? TILE_META_DATA_CLEAR_MASK : 0;
|
|
FfxUInt32 mask = FfxUInt32(light_mask | clear_mask);
|
|
StoreMetadata(gid.y * FFX_DNSR_Shadows_RoundedDivide(BufferDimensions().x, 8) + gid.x, mask);
|
|
}
|
|
}
|
|
|
|
void FFX_DNSR_Shadows_ClearTargets(FfxUInt32x2 did, FfxUInt32x2 gtid, FfxUInt32x2 gid, FfxFloat32 shadow_value, FfxBoolean is_shadow_receiver, FfxBoolean all_in_light)
|
|
{
|
|
FFX_DNSR_Shadows_WriteTileMetaData(gid, gtid, FFX_TRUE, all_in_light);
|
|
StoreReprojectionResults(did, FfxFloat32x2(shadow_value, 0)); // mean, variance
|
|
|
|
FfxFloat32 temporal_sample_count = is_shadow_receiver ? 1 : 0;
|
|
StoreMoments(did, FfxFloat32x3(shadow_value, 0, temporal_sample_count));// mean, variance, temporal sample count
|
|
}
|
|
|
|
void FFX_DNSR_Shadows_TileClassification(FfxUInt32 group_index, FfxUInt32x2 gid)
|
|
{
|
|
FfxUInt32x2 gtid = ffxRemapForWaveReduction(group_index); // Make sure we can use the QuadReadAcross intrinsics to access a 2x2 region.
|
|
FfxUInt32x2 did = gid * 8 + gtid;
|
|
|
|
FfxBoolean is_shadow_receiver = IsShadowReciever(did);
|
|
|
|
FfxBoolean skip_sky = FFX_DNSR_Shadows_ThreadGroupAllTrue(!is_shadow_receiver);
|
|
if (skip_sky)
|
|
{
|
|
// We have to set all resources of the tile we skipped to sensible values as neighboring active denoiser tiles might want to read them.
|
|
FFX_DNSR_Shadows_ClearTargets(did, gtid, gid, 0, is_shadow_receiver, FFX_FALSE);
|
|
return;
|
|
}
|
|
|
|
FfxBoolean all_in_light = FFX_FALSE;
|
|
FfxBoolean all_in_shadow = FFX_FALSE;
|
|
FFX_DNSR_Shadows_SearchSpatialRegion(gid, all_in_light, all_in_shadow);
|
|
FfxFloat32 shadow_value = all_in_light ? 1 : 0; // Either all_in_light or all_in_shadow must be true, otherwise we would not skip the tile.
|
|
|
|
FfxBoolean can_skip = all_in_light || all_in_shadow;
|
|
// We have to append the entire tile if there is a single lane that we can't skip
|
|
FfxBoolean skip_tile = FFX_DNSR_Shadows_ThreadGroupAllTrue(can_skip);
|
|
if (skip_tile)
|
|
{
|
|
// We have to set all resources of the tile we skipped to sensible values as neighboring active denoiser tiles might want to read them.
|
|
FFX_DNSR_Shadows_ClearTargets(did, gtid, gid, shadow_value, is_shadow_receiver, all_in_light);
|
|
return;
|
|
}
|
|
|
|
FFX_DNSR_Shadows_WriteTileMetaData(gid, gtid, FFX_FALSE, FFX_FALSE);
|
|
|
|
FfxFloat32 depth = LoadDepth(FfxInt32x2(did));
|
|
const FfxFloat32x2 velocity = FFX_DNSR_Shadows_GetClosestVelocity(FfxInt32x2(did), depth); // Must happen before we deactivate lanes
|
|
const FfxFloat32 local_neighborhood = FFX_DNSR_Shadows_ComputeLocalNeighborhood(FfxInt32x2(did), FfxInt32x2(gtid));
|
|
|
|
const FfxFloat32x2 texel_size = InvBufferDimensions();
|
|
const FfxFloat32x2 uv = (did.xy + 0.5f) * texel_size;
|
|
const FfxFloat32x2 history_uv = uv + velocity;
|
|
const FfxInt32x2 history_pos = FfxInt32x2(history_uv * BufferDimensions());
|
|
|
|
const FfxUInt32x2 tile_index = FFX_DNSR_Shadows_GetTileIndexFromPixelPosition(FfxInt32x2(did));
|
|
const FfxUInt32 linear_tile_index = FFX_DNSR_Shadows_LinearTileIndex(tile_index, BufferDimensions().x);
|
|
|
|
const FfxUInt32 shadow_tile = LoadRaytracedShadowMask(linear_tile_index);
|
|
|
|
FfxFloat32x3 moments_current = FfxFloat32x3(0,0,0);
|
|
FfxFloat32 variance = 0;
|
|
FfxFloat32 shadow_clamped = 0;
|
|
if (is_shadow_receiver) // do not process sky pixels
|
|
{
|
|
FfxBoolean hit_light = FfxBoolean(shadow_tile & FFX_DNSR_Shadows_GetBitMaskFromPixelPosition(did));
|
|
const FfxFloat32 shadow_current = hit_light ? 1.0 : 0.0;
|
|
|
|
// Perform moments and variance calculations
|
|
{
|
|
FfxBoolean is_disoccluded = FFX_DNSR_Shadows_IsDisoccluded(did, depth, velocity);
|
|
const FfxFloat32x3 previous_moments = is_disoccluded ? FfxFloat32x3(0.0f, 0.0f, 0.0f) // Can't trust previous moments on disocclusion
|
|
: LoadPreviousMomentsBuffer(history_pos);
|
|
|
|
const FfxFloat32 old_m = previous_moments.x;
|
|
const FfxFloat32 old_s = previous_moments.y;
|
|
const FfxFloat32 sample_count = previous_moments.z + 1.0f;
|
|
const FfxFloat32 new_m = old_m + (shadow_current - old_m) / sample_count;
|
|
const FfxFloat32 new_s = old_s + (shadow_current - old_m) * (shadow_current - new_m);
|
|
|
|
variance = (sample_count > 1.0f ? new_s / (sample_count - 1.0f) : 1.0f);
|
|
moments_current = FfxFloat32x3(new_m, new_s, sample_count);
|
|
}
|
|
|
|
// Retrieve local neighborhood and reproject
|
|
{
|
|
FfxFloat32 mean = local_neighborhood;
|
|
FfxFloat32 spatial_variance = local_neighborhood;
|
|
|
|
spatial_variance = max(spatial_variance - mean * mean, 0.0f);
|
|
|
|
// Compute the clamping bounding box
|
|
const FfxFloat32 std_deviation = sqrt(spatial_variance);
|
|
const FfxFloat32 nmin = mean - 0.5f * std_deviation;
|
|
const FfxFloat32 nmax = mean + 0.5f * std_deviation;
|
|
|
|
// Clamp reprojected sample to local neighborhood
|
|
FfxFloat32 shadow_previous = shadow_current;
|
|
if (IsFirstFrame() == 0)
|
|
{
|
|
shadow_previous = LoadHistory(history_uv);
|
|
}
|
|
|
|
shadow_clamped = clamp(shadow_previous, nmin, nmax);
|
|
|
|
// Reduce history weighting
|
|
const FfxFloat32 sigma = 20.0f;
|
|
const FfxFloat32 temporal_discontinuity = (shadow_previous - mean) / max(0.5f * std_deviation, 0.001f);
|
|
const FfxFloat32 sample_counter_damper = exp(-temporal_discontinuity * temporal_discontinuity / sigma);
|
|
moments_current.z *= sample_counter_damper;
|
|
|
|
// Boost variance on first frames
|
|
if (moments_current.z < 16.0f)
|
|
{
|
|
const FfxFloat32 variance_boost = max(16.0f - moments_current.z, 1.0f);
|
|
variance = max(variance, spatial_variance);
|
|
variance *= variance_boost;
|
|
}
|
|
}
|
|
|
|
// Perform the temporal blend
|
|
const FfxFloat32 history_weight = sqrt(max(8.0f - moments_current.z, 0.0f) / 8.0f);
|
|
shadow_clamped = ffxLerp(shadow_clamped, shadow_current, ffxLerp(0.05f, 1.0f, history_weight));
|
|
}
|
|
|
|
// Output the results of the temporal pass
|
|
StoreReprojectionResults(did.xy, FfxFloat32x2(shadow_clamped, variance));
|
|
StoreMoments(did.xy, moments_current);
|
|
}
|
|
|
|
#endif
|