342 lines
17 KiB
C
342 lines
17 KiB
C
// This file is part of the FidelityFX SDK.
|
|
//
|
|
// Copyright (C) 2024 Advanced Micro Devices, Inc.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
|
|
#include "ffx_sssr_common.h"
|
|
|
|
#define M_PI 3.14159265358979f
|
|
#define FFX_SSSR_FLOAT_MAX 3.402823466e+38
|
|
#define FFX_SSSR_DEPTH_HIERARCHY_MAX_MIP 6
|
|
|
|
FfxFloat32x3 FFX_SSSR_ScreenSpaceToViewSpace(FfxFloat32x3 screen_space_position) {
|
|
return InvProjectPosition(screen_space_position, InvProjection());
|
|
}
|
|
|
|
FfxFloat32x3 ScreenSpaceToWorldSpace(FfxFloat32x3 screen_space_position) {
|
|
return InvProjectPosition(screen_space_position, InvViewProjection());
|
|
}
|
|
|
|
// http://jcgt.org/published/0007/04/01/paper.pdf by Eric Heitz
|
|
// Input Ve: view direction
|
|
// Input alpha_x, alpha_y: roughness parameters
|
|
// Input U1, U2: uniform random numbers
|
|
// Output Ne: normal sampled with PDF D_Ve(Ne) = G1(Ve) * max(0, dot(Ve, Ne)) * D(Ne) / Ve.z
|
|
FfxFloat32x3 SampleGGXVNDF(FfxFloat32x3 Ve, FfxFloat32 alpha_x, FfxFloat32 alpha_y, FfxFloat32 U1, FfxFloat32 U2) {
|
|
// Section 3.2: transforming the view direction to the hemisphere configuration
|
|
FfxFloat32x3 Vh = normalize(FfxFloat32x3(alpha_x * Ve.x, alpha_y * Ve.y, Ve.z));
|
|
// Section 4.1: orthonormal basis (with special case if cross product is zero)
|
|
FfxFloat32 lensq = Vh.x * Vh.x + Vh.y * Vh.y;
|
|
FfxFloat32x3 T1 = lensq > 0 ? FfxFloat32x3(-Vh.y, Vh.x, 0) * ffxRsqrt(lensq) : FfxFloat32x3(1, 0, 0);
|
|
FfxFloat32x3 T2 = cross(Vh, T1);
|
|
// Section 4.2: parameterization of the projected area
|
|
FfxFloat32 r = sqrt(U1);
|
|
FfxFloat32 phi = 2.0 * M_PI * U2;
|
|
FfxFloat32 t1 = r * cos(phi);
|
|
FfxFloat32 t2 = r * sin(phi);
|
|
FfxFloat32 s = 0.5 * (1.0 + Vh.z);
|
|
t2 = (1.0 - s) * sqrt(1.0 - t1 * t1) + s * t2;
|
|
// Section 4.3: reprojection onto hemisphere
|
|
FfxFloat32x3 Nh = t1 * T1 + t2 * T2 + sqrt(max(0.0, 1.0 - t1 * t1 - t2 * t2)) * Vh;
|
|
// Section 3.4: transforming the normal back to the ellipsoid configuration
|
|
FfxFloat32x3 Ne = normalize(FfxFloat32x3(alpha_x * Nh.x, alpha_y * Nh.y, max(0.0, Nh.z)));
|
|
return Ne;
|
|
}
|
|
|
|
FfxFloat32x3 Sample_GGX_VNDF_Ellipsoid(FfxFloat32x3 Ve, FfxFloat32 alpha_x, FfxFloat32 alpha_y, FfxFloat32 U1, FfxFloat32 U2) {
|
|
return SampleGGXVNDF(Ve, alpha_x, alpha_y, U1, U2);
|
|
}
|
|
|
|
FfxFloat32x3 Sample_GGX_VNDF_Hemisphere(FfxFloat32x3 Ve, FfxFloat32 alpha, FfxFloat32 U1, FfxFloat32 U2) {
|
|
return Sample_GGX_VNDF_Ellipsoid(Ve, alpha, alpha, U1, U2);
|
|
}
|
|
|
|
FfxFloat32x3 SampleReflectionVector(FfxFloat32x3 view_direction, FfxFloat32x3 normal, FfxFloat32 roughness, FfxInt32x2 dispatch_thread_id) {
|
|
FfxFloat32x3 U;
|
|
FfxFloat32x3 N = normal;
|
|
if (abs(N.z) > 0.0) {
|
|
FfxFloat32 k = sqrt(N.y * N.y + N.z * N.z);
|
|
U.x = 0.0; U.y = -N.z / k; U.z = N.y / k;
|
|
}
|
|
else {
|
|
FfxFloat32 k = sqrt(N.x * N.x + N.y * N.y);
|
|
U.x = N.y / k; U.y = -N.x / k; U.z = 0.0;
|
|
}
|
|
|
|
// TBN 3x3 matrix
|
|
FfxFloat32x3 TBN_row0 = U;
|
|
FfxFloat32x3 TBN_row1 = cross(N, U);
|
|
FfxFloat32x3 TBN_row2 = N;
|
|
|
|
// TBN * -view_direction
|
|
FfxFloat32x3 view_direction_tbn = FfxFloat32x3(dot(TBN_row0, -view_direction), dot(TBN_row1, -view_direction), dot(TBN_row2, -view_direction));
|
|
|
|
FfxFloat32x2 u = FFX_SSSR_SampleRandomVector2D(dispatch_thread_id);
|
|
|
|
FfxFloat32x3 sampled_normal_tbn = Sample_GGX_VNDF_Hemisphere(view_direction_tbn, roughness, u.x, u.y);
|
|
#ifdef PERFECT_REFLECTIONS
|
|
sampled_normal_tbn = FfxFloat32x3(0, 0, 1); // Overwrite normal sample to produce perfect reflection.
|
|
#endif
|
|
|
|
FfxFloat32x3 reflected_direction_tbn = reflect(-view_direction_tbn, sampled_normal_tbn);
|
|
|
|
// Transpose of TBN
|
|
FfxFloat32x3 TBN_col0 = FfxFloat32x3(TBN_row0[0], TBN_row1[0], TBN_row2[0]);
|
|
FfxFloat32x3 TBN_col1 = FfxFloat32x3(TBN_row0[1], TBN_row1[1], TBN_row2[1]);
|
|
FfxFloat32x3 TBN_col2 = FfxFloat32x3(TBN_row0[2], TBN_row1[2], TBN_row2[2]);
|
|
|
|
// transpose(TBN) * reflected_direction_tbn
|
|
return FfxFloat32x3(dot(TBN_col0, reflected_direction_tbn), dot(TBN_col1, reflected_direction_tbn), dot(TBN_col2, reflected_direction_tbn));
|
|
}
|
|
|
|
void FFX_SSSR_InitialAdvanceRay(FfxFloat32x3 origin, FfxFloat32x3 direction, FfxFloat32x3 inv_direction, FfxFloat32x2 current_mip_resolution, FfxFloat32x2 current_mip_resolution_inv, FfxFloat32x2 floor_offset, FfxFloat32x2 uv_offset, FFX_PARAMETER_OUT FfxFloat32x3 position, FFX_PARAMETER_OUT FfxFloat32 current_t) {
|
|
FfxFloat32x2 current_mip_position = current_mip_resolution * origin.xy;
|
|
|
|
// Intersect ray with the half box that is pointing away from the ray origin.
|
|
FfxFloat32x2 xy_plane = floor(current_mip_position) + floor_offset;
|
|
xy_plane = xy_plane * current_mip_resolution_inv + uv_offset;
|
|
|
|
// o + d * t = p' => t = (p' - o) / d
|
|
FfxFloat32x2 t = xy_plane * inv_direction.xy - origin.xy * inv_direction.xy;
|
|
current_t = min(t.x, t.y);
|
|
position = origin + current_t * direction;
|
|
}
|
|
|
|
FfxBoolean FFX_SSSR_AdvanceRay(FfxFloat32x3 origin, FfxFloat32x3 direction, FfxFloat32x3 inv_direction, FfxFloat32x2 current_mip_position, FfxFloat32x2 current_mip_resolution_inv, FfxFloat32x2 floor_offset, FfxFloat32x2 uv_offset, FfxFloat32 surface_z, FFX_PARAMETER_INOUT FfxFloat32x3 position, FFX_PARAMETER_INOUT FfxFloat32 current_t) {
|
|
// Create boundary planes
|
|
FfxFloat32x2 xy_plane = floor(current_mip_position) + floor_offset;
|
|
xy_plane = xy_plane * current_mip_resolution_inv + uv_offset;
|
|
FfxFloat32x3 boundary_planes = FfxFloat32x3(xy_plane, surface_z);
|
|
|
|
// Intersect ray with the half box that is pointing away from the ray origin.
|
|
// o + d * t = p' => t = (p' - o) / d
|
|
FfxFloat32x3 t = boundary_planes * inv_direction - origin * inv_direction;
|
|
|
|
// Prevent using z plane when shooting out of the depth buffer.
|
|
#if FFX_SSSR_OPTION_INVERTED_DEPTH
|
|
t.z = direction.z < 0 ? t.z : FFX_SSSR_FLOAT_MAX;
|
|
#else
|
|
t.z = direction.z > 0 ? t.z : FFX_SSSR_FLOAT_MAX;
|
|
#endif
|
|
|
|
// Choose nearest intersection with a boundary.
|
|
FfxFloat32 t_min = min(min(t.x, t.y), t.z);
|
|
|
|
#if FFX_SSSR_OPTION_INVERTED_DEPTH
|
|
// Larger z means closer to the camera.
|
|
FfxBoolean above_surface = surface_z < position.z;
|
|
#else
|
|
// Smaller z means closer to the camera.
|
|
FfxBoolean above_surface = surface_z > position.z;
|
|
#endif
|
|
|
|
// Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface.
|
|
// We use the asuint comparison to avoid NaN / Inf logic, also we actually care about bitwise equality here to see if t_min is the t.z we fed into the min3 above.
|
|
FfxBoolean skipped_tile = ffxAsUInt32(t_min) != ffxAsUInt32(t.z) && above_surface;
|
|
|
|
// Make sure to only advance the ray if we're still above the surface.
|
|
current_t = above_surface ? t_min : current_t;
|
|
|
|
// Advance ray
|
|
position = origin + current_t * direction;
|
|
|
|
return skipped_tile;
|
|
}
|
|
|
|
FfxFloat32x2 FFX_SSSR_GetMipResolution(FfxFloat32x2 screen_dimensions, FfxInt32 mip_level) {
|
|
return screen_dimensions * pow(0.5, mip_level);
|
|
}
|
|
|
|
// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1]
|
|
FfxFloat32x3 FFX_SSSR_HierarchicalRaymarch(FfxFloat32x3 origin, FfxFloat32x3 direction, FfxBoolean is_mirror, FfxFloat32x2 screen_size, FfxInt32 most_detailed_mip, FfxUInt32 min_traversal_occupancy, FfxUInt32 max_traversal_intersections, FFX_PARAMETER_OUT FfxBoolean valid_hit)
|
|
{
|
|
const FfxFloat32x3 inv_direction = FFX_SELECT(direction != FfxFloat32x3(0.0f, 0.0f, 0.0f), FfxFloat32x3(1.0f, 1.0f, 1.0f) / direction, FfxFloat32x3(FFX_SSSR_FLOAT_MAX, FFX_SSSR_FLOAT_MAX, FFX_SSSR_FLOAT_MAX));
|
|
|
|
// Start on mip with highest detail.
|
|
FfxInt32 current_mip = most_detailed_mip;
|
|
|
|
// Could recompute these every iteration, but it's faster to hoist them out and update them.
|
|
FfxFloat32x2 current_mip_resolution = FFX_SSSR_GetMipResolution(screen_size, current_mip);
|
|
FfxFloat32x2 current_mip_resolution_inv = ffxReciprocal(current_mip_resolution);
|
|
|
|
// Offset to the bounding boxes uv space to intersect the ray with the center of the next pixel.
|
|
// This means we ever so slightly over shoot into the next region.
|
|
FfxFloat32x2 uv_offset = 0.005 * exp2(most_detailed_mip) / screen_size;
|
|
uv_offset.x = direction.x < 0.0f ? -uv_offset.x : uv_offset.x;
|
|
uv_offset.y = direction.y < 0.0f ? -uv_offset.y : uv_offset.y;
|
|
|
|
// Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction.
|
|
FfxFloat32x2 floor_offset;
|
|
floor_offset.x = direction.x < 0.0f ? 0.0f : 1.0f;
|
|
floor_offset.y = direction.y < 0.0f ? 0.0f : 1.0f;
|
|
|
|
// Initially advance ray to avoid immediate self intersections.
|
|
FfxFloat32 current_t;
|
|
FfxFloat32x3 position;
|
|
FFX_SSSR_InitialAdvanceRay(origin, direction, inv_direction, current_mip_resolution, current_mip_resolution_inv, floor_offset, uv_offset, position, current_t);
|
|
|
|
FfxBoolean exit_due_to_low_occupancy = false;
|
|
FfxInt32 i = 0;
|
|
while (i < max_traversal_intersections && current_mip >= most_detailed_mip && !exit_due_to_low_occupancy) {
|
|
FfxFloat32x2 current_mip_position = current_mip_resolution * position.xy;
|
|
FfxFloat32 surface_z = FFX_SSSR_LoadDepth(FfxInt32x2(current_mip_position), current_mip);
|
|
exit_due_to_low_occupancy = !is_mirror && ffxWaveActiveCountBits(true) <= min_traversal_occupancy;
|
|
FfxBoolean skipped_tile = FFX_SSSR_AdvanceRay(origin, direction, inv_direction, current_mip_position, current_mip_resolution_inv, floor_offset, uv_offset, surface_z, position, current_t);
|
|
|
|
// Don't increase mip further than this because we did not generate it
|
|
FfxBoolean nextMipIsOutOfRange = skipped_tile && (current_mip >= FFX_SSSR_DEPTH_HIERARCHY_MAX_MIP);
|
|
if (!nextMipIsOutOfRange)
|
|
{
|
|
current_mip += skipped_tile ? 1 : -1;
|
|
current_mip_resolution *= skipped_tile ? 0.5 : 2;
|
|
current_mip_resolution_inv *= skipped_tile ? 2 : 0.5;;
|
|
}
|
|
|
|
++i;
|
|
}
|
|
|
|
valid_hit = (i <= max_traversal_intersections);
|
|
|
|
return position;
|
|
}
|
|
|
|
FfxFloat32 FFX_SSSR_ValidateHit(FfxFloat32x3 hit, FfxFloat32x2 uv, FfxFloat32x3 world_space_ray_direction, FfxFloat32x2 screen_size, FfxFloat32 depth_buffer_thickness) {
|
|
// Reject hits outside the view frustum
|
|
if ((hit.x < 0.0f) || (hit.y < 0.0f) || (hit.x > 1.0f) || (hit.y > 1.0f)) {
|
|
return 0.0f;
|
|
}
|
|
|
|
// Reject the hit if we didnt advance the ray significantly to avoid immediate self reflection
|
|
FfxFloat32x2 manhattan_dist = abs(hit.xy - uv);
|
|
if((manhattan_dist.x < (2.0f / screen_size.x)) && (manhattan_dist.y < (2.0f / screen_size.y)) ) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Don't lookup radiance from the background.
|
|
FfxInt32x2 texel_coords = FfxInt32x2(screen_size * hit.xy);
|
|
FfxFloat32 surface_z = FFX_SSSR_LoadDepth(texel_coords / 2, 1);
|
|
#if FFX_SSSR_OPTION_INVERTED_DEPTH
|
|
if (surface_z == 0.0) {
|
|
#else
|
|
if (surface_z == 1.0) {
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
// We check if we hit the surface from the back, these should be rejected.
|
|
FfxFloat32x3 hit_normal = FFX_SSSR_LoadWorldSpaceNormal(texel_coords);
|
|
if (dot(hit_normal, world_space_ray_direction) > 0) {
|
|
return 0;
|
|
}
|
|
|
|
FfxFloat32x3 view_space_surface = FFX_SSSR_ScreenSpaceToViewSpace(FfxFloat32x3(hit.xy, surface_z));
|
|
FfxFloat32x3 view_space_hit = FFX_SSSR_ScreenSpaceToViewSpace(hit);
|
|
FfxFloat32 distance = length(view_space_surface - view_space_hit);
|
|
|
|
// Fade out hits near the screen borders
|
|
FfxFloat32x2 fov = 0.05 * FfxFloat32x2(screen_size.y / screen_size.x, 1);
|
|
FfxFloat32x2 border = smoothstep(FfxFloat32x2(0.0f, 0.0f), fov, hit.xy) * (1 - smoothstep(FfxFloat32x2(1.0f, 1.0f) - fov, FfxFloat32x2(1.0f, 1.0f), hit.xy));
|
|
FfxFloat32 vignette = border.x * border.y;
|
|
|
|
// We accept all hits that are within a reasonable minimum distance below the surface.
|
|
// Add constant in linear space to avoid growing of the reflections toward the reflected objects.
|
|
FfxFloat32 confidence = 1.0f - smoothstep(0.0f, depth_buffer_thickness, distance);
|
|
confidence *= confidence;
|
|
|
|
return vignette * confidence;
|
|
}
|
|
|
|
void Intersect(FfxUInt32 group_index, FfxUInt32 group_id)
|
|
{
|
|
FfxUInt32 ray_index = group_id * 64 + group_index;
|
|
if(!IsRayIndexValid(ray_index))
|
|
{
|
|
return;
|
|
}
|
|
|
|
FfxUInt32 packed_coords = GetRaylist(ray_index);
|
|
|
|
FfxUInt32x2 coords;
|
|
FfxBoolean copy_horizontal;
|
|
FfxBoolean copy_vertical;
|
|
FfxBoolean copy_diagonal;
|
|
UnpackRayCoords(packed_coords, coords, copy_horizontal, copy_vertical, copy_diagonal);
|
|
|
|
const FfxUInt32x2 screen_size = RenderSize();
|
|
|
|
FfxFloat32x2 uv = (coords + 0.5) * InverseRenderSize();
|
|
|
|
FfxFloat32x3 world_space_normal = FFX_SSSR_LoadWorldSpaceNormal(FfxInt32x2(coords));
|
|
FfxFloat32 roughness = FFX_SSSR_LoadExtractedRoughness(FfxInt32x3(coords, 0));
|
|
FfxBoolean is_mirror = IsMirrorReflection(roughness);
|
|
|
|
FfxInt32 most_detailed_mip = is_mirror ? 0 : FfxInt32(MostDetailedMip());
|
|
FfxFloat32x2 mip_resolution = FFX_SSSR_GetMipResolution(screen_size, most_detailed_mip);
|
|
FfxFloat32 z = FFX_SSSR_LoadDepth(FfxInt32x2(uv * mip_resolution), most_detailed_mip);
|
|
|
|
FfxFloat32x3 screen_uv_space_ray_origin = FfxFloat32x3(uv, z);
|
|
FfxFloat32x3 view_space_ray = ScreenSpaceToViewSpace(screen_uv_space_ray_origin);
|
|
FfxFloat32x3 view_space_ray_direction = normalize(view_space_ray);
|
|
|
|
FfxFloat32x3 view_space_surface_normal = FFX_MATRIX_MULTIPLY(ViewMatrix(), FfxFloat32x4(world_space_normal, 0)).xyz;
|
|
FfxFloat32x3 view_space_reflected_direction = SampleReflectionVector(view_space_ray_direction, view_space_surface_normal, roughness, FfxInt32x2(coords));
|
|
FfxFloat32x3 screen_space_ray_direction = ProjectDirection(view_space_ray, view_space_reflected_direction, screen_uv_space_ray_origin, Projection());
|
|
|
|
//====SSSR====
|
|
FfxBoolean valid_hit = false;
|
|
FfxFloat32x3 hit = FFX_SSSR_HierarchicalRaymarch(screen_uv_space_ray_origin, screen_space_ray_direction, is_mirror, screen_size, most_detailed_mip, MinTraversalOccupancy(), MaxTraversalIntersections(), valid_hit);
|
|
|
|
FfxFloat32x3 world_space_origin = ScreenSpaceToWorldSpace(screen_uv_space_ray_origin);
|
|
FfxFloat32x3 world_space_hit = ScreenSpaceToWorldSpace(hit);
|
|
FfxFloat32x3 world_space_ray = world_space_hit - world_space_origin.xyz;
|
|
|
|
FfxFloat32 confidence = valid_hit ? FFX_SSSR_ValidateHit(hit, uv, world_space_ray, screen_size, DepthBufferThickness()) : 0;
|
|
FfxFloat32 world_ray_length = max(0, length(world_space_ray));
|
|
|
|
FfxFloat32x3 reflection_radiance = FfxFloat32x3(0.0f, 0.0f, 0.0f);
|
|
if (confidence > 0.0f) {
|
|
// Found an intersection with the depth buffer -> We can lookup the color from lit scene.
|
|
reflection_radiance = FFX_SSSR_LoadInputColor(FfxInt32x3(screen_size * hit.xy, 0));
|
|
}
|
|
|
|
// Sample environment map.
|
|
FfxFloat32x3 world_space_reflected_direction = FFX_MATRIX_MULTIPLY(InvView(), FfxFloat32x4(view_space_reflected_direction, 0)).xyz;
|
|
FfxFloat32x3 environment_lookup = FFX_SSSR_SampleEnvironmentMap(world_space_reflected_direction, 0.0f);
|
|
reflection_radiance = ffxLerp(environment_lookup, reflection_radiance, confidence);
|
|
|
|
FfxFloat32x4 new_sample = FfxFloat32x4(reflection_radiance, world_ray_length);
|
|
|
|
FFX_SSSR_StoreRadiance(coords, new_sample);
|
|
|
|
FfxUInt32x2 copy_target = coords ^ 1; // Flip last bit to find the mirrored coords along the x and y axis within a quad.
|
|
if (copy_horizontal) {
|
|
FfxUInt32x2 copy_coords = FfxUInt32x2(copy_target.x, coords.y);
|
|
FFX_SSSR_StoreRadiance(copy_coords, new_sample);
|
|
}
|
|
if (copy_vertical) {
|
|
FfxUInt32x2 copy_coords = FfxUInt32x2(coords.x, copy_target.y);
|
|
FFX_SSSR_StoreRadiance(copy_coords, new_sample);
|
|
}
|
|
if (copy_diagonal) {
|
|
FfxUInt32x2 copy_coords = copy_target;
|
|
FFX_SSSR_StoreRadiance(copy_coords, new_sample);
|
|
}
|
|
}
|