From 4b5071f062db47b96eca915cedf2cc075f2ff229 Mon Sep 17 00:00:00 2001 From: kiseido <kiseido@users.noreply.github.com> Date: Wed, 19 Oct 2022 16:21:29 -0700 Subject: [PATCH] Copying changes from PotatoOfDoom's repo --- CMakeLists.txt | 2 + src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp | 4 -- src/ffx-fsr2-api/ffx_fsr2.cpp | 9 +-- src/ffx-fsr2-api/ffx_fsr2.h | 2 +- src/ffx-fsr2-api/ffx_types.h | 1 - .../shaders/ffx_fsr2_accumulate_pass.glsl | 1 + .../shaders/ffx_fsr2_callbacks_glsl.h | 11 ++- .../shaders/ffx_fsr2_callbacks_hlsl.h | 68 ++++++++++--------- .../shaders/ffx_fsr2_depth_clip.h | 4 ++ src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h | 40 +++++------ src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h | 6 +- src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp | 6 +- .../vk/shaders/ffx_fsr2_shaders_vk.cpp | 2 + 13 files changed, 81 insertions(+), 75 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a5424c..2911401 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,8 @@ option (FFX_FSR2_API_VK "Build FSR 2.0 Vulkan backend" ${GFX_API_VK}) # reference libs used by both backends add_subdirectory(libs/cauldron) add_subdirectory(src/Common) + +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) add_subdirectory(src/ffx-fsr2-api) if(GFX_API_VK) diff --git a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp index 8973648..4b0f507 100644 --- a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp +++ b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp @@ -245,8 +245,6 @@ DXGI_FORMAT ffxGetDX12FormatFromSurfaceFormat(FfxSurfaceFormat surfaceFormat) return DXGI_FORMAT_R32G32B32A32_FLOAT; case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT): return DXGI_FORMAT_R16G16B16A16_FLOAT; - case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM): - return DXGI_FORMAT_R16G16B16A16_UNORM; case(FFX_SURFACE_FORMAT_R32G32_FLOAT): return DXGI_FORMAT_R32G32_FLOAT; case(FFX_SURFACE_FORMAT_R32_UINT): @@ -298,8 +296,6 @@ FfxSurfaceFormat ffxGetSurfaceFormatDX12(DXGI_FORMAT format) return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; case(DXGI_FORMAT_R16G16B16A16_FLOAT): return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; - case(DXGI_FORMAT_R16G16B16A16_UNORM): - return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM; case(DXGI_FORMAT_R32G32_FLOAT): return FFX_SURFACE_FORMAT_R32G32_FLOAT; case(DXGI_FORMAT_R32_UINT): diff --git a/src/ffx-fsr2-api/ffx_fsr2.cpp b/src/ffx-fsr2-api/ffx_fsr2.cpp index c7a342f..260a688 100644 --- a/src/ffx-fsr2-api/ffx_fsr2.cpp +++ b/src/ffx-fsr2-api/ffx_fsr2.cpp @@ -29,6 +29,7 @@ #include "shaders/ffx_fsr1.h" #include "shaders/ffx_spd.h" #include "shaders/ffx_fsr2_callbacks_hlsl.h" +#include <cmath> #include "ffx_fsr2_maximum_bias.h" @@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = { // Lanczos static float lanczos2(float value) { - return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); + return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); } // Calculate halton number for index and base. @@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con const Fsr2ResourceDescription internalSurfaceDesc[] = { { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, @@ -662,8 +663,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // compute jitter cancellation if (context->contextDescription.flags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) { - context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0]; - context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1]; + context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0] * context->constants.motionVectorScale[0]; + context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1] * context->constants.motionVectorScale[1]; context->previousJitterOffset[0] = context->constants.jitterOffset[0]; context->previousJitterOffset[1] = context->constants.jitterOffset[1]; diff --git a/src/ffx-fsr2-api/ffx_fsr2.h b/src/ffx-fsr2-api/ffx_fsr2.h index ee2ff7d..c8b0fa5 100644 --- a/src/ffx-fsr2-api/ffx_fsr2.h +++ b/src/ffx-fsr2-api/ffx_fsr2.h @@ -40,7 +40,7 @@ /// FidelityFX Super Resolution 2 patch version. /// /// @ingroup FSR2 -#define FFX_FSR2_VERSION_PATCH (2) +#define FFX_FSR2_VERSION_PATCH (1) /// The size of the context specified in 32bit values. /// diff --git a/src/ffx-fsr2-api/ffx_types.h b/src/ffx-fsr2-api/ffx_types.h index 0079572..75fb0e8 100644 --- a/src/ffx-fsr2-api/ffx_types.h +++ b/src/ffx-fsr2-api/ffx_types.h @@ -57,7 +57,6 @@ typedef enum FfxSurfaceFormat { FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format - FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl index e1ee116..4f0d694 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl @@ -42,6 +42,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +#extension GL_EXT_shader_image_load_formatted : require #define FSR2_BIND_SRV_EXPOSURE 0 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h index d598250..0d5519b 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h @@ -221,7 +221,7 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) uniform image2D rw_prepared_input_color; + layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f) uniform image2D rw_prepared_input_color; #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; @@ -591,6 +591,9 @@ void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { #if defined(FSR2_BIND_UAV_DILATED_DEPTH) +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + fDepth = 1.0 - fDepth; // Preserve precision as well as we can in FP16. +#endif //FfxUInt32 uDepth = f32tof16(fDepth); imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); #endif @@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) { #if defined(FSR2_BIND_SRV_DILATED_DEPTH) - return texelFetch(r_dilatedDepth, iPxInput, 0).r; + FfxFloat32 fDepth = texelFetch(r_dilatedDepth, iPxInput, 0).r; +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + fDepth = 1.0 - fDepth; // Reconstruct from FP16. +#endif + return fDepth; #else return 0.f; #endif diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h index 4641927..f6412f7 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h @@ -234,26 +234,26 @@ SamplerState s_LinearClamp : register(s1); // SRVs #if defined(FFX_INTERNAL) - Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); - Texture2D<FfxFloat32x4> r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); - Texture2D<FfxFloat32> r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); - Texture2D<FfxFloat32x2> r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); - Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); - Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); - Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); - Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); - Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); - Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); - Texture2D<FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); + Texture2D<FfxFloat32x4> r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); + Texture2D<FfxFloat32> r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); + Texture2D<FfxFloat32x2> r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); + Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); + Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); + Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); + Texture2D<FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); // declarations not current form, no accessor functions Texture2D<FfxFloat32x4> r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); @@ -275,15 +275,15 @@ SamplerState s_LinearClamp : register(s1); Texture2D<FfxFloat32x4> r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); // UAV declarations - RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); + RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); @@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1); Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); #endif #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_LUMA_HISTORY Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); @@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1); RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); @@ -778,7 +778,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) { #if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) - return r_dilatedDepth[iPxInput]; + FfxFloat32 fDepth = r_dilatedDepth[iPxInput]; +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + fDepth = 1.0 - fDepth; // Reconstruct from FP16. +#endif + return fDepth; #else return 0.f; #endif diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h index 81db737..4748c96 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h @@ -34,8 +34,12 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // Arbitrary hack to make normal depth work better. +#else // WARNING: Ksep only works with reversed-z with infinite projection. const FfxFloat32 Ksep = 1.37e-05f; +#endif FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h index cfa9db8..f697d70 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h @@ -563,32 +563,24 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) // is common, so iPxSample can "jitter" -#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ - FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ - /* */ \ - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ - FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ } -#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ - FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ - /* */ \ - FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ - FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ + FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ } #define FFX_FSR2_CONCAT_ID(x, y) x ## y diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h index a66a5be..80524d4 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h @@ -46,10 +46,10 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_APPROXIMATE - FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_LUT +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); #else #error "Invalid Lanczos type" #endif diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp index be766b5..683ad9a 100644 --- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp +++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp @@ -19,6 +19,8 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define fsr2GetPermutationBlobByIndex fsr2GetPermutationBlobByIndexVK + #include "../ffx_fsr2.h" #include "ffx_fsr2_vk.h" #include "shaders/ffx_fsr2_shaders_vk.h" // include all the precompiled VK shaders for the FSR2 passes @@ -282,8 +284,6 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt) return VK_FORMAT_R32G32B32A32_SFLOAT; case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT): return VK_FORMAT_R16G16B16A16_SFLOAT; - case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM): - return VK_FORMAT_R16G16B16A16_UNORM; case(FFX_SURFACE_FORMAT_R32G32_FLOAT): return VK_FORMAT_R32G32_SFLOAT; case(FFX_SURFACE_FORMAT_R32_UINT): @@ -409,8 +409,6 @@ FfxSurfaceFormat ffxGetSurfaceFormatVK(VkFormat fmt) return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; case(VK_FORMAT_R16G16B16A16_SFLOAT): return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; - case(VK_FORMAT_R16G16B16A16_UNORM): - return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM; case(VK_FORMAT_R32G32_SFLOAT): return FFX_SURFACE_FORMAT_R32G32_FLOAT; case(VK_FORMAT_R32_UINT): diff --git a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp index 230ae9b..5842275 100644 --- a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp +++ b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp @@ -19,6 +19,8 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define fsr2GetPermutationBlobByIndex fsr2GetPermutationBlobByIndexVK + #include "ffx_fsr2_shaders_vk.h" #include "ffx_fsr2_autogen_reactive_pass_permutations.h"