From 4b5071f062db47b96eca915cedf2cc075f2ff229 Mon Sep 17 00:00:00 2001
From: kiseido <kiseido@users.noreply.github.com>
Date: Wed, 19 Oct 2022 16:21:29 -0700
Subject: [PATCH] Copying changes from PotatoOfDoom's repo

---
 CMakeLists.txt                                |  2 +
 src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp       |  4 --
 src/ffx-fsr2-api/ffx_fsr2.cpp                 |  9 +--
 src/ffx-fsr2-api/ffx_fsr2.h                   |  2 +-
 src/ffx-fsr2-api/ffx_types.h                  |  1 -
 .../shaders/ffx_fsr2_accumulate_pass.glsl     |  1 +
 .../shaders/ffx_fsr2_callbacks_glsl.h         | 11 ++-
 .../shaders/ffx_fsr2_callbacks_hlsl.h         | 68 ++++++++++---------
 .../shaders/ffx_fsr2_depth_clip.h             |  4 ++
 src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h    | 40 +++++------
 src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h  |  6 +-
 src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp           |  6 +-
 .../vk/shaders/ffx_fsr2_shaders_vk.cpp        |  2 +
 13 files changed, 81 insertions(+), 75 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9a5424c..2911401 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,8 @@ option (FFX_FSR2_API_VK "Build FSR 2.0 Vulkan backend" ${GFX_API_VK})
 # reference libs used by both backends
 add_subdirectory(libs/cauldron)
 add_subdirectory(src/Common)
+
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 add_subdirectory(src/ffx-fsr2-api)
 
 if(GFX_API_VK)
diff --git a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp
index 8973648..4b0f507 100644
--- a/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp
+++ b/src/ffx-fsr2-api/dx12/ffx_fsr2_dx12.cpp
@@ -245,8 +245,6 @@ DXGI_FORMAT ffxGetDX12FormatFromSurfaceFormat(FfxSurfaceFormat surfaceFormat)
             return DXGI_FORMAT_R32G32B32A32_FLOAT;
         case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
             return DXGI_FORMAT_R16G16B16A16_FLOAT;
-        case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM):
-            return DXGI_FORMAT_R16G16B16A16_UNORM;
         case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
             return DXGI_FORMAT_R32G32_FLOAT;
         case(FFX_SURFACE_FORMAT_R32_UINT):
@@ -298,8 +296,6 @@ FfxSurfaceFormat ffxGetSurfaceFormatDX12(DXGI_FORMAT format)
             return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;
         case(DXGI_FORMAT_R16G16B16A16_FLOAT):
             return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;
-        case(DXGI_FORMAT_R16G16B16A16_UNORM):
-            return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM;
         case(DXGI_FORMAT_R32G32_FLOAT):
             return FFX_SURFACE_FORMAT_R32G32_FLOAT;
         case(DXGI_FORMAT_R32_UINT):
diff --git a/src/ffx-fsr2-api/ffx_fsr2.cpp b/src/ffx-fsr2-api/ffx_fsr2.cpp
index c7a342f..260a688 100644
--- a/src/ffx-fsr2-api/ffx_fsr2.cpp
+++ b/src/ffx-fsr2-api/ffx_fsr2.cpp
@@ -29,6 +29,7 @@
 #include "shaders/ffx_fsr1.h"
 #include "shaders/ffx_spd.h"
 #include "shaders/ffx_fsr2_callbacks_hlsl.h"
+#include <cmath>
 
 #include "ffx_fsr2_maximum_bias.h"
 
@@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = {
 // Lanczos
 static float lanczos2(float value)
 {
-    return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
+    return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
 }
 
 // Calculate halton number for index and base.
@@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
     const Fsr2ResourceDescription internalSurfaceDesc[] = {
 
         {   FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
-            FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
 
         {   FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
             FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
@@ -662,8 +663,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
     // compute jitter cancellation
     if (context->contextDescription.flags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) {
 
-        context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0];
-        context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1];
+        context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0] * context->constants.motionVectorScale[0];
+        context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1] * context->constants.motionVectorScale[1];
 
         context->previousJitterOffset[0] = context->constants.jitterOffset[0];
         context->previousJitterOffset[1] = context->constants.jitterOffset[1];
diff --git a/src/ffx-fsr2-api/ffx_fsr2.h b/src/ffx-fsr2-api/ffx_fsr2.h
index ee2ff7d..c8b0fa5 100644
--- a/src/ffx-fsr2-api/ffx_fsr2.h
+++ b/src/ffx-fsr2-api/ffx_fsr2.h
@@ -40,7 +40,7 @@
 /// FidelityFX Super Resolution 2 patch version.
 ///
 /// @ingroup FSR2
-#define FFX_FSR2_VERSION_PATCH      (2)
+#define FFX_FSR2_VERSION_PATCH      (1)
 
 /// The size of the context specified in 32bit values.
 ///
diff --git a/src/ffx-fsr2-api/ffx_types.h b/src/ffx-fsr2-api/ffx_types.h
index 0079572..75fb0e8 100644
--- a/src/ffx-fsr2-api/ffx_types.h
+++ b/src/ffx-fsr2-api/ffx_types.h
@@ -57,7 +57,6 @@ typedef enum FfxSurfaceFormat {
     FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS,       ///< 32 bit per channel, 4 channel typeless format
     FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT,          ///< 32 bit per channel, 4 channel float format
     FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,          ///< 16 bit per channel, 4 channel float format
-    FFX_SURFACE_FORMAT_R16G16B16A16_UNORM,          ///< 16 bit per channel, 4 channel unsigned normalized format
     FFX_SURFACE_FORMAT_R32G32_FLOAT,                ///< 32 bit per channel, 2 channel float format
     FFX_SURFACE_FORMAT_R32_UINT,                    ///< 32 bit per channel, 1 channel float format
     FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS,           ///<  8 bit per channel, 4 channel float format
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl
index e1ee116..4f0d694 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl
@@ -42,6 +42,7 @@
 
 #extension GL_GOOGLE_include_directive : require
 #extension GL_EXT_samplerless_texture_functions : require
+#extension GL_EXT_shader_image_load_formatted : require
 
 #define FSR2_BIND_SRV_EXPOSURE                               0
 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS                 1
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h
index d598250..0d5519b 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h
@@ -221,7 +221,7 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
 	layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f)                        uniform image2D    rw_depth_clip;
 #endif
 #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
-	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16)            uniform image2D    rw_prepared_input_color;
+	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f)           uniform image2D    rw_prepared_input_color;
 #endif
 #if defined FSR2_BIND_UAV_LUMA_HISTORY
 	layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f)                   uniform image2D    rw_luma_history;
@@ -591,6 +591,9 @@ void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
 void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
 {
 #if defined(FSR2_BIND_UAV_DILATED_DEPTH)
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+	fDepth = 1.0 - fDepth; // Preserve precision as well as we can in FP16.
+#endif
 	//FfxUInt32 uDepth = f32tof16(fDepth);
 	imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
 #endif
@@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
 FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
 {
 #if defined(FSR2_BIND_SRV_DILATED_DEPTH)
-	return texelFetch(r_dilatedDepth, iPxInput, 0).r;
+	FfxFloat32 fDepth = texelFetch(r_dilatedDepth, iPxInput, 0).r;
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+	fDepth = 1.0 - fDepth; // Reconstruct from FP16.
+#endif
+	return fDepth;
 #else
     return 0.f;
 #endif
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h
index 4641927..f6412f7 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h
@@ -234,26 +234,26 @@ SamplerState s_LinearClamp : register(s1);
 
 // SRVs
 #if defined(FFX_INTERNAL)
-    Texture2D<FfxFloat32x4>                       r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
-    Texture2D<FfxFloat32x4>                       r_motion_vectors                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
-    Texture2D<FfxFloat32>                         r_depth                                   : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
-    Texture2D<FfxFloat32x2>                       r_exposure                                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
-    Texture2D<FfxFloat32>                         r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
-    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
-    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
-    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
-    Texture2D<FfxFloat32>                         r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
-    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
-    Texture2D<FfxFloat32x3>                       r_lock_status                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
-    Texture2D<FfxFloat32>                         r_depth_clip                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
-    Texture2D<unorm FfxFloat32x4>                 r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
-    Texture2D<unorm FfxFloat32x4>                 r_luma_history                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
-    Texture2D<FfxFloat32x4>                       r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
-    Texture2D<FfxFloat32>                         r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
-    Texture2D<FfxFloat32>                         r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
-    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
-    Texture2D<FfxFloat32x2>                       r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
-    Texture2D<FfxFloat32x4>                       r_debug_out                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
+    Texture2D<FfxFloat32x4>                       r_input_color_jittered                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
+    Texture2D<FfxFloat32x4>                       r_motion_vectors                        : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_depth                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_exposure                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
+    Texture2D<FfxFloat32>                         r_reactive_mask                         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
+    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask     : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
+    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_dilatedDepth                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    Texture2D<FfxFloat32x3>                       r_lock_status                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    Texture2D<FfxFloat32>                         r_depth_clip                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
+    Texture2D<FfxFloat32x4>                       r_prepared_input_color                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    Texture2D<unorm FfxFloat32x4>                 r_luma_history                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    Texture2D<FfxFloat32x4>                       r_rcas_input                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
+    Texture2D<FfxFloat32>                         r_lanczos_lut                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
+    Texture2D<FfxFloat32>                         r_imgMips                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
+    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
+    Texture2D<FfxFloat32x2>                       r_dilated_reactive_masks                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
+    Texture2D<FfxFloat32x4>                       r_debug_out                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
 
     // declarations not current form, no accessor functions
     Texture2D<FfxFloat32x4>                       r_transparency_mask                     : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK);
@@ -275,15 +275,15 @@ SamplerState s_LinearClamp : register(s1);
     Texture2D<FfxFloat32x4>                       r_motion_vectors_reflection             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION);
 
     // UAV declarations
-    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
-    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
-    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
-    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
-    RWTexture2D<FfxFloat32x3>                     rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
-    RWTexture2D<FfxFloat32>                       rw_depth_clip                             : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
-    RWTexture2D<unorm FfxFloat32x4>               rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
-    RWTexture2D<unorm FfxFloat32x4>               rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
-    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
+    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors               : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                         : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    RWTexture2D<FfxFloat32x3>                     rw_lock_status                          : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    RWTexture2D<FfxFloat32>                       rw_depth_clip                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
+    RWTexture2D<FfxFloat32x4>                     rw_prepared_input_color                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    RWTexture2D<unorm FfxFloat32x4>               rw_luma_history                         : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                      : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
 
     globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_shading_change               : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE);
     globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_5                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5);
@@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1);
         Texture2D<FfxFloat32>                     r_depth_clip                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP);
     #endif
     #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
-        Texture2D<unorm FfxFloat32x4>             r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
+        Texture2D<FfxFloat32x4>                   r_prepared_input_color                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
     #endif
     #if defined FSR2_BIND_SRV_LUMA_HISTORY
         Texture2D<unorm FfxFloat32x4>             r_luma_history                          : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
@@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1);
         RWTexture2D<FfxFloat32>                   rw_depth_clip                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP);
     #endif
     #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
-        RWTexture2D<unorm FfxFloat32x4>           rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
+        RWTexture2D<FfxFloat32x4>                 rw_prepared_input_color                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
     #endif
     #if defined FSR2_BIND_UAV_LUMA_HISTORY
         RWTexture2D<unorm FfxFloat32x4>           rw_luma_history                         : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
@@ -778,7 +778,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
 FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
 {
 #if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
-    return r_dilatedDepth[iPxInput];
+    FfxFloat32 fDepth = r_dilatedDepth[iPxInput];
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+    fDepth = 1.0 - fDepth; // Reconstruct from FP16.
+#endif
+    return fDepth;
 #else
     return 0.f;
 #endif
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h
index 81db737..4748c96 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h
@@ -34,8 +34,12 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD
     const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f;
     FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
 
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+    const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // Arbitrary hack to make normal depth work better.
+#else
     // WARNING: Ksep only works with reversed-z with infinite projection.
     const FfxFloat32 Ksep = 1.37e-05f;
+#endif
     FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
     FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
 
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h
index cfa9db8..f697d70 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h
@@ -563,32 +563,24 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1
 
 // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
 // is common, so iPxSample can "jitter"
-#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                           \
-    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
-    {                                                                                                                \
-        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                  \
-        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
-        /* Clamp base coords */                                                                                      \
-        iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1));                                            \
-        iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1));                                            \
-        /* */                                                                                                        \
-        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                  \
-        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
-        return fColorXY;                                                                                             \
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                                                                    \
+    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                                                                        \
+    {                                                                                                                                                         \
+        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                                                            \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                                                                  \
+        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                                                           \
+        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));   \
+        return fColorXY;                                                                                                                                      \
     }
 
-#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                      \
-    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
-    {                                                                                                                \
-        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                  \
-        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
-        /* Clamp base coords */                                                                                      \
-        iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1));                                            \
-        iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1));                                            \
-        /* */                                                                                                        \
-        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                    \
-        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
-        return fColorXY;                                                                                             \
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                                                             \
+    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                                                                      \
+    {                                                                                                                                                       \
+        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                                                         \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                                                                \
+        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                                                            \
+        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
+        return fColorXY;                                                                                                                                    \
     }
 
 #define FFX_FSR2_CONCAT_ID(x, y) x ## y
diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h
index a66a5be..80524d4 100644
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_upsample.h
@@ -46,10 +46,10 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2
     FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight;
 #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
     FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
-#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_APPROXIMATE
-    FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
-#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_LUT
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
     FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+    FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
 #else
 #error "Invalid Lanczos type"
 #endif
diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp
index be766b5..683ad9a 100644
--- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp
+++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp
@@ -19,6 +19,8 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 
+#define fsr2GetPermutationBlobByIndex fsr2GetPermutationBlobByIndexVK
+
 #include "../ffx_fsr2.h"
 #include "ffx_fsr2_vk.h"
 #include "shaders/ffx_fsr2_shaders_vk.h"  // include all the precompiled VK shaders for the FSR2 passes
@@ -282,8 +284,6 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt)
         return VK_FORMAT_R32G32B32A32_SFLOAT;
     case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
         return VK_FORMAT_R16G16B16A16_SFLOAT;
-    case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM):
-        return VK_FORMAT_R16G16B16A16_UNORM;
     case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
         return VK_FORMAT_R32G32_SFLOAT;
     case(FFX_SURFACE_FORMAT_R32_UINT):
@@ -409,8 +409,6 @@ FfxSurfaceFormat ffxGetSurfaceFormatVK(VkFormat fmt)
         return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;
     case(VK_FORMAT_R16G16B16A16_SFLOAT):
         return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;
-    case(VK_FORMAT_R16G16B16A16_UNORM):
-        return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM;
     case(VK_FORMAT_R32G32_SFLOAT):
         return FFX_SURFACE_FORMAT_R32G32_FLOAT;
     case(VK_FORMAT_R32_UINT):
diff --git a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp
index 230ae9b..5842275 100644
--- a/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp
+++ b/src/ffx-fsr2-api/vk/shaders/ffx_fsr2_shaders_vk.cpp
@@ -19,6 +19,8 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 
+#define fsr2GetPermutationBlobByIndex fsr2GetPermutationBlobByIndexVK
+
 #include "ffx_fsr2_shaders_vk.h"
 
 #include "ffx_fsr2_autogen_reactive_pass_permutations.h"