my fixes

2023-03-03 19:34:56 +00:00 · 2023-03-03 19:34:56 +00:00 · 81dd44cb88
commit 81dd44cb88
parent ea97a113b0
19 changed files with 171 additions and 126 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -24,6 +24,8 @@ cmake_minimum_required(VERSION 3.12.1)
 option (GFX_API_DX12 "Build with DX12" ON)
 option (GFX_API_VK "Build with Vulkan" ON)

+add_definitions(-DFFX_GCC)
+
 if(NOT DEFINED GFX_API)
    project (FSR2_Sample)
 else()
@ -56,11 +58,6 @@ else()
    endif()
 endif()

-# Check MSVC toolset version, Visual Studio 2019 required
-if(MSVC_TOOLSET_VERSION VERSION_LESS 142)
-    message(FATAL_ERROR "Cannot find MSVC toolset version 142 or greater. Please make sure Visual Studio 2019 or newer installed")
-endif()
-
 # ouput exe to bin directory
 SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin)

@ -69,7 +66,6 @@ foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} )
    set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin )
 endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES )

-add_compile_options(/MP)
 add_compile_definitions($<$<CONFIG:RelWithDebInfo>:USE_PIX>)

 # override build options in ffx-fsr2-api cmake
--- a/README.md
+++ b/README.md
@ -1,4 +1,12 @@
-# FidelityFX Super Resolution 2.1 (FSR 2.1) 
+This fork cleans up some *questionable* choices with FSR2 by:
+* adding support for compiling under (MSYS2's) GCC
+* working shader permutation generation (as it relies on being ran under CMD.exe, not bash)
+* fixing segfaults from weird linkage against vulkan by grabbing function pointers
+* some other things, I don't remember, I did this a year ago
+
+---
+
+# FidelityFX Super Resolution 2.1 (FSR 2.1)

 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.

@ -634,7 +642,7 @@ With the dilated motion vectors, we can now move to the second part of the [Reco

 When using the FSR2 API, the application's depth buffer and the application's velocity buffer must be specified as separate resources as per the [Resource inputs](#resource-inputs) table above. However, if you are undertaking a bespoke integration into your application, this constraint may be relaxed. Take care that the performance characteristics of this pass do not change if moving to a format for the motion vector texture which is more sparse, e.g.: as part of a packed g-buffer in a deferred renderer.

-## Depth clip 
+## Depth clip
 The goal of the [Depth clip](#depth-clip) stage is to produce a mask which indicates disoccluded areas of the current frame. 

 This stage runs at render resolution.
--- a/build/GenerateSolutionsDLL.bat
+++ b/build/GenerateSolutionsDLL.bat
@ -47,12 +47,7 @@ if "%VULKAN_SDK%"=="" (
 )

 :: Call CMake
-mkdir DX12
-cd DX12
-cmake -A x64 ..\.. -DGFX_API=DX12 -DFSR2_BUILD_AS_DLL=1
-cd ..
-
 mkdir VK
 cd VK
-cmake -A x64 ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1
+cmake -G "Unix Makefiles" ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1
 cd ..
--- a/common.cmake
+++ b/common.cmake
@ -2,8 +2,6 @@
 # enables multithreading compilation
 #

-add_compile_options(/MP)
-
 #
 # includes cauldron's helper cmakes
 #
--- a/src/VK/FSR2Sample.cpp
+++ b/src/VK/FSR2Sample.cpp
@ -507,8 +507,8 @@ void FSR2Sample::OnUpdate()
            if (m_UIState.m_bBaloonAttachToCamera) {
                Vectormath::Vector3  eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z;
                Vectormath::Vector3  look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f;
-                m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f));
-                m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f));
+                m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
+                m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
                m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) *
                    Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * //
                    Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) *       //
--- a/src/VK/GPUFrameRateLimiter.cpp
+++ b/src/VK/GPUFrameRateLimiter.cpp
@ -157,13 +157,13 @@ void GPUFrameRateLimiter::Draw(VkCommandBuffer cmdBuf, DynamicBufferRing* pDynam
    m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs;
    m_frameTimeHistoryCount++;

-    double recentFrameTimeAvg = double(m_frameTimeHistorySum) / min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory));
+    double recentFrameTimeAvg = double(m_frameTimeHistorySum) / std::min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory));

-    double clampedTargetFrameTimeMs = max(min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs);
+    double clampedTargetFrameTimeMs = std::max(std::min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs);
    double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs;

    m_overhead -= m_overhead * deltaRatio * DampenFactor;
-    m_overhead = min(max(1.0, m_overhead), 1000000.0);
+    m_overhead = std::min(std::max(1.0, m_overhead), 1000000.0);

    uint32_t numLoops = uint32_t(m_overhead);

--- a/src/VK/UpscaleContext.h
+++ b/src/VK/UpscaleContext.h
@ -86,7 +86,7 @@ public:
    virtual void                BuildDevUI(UIState* pState) {}
    virtual void                PreDraw(UIState* pState);
    virtual void                GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState);
-    virtual void                Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = NULL;
+    virtual void                Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = 0;

 protected:
    Device* m_pDevice;
--- a/src/ffx-fsr2-api/CMakeLists.txt
+++ b/src/ffx-fsr2-api/CMakeLists.txt
@ -37,16 +37,8 @@ if(CMAKE_GENERATOR STREQUAL "Visual Studio 16 2019")
    set(FSR2_VS_VERSION 2019)
 endif()

-if(CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:x64")
-    set(FSR2_PLATFORM_NAME x64)
-elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:X86")
-    set(FSR2_PLATFORM_NAME x86)
-else()
-    message(FATAL_ERROR "Unsupported target platform - only supporting x64 and Win32 currently")
-endif()
-
 # Embed PDBs in the debug versions of the libs
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g")

 # Write both debug and release versions of the static libs to the /lib folder as they are uniquely named
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG   ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/)
--- a/src/ffx-fsr2-api/ffx_fsr2.cpp
+++ b/src/ffx-fsr2-api/ffx_fsr2.cpp
@ -29,6 +29,7 @@
 #include "shaders/ffx_fsr1.h"
 #include "shaders/ffx_spd.h"
 #include "shaders/ffx_fsr2_callbacks_hlsl.h"
+#include <cmath>

 #include "ffx_fsr2_maximum_bias.h"

@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = {
 // Lanczos
 static float lanczos2(float value)
 {
-    return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
+    return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
 }

 // Calculate halton number for index and base.
@ -183,7 +184,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
 {
    for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
    {
-        int32_t mapIndex = 0;
+        size_t mapIndex = 0;
        for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
        {
            if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
@ -197,7 +198,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)

    for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
    {
-        int32_t mapIndex = 0;
+        size_t mapIndex = 0;
        for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
        {
            if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
@ -211,7 +212,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)

    for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
    {
-        int32_t mapIndex = 0;
+        size_t mapIndex = 0;
        for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
        {
            if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
    const Fsr2ResourceDescription internalSurfaceDesc[] = {

        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
-            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+            FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },

        {   FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
            FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
@ -475,7 +476,7 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
    return FFX_OK;
 }

-static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
 {
    FfxComputeJobDescription jobDescriptor = {};

@ -562,8 +563,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D

    // Prepare per frame descriptor tables
    const bool isOddFrame = !!(context->resourceFrameIndex & 1);
-    const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
-    const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
+    //const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
+    //const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
    const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
    const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
    const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
--- a/src/ffx-fsr2-api/ffx_types.h
+++ b/src/ffx-fsr2-api/ffx_types.h
@ -23,13 +23,26 @@

 #include <stdint.h>

-#if defined (FFX_GCC)
-/// FidelityFX exported functions
+// maister: Force internal linkage.
 #define FFX_API
-#else
-/// FidelityFX exported functions
-#define FFX_API __declspec(dllexport)
-#endif // #if defined (FFX_GCC)
+
+// Workarounds.
+#ifndef _WIN32
+#define _countof(array) (sizeof(array) / sizeof((array)[0]))
+#include <stdio.h>
+#include <stddef.h>
+
+template <size_t N>
+static inline void strcpy_s(char (&buf)[N], const char *str)
+{
+	snprintf(buf, N, "%s", str);
+}
+#endif
+
+#ifndef _MSC_VER
+#define FFX_STATIC static inline
+#endif
+////

 /// Maximum supported number of simultaneously bound SRVs.
 #define FFX_MAX_NUM_SRVS            16
@ -57,6 +70,7 @@ typedef enum FfxSurfaceFormat {
    FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS,       ///< 32 bit per channel, 4 channel typeless format
    FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT,          ///< 32 bit per channel, 4 channel float format
    FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,          ///< 16 bit per channel, 4 channel float format
+    FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format
    FFX_SURFACE_FORMAT_R32G32_FLOAT,                ///< 32 bit per channel, 2 channel float format
    FFX_SURFACE_FORMAT_R32_UINT,                    ///< 32 bit per channel, 1 channel float format
    FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS,           ///<  8 bit per channel, 4 channel float format
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h
@ -182,6 +182,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
    const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered);
    const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x;
    const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y;
+    const FfxBoolean bIsResetFrame = (0 == FrameIndex());

    FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0);
    FfxFloat32x3 fLockStatus;
@ -191,7 +192,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
    FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0);
    ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample);

-    if (bIsExistingSample) {
+    if (bIsExistingSample && !bIsResetFrame) {
        ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight);
        ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus);
    }
@ -259,4 +260,4 @@ void Accumulate(FfxInt32x2 iPxHrPos)
 #endif
 }

-#endif // FFX_FSR2_ACCUMULATE_H
+#endif // FFX_FSR2_ACCUMULATE_H
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl
@ -42,6 +42,7 @@

 #extension GL_GOOGLE_include_directive : require
 #extension GL_EXT_samplerless_texture_functions : require
+#extension GL_EXT_shader_image_load_formatted : require

 #define FSR2_BIND_SRV_EXPOSURE                               0
 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS                 1
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h
@ -206,37 +206,37 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
 	layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D   rw_reconstructed_previous_nearest_depth;
 #endif
 #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
-	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f)           uniform image2D    rw_dilated_motion_vectors;
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS /* app controlled format */) writeonly uniform image2D rw_dilated_motion_vectors;
 #endif
 #if defined FSR2_BIND_UAV_DILATED_DEPTH
-	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f)                     uniform image2D    rw_dilatedDepth;
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f)                     uniform image2D    rw_dilatedDepth;
 #endif
 #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
-	layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f)              uniform image2D    rw_internal_upscaled_color;
+	layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f)              uniform image2D    rw_internal_upscaled_color;
 #endif
 #if defined FSR2_BIND_UAV_LOCK_STATUS
 	layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f)             uniform image2D    rw_lock_status;
 #endif
 #if defined FSR2_BIND_UAV_DEPTH_CLIP
-	layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f)                        uniform image2D    rw_depth_clip;
+	layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r16f)                        uniform image2D    rw_depth_clip;
 #endif
 #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
-	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f)           uniform image2D    rw_prepared_input_color;
+	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16f)           uniform image2D    rw_prepared_input_color;
 #endif
 #if defined FSR2_BIND_UAV_LUMA_HISTORY
-	layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f)                   uniform image2D    rw_luma_history;
+	layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8)                     uniform image2D    rw_luma_history;
 #endif
 #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
-	layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f)                uniform image2D    rw_upscaled_output;
+	layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
 #endif
 #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
-	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D    rw_img_mip_shading_change;
+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D    rw_img_mip_shading_change;
 #endif
 #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
-	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f)           coherent uniform image2D    rw_img_mip_5;
+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f)           coherent uniform image2D    rw_img_mip_5;
 #endif
 #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
-	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f)           uniform image2D    rw_dilated_reactive_masks;
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, r8)              uniform image2D    rw_dilated_reactive_masks;
 #endif 
 #if defined FSR2_BIND_UAV_EXPOSURE 
 	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f)                         uniform image2D    rw_exposure;
@ -592,6 +592,9 @@ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxF
 {
 #if defined(FSR2_BIND_UAV_DILATED_DEPTH)
 	//FfxUInt32 uDepth = f32tof16(fDepth);
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+	fDepth = 1.0 - fDepth; // maister: Preserve precision as well as we can in FP16.
+#endif
 	imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
 #endif
 }
@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
 FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
 {
 #if defined(FSR2_BIND_SRV_DILATED_DEPTH)
-	return texelFetch(r_dilatedDepth, iPxInput, 0).r;
+	FfxFloat32 d = texelFetch(r_dilatedDepth, iPxInput, 0).r;
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+	d = 1.0 - d; // maister: Reconstruct from FP16.
+#endif
+	return d;
 #else
    return 0.f;
 #endif
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h
@ -234,26 +234,26 @@ SamplerState s_LinearClamp : register(s1);

 // SRVs
 #if defined(FFX_INTERNAL)
-    Texture2D<FfxFloat32x4>                       r_input_color_jittered                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
-    Texture2D<FfxFloat32x4>                       r_motion_vectors                        : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
-    Texture2D<FfxFloat32>                         r_depth                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
-    Texture2D<FfxFloat32x2>                       r_exposure                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
-    Texture2D<FfxFloat32>                         r_reactive_mask                         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
-    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask     : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
-    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
-    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
-    Texture2D<FfxFloat32>                         r_dilatedDepth                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
-    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
-    Texture2D<FfxFloat32x3>                       r_lock_status                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
-    Texture2D<FfxFloat32>                         r_depth_clip                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
-    Texture2D<FfxFloat32x4>                       r_prepared_input_color                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
-    Texture2D<unorm FfxFloat32x4>                 r_luma_history                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
-    Texture2D<FfxFloat32x4>                       r_rcas_input                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
-    Texture2D<FfxFloat32>                         r_lanczos_lut                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
-    Texture2D<FfxFloat32>                         r_imgMips                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
-    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
-    Texture2D<FfxFloat32x2>                       r_dilated_reactive_masks                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
-    Texture2D<FfxFloat32x4>                       r_debug_out                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
+    Texture2D<FfxFloat32x4>                       r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
+    Texture2D<FfxFloat32x4>                       r_motion_vectors                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_depth                                   : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_exposure                                : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
+    Texture2D<FfxFloat32>                         r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
+    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
+    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    Texture2D<FfxFloat32x3>                       r_lock_status                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    Texture2D<FfxFloat32>                         r_depth_clip                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
+    Texture2D<unorm FfxFloat32x4>                 r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    Texture2D<unorm FfxFloat32x4>                 r_luma_history                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    Texture2D<FfxFloat32x4>                       r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
+    Texture2D<FfxFloat32>                         r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
+    Texture2D<FfxFloat32>                         r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
+    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
+    Texture2D<FfxFloat32x2>                       r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
+    Texture2D<FfxFloat32x4>                       r_debug_out                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);

    // declarations not current form, no accessor functions
    Texture2D<FfxFloat32x4>                       r_transparency_mask                     : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK);
@ -275,15 +275,15 @@ SamplerState s_LinearClamp : register(s1);
    Texture2D<FfxFloat32x4>                       r_motion_vectors_reflection             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION);

    // UAV declarations
-    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
-    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors               : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
-    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                         : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
-    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
-    RWTexture2D<FfxFloat32x3>                     rw_lock_status                          : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
-    RWTexture2D<FfxFloat32>                       rw_depth_clip                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
-    RWTexture2D<FfxFloat32x4>                     rw_prepared_input_color                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
-    RWTexture2D<unorm FfxFloat32x4>               rw_luma_history                         : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
-    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                      : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
+    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    RWTexture2D<FfxFloat32x3>                     rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    RWTexture2D<FfxFloat32>                       rw_depth_clip                             : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
+    RWTexture2D<unorm FfxFloat32x4>               rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    RWTexture2D<unorm FfxFloat32x4>               rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);

    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_shading_change               : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE);
    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_5                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5);
@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1);
        Texture2D<FfxFloat32>                     r_depth_clip                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP);
    #endif
    #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
-        Texture2D<FfxFloat32x4>                   r_prepared_input_color                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
+        Texture2D<unorm FfxFloat32x4>             r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
    #endif
    #if defined FSR2_BIND_SRV_LUMA_HISTORY
        Texture2D<unorm FfxFloat32x4>             r_luma_history                          : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1);
        RWTexture2D<FfxFloat32>                   rw_depth_clip                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP);
    #endif
    #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
-        RWTexture2D<FfxFloat32x4>                 rw_prepared_input_color                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
+        RWTexture2D<unorm FfxFloat32x4>           rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
    #endif
    #if defined FSR2_BIND_UAV_LUMA_HISTORY
        RWTexture2D<unorm FfxFloat32x4>           rw_luma_history                         : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
@ -823,9 +823,9 @@ FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
 {
 #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
    fUV *= depthclip_uv_scale;
-	return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
+    return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
 #else
-	return 0.f;
+    return 0.f;
 #endif
 }

@ -845,4 +845,4 @@ void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETE
 #endif
 }

-#endif // #if defined(FFX_GPU)
+#endif // #if defined(FFX_GPU)
--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h
@ -35,7 +35,11 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD
    FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);

    // WARNING: Ksep only works with reversed-z with infinite projection.
+#if !FFX_FSR2_OPTION_INVERTED_DEPTH
+    const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // maister: Arbitrary hack to make normal depth work.
+#else
    const FfxFloat32 Ksep = 1.37e-05f;
+#endif
    FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
    FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;

--- a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h
+++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h
@ -563,24 +563,32 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1

 // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
 // is common, so iPxSample can "jitter"
-#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                                                                    \
-    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                                                                        \
-    {                                                                                                                                                         \
-        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                                                            \
-        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                                                                  \
-        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                                                           \
-        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));   \
-        return fColorXY;                                                                                                                                      \
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                           \
+    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                  \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        /* Clamp base coords */                                                                                      \
+        iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1));                                            \
+        iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1));                                            \
+        /* */                                                                                                        \
+        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                  \
+        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
    }

-#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                                                             \
-    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                                                                      \
-    {                                                                                                                                                       \
-        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                                                         \
-        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                                                                \
-        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                                                            \
-        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
-        return fColorXY;                                                                                                                                    \
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                      \
+    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f);                  \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        /* Clamp base coords */                                                                                      \
+        iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1));                                            \
+        iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1));                                            \
+        /* */                                                                                                        \
+        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                    \
+        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
    }

 #define FFX_FSR2_CONCAT_ID(x, y) x ## y
@ -591,4 +599,4 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1

 #define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)

-#endif //!defined( FFX_FSR2_SAMPLE_H )
+#endif //!defined( FFX_FSR2_SAMPLE_H )
--- a/src/ffx-fsr2-api/vk/CMakeLists.txt
+++ b/src/ffx-fsr2-api/vk/CMakeLists.txt
@ -23,6 +23,8 @@ if(NOT ${FFX_FSR2_API_VK})
    return()
 endif()

+add_definitions(-DFFX_GCC)
+
 set(FFX_SC_VK_BASE_ARGS
    -compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1)

@ -86,7 +88,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
    if(USE_DEPFILE)
        add_custom_command(
            OUTPUT ${PERMUTATION_HEADER}
-            COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}
+            COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
            DEPENDS ${PASS_SHADER}
            DEPFILE ${PERMUTATION_HEADER}.d
@ -95,7 +97,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
    else()
        add_custom_command(
            OUTPUT ${PERMUTATION_HEADER}
-            COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}
+            COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
            DEPENDS ${PASS_SHADER}
        )
--- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp
+++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp
@ -27,6 +27,8 @@
 #include <math.h>
 #include <stdlib.h>
 #include <codecvt>
+#include <locale>
+

 // prototypes for functions in the interface
 FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device);
@ -170,13 +172,25 @@ typedef struct BackendContext_VK {

 } BackendContext_VK;

-FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice)
-{
-    uint32_t numExtensions = 0;
-    
-    if (physicalDevice)
-        vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
+namespace {
+    PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties;
+    PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
+    PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties;
+    PFN_vkGetPhysicalDeviceProperties2 GetPhysicalDeviceProperties2;
+    PFN_vkGetPhysicalDeviceFeatures2 GetPhysicalDeviceFeatures2;
+}

+FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr ) {
+    ::EnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties) getInstanceProcAddr(instance, "vkEnumerateDeviceExtensionProperties");
+    ::GetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceMemoryProperties");
+    ::GetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties");
+    ::GetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2");
+    ::GetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2");
+}
+
+FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t numExtensions)
+{
+    if ( physicalDevice && numExtensions == 0) ::EnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
    return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t));
 }

@ -185,7 +199,8 @@ FfxErrorCode ffxFsr2GetInterfaceVK(
    void* scratchBuffer,
    size_t scratchBufferSize,
    VkPhysicalDevice physicalDevice,
-    PFN_vkGetDeviceProcAddr getDeviceProcAddr)
+    PFN_vkGetDeviceProcAddr getDeviceProcAddr
+)
 {
    FFX_RETURN_ON_ERROR(
        outInterface,
@ -282,6 +297,8 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt)
        return VK_FORMAT_R32G32B32A32_SFLOAT;
    case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
        return VK_FORMAT_R16G16B16A16_SFLOAT;
+    case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM):
+        return VK_FORMAT_R16G16B16A16_UNORM;
    case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
        return VK_FORMAT_R32G32_SFLOAT;
    case(FFX_SURFACE_FORMAT_R32_UINT):
@ -441,7 +458,7 @@ uint32_t findMemoryTypeIndex(VkPhysicalDevice physicalDevice, VkMemoryRequiremen
    FFX_ASSERT(NULL != physicalDevice);

    VkPhysicalDeviceMemoryProperties memProperties;
-    vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
+    ::GetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);

    uint32_t bestCandidate = UINT32_MAX;

@ -714,7 +731,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
            VkPhysicalDeviceProperties2 deviceProperties2 = {};
            deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
            deviceProperties2.pNext = &subgroupSizeControlProperties;
-            vkGetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2);
+            ::GetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2);

            deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize;
            deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize;
@ -729,7 +746,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
            physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
            physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features;

-            vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
+            ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);

            deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16;
        }
@ -743,7 +760,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
            physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
            physicalDeviceFeatures2.pNext = &accelerationStructureFeatures;

-            vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
+            ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);

            deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure;
        }
@ -781,8 +798,8 @@ FfxErrorCode CreateBackendContextVK(FfxFsr2Interface* backendInterface, FfxDevic

    // enumerate all the device extensions 
    backendContext->numDeviceExtensions = 0;
-    vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr);
-    vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties);
+    ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr);
+    ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties);

    // create descriptor pool
    VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
@ -1265,7 +1282,7 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa
    if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN)
    {
        VkPhysicalDeviceProperties physicalDeviceProperties = {};
-        vkGetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties);
+        ::GetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties);

        // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
        if (physicalDeviceProperties.vendorID == 0x10DE)
--- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h
+++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h
@ -34,7 +34,8 @@ extern "C" {
    ///
    /// @returns
    /// The size (in bytes) of the required scratch memory buffer for the VK backend.
-    FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice);
+    FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t = 0);
+    FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr );

    /// Populate an interface with pointers for the VK backend.
    ///