From 81dd44cb881ff7f60d5c39de30a74ad987038ba5 Mon Sep 17 00:00:00 2001 From: mrq Date: Fri, 3 Mar 2023 19:34:56 +0000 Subject: [PATCH] my fixes --- CMakeLists.txt | 8 +-- README.md | 12 +++- build/GenerateSolutionsDLL.bat | 7 +- common.cmake | 2 - src/VK/FSR2Sample.cpp | 4 +- src/VK/GPUFrameRateLimiter.cpp | 6 +- src/VK/UpscaleContext.h | 2 +- src/ffx-fsr2-api/CMakeLists.txt | 10 +-- src/ffx-fsr2-api/ffx_fsr2.cpp | 17 ++--- src/ffx-fsr2-api/ffx_types.h | 26 +++++-- .../shaders/ffx_fsr2_accumulate.h | 5 +- .../shaders/ffx_fsr2_accumulate_pass.glsl | 1 + .../shaders/ffx_fsr2_callbacks_glsl.h | 29 +++++--- .../shaders/ffx_fsr2_callbacks_hlsl.h | 68 +++++++++---------- .../shaders/ffx_fsr2_depth_clip.h | 4 ++ src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h | 42 +++++++----- src/ffx-fsr2-api/vk/CMakeLists.txt | 6 +- src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp | 45 ++++++++---- src/ffx-fsr2-api/vk/ffx_fsr2_vk.h | 3 +- 19 files changed, 171 insertions(+), 126 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a5424c..c4ef99c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,8 @@ cmake_minimum_required(VERSION 3.12.1) option (GFX_API_DX12 "Build with DX12" ON) option (GFX_API_VK "Build with Vulkan" ON) +add_definitions(-DFFX_GCC) + if(NOT DEFINED GFX_API) project (FSR2_Sample) else() @@ -56,11 +58,6 @@ else() endif() endif() -# Check MSVC toolset version, Visual Studio 2019 required -if(MSVC_TOOLSET_VERSION VERSION_LESS 142) - message(FATAL_ERROR "Cannot find MSVC toolset version 142 or greater. Please make sure Visual Studio 2019 or newer installed") -endif() - # ouput exe to bin directory SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) @@ -69,7 +66,6 @@ foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin ) endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) -add_compile_options(/MP) add_compile_definitions($<$:USE_PIX>) # override build options in ffx-fsr2-api cmake diff --git a/README.md b/README.md index 9f43d44..9cd5468 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,12 @@ -# FidelityFX Super Resolution 2.1 (FSR 2.1) +This fork cleans up some *questionable* choices with FSR2 by: +* adding support for compiling under (MSYS2's) GCC +* working shader permutation generation (as it relies on being ran under CMD.exe, not bash) +* fixing segfaults from weird linkage against vulkan by grabbing function pointers +* some other things, I don't remember, I did this a year ago + +--- + +# FidelityFX Super Resolution 2.1 (FSR 2.1) Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. @@ -634,7 +642,7 @@ With the dilated motion vectors, we can now move to the second part of the [Reco When using the FSR2 API, the application's depth buffer and the application's velocity buffer must be specified as separate resources as per the [Resource inputs](#resource-inputs) table above. However, if you are undertaking a bespoke integration into your application, this constraint may be relaxed. Take care that the performance characteristics of this pass do not change if moving to a format for the motion vector texture which is more sparse, e.g.: as part of a packed g-buffer in a deferred renderer. -## Depth clip +## Depth clip The goal of the [Depth clip](#depth-clip) stage is to produce a mask which indicates disoccluded areas of the current frame. This stage runs at render resolution. diff --git a/build/GenerateSolutionsDLL.bat b/build/GenerateSolutionsDLL.bat index 72505fa..ae5cf3a 100644 --- a/build/GenerateSolutionsDLL.bat +++ b/build/GenerateSolutionsDLL.bat @@ -47,12 +47,7 @@ if "%VULKAN_SDK%"=="" ( ) :: Call CMake -mkdir DX12 -cd DX12 -cmake -A x64 ..\.. -DGFX_API=DX12 -DFSR2_BUILD_AS_DLL=1 -cd .. - mkdir VK cd VK -cmake -A x64 ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1 +cmake -G "Unix Makefiles" ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1 cd .. diff --git a/common.cmake b/common.cmake index 4ea0fda..1ca9206 100644 --- a/common.cmake +++ b/common.cmake @@ -2,8 +2,6 @@ # enables multithreading compilation # -add_compile_options(/MP) - # # includes cauldron's helper cmakes # diff --git a/src/VK/FSR2Sample.cpp b/src/VK/FSR2Sample.cpp index 2491710..389b8b2 100644 --- a/src/VK/FSR2Sample.cpp +++ b/src/VK/FSR2Sample.cpp @@ -507,8 +507,8 @@ void FSR2Sample::OnUpdate() if (m_UIState.m_bBaloonAttachToCamera) { Vectormath::Vector3 eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z; Vectormath::Vector3 look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f; - m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f)); - m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f)); + m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f)); + m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f)); m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) * Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * // Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) * // diff --git a/src/VK/GPUFrameRateLimiter.cpp b/src/VK/GPUFrameRateLimiter.cpp index d24d268..bd51d2c 100644 --- a/src/VK/GPUFrameRateLimiter.cpp +++ b/src/VK/GPUFrameRateLimiter.cpp @@ -157,13 +157,13 @@ void GPUFrameRateLimiter::Draw(VkCommandBuffer cmdBuf, DynamicBufferRing* pDynam m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs; m_frameTimeHistoryCount++; - double recentFrameTimeAvg = double(m_frameTimeHistorySum) / min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory)); + double recentFrameTimeAvg = double(m_frameTimeHistorySum) / std::min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory)); - double clampedTargetFrameTimeMs = max(min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs); + double clampedTargetFrameTimeMs = std::max(std::min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs); double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs; m_overhead -= m_overhead * deltaRatio * DampenFactor; - m_overhead = min(max(1.0, m_overhead), 1000000.0); + m_overhead = std::min(std::max(1.0, m_overhead), 1000000.0); uint32_t numLoops = uint32_t(m_overhead); diff --git a/src/VK/UpscaleContext.h b/src/VK/UpscaleContext.h index ca13ee1..5081fef 100644 --- a/src/VK/UpscaleContext.h +++ b/src/VK/UpscaleContext.h @@ -86,7 +86,7 @@ public: virtual void BuildDevUI(UIState* pState) {} virtual void PreDraw(UIState* pState); virtual void GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState); - virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = NULL; + virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = 0; protected: Device* m_pDevice; diff --git a/src/ffx-fsr2-api/CMakeLists.txt b/src/ffx-fsr2-api/CMakeLists.txt index 7ef023c..1f66c6f 100644 --- a/src/ffx-fsr2-api/CMakeLists.txt +++ b/src/ffx-fsr2-api/CMakeLists.txt @@ -37,16 +37,8 @@ if(CMAKE_GENERATOR STREQUAL "Visual Studio 16 2019") set(FSR2_VS_VERSION 2019) endif() -if(CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:x64") - set(FSR2_PLATFORM_NAME x64) -elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:X86") - set(FSR2_PLATFORM_NAME x86) -else() - message(FATAL_ERROR "Unsupported target platform - only supporting x64 and Win32 currently") -endif() - # Embed PDBs in the debug versions of the libs -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g") # Write both debug and release versions of the static libs to the /lib folder as they are uniquely named set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/) diff --git a/src/ffx-fsr2-api/ffx_fsr2.cpp b/src/ffx-fsr2-api/ffx_fsr2.cpp index ebd69d5..0dc2118 100644 --- a/src/ffx-fsr2-api/ffx_fsr2.cpp +++ b/src/ffx-fsr2-api/ffx_fsr2.cpp @@ -29,6 +29,7 @@ #include "shaders/ffx_fsr1.h" #include "shaders/ffx_spd.h" #include "shaders/ffx_fsr2_callbacks_hlsl.h" +#include #include "ffx_fsr2_maximum_bias.h" @@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = { // Lanczos static float lanczos2(float value) { - return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); + return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); } // Calculate halton number for index and base. @@ -183,7 +184,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) { for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex) { - int32_t mapIndex = 0; + size_t mapIndex = 0; for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex) { if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name)) @@ -197,7 +198,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex) { - int32_t mapIndex = 0; + size_t mapIndex = 0; for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex) { if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name)) @@ -211,7 +212,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) { - int32_t mapIndex = 0; + size_t mapIndex = 0; for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex) { if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name)) @@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con const Fsr2ResourceDescription internalSurfaceDesc[] = { { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, + FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, @@ -475,7 +476,7 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) return FFX_OK; } -static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) +static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) { FfxComputeJobDescription jobDescriptor = {}; @@ -562,8 +563,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // Prepare per frame descriptor tables const bool isOddFrame = !!(context->resourceFrameIndex & 1); - const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0; - const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex; + //const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0; + //const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex; const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1; const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2; const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1; diff --git a/src/ffx-fsr2-api/ffx_types.h b/src/ffx-fsr2-api/ffx_types.h index 75fb0e8..9962c4f 100644 --- a/src/ffx-fsr2-api/ffx_types.h +++ b/src/ffx-fsr2-api/ffx_types.h @@ -23,13 +23,26 @@ #include -#if defined (FFX_GCC) -/// FidelityFX exported functions +// maister: Force internal linkage. #define FFX_API -#else -/// FidelityFX exported functions -#define FFX_API __declspec(dllexport) -#endif // #if defined (FFX_GCC) + +// Workarounds. +#ifndef _WIN32 +#define _countof(array) (sizeof(array) / sizeof((array)[0])) +#include +#include + +template +static inline void strcpy_s(char (&buf)[N], const char *str) +{ + snprintf(buf, N, "%s", str); +} +#endif + +#ifndef _MSC_VER +#define FFX_STATIC static inline +#endif +//// /// Maximum supported number of simultaneously bound SRVs. #define FFX_MAX_NUM_SRVS 16 @@ -57,6 +70,7 @@ typedef enum FfxSurfaceFormat { FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format + FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h index 14620d5..ec56bfe 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate.h @@ -182,6 +182,7 @@ void Accumulate(FfxInt32x2 iPxHrPos) const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered); const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x; const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y; + const FfxBoolean bIsResetFrame = (0 == FrameIndex()); FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0); FfxFloat32x3 fLockStatus; @@ -191,7 +192,7 @@ void Accumulate(FfxInt32x2 iPxHrPos) FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); - if (bIsExistingSample) { + if (bIsExistingSample && !bIsResetFrame) { ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); } @@ -259,4 +260,4 @@ void Accumulate(FfxInt32x2 iPxHrPos) #endif } -#endif // FFX_FSR2_ACCUMULATE_H +#endif // FFX_FSR2_ACCUMULATE_H \ No newline at end of file diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl index e1ee116..4f0d694 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_accumulate_pass.glsl @@ -42,6 +42,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +#extension GL_EXT_shader_image_load_formatted : require #define FSR2_BIND_SRV_EXPOSURE 0 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h index 2cd1d15..1676144 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_glsl.h @@ -206,37 +206,37 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS /* app controlled format */) writeonly uniform image2D rw_dilated_motion_vectors; #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) uniform image2D rw_dilatedDepth; #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color; + layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) uniform image2D rw_internal_upscaled_color; #endif #if defined FSR2_BIND_UAV_LOCK_STATUS layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status; #endif #if defined FSR2_BIND_UAV_DEPTH_CLIP - layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; + layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r16f) uniform image2D rw_depth_clip; #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f) uniform image2D rw_prepared_input_color; + layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16f) uniform image2D rw_prepared_input_color; #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; + layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; + layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5; #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, r8) uniform image2D rw_dilated_reactive_masks; #endif #if defined FSR2_BIND_UAV_EXPOSURE layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; @@ -592,6 +592,9 @@ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxF { #if defined(FSR2_BIND_UAV_DILATED_DEPTH) //FfxUInt32 uDepth = f32tof16(fDepth); +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + fDepth = 1.0 - fDepth; // maister: Preserve precision as well as we can in FP16. +#endif imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); #endif } @@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) { #if defined(FSR2_BIND_SRV_DILATED_DEPTH) - return texelFetch(r_dilatedDepth, iPxInput, 0).r; + FfxFloat32 d = texelFetch(r_dilatedDepth, iPxInput, 0).r; +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + d = 1.0 - d; // maister: Reconstruct from FP16. +#endif + return d; #else return 0.f; #endif diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h index 646847e..1c368bb 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_callbacks_hlsl.h @@ -234,26 +234,26 @@ SamplerState s_LinearClamp : register(s1); // SRVs #if defined(FFX_INTERNAL) - Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); - Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); - Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); - Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); - Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); - Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); - Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); - Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); + Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); + Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); + Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); + Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); // declarations not current form, no accessor functions Texture2D r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); @@ -275,15 +275,15 @@ SamplerState s_LinearClamp : register(s1); Texture2D r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); // UAV declarations - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); + RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); @@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1); Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); #endif #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_LUMA_HISTORY Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); @@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1); RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); @@ -823,9 +823,9 @@ FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) fUV *= depthclip_uv_scale; - return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); #else - return 0.f; + return 0.f; #endif } @@ -845,4 +845,4 @@ void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETE #endif } -#endif // #if defined(FFX_GPU) +#endif // #if defined(FFX_GPU) \ No newline at end of file diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h index 81db737..9522e65 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_depth_clip.h @@ -35,7 +35,11 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); // WARNING: Ksep only works with reversed-z with infinite projection. +#if !FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // maister: Arbitrary hack to make normal depth work. +#else const FfxFloat32 Ksep = 1.37e-05f; +#endif FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; diff --git a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h index f697d70..b029330 100644 --- a/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h +++ b/src/ffx-fsr2-api/shaders/ffx_fsr2_sample.h @@ -563,24 +563,32 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) // is common, so iPxSample can "jitter" -#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ - FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ - FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + /* Clamp base coords */ \ + iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ + iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + /* */ \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ } -#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ - FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ - FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + /* Clamp base coords */ \ + iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ + iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + /* */ \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ + FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ } #define FFX_FSR2_CONCAT_ID(x, y) x ## y @@ -591,4 +599,4 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 #define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x) -#endif //!defined( FFX_FSR2_SAMPLE_H ) +#endif //!defined( FFX_FSR2_SAMPLE_H ) \ No newline at end of file diff --git a/src/ffx-fsr2-api/vk/CMakeLists.txt b/src/ffx-fsr2-api/vk/CMakeLists.txt index 933d097..2b532cf 100644 --- a/src/ffx-fsr2-api/vk/CMakeLists.txt +++ b/src/ffx-fsr2-api/vk/CMakeLists.txt @@ -23,6 +23,8 @@ if(NOT ${FFX_FSR2_API_VK}) return() endif() +add_definitions(-DFFX_GCC) + set(FFX_SC_VK_BASE_ARGS -compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1) @@ -86,7 +88,7 @@ foreach(PASS_SHADER ${PASS_SHADERS}) if(USE_DEPFILE) add_custom_command( OUTPUT ${PERMUTATION_HEADER} - COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER} + COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}' WORKING_DIRECTORY ${CMAKE_BINARY_DIR} DEPENDS ${PASS_SHADER} DEPFILE ${PERMUTATION_HEADER}.d @@ -95,7 +97,7 @@ foreach(PASS_SHADER ${PASS_SHADERS}) else() add_custom_command( OUTPUT ${PERMUTATION_HEADER} - COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER} + COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}' WORKING_DIRECTORY ${CMAKE_BINARY_DIR} DEPENDS ${PASS_SHADER} ) diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp index 867da60..6f6e3fa 100644 --- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp +++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.cpp @@ -27,6 +27,8 @@ #include #include #include +#include + // prototypes for functions in the interface FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device); @@ -170,13 +172,25 @@ typedef struct BackendContext_VK { } BackendContext_VK; -FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice) -{ - uint32_t numExtensions = 0; - - if (physicalDevice) - vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr); +namespace { + PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties; + PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; + PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceProperties2 GetPhysicalDeviceProperties2; + PFN_vkGetPhysicalDeviceFeatures2 GetPhysicalDeviceFeatures2; +} +FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr ) { + ::EnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties) getInstanceProcAddr(instance, "vkEnumerateDeviceExtensionProperties"); + ::GetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceMemoryProperties"); + ::GetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties"); + ::GetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2"); + ::GetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2"); +} + +FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t numExtensions) +{ + if ( physicalDevice && numExtensions == 0) ::EnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr); return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t)); } @@ -185,7 +199,8 @@ FfxErrorCode ffxFsr2GetInterfaceVK( void* scratchBuffer, size_t scratchBufferSize, VkPhysicalDevice physicalDevice, - PFN_vkGetDeviceProcAddr getDeviceProcAddr) + PFN_vkGetDeviceProcAddr getDeviceProcAddr +) { FFX_RETURN_ON_ERROR( outInterface, @@ -282,6 +297,8 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt) return VK_FORMAT_R32G32B32A32_SFLOAT; case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT): return VK_FORMAT_R16G16B16A16_SFLOAT; + case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM): + return VK_FORMAT_R16G16B16A16_UNORM; case(FFX_SURFACE_FORMAT_R32G32_FLOAT): return VK_FORMAT_R32G32_SFLOAT; case(FFX_SURFACE_FORMAT_R32_UINT): @@ -441,7 +458,7 @@ uint32_t findMemoryTypeIndex(VkPhysicalDevice physicalDevice, VkMemoryRequiremen FFX_ASSERT(NULL != physicalDevice); VkPhysicalDeviceMemoryProperties memProperties; - vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); + ::GetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); uint32_t bestCandidate = UINT32_MAX; @@ -714,7 +731,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi VkPhysicalDeviceProperties2 deviceProperties2 = {}; deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; deviceProperties2.pNext = &subgroupSizeControlProperties; - vkGetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2); + ::GetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2); deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize; deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize; @@ -729,7 +746,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features; - vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); + ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16; } @@ -743,7 +760,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.pNext = &accelerationStructureFeatures; - vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); + ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure; } @@ -781,8 +798,8 @@ FfxErrorCode CreateBackendContextVK(FfxFsr2Interface* backendInterface, FfxDevic // enumerate all the device extensions backendContext->numDeviceExtensions = 0; - vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr); - vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties); + ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr); + ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties); // create descriptor pool VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {}; @@ -1265,7 +1282,7 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) { VkPhysicalDeviceProperties physicalDeviceProperties = {}; - vkGetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties); + ::GetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties); // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. if (physicalDeviceProperties.vendorID == 0x10DE) diff --git a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h index e0e226a..3d5accd 100644 --- a/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h +++ b/src/ffx-fsr2-api/vk/ffx_fsr2_vk.h @@ -34,7 +34,8 @@ extern "C" { /// /// @returns /// The size (in bytes) of the required scratch memory buffer for the VK backend. - FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice); + FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t = 0); + FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr ); /// Populate an interface with pointers for the VK backend. ///