This commit is contained in:
mrq 2023-03-03 19:34:56 +00:00
parent ea97a113b0
commit 81dd44cb88
19 changed files with 171 additions and 126 deletions

View File

@ -24,6 +24,8 @@ cmake_minimum_required(VERSION 3.12.1)
option (GFX_API_DX12 "Build with DX12" ON) option (GFX_API_DX12 "Build with DX12" ON)
option (GFX_API_VK "Build with Vulkan" ON) option (GFX_API_VK "Build with Vulkan" ON)
add_definitions(-DFFX_GCC)
if(NOT DEFINED GFX_API) if(NOT DEFINED GFX_API)
project (FSR2_Sample) project (FSR2_Sample)
else() else()
@ -56,11 +58,6 @@ else()
endif() endif()
endif() endif()
# Check MSVC toolset version, Visual Studio 2019 required
if(MSVC_TOOLSET_VERSION VERSION_LESS 142)
message(FATAL_ERROR "Cannot find MSVC toolset version 142 or greater. Please make sure Visual Studio 2019 or newer installed")
endif()
# ouput exe to bin directory # ouput exe to bin directory
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin)
@ -69,7 +66,6 @@ foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} )
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin ) set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin )
endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES )
add_compile_options(/MP)
add_compile_definitions($<$<CONFIG:RelWithDebInfo>:USE_PIX>) add_compile_definitions($<$<CONFIG:RelWithDebInfo>:USE_PIX>)
# override build options in ffx-fsr2-api cmake # override build options in ffx-fsr2-api cmake

View File

@ -1,3 +1,11 @@
This fork cleans up some *questionable* choices with FSR2 by:
* adding support for compiling under (MSYS2's) GCC
* working shader permutation generation (as it relies on being ran under CMD.exe, not bash)
* fixing segfaults from weird linkage against vulkan by grabbing function pointers
* some other things, I don't remember, I did this a year ago
---
# FidelityFX Super Resolution 2.1 (FSR 2.1) # FidelityFX Super Resolution 2.1 (FSR 2.1)
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.

View File

@ -47,12 +47,7 @@ if "%VULKAN_SDK%"=="" (
) )
:: Call CMake :: Call CMake
mkdir DX12
cd DX12
cmake -A x64 ..\.. -DGFX_API=DX12 -DFSR2_BUILD_AS_DLL=1
cd ..
mkdir VK mkdir VK
cd VK cd VK
cmake -A x64 ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1 cmake -G "Unix Makefiles" ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1
cd .. cd ..

View File

@ -2,8 +2,6 @@
# enables multithreading compilation # enables multithreading compilation
# #
add_compile_options(/MP)
# #
# includes cauldron's helper cmakes # includes cauldron's helper cmakes
# #

View File

@ -507,8 +507,8 @@ void FSR2Sample::OnUpdate()
if (m_UIState.m_bBaloonAttachToCamera) { if (m_UIState.m_bBaloonAttachToCamera) {
Vectormath::Vector3 eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z; Vectormath::Vector3 eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z;
Vectormath::Vector3 look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f; Vectormath::Vector3 look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f;
m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f)); m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f)); m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) * m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) *
Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * // Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * //
Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) * // Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) * //

View File

@ -157,13 +157,13 @@ void GPUFrameRateLimiter::Draw(VkCommandBuffer cmdBuf, DynamicBufferRing* pDynam
m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs; m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs;
m_frameTimeHistoryCount++; m_frameTimeHistoryCount++;
double recentFrameTimeAvg = double(m_frameTimeHistorySum) / min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory)); double recentFrameTimeAvg = double(m_frameTimeHistorySum) / std::min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory));
double clampedTargetFrameTimeMs = max(min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs); double clampedTargetFrameTimeMs = std::max(std::min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs);
double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs; double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs;
m_overhead -= m_overhead * deltaRatio * DampenFactor; m_overhead -= m_overhead * deltaRatio * DampenFactor;
m_overhead = min(max(1.0, m_overhead), 1000000.0); m_overhead = std::min(std::max(1.0, m_overhead), 1000000.0);
uint32_t numLoops = uint32_t(m_overhead); uint32_t numLoops = uint32_t(m_overhead);

View File

@ -86,7 +86,7 @@ public:
virtual void BuildDevUI(UIState* pState) {} virtual void BuildDevUI(UIState* pState) {}
virtual void PreDraw(UIState* pState); virtual void PreDraw(UIState* pState);
virtual void GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState); virtual void GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState);
virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = NULL; virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = 0;
protected: protected:
Device* m_pDevice; Device* m_pDevice;

View File

@ -37,16 +37,8 @@ if(CMAKE_GENERATOR STREQUAL "Visual Studio 16 2019")
set(FSR2_VS_VERSION 2019) set(FSR2_VS_VERSION 2019)
endif() endif()
if(CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:x64")
set(FSR2_PLATFORM_NAME x64)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:X86")
set(FSR2_PLATFORM_NAME x86)
else()
message(FATAL_ERROR "Unsupported target platform - only supporting x64 and Win32 currently")
endif()
# Embed PDBs in the debug versions of the libs # Embed PDBs in the debug versions of the libs
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g")
# Write both debug and release versions of the static libs to the /lib folder as they are uniquely named # Write both debug and release versions of the static libs to the /lib folder as they are uniquely named
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/)

View File

@ -29,6 +29,7 @@
#include "shaders/ffx_fsr1.h" #include "shaders/ffx_fsr1.h"
#include "shaders/ffx_spd.h" #include "shaders/ffx_spd.h"
#include "shaders/ffx_fsr2_callbacks_hlsl.h" #include "shaders/ffx_fsr2_callbacks_hlsl.h"
#include <cmath>
#include "ffx_fsr2_maximum_bias.h" #include "ffx_fsr2_maximum_bias.h"
@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = {
// Lanczos // Lanczos
static float lanczos2(float value) static float lanczos2(float value)
{ {
return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
} }
// Calculate halton number for index and base. // Calculate halton number for index and base.
@ -183,7 +184,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
{ {
for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex) for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
{ {
int32_t mapIndex = 0; size_t mapIndex = 0;
for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex) for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
{ {
if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name)) if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
@ -197,7 +198,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex) for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
{ {
int32_t mapIndex = 0; size_t mapIndex = 0;
for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex) for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
{ {
if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name)) if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
@ -211,7 +212,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
{ {
int32_t mapIndex = 0; size_t mapIndex = 0;
for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex) for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
{ {
if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name)) if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
const Fsr2ResourceDescription internalSurfaceDesc[] = { const Fsr2ResourceDescription internalSurfaceDesc[] = {
{ FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV, { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
{ FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV, { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
@ -475,7 +476,7 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
return FFX_OK; return FFX_OK;
} }
static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
{ {
FfxComputeJobDescription jobDescriptor = {}; FfxComputeJobDescription jobDescriptor = {};
@ -562,8 +563,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
// Prepare per frame descriptor tables // Prepare per frame descriptor tables
const bool isOddFrame = !!(context->resourceFrameIndex & 1); const bool isOddFrame = !!(context->resourceFrameIndex & 1);
const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0; //const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex; //const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1; const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2; const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1; const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;

View File

@ -23,13 +23,26 @@
#include <stdint.h> #include <stdint.h>
#if defined (FFX_GCC) // maister: Force internal linkage.
/// FidelityFX exported functions
#define FFX_API #define FFX_API
#else
/// FidelityFX exported functions // Workarounds.
#define FFX_API __declspec(dllexport) #ifndef _WIN32
#endif // #if defined (FFX_GCC) #define _countof(array) (sizeof(array) / sizeof((array)[0]))
#include <stdio.h>
#include <stddef.h>
template <size_t N>
static inline void strcpy_s(char (&buf)[N], const char *str)
{
snprintf(buf, N, "%s", str);
}
#endif
#ifndef _MSC_VER
#define FFX_STATIC static inline
#endif
////
/// Maximum supported number of simultaneously bound SRVs. /// Maximum supported number of simultaneously bound SRVs.
#define FFX_MAX_NUM_SRVS 16 #define FFX_MAX_NUM_SRVS 16
@ -57,6 +70,7 @@ typedef enum FfxSurfaceFormat {
FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format
FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format
FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format
FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format
FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format
FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format

View File

@ -182,6 +182,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered); const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered);
const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x; const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x;
const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y; const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y;
const FfxBoolean bIsResetFrame = (0 == FrameIndex());
FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0); FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0);
FfxFloat32x3 fLockStatus; FfxFloat32x3 fLockStatus;
@ -191,7 +192,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0);
ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample);
if (bIsExistingSample) { if (bIsExistingSample && !bIsResetFrame) {
ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight);
ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus);
} }

View File

@ -42,6 +42,7 @@
#extension GL_GOOGLE_include_directive : require #extension GL_GOOGLE_include_directive : require
#extension GL_EXT_samplerless_texture_functions : require #extension GL_EXT_samplerless_texture_functions : require
#extension GL_EXT_shader_image_load_formatted : require
#define FSR2_BIND_SRV_EXPOSURE 0 #define FSR2_BIND_SRV_EXPOSURE 0
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1

View File

@ -206,37 +206,37 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
#endif #endif
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors; layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS /* app controlled format */) writeonly uniform image2D rw_dilated_motion_vectors;
#endif #endif
#if defined FSR2_BIND_UAV_DILATED_DEPTH #if defined FSR2_BIND_UAV_DILATED_DEPTH
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth; layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) uniform image2D rw_dilatedDepth;
#endif #endif
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color; layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) uniform image2D rw_internal_upscaled_color;
#endif #endif
#if defined FSR2_BIND_UAV_LOCK_STATUS #if defined FSR2_BIND_UAV_LOCK_STATUS
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status; layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status;
#endif #endif
#if defined FSR2_BIND_UAV_DEPTH_CLIP #if defined FSR2_BIND_UAV_DEPTH_CLIP
layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r16f) uniform image2D rw_depth_clip;
#endif #endif
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f) uniform image2D rw_prepared_input_color; layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16f) uniform image2D rw_prepared_input_color;
#endif #endif
#if defined FSR2_BIND_UAV_LUMA_HISTORY #if defined FSR2_BIND_UAV_LUMA_HISTORY
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
#endif #endif
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
#endif #endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change; layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
#endif #endif
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5; layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
#endif #endif
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks; layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, r8) uniform image2D rw_dilated_reactive_masks;
#endif #endif
#if defined FSR2_BIND_UAV_EXPOSURE #if defined FSR2_BIND_UAV_EXPOSURE
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
@ -592,6 +592,9 @@ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxF
{ {
#if defined(FSR2_BIND_UAV_DILATED_DEPTH) #if defined(FSR2_BIND_UAV_DILATED_DEPTH)
//FfxUInt32 uDepth = f32tof16(fDepth); //FfxUInt32 uDepth = f32tof16(fDepth);
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
fDepth = 1.0 - fDepth; // maister: Preserve precision as well as we can in FP16.
#endif
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
#endif #endif
} }
@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
{ {
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) #if defined(FSR2_BIND_SRV_DILATED_DEPTH)
return texelFetch(r_dilatedDepth, iPxInput, 0).r; FfxFloat32 d = texelFetch(r_dilatedDepth, iPxInput, 0).r;
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
d = 1.0 - d; // maister: Reconstruct from FP16.
#endif
return d;
#else #else
return 0.f; return 0.f;
#endif #endif

View File

@ -234,26 +234,26 @@ SamplerState s_LinearClamp : register(s1);
// SRVs // SRVs
#if defined(FFX_INTERNAL) #if defined(FFX_INTERNAL)
Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); Texture2D<FfxFloat32x4> r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
Texture2D<FfxFloat32x4> r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); Texture2D<FfxFloat32x4> r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
Texture2D<FfxFloat32> r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); Texture2D<FfxFloat32> r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
Texture2D<FfxFloat32x2> r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); Texture2D<FfxFloat32x2> r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); Texture2D<FfxFloat32> r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); Texture2D<FfxFloat32> r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); Texture2D<FfxUInt32> r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); Texture2D<FfxFloat32x2> r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); Texture2D<FfxFloat32> r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); Texture2D<FfxFloat32> r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); Texture2D<FfxFloat32> r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
Texture2D<FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); Texture2D<FfxFloat32x2> r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); Texture2D<FfxFloat32x4> r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
// declarations not current form, no accessor functions // declarations not current form, no accessor functions
Texture2D<FfxFloat32x4> r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); Texture2D<FfxFloat32x4> r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK);
@ -275,15 +275,15 @@ SamplerState s_LinearClamp : register(s1);
Texture2D<FfxFloat32x4> r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); Texture2D<FfxFloat32x4> r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION);
// UAV declarations // UAV declarations
RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); RWTexture2D<FfxUInt32> rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); RWTexture2D<FfxFloat32x2> rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); RWTexture2D<FfxFloat32> rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE);
globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); globallycoherent RWTexture2D<FfxFloat32> rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5);
@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1);
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP);
#endif #endif
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
#endif #endif
#if defined FSR2_BIND_SRV_LUMA_HISTORY #if defined FSR2_BIND_SRV_LUMA_HISTORY
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1);
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP);
#endif #endif
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
#endif #endif
#if defined FSR2_BIND_UAV_LUMA_HISTORY #if defined FSR2_BIND_UAV_LUMA_HISTORY
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
@ -823,9 +823,9 @@ FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
{ {
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
fUV *= depthclip_uv_scale; fUV *= depthclip_uv_scale;
return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
#else #else
return 0.f; return 0.f;
#endif #endif
} }

View File

@ -35,7 +35,11 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD
FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
// WARNING: Ksep only works with reversed-z with infinite projection. // WARNING: Ksep only works with reversed-z with infinite projection.
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // maister: Arbitrary hack to make normal depth work.
#else
const FfxFloat32 Ksep = 1.37e-05f; const FfxFloat32 Ksep = 1.37e-05f;
#endif
FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;

View File

@ -563,24 +563,32 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
// is common, so iPxSample can "jitter" // is common, so iPxSample can "jitter"
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
{ \ { \
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ /* Clamp base coords */ \
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \
return fColorXY; \ iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \
/* */ \
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
return fColorXY; \
} }
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
{ \ { \
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ /* Clamp base coords */ \
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \
return fColorXY; \ iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \
/* */ \
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
return fColorXY; \
} }
#define FFX_FSR2_CONCAT_ID(x, y) x ## y #define FFX_FSR2_CONCAT_ID(x, y) x ## y

View File

@ -23,6 +23,8 @@ if(NOT ${FFX_FSR2_API_VK})
return() return()
endif() endif()
add_definitions(-DFFX_GCC)
set(FFX_SC_VK_BASE_ARGS set(FFX_SC_VK_BASE_ARGS
-compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1) -compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1)
@ -86,7 +88,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
if(USE_DEPFILE) if(USE_DEPFILE)
add_custom_command( add_custom_command(
OUTPUT ${PERMUTATION_HEADER} OUTPUT ${PERMUTATION_HEADER}
COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER} COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
WORKING_DIRECTORY ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS ${PASS_SHADER} DEPENDS ${PASS_SHADER}
DEPFILE ${PERMUTATION_HEADER}.d DEPFILE ${PERMUTATION_HEADER}.d
@ -95,7 +97,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
else() else()
add_custom_command( add_custom_command(
OUTPUT ${PERMUTATION_HEADER} OUTPUT ${PERMUTATION_HEADER}
COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER} COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
WORKING_DIRECTORY ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS ${PASS_SHADER} DEPENDS ${PASS_SHADER}
) )

View File

@ -27,6 +27,8 @@
#include <math.h> #include <math.h>
#include <stdlib.h> #include <stdlib.h>
#include <codecvt> #include <codecvt>
#include <locale>
// prototypes for functions in the interface // prototypes for functions in the interface
FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device); FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device);
@ -170,13 +172,25 @@ typedef struct BackendContext_VK {
} BackendContext_VK; } BackendContext_VK;
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice) namespace {
PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties;
PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties;
PFN_vkGetPhysicalDeviceProperties2 GetPhysicalDeviceProperties2;
PFN_vkGetPhysicalDeviceFeatures2 GetPhysicalDeviceFeatures2;
}
FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr ) {
::EnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties) getInstanceProcAddr(instance, "vkEnumerateDeviceExtensionProperties");
::GetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceMemoryProperties");
::GetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties");
::GetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2");
::GetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2");
}
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t numExtensions)
{ {
uint32_t numExtensions = 0; if ( physicalDevice && numExtensions == 0) ::EnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
if (physicalDevice)
vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t)); return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t));
} }
@ -185,7 +199,8 @@ FfxErrorCode ffxFsr2GetInterfaceVK(
void* scratchBuffer, void* scratchBuffer,
size_t scratchBufferSize, size_t scratchBufferSize,
VkPhysicalDevice physicalDevice, VkPhysicalDevice physicalDevice,
PFN_vkGetDeviceProcAddr getDeviceProcAddr) PFN_vkGetDeviceProcAddr getDeviceProcAddr
)
{ {
FFX_RETURN_ON_ERROR( FFX_RETURN_ON_ERROR(
outInterface, outInterface,
@ -282,6 +297,8 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt)
return VK_FORMAT_R32G32B32A32_SFLOAT; return VK_FORMAT_R32G32B32A32_SFLOAT;
case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT): case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
return VK_FORMAT_R16G16B16A16_SFLOAT; return VK_FORMAT_R16G16B16A16_SFLOAT;
case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM):
return VK_FORMAT_R16G16B16A16_UNORM;
case(FFX_SURFACE_FORMAT_R32G32_FLOAT): case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
return VK_FORMAT_R32G32_SFLOAT; return VK_FORMAT_R32G32_SFLOAT;
case(FFX_SURFACE_FORMAT_R32_UINT): case(FFX_SURFACE_FORMAT_R32_UINT):
@ -441,7 +458,7 @@ uint32_t findMemoryTypeIndex(VkPhysicalDevice physicalDevice, VkMemoryRequiremen
FFX_ASSERT(NULL != physicalDevice); FFX_ASSERT(NULL != physicalDevice);
VkPhysicalDeviceMemoryProperties memProperties; VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); ::GetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
uint32_t bestCandidate = UINT32_MAX; uint32_t bestCandidate = UINT32_MAX;
@ -714,7 +731,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
VkPhysicalDeviceProperties2 deviceProperties2 = {}; VkPhysicalDeviceProperties2 deviceProperties2 = {};
deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
deviceProperties2.pNext = &subgroupSizeControlProperties; deviceProperties2.pNext = &subgroupSizeControlProperties;
vkGetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2); ::GetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2);
deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize; deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize;
deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize; deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize;
@ -729,7 +746,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features; physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features;
vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16; deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16;
} }
@ -743,7 +760,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
physicalDeviceFeatures2.pNext = &accelerationStructureFeatures; physicalDeviceFeatures2.pNext = &accelerationStructureFeatures;
vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2); ::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure; deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure;
} }
@ -781,8 +798,8 @@ FfxErrorCode CreateBackendContextVK(FfxFsr2Interface* backendInterface, FfxDevic
// enumerate all the device extensions // enumerate all the device extensions
backendContext->numDeviceExtensions = 0; backendContext->numDeviceExtensions = 0;
vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr); ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr);
vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties); ::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties);
// create descriptor pool // create descriptor pool
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {}; VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
@ -1265,7 +1282,7 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa
if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN)
{ {
VkPhysicalDeviceProperties physicalDeviceProperties = {}; VkPhysicalDeviceProperties physicalDeviceProperties = {};
vkGetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties); ::GetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties);
// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
if (physicalDeviceProperties.vendorID == 0x10DE) if (physicalDeviceProperties.vendorID == 0x10DE)

View File

@ -34,7 +34,8 @@ extern "C" {
/// ///
/// @returns /// @returns
/// The size (in bytes) of the required scratch memory buffer for the VK backend. /// The size (in bytes) of the required scratch memory buffer for the VK backend.
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice); FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t = 0);
FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr );
/// Populate an interface with pointers for the VK backend. /// Populate an interface with pointers for the VK backend.
/// ///