my fixes
This commit is contained in:
parent
ea97a113b0
commit
81dd44cb88
|
@ -24,6 +24,8 @@ cmake_minimum_required(VERSION 3.12.1)
|
||||||
option (GFX_API_DX12 "Build with DX12" ON)
|
option (GFX_API_DX12 "Build with DX12" ON)
|
||||||
option (GFX_API_VK "Build with Vulkan" ON)
|
option (GFX_API_VK "Build with Vulkan" ON)
|
||||||
|
|
||||||
|
add_definitions(-DFFX_GCC)
|
||||||
|
|
||||||
if(NOT DEFINED GFX_API)
|
if(NOT DEFINED GFX_API)
|
||||||
project (FSR2_Sample)
|
project (FSR2_Sample)
|
||||||
else()
|
else()
|
||||||
|
@ -56,11 +58,6 @@ else()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Check MSVC toolset version, Visual Studio 2019 required
|
|
||||||
if(MSVC_TOOLSET_VERSION VERSION_LESS 142)
|
|
||||||
message(FATAL_ERROR "Cannot find MSVC toolset version 142 or greater. Please make sure Visual Studio 2019 or newer installed")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# ouput exe to bin directory
|
# ouput exe to bin directory
|
||||||
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin)
|
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin)
|
||||||
|
|
||||||
|
@ -69,7 +66,6 @@ foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} )
|
||||||
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin )
|
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin )
|
||||||
endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES )
|
endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES )
|
||||||
|
|
||||||
add_compile_options(/MP)
|
|
||||||
add_compile_definitions($<$<CONFIG:RelWithDebInfo>:USE_PIX>)
|
add_compile_definitions($<$<CONFIG:RelWithDebInfo>:USE_PIX>)
|
||||||
|
|
||||||
# override build options in ffx-fsr2-api cmake
|
# override build options in ffx-fsr2-api cmake
|
||||||
|
|
|
@ -1,3 +1,11 @@
|
||||||
|
This fork cleans up some *questionable* choices with FSR2 by:
|
||||||
|
* adding support for compiling under (MSYS2's) GCC
|
||||||
|
* working shader permutation generation (as it relies on being ran under CMD.exe, not bash)
|
||||||
|
* fixing segfaults from weird linkage against vulkan by grabbing function pointers
|
||||||
|
* some other things, I don't remember, I did this a year ago
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
# FidelityFX Super Resolution 2.1 (FSR 2.1)
|
# FidelityFX Super Resolution 2.1 (FSR 2.1)
|
||||||
|
|
||||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
|
@ -47,12 +47,7 @@ if "%VULKAN_SDK%"=="" (
|
||||||
)
|
)
|
||||||
|
|
||||||
:: Call CMake
|
:: Call CMake
|
||||||
mkdir DX12
|
|
||||||
cd DX12
|
|
||||||
cmake -A x64 ..\.. -DGFX_API=DX12 -DFSR2_BUILD_AS_DLL=1
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
mkdir VK
|
mkdir VK
|
||||||
cd VK
|
cd VK
|
||||||
cmake -A x64 ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1
|
cmake -G "Unix Makefiles" ..\.. -DGFX_API=VK -DFSR2_BUILD_AS_DLL=1
|
||||||
cd ..
|
cd ..
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
# enables multithreading compilation
|
# enables multithreading compilation
|
||||||
#
|
#
|
||||||
|
|
||||||
add_compile_options(/MP)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# includes cauldron's helper cmakes
|
# includes cauldron's helper cmakes
|
||||||
#
|
#
|
||||||
|
|
|
@ -507,8 +507,8 @@ void FSR2Sample::OnUpdate()
|
||||||
if (m_UIState.m_bBaloonAttachToCamera) {
|
if (m_UIState.m_bBaloonAttachToCamera) {
|
||||||
Vectormath::Vector3 eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z;
|
Vectormath::Vector3 eye = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * m_UIState.baloon_offset_z;
|
||||||
Vectormath::Vector3 look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f;
|
Vectormath::Vector3 look = m_UIState.camera.GetPosition().getXYZ() + m_UIState.camera.GetDirection().getXYZ() * -2.0f;
|
||||||
m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f));
|
m_UIState.baloon_pos = m_UIState.baloon_pos + (eye - m_UIState.baloon_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
|
||||||
m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::expf(50.0f * -(float)m_deltaTime / 1000.0f));
|
m_UIState.baloon_tip_pos = m_UIState.baloon_tip_pos + (look - m_UIState.baloon_tip_pos) * (1.0f - std::exp(50.0f * -(float)m_deltaTime / 1000.0f));
|
||||||
m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) *
|
m_UIState.m_CurBaloonTransform = Vectormath::inverse(Vectormath::Matrix4::lookAt(Vectormath::Point3(m_UIState.baloon_pos), Vectormath::Point3(m_UIState.baloon_tip_pos), Vectormath::Vector3(0.0f, 1.0f, 0.0f))) *
|
||||||
Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * //
|
Vectormath::Matrix4::translation(Vectormath::Vector3(m_UIState.baloon_offset_x, m_UIState.baloon_offset_y, 0.0f)) * //
|
||||||
Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) * //
|
Vectormath::Matrix4::rotation(-3.141592f / 2.0f, Vectormath::Vector3(1.0f, 0.0f, 0.0f)) * //
|
||||||
|
|
|
@ -157,13 +157,13 @@ void GPUFrameRateLimiter::Draw(VkCommandBuffer cmdBuf, DynamicBufferRing* pDynam
|
||||||
m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs;
|
m_frameTimeHistory[m_frameTimeHistoryCount % _countof(m_frameTimeHistory)] = lastFrameTimeMicrosecs;
|
||||||
m_frameTimeHistoryCount++;
|
m_frameTimeHistoryCount++;
|
||||||
|
|
||||||
double recentFrameTimeAvg = double(m_frameTimeHistorySum) / min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory));
|
double recentFrameTimeAvg = double(m_frameTimeHistorySum) / std::min(m_frameTimeHistoryCount, _countof(m_frameTimeHistory));
|
||||||
|
|
||||||
double clampedTargetFrameTimeMs = max(min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs);
|
double clampedTargetFrameTimeMs = std::max(std::min(double(targetFrameTimeMicrosecs), MaxTargetFrameTimeUs), MinTargetFrameTimeUs);
|
||||||
double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs;
|
double deltaRatio = (recentFrameTimeAvg - clampedTargetFrameTimeMs) / clampedTargetFrameTimeMs;
|
||||||
|
|
||||||
m_overhead -= m_overhead * deltaRatio * DampenFactor;
|
m_overhead -= m_overhead * deltaRatio * DampenFactor;
|
||||||
m_overhead = min(max(1.0, m_overhead), 1000000.0);
|
m_overhead = std::min(std::max(1.0, m_overhead), 1000000.0);
|
||||||
|
|
||||||
uint32_t numLoops = uint32_t(m_overhead);
|
uint32_t numLoops = uint32_t(m_overhead);
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,7 @@ public:
|
||||||
virtual void BuildDevUI(UIState* pState) {}
|
virtual void BuildDevUI(UIState* pState) {}
|
||||||
virtual void PreDraw(UIState* pState);
|
virtual void PreDraw(UIState* pState);
|
||||||
virtual void GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState);
|
virtual void GenerateReactiveMask(VkCommandBuffer pCommandList, const FfxUpscaleSetup& cameraSetup, UIState* pState);
|
||||||
virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = NULL;
|
virtual void Draw(VkCommandBuffer commandBuffer, const FfxUpscaleSetup& cameraSetup, UIState* pState) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Device* m_pDevice;
|
Device* m_pDevice;
|
||||||
|
|
|
@ -37,16 +37,8 @@ if(CMAKE_GENERATOR STREQUAL "Visual Studio 16 2019")
|
||||||
set(FSR2_VS_VERSION 2019)
|
set(FSR2_VS_VERSION 2019)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:x64")
|
|
||||||
set(FSR2_PLATFORM_NAME x64)
|
|
||||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32" OR CMAKE_EXE_LINKER_FLAGS STREQUAL "/machine:X86")
|
|
||||||
set(FSR2_PLATFORM_NAME x86)
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "Unsupported target platform - only supporting x64 and Win32 currently")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Embed PDBs in the debug versions of the libs
|
# Embed PDBs in the debug versions of the libs
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g")
|
||||||
|
|
||||||
# Write both debug and release versions of the static libs to the /lib folder as they are uniquely named
|
# Write both debug and release versions of the static libs to the /lib folder as they are uniquely named
|
||||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/)
|
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_HOME_DIRECTORY}/bin/ffx_fsr2_api/)
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "shaders/ffx_fsr1.h"
|
#include "shaders/ffx_fsr1.h"
|
||||||
#include "shaders/ffx_spd.h"
|
#include "shaders/ffx_spd.h"
|
||||||
#include "shaders/ffx_fsr2_callbacks_hlsl.h"
|
#include "shaders/ffx_fsr2_callbacks_hlsl.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include "ffx_fsr2_maximum_bias.h"
|
#include "ffx_fsr2_maximum_bias.h"
|
||||||
|
|
||||||
|
@ -161,7 +162,7 @@ FfxConstantBuffer globalFsr2ConstantBuffers[3] = {
|
||||||
// Lanczos
|
// Lanczos
|
||||||
static float lanczos2(float value)
|
static float lanczos2(float value)
|
||||||
{
|
{
|
||||||
return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
|
return std::abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate halton number for index and base.
|
// Calculate halton number for index and base.
|
||||||
|
@ -183,7 +184,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
|
||||||
{
|
{
|
||||||
for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
|
for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
|
||||||
{
|
{
|
||||||
int32_t mapIndex = 0;
|
size_t mapIndex = 0;
|
||||||
for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
|
for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
|
||||||
{
|
{
|
||||||
if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
|
if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
|
||||||
|
@ -197,7 +198,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
|
||||||
|
|
||||||
for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
|
for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
|
||||||
{
|
{
|
||||||
int32_t mapIndex = 0;
|
size_t mapIndex = 0;
|
||||||
for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
|
for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
|
||||||
{
|
{
|
||||||
if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
|
if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
|
||||||
|
@ -211,7 +212,7 @@ static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
|
||||||
|
|
||||||
for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
|
for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
|
||||||
{
|
{
|
||||||
int32_t mapIndex = 0;
|
size_t mapIndex = 0;
|
||||||
for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
|
for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
|
||||||
{
|
{
|
||||||
if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
|
if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
|
||||||
|
@ -331,7 +332,7 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con
|
||||||
const Fsr2ResourceDescription internalSurfaceDesc[] = {
|
const Fsr2ResourceDescription internalSurfaceDesc[] = {
|
||||||
|
|
||||||
{ FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
|
{ FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
|
||||||
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
|
FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
|
||||||
|
|
||||||
{ FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
|
{ FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
|
||||||
FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
|
FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
|
||||||
|
@ -475,7 +476,7 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
|
||||||
return FFX_OK;
|
return FFX_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
|
static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
|
||||||
{
|
{
|
||||||
FfxComputeJobDescription jobDescriptor = {};
|
FfxComputeJobDescription jobDescriptor = {};
|
||||||
|
|
||||||
|
@ -562,8 +563,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D
|
||||||
|
|
||||||
// Prepare per frame descriptor tables
|
// Prepare per frame descriptor tables
|
||||||
const bool isOddFrame = !!(context->resourceFrameIndex & 1);
|
const bool isOddFrame = !!(context->resourceFrameIndex & 1);
|
||||||
const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
|
//const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
|
||||||
const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
|
//const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
|
||||||
const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
|
const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
|
||||||
const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
|
const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
|
||||||
const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
|
const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
|
||||||
|
|
|
@ -23,13 +23,26 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if defined (FFX_GCC)
|
// maister: Force internal linkage.
|
||||||
/// FidelityFX exported functions
|
|
||||||
#define FFX_API
|
#define FFX_API
|
||||||
#else
|
|
||||||
/// FidelityFX exported functions
|
// Workarounds.
|
||||||
#define FFX_API __declspec(dllexport)
|
#ifndef _WIN32
|
||||||
#endif // #if defined (FFX_GCC)
|
#define _countof(array) (sizeof(array) / sizeof((array)[0]))
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
template <size_t N>
|
||||||
|
static inline void strcpy_s(char (&buf)[N], const char *str)
|
||||||
|
{
|
||||||
|
snprintf(buf, N, "%s", str);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
#define FFX_STATIC static inline
|
||||||
|
#endif
|
||||||
|
////
|
||||||
|
|
||||||
/// Maximum supported number of simultaneously bound SRVs.
|
/// Maximum supported number of simultaneously bound SRVs.
|
||||||
#define FFX_MAX_NUM_SRVS 16
|
#define FFX_MAX_NUM_SRVS 16
|
||||||
|
@ -57,6 +70,7 @@ typedef enum FfxSurfaceFormat {
|
||||||
FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format
|
FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format
|
||||||
FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format
|
FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format
|
||||||
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format
|
FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format
|
||||||
|
FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format
|
||||||
FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format
|
FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format
|
||||||
FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format
|
FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format
|
||||||
FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format
|
FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format
|
||||||
|
|
|
@ -182,6 +182,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
|
||||||
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered);
|
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered);
|
||||||
const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x;
|
const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x;
|
||||||
const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y;
|
const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y;
|
||||||
|
const FfxBoolean bIsResetFrame = (0 == FrameIndex());
|
||||||
|
|
||||||
FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0);
|
FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0);
|
||||||
FfxFloat32x3 fLockStatus;
|
FfxFloat32x3 fLockStatus;
|
||||||
|
@ -191,7 +192,7 @@ void Accumulate(FfxInt32x2 iPxHrPos)
|
||||||
FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0);
|
FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0);
|
||||||
ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample);
|
ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample);
|
||||||
|
|
||||||
if (bIsExistingSample) {
|
if (bIsExistingSample && !bIsResetFrame) {
|
||||||
ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight);
|
ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight);
|
||||||
ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus);
|
ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
|
|
||||||
#extension GL_GOOGLE_include_directive : require
|
#extension GL_GOOGLE_include_directive : require
|
||||||
#extension GL_EXT_samplerless_texture_functions : require
|
#extension GL_EXT_samplerless_texture_functions : require
|
||||||
|
#extension GL_EXT_shader_image_load_formatted : require
|
||||||
|
|
||||||
#define FSR2_BIND_SRV_EXPOSURE 0
|
#define FSR2_BIND_SRV_EXPOSURE 0
|
||||||
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
|
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
|
||||||
|
|
|
@ -206,37 +206,37 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
|
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors;
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS /* app controlled format */) writeonly uniform image2D rw_dilated_motion_vectors;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
#if defined FSR2_BIND_UAV_DILATED_DEPTH
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth;
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) uniform image2D rw_dilatedDepth;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color;
|
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) uniform image2D rw_internal_upscaled_color;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
#if defined FSR2_BIND_UAV_LOCK_STATUS
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status;
|
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_DEPTH_CLIP
|
#if defined FSR2_BIND_UAV_DEPTH_CLIP
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip;
|
layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r16f) uniform image2D rw_depth_clip;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba32f) uniform image2D rw_prepared_input_color;
|
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16f) uniform image2D rw_prepared_input_color;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history;
|
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output;
|
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change;
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5;
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks;
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, r8) uniform image2D rw_dilated_reactive_masks;
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_EXPOSURE
|
#if defined FSR2_BIND_UAV_EXPOSURE
|
||||||
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure;
|
||||||
|
@ -592,6 +592,9 @@ void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxF
|
||||||
{
|
{
|
||||||
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
|
#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
|
||||||
//FfxUInt32 uDepth = f32tof16(fDepth);
|
//FfxUInt32 uDepth = f32tof16(fDepth);
|
||||||
|
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||||
|
fDepth = 1.0 - fDepth; // maister: Preserve precision as well as we can in FP16.
|
||||||
|
#endif
|
||||||
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
|
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -625,7 +628,11 @@ FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
|
||||||
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
|
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
|
||||||
{
|
{
|
||||||
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
|
||||||
return texelFetch(r_dilatedDepth, iPxInput, 0).r;
|
FfxFloat32 d = texelFetch(r_dilatedDepth, iPxInput, 0).r;
|
||||||
|
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||||
|
d = 1.0 - d; // maister: Reconstruct from FP16.
|
||||||
|
#endif
|
||||||
|
return d;
|
||||||
#else
|
#else
|
||||||
return 0.f;
|
return 0.f;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -246,7 +246,7 @@ SamplerState s_LinearClamp : register(s1);
|
||||||
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
Texture2D<FfxFloat32x4> r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||||
Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
Texture2D<FfxFloat32x3> r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||||
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
|
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
|
||||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||||
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||||
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
|
Texture2D<FfxFloat32x4> r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
|
||||||
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
|
Texture2D<FfxFloat32> r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
|
||||||
|
@ -281,7 +281,7 @@ SamplerState s_LinearClamp : register(s1);
|
||||||
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
RWTexture2D<FfxFloat32x4> rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
|
||||||
RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
RWTexture2D<FfxFloat32x3> rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
|
||||||
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
|
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP);
|
||||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
|
||||||
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
|
||||||
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
|
RWTexture2D<FfxFloat32x4> rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
|
||||||
|
|
||||||
|
@ -330,7 +330,7 @@ SamplerState s_LinearClamp : register(s1);
|
||||||
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP);
|
Texture2D<FfxFloat32> r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP);
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
|
#if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
|
||||||
Texture2D<FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
|
Texture2D<unorm FfxFloat32x4> r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_SRV_LUMA_HISTORY
|
#if defined FSR2_BIND_SRV_LUMA_HISTORY
|
||||||
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
|
Texture2D<unorm FfxFloat32x4> r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
|
||||||
|
@ -371,7 +371,7 @@ SamplerState s_LinearClamp : register(s1);
|
||||||
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP);
|
RWTexture2D<FfxFloat32> rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP);
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
|
||||||
RWTexture2D<FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
|
RWTexture2D<unorm FfxFloat32x4> rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
|
||||||
#endif
|
#endif
|
||||||
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
#if defined FSR2_BIND_UAV_LUMA_HISTORY
|
||||||
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
|
RWTexture2D<unorm FfxFloat32x4> rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
|
||||||
|
|
|
@ -35,7 +35,11 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD
|
||||||
FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
|
FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
|
||||||
|
|
||||||
// WARNING: Ksep only works with reversed-z with infinite projection.
|
// WARNING: Ksep only works with reversed-z with infinite projection.
|
||||||
|
#if !FFX_FSR2_OPTION_INVERTED_DEPTH
|
||||||
|
const FfxFloat32 Ksep = 4.0f * 1.37e-05f; // maister: Arbitrary hack to make normal depth work.
|
||||||
|
#else
|
||||||
const FfxFloat32 Ksep = 1.37e-05f;
|
const FfxFloat32 Ksep = 1.37e-05f;
|
||||||
|
#endif
|
||||||
FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
|
FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth;
|
||||||
FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
|
FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
|
||||||
|
|
||||||
|
|
|
@ -568,6 +568,10 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1
|
||||||
{ \
|
{ \
|
||||||
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
|
||||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||||
|
/* Clamp base coords */ \
|
||||||
|
iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \
|
||||||
|
iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \
|
||||||
|
/* */ \
|
||||||
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
||||||
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||||
return fColorXY; \
|
return fColorXY; \
|
||||||
|
@ -578,6 +582,10 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1
|
||||||
{ \
|
{ \
|
||||||
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \
|
||||||
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
||||||
|
/* Clamp base coords */ \
|
||||||
|
iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \
|
||||||
|
iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \
|
||||||
|
/* */ \
|
||||||
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
|
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
|
||||||
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
|
||||||
return fColorXY; \
|
return fColorXY; \
|
||||||
|
|
|
@ -23,6 +23,8 @@ if(NOT ${FFX_FSR2_API_VK})
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
add_definitions(-DFFX_GCC)
|
||||||
|
|
||||||
set(FFX_SC_VK_BASE_ARGS
|
set(FFX_SC_VK_BASE_ARGS
|
||||||
-compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1)
|
-compiler=glslang -e main --target-env vulkan1.1 -S comp -Os -DFFX_GLSL=1)
|
||||||
|
|
||||||
|
@ -86,7 +88,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
|
||||||
if(USE_DEPFILE)
|
if(USE_DEPFILE)
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${PERMUTATION_HEADER}
|
OUTPUT ${PERMUTATION_HEADER}
|
||||||
COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}
|
COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
|
||||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||||
DEPENDS ${PASS_SHADER}
|
DEPENDS ${PASS_SHADER}
|
||||||
DEPFILE ${PERMUTATION_HEADER}.d
|
DEPFILE ${PERMUTATION_HEADER}.d
|
||||||
|
@ -95,7 +97,7 @@ foreach(PASS_SHADER ${PASS_SHADERS})
|
||||||
else()
|
else()
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${PERMUTATION_HEADER}
|
OUTPUT ${PERMUTATION_HEADER}
|
||||||
COMMAND ${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}
|
COMMAND cmd.exe /C '${FFX_SC_EXECUTABLE} ${FFX_SC_ARGS} -name=${PASS_SHADER_FILENAME} -I${CMAKE_CURRENT_SOURCE_DIR}/shaders -output=${PASS_SHADER_OUTPUT_PATH} ${PASS_SHADER}'
|
||||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||||
DEPENDS ${PASS_SHADER}
|
DEPENDS ${PASS_SHADER}
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,6 +27,8 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
|
|
||||||
// prototypes for functions in the interface
|
// prototypes for functions in the interface
|
||||||
FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device);
|
FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDeviceCapabilities* deviceCapabilities, FfxDevice device);
|
||||||
|
@ -170,13 +172,25 @@ typedef struct BackendContext_VK {
|
||||||
|
|
||||||
} BackendContext_VK;
|
} BackendContext_VK;
|
||||||
|
|
||||||
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice)
|
namespace {
|
||||||
|
PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties;
|
||||||
|
PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
|
||||||
|
PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties;
|
||||||
|
PFN_vkGetPhysicalDeviceProperties2 GetPhysicalDeviceProperties2;
|
||||||
|
PFN_vkGetPhysicalDeviceFeatures2 GetPhysicalDeviceFeatures2;
|
||||||
|
}
|
||||||
|
|
||||||
|
FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr ) {
|
||||||
|
::EnumerateDeviceExtensionProperties = (PFN_vkEnumerateDeviceExtensionProperties) getInstanceProcAddr(instance, "vkEnumerateDeviceExtensionProperties");
|
||||||
|
::GetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceMemoryProperties");
|
||||||
|
::GetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties");
|
||||||
|
::GetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceProperties2");
|
||||||
|
::GetPhysicalDeviceFeatures2 = (PFN_vkGetPhysicalDeviceFeatures2) getInstanceProcAddr(instance, "vkGetPhysicalDeviceFeatures2");
|
||||||
|
}
|
||||||
|
|
||||||
|
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t numExtensions)
|
||||||
{
|
{
|
||||||
uint32_t numExtensions = 0;
|
if ( physicalDevice && numExtensions == 0) ::EnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
|
||||||
|
|
||||||
if (physicalDevice)
|
|
||||||
vkEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &numExtensions, nullptr);
|
|
||||||
|
|
||||||
return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t));
|
return FFX_ALIGN_UP(sizeof(BackendContext_VK) + sizeof(VkExtensionProperties) * numExtensions, sizeof(uint64_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,7 +199,8 @@ FfxErrorCode ffxFsr2GetInterfaceVK(
|
||||||
void* scratchBuffer,
|
void* scratchBuffer,
|
||||||
size_t scratchBufferSize,
|
size_t scratchBufferSize,
|
||||||
VkPhysicalDevice physicalDevice,
|
VkPhysicalDevice physicalDevice,
|
||||||
PFN_vkGetDeviceProcAddr getDeviceProcAddr)
|
PFN_vkGetDeviceProcAddr getDeviceProcAddr
|
||||||
|
)
|
||||||
{
|
{
|
||||||
FFX_RETURN_ON_ERROR(
|
FFX_RETURN_ON_ERROR(
|
||||||
outInterface,
|
outInterface,
|
||||||
|
@ -282,6 +297,8 @@ VkFormat getVKFormatFromSurfaceFormat(FfxSurfaceFormat fmt)
|
||||||
return VK_FORMAT_R32G32B32A32_SFLOAT;
|
return VK_FORMAT_R32G32B32A32_SFLOAT;
|
||||||
case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
|
case(FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT):
|
||||||
return VK_FORMAT_R16G16B16A16_SFLOAT;
|
return VK_FORMAT_R16G16B16A16_SFLOAT;
|
||||||
|
case(FFX_SURFACE_FORMAT_R16G16B16A16_UNORM):
|
||||||
|
return VK_FORMAT_R16G16B16A16_UNORM;
|
||||||
case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
|
case(FFX_SURFACE_FORMAT_R32G32_FLOAT):
|
||||||
return VK_FORMAT_R32G32_SFLOAT;
|
return VK_FORMAT_R32G32_SFLOAT;
|
||||||
case(FFX_SURFACE_FORMAT_R32_UINT):
|
case(FFX_SURFACE_FORMAT_R32_UINT):
|
||||||
|
@ -441,7 +458,7 @@ uint32_t findMemoryTypeIndex(VkPhysicalDevice physicalDevice, VkMemoryRequiremen
|
||||||
FFX_ASSERT(NULL != physicalDevice);
|
FFX_ASSERT(NULL != physicalDevice);
|
||||||
|
|
||||||
VkPhysicalDeviceMemoryProperties memProperties;
|
VkPhysicalDeviceMemoryProperties memProperties;
|
||||||
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
|
::GetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
|
||||||
|
|
||||||
uint32_t bestCandidate = UINT32_MAX;
|
uint32_t bestCandidate = UINT32_MAX;
|
||||||
|
|
||||||
|
@ -714,7 +731,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
|
||||||
VkPhysicalDeviceProperties2 deviceProperties2 = {};
|
VkPhysicalDeviceProperties2 deviceProperties2 = {};
|
||||||
deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||||
deviceProperties2.pNext = &subgroupSizeControlProperties;
|
deviceProperties2.pNext = &subgroupSizeControlProperties;
|
||||||
vkGetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2);
|
::GetPhysicalDeviceProperties2(context->physicalDevice, &deviceProperties2);
|
||||||
|
|
||||||
deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize;
|
deviceCapabilities->waveLaneCountMin = subgroupSizeControlProperties.minSubgroupSize;
|
||||||
deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize;
|
deviceCapabilities->waveLaneCountMax = subgroupSizeControlProperties.maxSubgroupSize;
|
||||||
|
@ -729,7 +746,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
|
||||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||||
physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features;
|
physicalDeviceFeatures2.pNext = &shaderFloat18Int8Features;
|
||||||
|
|
||||||
vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
|
::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
|
||||||
|
|
||||||
deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16;
|
deviceCapabilities->fp16Supported = (bool)shaderFloat18Int8Features.shaderFloat16;
|
||||||
}
|
}
|
||||||
|
@ -743,7 +760,7 @@ FfxErrorCode GetDeviceCapabilitiesVK(FfxFsr2Interface* backendInterface, FfxDevi
|
||||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||||
physicalDeviceFeatures2.pNext = &accelerationStructureFeatures;
|
physicalDeviceFeatures2.pNext = &accelerationStructureFeatures;
|
||||||
|
|
||||||
vkGetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
|
::GetPhysicalDeviceFeatures2(context->physicalDevice, &physicalDeviceFeatures2);
|
||||||
|
|
||||||
deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure;
|
deviceCapabilities->raytracingSupported = (bool)accelerationStructureFeatures.accelerationStructure;
|
||||||
}
|
}
|
||||||
|
@ -781,8 +798,8 @@ FfxErrorCode CreateBackendContextVK(FfxFsr2Interface* backendInterface, FfxDevic
|
||||||
|
|
||||||
// enumerate all the device extensions
|
// enumerate all the device extensions
|
||||||
backendContext->numDeviceExtensions = 0;
|
backendContext->numDeviceExtensions = 0;
|
||||||
vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr);
|
::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, nullptr);
|
||||||
vkEnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties);
|
::EnumerateDeviceExtensionProperties(backendContext->physicalDevice, nullptr, &backendContext->numDeviceExtensions, backendContext->extensionProperties);
|
||||||
|
|
||||||
// create descriptor pool
|
// create descriptor pool
|
||||||
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
|
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
|
||||||
|
@ -1265,7 +1282,7 @@ FfxErrorCode CreatePipelineVK(FfxFsr2Interface* backendInterface, FfxFsr2Pass pa
|
||||||
if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN)
|
if (pass == FFX_FSR2_PASS_ACCUMULATE || pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN)
|
||||||
{
|
{
|
||||||
VkPhysicalDeviceProperties physicalDeviceProperties = {};
|
VkPhysicalDeviceProperties physicalDeviceProperties = {};
|
||||||
vkGetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties);
|
::GetPhysicalDeviceProperties(backendContext->physicalDevice, &physicalDeviceProperties);
|
||||||
|
|
||||||
// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
|
// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
|
||||||
if (physicalDeviceProperties.vendorID == 0x10DE)
|
if (physicalDeviceProperties.vendorID == 0x10DE)
|
||||||
|
|
|
@ -34,7 +34,8 @@ extern "C" {
|
||||||
///
|
///
|
||||||
/// @returns
|
/// @returns
|
||||||
/// The size (in bytes) of the required scratch memory buffer for the VK backend.
|
/// The size (in bytes) of the required scratch memory buffer for the VK backend.
|
||||||
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice);
|
FFX_API size_t ffxFsr2GetScratchMemorySizeVK(VkPhysicalDevice physicalDevice, uint32_t = 0);
|
||||||
|
FFX_API void ffxFsr2SetInstanceFunctions( VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr );
|
||||||
|
|
||||||
/// Populate an interface with pointers for the VK backend.
|
/// Populate an interface with pointers for the VK backend.
|
||||||
///
|
///
|
||||||
|
|
Loading…
Reference in New Issue
Block a user