parent
2e6d42ad0a
commit
ea97a113b0
@ -0,0 +1,19 @@
|
||||
|
||||
2022-09-06 | FidelityFX Super Resolution 2.1
|
||||
-------
|
||||
- Reactivity mask now uses full range of value in the mask (0.0 - 1.0).
|
||||
- Reactivity and Composition and Transparency mask dialation is now based on input colors to avoid expanding reactiveness into non-relevant upscaled areas.
|
||||
- Disocclusion logic improved in order to detect disocclusions in areas with very small depth deparation.
|
||||
- RCAS Pass forced to fp32 mode to reduce chance of issues seen with HDR input values.
|
||||
- Fix for display-resolution motion vectors interpretation.
|
||||
- FP16/FP32 computation review, readjusting balance of fp16/fp32 for maximum quality.
|
||||
- Amended motion vector description within the documentation.
|
||||
- Various documentation edits for spelling.
|
||||
- Clarified the frame delta time input value within the readme documentation.
|
||||
- Fixed issue with bad memset within the shader blob selection logic.
|
||||
|
||||
|
||||
2022-06-22 | FidelityFX Super Resolution 2.0.1
|
||||
-------
|
||||
- First release.
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 08e3881a04a0e207d65b4560d023c74c3775732e
|
||||
Subproject commit b92d559bd083f44df9f8f42a6ad149c1584ae94c
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 349 KiB |
@ -0,0 +1,183 @@
|
||||
// FidelityFX Super Resolution Sample
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
|
||||
#include "AnimatedTexture.h"
|
||||
|
||||
|
||||
void AnimatedTextures::OnCreate( Device& device, UploadHeap& uploadHeap, StaticBufferPool& bufferPool, ResourceViewHeaps& resourceViewHeaps, DynamicBufferRing& constantBufferRing )
|
||||
{
|
||||
m_pResourceViewHeaps = &resourceViewHeaps;
|
||||
m_constantBufferRing = &constantBufferRing;
|
||||
|
||||
D3D12_SHADER_BYTECODE vs = {};
|
||||
D3D12_SHADER_BYTECODE ps = {};
|
||||
CompileShaderFromFile( "AnimatedTexture.hlsl", nullptr, "VSMain", "-T vs_6_0", &vs );
|
||||
CompileShaderFromFile( "AnimatedTexture.hlsl", nullptr, "PSMain", "-T ps_6_0", &ps );
|
||||
|
||||
CD3DX12_DESCRIPTOR_RANGE DescRange[1] = {};
|
||||
DescRange[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0 ); // t0
|
||||
|
||||
CD3DX12_ROOT_PARAMETER rootParamters[2] = {};
|
||||
rootParamters[0].InitAsDescriptorTable( 1, &DescRange[0], D3D12_SHADER_VISIBILITY_PIXEL ); // textures
|
||||
rootParamters[1].InitAsConstantBufferView( 0, 0, D3D12_SHADER_VISIBILITY_ALL );
|
||||
|
||||
CD3DX12_STATIC_SAMPLER_DESC sampler( 0 );
|
||||
CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC();
|
||||
descRootSignature.Init( _countof(rootParamters), rootParamters, 1, &sampler );
|
||||
|
||||
// deny uneccessary access to certain pipeline stages
|
||||
descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT
|
||||
| D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS
|
||||
| D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS
|
||||
| D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS;
|
||||
|
||||
ID3DBlob *pOutBlob, *pErrorBlob = NULL;
|
||||
ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob));
|
||||
ThrowIfFailed(device.GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)));
|
||||
SetName( m_pRootSignature, "AnimatedTexture" );
|
||||
|
||||
pOutBlob->Release();
|
||||
if (pErrorBlob)
|
||||
pErrorBlob->Release();
|
||||
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC descPso = {};
|
||||
descPso.pRootSignature = m_pRootSignature;
|
||||
descPso.VS = vs;
|
||||
descPso.PS = ps;
|
||||
descPso.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
|
||||
descPso.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
|
||||
descPso.DSVFormat = DXGI_FORMAT_D32_FLOAT;
|
||||
descPso.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
|
||||
descPso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
descPso.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
|
||||
descPso.BlendState.IndependentBlendEnable = true;
|
||||
descPso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
||||
descPso.BlendState.RenderTarget[1].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | D3D12_COLOR_WRITE_ENABLE_GREEN;
|
||||
descPso.BlendState.RenderTarget[2].RenderTargetWriteMask = 0x0;
|
||||
descPso.BlendState.RenderTarget[3].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED;
|
||||
descPso.SampleMask = UINT_MAX;
|
||||
descPso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
descPso.NumRenderTargets = 4;
|
||||
descPso.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
descPso.RTVFormats[1] = DXGI_FORMAT_R16G16_FLOAT;
|
||||
descPso.RTVFormats[2] = DXGI_FORMAT_R8_UNORM;
|
||||
descPso.RTVFormats[3] = DXGI_FORMAT_R8_UNORM;
|
||||
descPso.SampleDesc.Count = 1;
|
||||
|
||||
ThrowIfFailed(device.GetDevice()->CreateGraphicsPipelineState(&descPso, IID_PPV_ARGS(&m_pPipelines[0])));
|
||||
SetName(m_pPipelines[0], "AnimatedTexturePipelineComp");
|
||||
|
||||
descPso.BlendState.RenderTarget[3].RenderTargetWriteMask = 0;
|
||||
ThrowIfFailed(device.GetDevice()->CreateGraphicsPipelineState(&descPso, IID_PPV_ARGS(&m_pPipelines[1])));
|
||||
SetName(m_pPipelines[1], "AnimatedTexturePipelineNoComp");
|
||||
|
||||
UINT indices[6] = { 0, 1, 2, 2, 1, 3 };
|
||||
bufferPool.AllocIndexBuffer( _countof( indices ), sizeof( UINT ), indices, &m_indexBuffer );
|
||||
|
||||
resourceViewHeaps.AllocCBV_SRV_UAVDescriptor( _countof( m_textures ), &m_descriptorTable );
|
||||
|
||||
m_textures[0].InitFromFile( &device, &uploadHeap, "..\\media\\lion.jpg", true );
|
||||
m_textures[1].InitFromFile( &device, &uploadHeap, "..\\media\\checkerboard.dds", true );
|
||||
m_textures[2].InitFromFile( &device, &uploadHeap, "..\\media\\composition_text.dds", true );
|
||||
|
||||
for ( int i = 0; i < _countof( m_textures ); i++ )
|
||||
{
|
||||
m_textures[ i ].CreateSRV( i, &m_descriptorTable );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AnimatedTextures::OnDestroy()
|
||||
{
|
||||
for ( int i = 0; i < _countof( m_textures ); i++ )
|
||||
{
|
||||
m_textures[i].OnDestroy();
|
||||
}
|
||||
|
||||
for ( int i = 0; i < _countof( m_pPipelines ); i++ )
|
||||
{
|
||||
m_pPipelines[i]->Release();
|
||||
m_pPipelines[i] = nullptr;
|
||||
}
|
||||
|
||||
m_pRootSignature->Release();
|
||||
m_pRootSignature = nullptr;
|
||||
m_pResourceViewHeaps = nullptr;
|
||||
}
|
||||
|
||||
|
||||
void AnimatedTextures::Render( ID3D12GraphicsCommandList* pCommandList, float frameTime, float speed, bool compositionMask, const Camera& camera )
|
||||
{
|
||||
struct ConstantBuffer
|
||||
{
|
||||
math::Matrix4 currentViewProj;
|
||||
math::Matrix4 previousViewProj;
|
||||
float jitterCompensation[ 2 ];
|
||||
float scrollFactor;
|
||||
float rotationFactor;
|
||||
int mode;
|
||||
int pads[3];
|
||||
};
|
||||
|
||||
m_scrollFactor += frameTime * 1.0f * speed;
|
||||
m_rotationFactor += frameTime * 2.0f * speed;
|
||||
m_flipTimer += frameTime * 1.0f;
|
||||
|
||||
if ( m_scrollFactor > 10.0f )
|
||||
m_scrollFactor -= 10.0f;
|
||||
|
||||
const float twoPI = 6.283185307179586476925286766559f;
|
||||
|
||||
if ( m_rotationFactor > twoPI )
|
||||
m_rotationFactor -= twoPI;
|
||||
|
||||
int textureIndex = min( (int)floorf( m_flipTimer * 0.33333f ), _countof( m_textures ) - 1 );
|
||||
if ( m_flipTimer > 9.0f )
|
||||
m_flipTimer = 0.0f;
|
||||
|
||||
D3D12_GPU_VIRTUAL_ADDRESS cb = {};
|
||||
ConstantBuffer* constantBuffer = nullptr;
|
||||
m_constantBufferRing->AllocConstantBuffer( sizeof(*constantBuffer), (void**)&constantBuffer, &cb );
|
||||
|
||||
constantBuffer->currentViewProj = camera.GetProjection() * camera.GetView();
|
||||
constantBuffer->previousViewProj = camera.GetPrevProjection() * camera.GetPrevView();
|
||||
|
||||
constantBuffer->jitterCompensation[0] = camera.GetPrevProjection().getCol2().getX() - camera.GetProjection().getCol2().getX();
|
||||
constantBuffer->jitterCompensation[1] = camera.GetPrevProjection().getCol2().getY() - camera.GetProjection().getCol2().getY();
|
||||
constantBuffer->scrollFactor = m_scrollFactor;
|
||||
constantBuffer->rotationFactor = m_rotationFactor;
|
||||
constantBuffer->mode = textureIndex;
|
||||
|
||||
ID3D12DescriptorHeap* descriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() };
|
||||
pCommandList->SetDescriptorHeaps( _countof( descriptorHeaps ), descriptorHeaps );
|
||||
pCommandList->SetGraphicsRootSignature( m_pRootSignature );
|
||||
pCommandList->SetGraphicsRootDescriptorTable( 0, m_descriptorTable.GetGPU( textureIndex ) );
|
||||
pCommandList->SetGraphicsRootConstantBufferView( 1, cb );
|
||||
|
||||
pCommandList->IASetPrimitiveTopology( D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST );
|
||||
pCommandList->IASetIndexBuffer( &m_indexBuffer );
|
||||
pCommandList->IASetVertexBuffers( 0, 0, nullptr );
|
||||
pCommandList->SetPipelineState( m_pPipelines[compositionMask ? 0 : 1] );
|
||||
pCommandList->DrawIndexedInstanced( 6, 2, 0, 0, 0 );
|
||||
}
|
||||
|
@ -0,0 +1,56 @@
|
||||
// FidelityFX Super Resolution Sample
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
|
||||
class AnimatedTextures
|
||||
{
|
||||
public:
|
||||
|
||||
AnimatedTextures() {}
|
||||
virtual ~AnimatedTextures() {}
|
||||
|
||||
void OnCreate( Device& device, UploadHeap& uploadHeap, StaticBufferPool& bufferPool, ResourceViewHeaps& resourceViewHeaps, DynamicBufferRing& constantBufferRing );
|
||||
void OnDestroy();
|
||||
|
||||
void Render( ID3D12GraphicsCommandList* pCommandList, float frameTime, float speed, bool compositionMask, const Camera& camera );
|
||||
|
||||
private:
|
||||
|
||||
ResourceViewHeaps* m_pResourceViewHeaps = nullptr;
|
||||
DynamicBufferRing* m_constantBufferRing = nullptr;
|
||||
|
||||
ID3D12RootSignature* m_pRootSignature = nullptr;
|
||||
ID3D12PipelineState* m_pPipelines[2] = {};
|
||||
D3D12_INDEX_BUFFER_VIEW m_indexBuffer = {};
|
||||
|
||||
Texture m_textures[3] = {};
|
||||
CBV_SRV_UAV m_descriptorTable = {};
|
||||
|
||||
float m_scrollFactor = 0.0f;
|
||||
float m_rotationFactor = 0.0f;
|
||||
float m_flipTimer = 0.0f;
|
||||
};
|
@ -0,0 +1,129 @@
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
cbuffer cb : register(b0)
|
||||
{
|
||||
matrix g_CurrentViewProjection;
|
||||
matrix g_PreviousViewProjection;
|
||||
float2 g_CameraJitterCompensation;
|
||||
float g_ScrollFactor;
|
||||
float g_RotationFactor;
|
||||
int g_Mode;
|
||||
int pad0;
|
||||
int pad1;
|
||||
int pad2;
|
||||
}
|
||||
|
||||
|
||||
Texture2D g_Texture : register(t0);
|
||||
SamplerState g_Sampler : register(s0);
|
||||
|
||||
struct VERTEX_OUT
|
||||
{
|
||||
float4 CurrentPosition : TEXCOORD0;
|
||||
float4 PreviousPosition : TEXCOORD1;
|
||||
float3 TexCoord : TEXCOORD2;
|
||||
float4 Position : SV_POSITION;
|
||||
};
|
||||
|
||||
|
||||
VERTEX_OUT VSMain( uint vertexId : SV_VertexID, uint instanceId : SV_InstanceID )
|
||||
{
|
||||
VERTEX_OUT output = (VERTEX_OUT)0;
|
||||
|
||||
const float2 offsets[ 4 ] =
|
||||
{
|
||||
float2( -1, 1 ),
|
||||
float2( 1, 1 ),
|
||||
float2( -1, -1 ),
|
||||
float2( 1, -1 ),
|
||||
};
|
||||
|
||||
float2 offset = offsets[ vertexId ];
|
||||
float2 uv = (offset+1)*float2( instanceId == 0 ? -0.5 : 0.5, -0.5 );
|
||||
|
||||
float4 worldPos = float4( offsets[ vertexId ], 0.0, 1.0 );
|
||||
|
||||
worldPos.xyz += instanceId == 0 ? float3( -13, 1.5, 2 ) : float3( -13, 1.5, -2 );
|
||||
|
||||
output.CurrentPosition = mul( g_CurrentViewProjection, worldPos );
|
||||
output.PreviousPosition = mul( g_PreviousViewProjection, worldPos );
|
||||
|
||||
output.Position = output.CurrentPosition;
|
||||
|
||||
output.TexCoord.xy = uv;
|
||||
output.TexCoord.z = instanceId;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
struct Output
|
||||
{
|
||||
float4 finalColor : SV_TARGET0;
|
||||
float2 motionVectors : SV_TARGET1;
|
||||
float upscaleReactive : SV_TARGET2;
|
||||
float upscaleTransparencyAndComposition : SV_TARGET3;
|
||||
};
|
||||
|
||||
|
||||
float4 TextureLookup( int billboardIndex, float2 uv0 )
|
||||
{
|
||||
float4 color = 1;
|
||||
|
||||
if ( billboardIndex == 0 || g_Mode == 2 )
|
||||
{
|
||||
// Scrolling
|
||||
float2 uv = uv0;
|
||||
if ( g_Mode == 2 )
|
||||
uv += float2( -g_ScrollFactor, 0.0 );
|
||||
else
|
||||
uv += float2( -g_ScrollFactor, 0.5*g_ScrollFactor );
|
||||
|
||||
color.rgb = g_Texture.SampleLevel( g_Sampler, uv, 0 ).rgb;
|
||||
}
|
||||
else if ( billboardIndex == 1 )
|
||||
{
|
||||
// Rotated UVs
|
||||
float s, c;
|
||||
sincos( g_RotationFactor, s, c );
|
||||
float2x2 rotation = { float2( c, s ), float2( -s, c ) };
|
||||
|
||||
float2 rotatedUV = mul( rotation, uv0-float2( 0.5, -0.5) );
|
||||
color.rgb = g_Texture.SampleLevel( g_Sampler, rotatedUV, 0 ).rgb;
|
||||
}
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
|
||||
Output PSMain( VERTEX_OUT input )
|
||||
{
|
||||
Output output = (Output)0;
|
||||
|
||||
output.finalColor = TextureLookup( (int)input.TexCoord.z, input.TexCoord.xy );
|
||||
|
||||
output.motionVectors = (input.PreviousPosition.xy / input.PreviousPosition.w) - (input.CurrentPosition.xy / input.CurrentPosition.w) + g_CameraJitterCompensation;
|
||||
output.motionVectors *= float2(0.5f, -0.5f);
|
||||
|
||||
output.upscaleReactive = 0; // Nothing to write to the reactice mask. Color writes are off on this target anyway.
|
||||
output.upscaleTransparencyAndComposition = 1; // Write a value into here to indicate the depth and motion vectors are as expected for a static object, but the surface contents are changing.
|
||||
|
||||
return output;
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
//
|
||||
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "ShaderConstants.h"
|
||||
|
||||
|
||||
#define FLOAT float
|
||||
#define FLOAT2 float2
|
||||
#define FLOAT3 float3
|
||||
#define FLOAT4 float4
|
||||
#define FLOAT2X2 float2x2
|
||||
#define UINT uint
|
||||
#define UINT2 uint2
|
||||
#define UINT3 uint3
|
||||
#define UINT4 uint4
|
||||
|
||||
|
||||
// Per-frame constant buffer
|
||||
[[vk::binding( 10, 0 )]] cbuffer PerFrameConstantBuffer : register( b0 )
|
||||
{
|
||||
float4 g_StartColor[ NUM_EMITTERS ];
|
||||
float4 g_EndColor[ NUM_EMITTERS ];
|
||||
|
||||
float4 g_EmitterLightingCenter[ NUM_EMITTERS ];
|
||||
|
||||
matrix g_mViewProjection;
|
||||
matrix g_mView;
|
||||
matrix g_mViewInv;
|
||||
matrix g_mProjection;
|
||||
matrix g_mProjectionInv;
|
||||
|
||||
float4 g_EyePosition;
|
||||
float4 g_SunDirection;
|
||||
float4 g_SunColor;
|
||||
float4 g_AmbientColor;
|
||||
|
||||
float4 g_SunDirectionVS;
|
||||
|
||||
uint g_ScreenWidth;
|
||||
uint g_ScreenHeight;
|
||||
float g_InvScreenWidth;
|
||||
float g_InvScreenHeight;
|
||||
|
||||
float g_AlphaThreshold;
|
||||
float g_ElapsedTime;
|
||||
float g_CollisionThickness;
|
||||
int g_CollideParticles;
|
||||
|
||||
int g_ShowSleepingParticles;
|
||||
int g_EnableSleepState;
|
||||
float g_FrameTime;
|
||||
int g_MaxParticles;
|
||||
|
||||
uint g_NumTilesX;
|
||||
uint g_NumTilesY;
|
||||
uint g_NumCoarseCullingTilesX;
|
||||
uint g_NumCoarseCullingTilesY;
|
||||
|
||||
uint g_NumCullingTilesPerCoarseTileX;
|
||||
uint g_NumCullingTilesPerCoarseTileY;
|
||||
uint g_AlignedScreenWidth;
|
||||
uint g_Pad1;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Declare the global samplers
|
||||
[[vk::binding( 12, 0 )]] SamplerState g_samWrapLinear : register( s0 );
|
||||
[[vk::binding( 13, 0 )]] SamplerState g_samClampLinear : register( s1 );
|
||||
[[vk::binding( 14, 0 )]] SamplerState g_samWrapPoint : register( s2 );
|
||||
|
@ -0,0 +1,123 @@
|
||||
// ParallelSortCS.hlsl
|
||||
//
|
||||
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// ParallelSort Shaders/Includes
|
||||
//--------------------------------------------------------------------------------------
|
||||
#define FFX_HLSL
|
||||
#include "FFX_ParallelSort.h"
|
||||
|
||||
[[vk::binding(0, 0)]] ConstantBuffer<FFX_ParallelSortCB> CBuffer : register(b0); // Constant buffer
|
||||
[[vk::binding(0, 1)]] cbuffer SetupIndirectCB : register(b1) // Setup Indirect Constant buffer
|
||||
{
|
||||
uint MaxThreadGroups;
|
||||
};
|
||||
|
||||
struct RootConstantData
|
||||
{
|
||||
uint CShiftBit;
|
||||
};
|
||||
|
||||
#ifdef API_VULKAN
|
||||
[[vk::push_constant]] RootConstantData rootConstData : register(b2); // Store the shift bit directly in the root signature
|
||||
#else
|
||||
ConstantBuffer<RootConstantData> rootConstData : register(b2); // Store the shift bit directly in the root signature
|
||||
#endif
|
||||
|
||||
[[vk::binding(0, 2)]] RWStructuredBuffer<uint> SrcBuffer : register(u0, space0); // The unsorted keys or scan data
|
||||
[[vk::binding(2, 2)]] RWStructuredBuffer<uint> SrcPayload : register(u0, space1); // The payload data
|
||||
|
||||
[[vk::binding(0, 4)]] RWStructuredBuffer<uint> SumTable : register(u0, space2); // The sum table we will write sums to
|
||||
[[vk::binding(1, 4)]] RWStructuredBuffer<uint> ReduceTable : register(u0, space3); // The reduced sum table we will write sums to
|
||||
|
||||
[[vk::binding(1, 2)]] RWStructuredBuffer<uint> DstBuffer : register(u0, space4); // The sorted keys or prefixed data
|
||||
[[vk::binding(3, 2)]] RWStructuredBuffer<uint> DstPayload : register(u0, space5); // the sorted payload data
|
||||
|
||||
[[vk::binding(0, 3)]] RWStructuredBuffer<uint> ScanSrc : register(u0, space6); // Source for Scan Data
|
||||
[[vk::binding(1, 3)]] RWStructuredBuffer<uint> ScanDst : register(u0, space7); // Destination for Scan Data
|
||||
[[vk::binding(2, 3)]] RWStructuredBuffer<uint> ScanScratch : register(u0, space8); // Scratch data for Scan
|
||||
|
||||
[[vk::binding( 0, 5 )]] StructuredBuffer<uint> g_ElementCount : register( t0 );
|
||||
[[vk::binding(1, 5)]] RWStructuredBuffer<FFX_ParallelSortCB> CBufferUAV : register(u0, space10); // UAV for constant buffer parameters for indirect execution
|
||||
[[vk::binding(2, 5)]] RWStructuredBuffer<uint> CountScatterArgs : register(u0, space11); // Count and Scatter Args for indirect execution
|
||||
[[vk::binding(3, 5)]] RWStructuredBuffer<uint> ReduceScanArgs : register(u0, space12); // Reduce and Scan Args for indirect execution
|
||||
|
||||
|
||||
|
||||
// FPS Count
|
||||
[numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)]
|
||||
void FPS_Count(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID)
|
||||
{
|
||||
// Call the uint version of the count part of the algorithm
|
||||
FFX_ParallelSort_Count_uint(localID, groupID, CBuffer, rootConstData.CShiftBit, SrcBuffer, SumTable);
|
||||
}
|
||||
|
||||
// FPS Reduce
|
||||
[numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)]
|
||||
void FPS_CountReduce(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID)
|
||||
{
|
||||
// Call the reduce part of the algorithm
|
||||
FFX_ParallelSort_ReduceCount(localID, groupID, CBuffer, SumTable, ReduceTable);
|
||||
}
|
||||
|
||||
// FPS Scan
|
||||
[numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)]
|
||||
void FPS_Scan(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID)
|
||||
{
|
||||
uint BaseIndex = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE * groupID;
|
||||
FFX_ParallelSort_ScanPrefix(CBuffer.NumScanValues, localID, groupID, 0, BaseIndex, false,
|
||||
CBuffer, ScanSrc, ScanDst, ScanScratch);
|
||||
}
|
||||
|
||||
// FPS ScanAdd
|
||||
[numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)]
|
||||
void FPS_ScanAdd(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID)
|
||||
{
|
||||
// When doing adds, we need to access data differently because reduce
|
||||
// has a more specialized access pattern to match optimized count
|
||||
// Access needs to be done similarly to reduce
|
||||
// Figure out what bin data we are reducing
|
||||
uint BinID = groupID / CBuffer.NumReduceThreadgroupPerBin;
|
||||
uint BinOffset = BinID * CBuffer.NumThreadGroups;
|
||||
|
||||
// Get the base index for this thread group
|
||||
//uint BaseIndex = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE * (groupID / FFX_PARALLELSORT_SORT_BIN_COUNT);
|
||||
uint BaseIndex = (groupID % CBuffer.NumReduceThreadgroupPerBin) * FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE;
|
||||
|
||||
FFX_ParallelSort_ScanPrefix(CBuffer.NumThreadGroups, localID, groupID, BinOffset, BaseIndex, true,
|
||||
CBuffer, ScanSrc, ScanDst, ScanScratch);
|
||||
}
|
||||
|
||||
// FPS Scatter
|
||||
[numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)]
|
||||
void FPS_Scatter(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID)
|
||||
{
|
||||
FFX_ParallelSort_Scatter_uint(localID, groupID, CBuffer, rootConstData.CShiftBit, SrcBuffer, DstBuffer, SumTable
|
||||
#ifdef kRS_ValueCopy
|
||||
,SrcPayload, DstPayload
|
||||
#endif // kRS_ValueCopy
|
||||
);
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void FPS_SetupIndirectParameters(uint localID : SV_GroupThreadID)
|
||||
{
|
||||
FFX_ParallelSort_SetupIndirectParams(g_ElementCount[ 0 ], MaxThreadGroups, CBufferUAV, CountScatterArgs, ReduceScanArgs);
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "ParticleStructs.h"
|
||||
#include "SimulationBindings.h"
|
||||
|
||||
|
||||
// Emitter index has 8 bits
|
||||
// Texture index has 5 bits
|
||||
uint WriteEmitterProperties( uint emitterIndex, uint textureIndex, bool isStreakEmitter )
|
||||
{
|
||||
uint properties = 0;
|
||||
|
||||
properties |= (emitterIndex & 0xff) << 16;
|
||||
|
||||
properties |= ( textureIndex & 0x1f ) << 24;
|
||||
|
||||
if ( isStreakEmitter )
|
||||
{
|
||||
properties |= 1 << 30;
|
||||
}
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
groupshared int g_ldsNumParticlesAvailable;
|
||||
|
||||
|
||||
// Emit particles, one per thread, in blocks of 1024 at a time
|
||||
[numthreads(1024,1,1)]
|
||||
void CS_Emit( uint3 localIdx : SV_GroupThreadID, uint3 globalIdx : SV_DispatchThreadID )
|
||||
{
|
||||
if ( localIdx.x == 0 )
|
||||
{
|
||||
int maxParticles = min( g_MaxParticlesThisFrame, g_MaxParticles );
|
||||
g_ldsNumParticlesAvailable = clamp( g_DeadList[ 0 ], 0, maxParticles );
|
||||
}
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
// Check to make sure we don't emit more particles than we specified
|
||||
if ( globalIdx.x < g_ldsNumParticlesAvailable )
|
||||
{
|
||||
int numDeadParticles = 0;
|
||||
InterlockedAdd( g_DeadList[ 0 ], -1, numDeadParticles );
|
||||
|
||||
if ( numDeadParticles > 0 && numDeadParticles <= g_MaxParticles )
|
||||
{
|
||||
// Initialize the particle data to zero to avoid any unexpected results
|
||||
GPUParticlePartA pa = (GPUParticlePartA)0;
|
||||
GPUParticlePartB pb = (GPUParticlePartB)0;
|
||||
|
||||
// Generate some random numbers from reading the random texture
|
||||
float2 uv = float2( globalIdx.x / 1024.0, g_ElapsedTime );
|
||||
float3 randomValues0 = g_RandomBuffer.SampleLevel( g_samWrapPoint, uv, 0 ).xyz;
|
||||
|
||||
float2 uv2 = float2( (globalIdx.x + 1) / 1024.0, g_ElapsedTime );
|
||||
float3 randomValues1 = g_RandomBuffer.SampleLevel( g_samWrapPoint, uv2, 0 ).xyz;
|
||||
|
||||
float velocityMagnitude = length( g_vEmitterVelocity.xyz );
|
||||
|
||||
pb.m_Position = g_vEmitterPosition.xyz + ( randomValues0.xyz * g_PositionVariance.xyz );
|
||||
|
||||
pa.m_StreakLengthAndEmitterProperties = WriteEmitterProperties( g_EmitterIndex, g_TextureIndex, g_EmitterStreaks ? true : false );
|
||||
pa.m_CollisionCount = 0;
|
||||
|
||||
pb.m_Mass = g_Mass;
|
||||
pb.m_Velocity = g_vEmitterVelocity.xyz + ( randomValues1.xyz * velocityMagnitude * g_VelocityVariance );
|
||||
pb.m_Lifespan = g_ParticleLifeSpan;
|
||||
pb.m_Age = pb.m_Lifespan;
|
||||
pb.m_StartSize = g_StartSize;
|
||||
pb.m_EndSize = g_EndSize;
|
||||
|
||||
int index = g_DeadList[ numDeadParticles ];
|
||||
|
||||
// Write the new particle state into the global particle buffer
|
||||
g_ParticleBufferA[ index ] = pa;
|
||||
g_ParticleBufferB[ index ] = pb;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
|
||||
|
||||
float GetTextureOffset( uint emitterProperties )
|
||||
{
|
||||
uint index = (emitterProperties & 0x001f00000) >> 24;
|
||||
|
||||
return (float)index * 0.5; // Assumes 2 textures in the atlas!
|
||||
}
|
||||
|
||||
bool IsStreakEmitter( uint emitterProperties )
|
||||
{
|
||||
return ( emitterProperties >> 30 ) & 0x01 ? true : false;
|
||||
}
|
||||
|
@ -0,0 +1,263 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
//
|
||||
// Shader code for rendering particles as simple quads using rasterization
|
||||
//
|
||||
|
||||
#include "ParticleStructs.h"
|
||||
#include "ParticleHelpers.h"
|
||||
#include "fp16util.h"
|
||||
|
||||
|
||||
struct PS_INPUT
|
||||
{
|
||||
nointerpolation float4 ViewSpaceCentreAndRadius : TEXCOORD0;
|
||||
float2 TexCoord : TEXCOORD1;
|
||||
float3 ViewPos : TEXCOORD2;
|
||||
nointerpolation float3 VelocityXYEmitterNdotL : TEXCOORD3;
|
||||
nointerpolation float3 Extrusion : TEXCOORD4;
|
||||
nointerpolation float2 EllipsoidRadius : TEXCOORD5;
|
||||
nointerpolation float4 Color : COLOR0;
|
||||
float4 Position : SV_POSITION;
|
||||
};
|
||||
|
||||
|
||||
// The particle buffer data. Note this is only one half of the particle data - the data that is relevant to rendering as opposed to simulation
|
||||
[[vk::binding( 0, 0 )]] StructuredBuffer<GPUParticlePartA> g_ParticleBufferA : register( t0 );
|
||||
|
||||
// A buffer containing the pre-computed view space positions of the particles
|
||||
[[vk::binding( 1, 0 )]] StructuredBuffer<uint2> g_PackedViewSpacePositions : register( t1 );
|
||||
|
||||
// The number of sorted particles
|
||||
[[vk::binding( 2, 0 )]] StructuredBuffer<int> g_NumParticlesBuffer : register( t2 );
|
||||
|
||||
// The sorted index list of particles
|
||||
[[vk::binding( 3, 0 )]] StructuredBuffer<int> g_SortedIndexBuffer : register( t3 );
|
||||
|
||||
// The texture atlas for the particles
|
||||
[[vk::binding( 4, 0 )]] Texture2D g_ParticleTexture : register( t4 );
|
||||
|
||||
// The opaque scene depth buffer read as a texture
|
||||
[[vk::binding( 5, 0 )]] Texture2D<float> g_DepthTexture : register( t5 );
|
||||
|
||||
[[vk::binding( 6, 0 )]] cbuffer RenderingConstantBuffer : register( b0 )
|
||||
{
|
||||
matrix g_mProjection;
|
||||
matrix g_mProjectionInv;
|
||||
|
||||
float4 g_SunColor;
|
||||
float4 g_AmbientColor;
|
||||
float4 g_SunDirectionVS;
|
||||
|
||||
uint g_ScreenWidth;
|
||||
uint g_ScreenHeight;
|
||||
uint g_pads0;
|
||||
uint g_pads1;
|
||||
};
|
||||
|
||||
[[vk::binding( 7, 0 )]] SamplerState g_samClampLinear : register( s0 );
|
||||
|
||||
|
||||
// Vertex shader only path
|
||||
PS_INPUT VS_StructuredBuffer( uint VertexId : SV_VertexID )
|
||||
{
|
||||
PS_INPUT Output = (PS_INPUT)0;
|
||||
|
||||
// Particle index
|
||||
uint particleIndex = VertexId / 4;
|
||||
|
||||
// Per-particle corner index
|
||||
uint cornerIndex = VertexId % 4;
|
||||
|
||||
float xOffset = 0;
|
||||
|
||||
const float2 offsets[ 4 ] =
|
||||
{
|
||||
float2( -1, 1 ),
|
||||
float2( 1, 1 ),
|
||||
float2( -1, -1 ),
|
||||
float2( 1, -1 ),
|
||||
};
|
||||
|
||||
int NumParticles = g_NumParticlesBuffer[ 0 ];
|
||||
|
||||
int index = g_SortedIndexBuffer[ NumParticles - particleIndex - 1 ];
|
||||
|
||||
GPUParticlePartA pa = g_ParticleBufferA[ index ];
|
||||
|
||||
float4 ViewSpaceCentreAndRadius = UnpackFloat16( g_PackedViewSpacePositions[ index ] );
|
||||
float4 VelocityXYEmitterNdotLRotation = UnpackFloat16( pa.m_PackedVelocityXYEmitterNDotLAndRotation );
|
||||
|
||||
uint emitterProperties = pa.m_StreakLengthAndEmitterProperties;
|
||||
|
||||
bool streaks = IsStreakEmitter( emitterProperties );
|
||||
|
||||
float2 offset = offsets[ cornerIndex ];
|
||||
float2 uv = (offset+1)*float2( 0.25, 0.5 );
|
||||
uv.x += GetTextureOffset( emitterProperties );
|
||||
|
||||
float radius = ViewSpaceCentreAndRadius.w;
|
||||
float3 cameraFacingPos;
|
||||
|
||||
#if defined (STREAKS)
|
||||
if ( streaks )
|
||||
{
|
||||
float2 viewSpaceVelocity = VelocityXYEmitterNdotLRotation.xy;
|
||||
|
||||
float2 ellipsoidRadius = float2( radius, UnpackFloat16( pa.m_StreakLengthAndEmitterProperties ).x );
|
||||
|
||||
float2 extrusionVector = viewSpaceVelocity;
|
||||
float2 tangentVector = float2( extrusionVector.y, -extrusionVector.x );
|
||||
float2x2 transform = float2x2( tangentVector, extrusionVector );
|
||||
|
||||
Output.Extrusion.xy = extrusionVector;
|
||||
Output.Extrusion.z = 1.0;
|
||||
Output.EllipsoidRadius = ellipsoidRadius;
|
||||
|
||||
cameraFacingPos = ViewSpaceCentreAndRadius.xyz;
|
||||
|
||||
cameraFacingPos.xy += mul( offset * ellipsoidRadius, transform );
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
float s, c;
|
||||
sincos( VelocityXYEmitterNdotLRotation.w, s, c );
|
||||
float2x2 rotation = { float2( c, -s ), float2( s, c ) };
|
||||
|
||||
offset = mul( offset, rotation );
|
||||
|
||||
cameraFacingPos = ViewSpaceCentreAndRadius.xyz;
|
||||
cameraFacingPos.xy += radius * offset;
|
||||
}
|
||||
|
||||
Output.Position = mul( g_mProjection, float4( cameraFacingPos, 1 ) );
|
||||
|
||||
Output.TexCoord = uv;
|
||||
Output.Color = UnpackFloat16( pa.m_PackedTintAndAlpha );
|
||||
Output.ViewSpaceCentreAndRadius = ViewSpaceCentreAndRadius;
|
||||
Output.VelocityXYEmitterNdotL = VelocityXYEmitterNdotLRotation.xyz;
|
||||
Output.ViewPos = cameraFacingPos;
|
||||
|
||||
return Output;
|
||||
}
|
||||
|
||||
|
||||
struct PS_OUTPUT
|
||||
{
|
||||
float4 color : SV_TARGET0;
|
||||
#if defined (REACTIVE)
|
||||
float reactiveMask : SV_TARGET2;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
// Ratserization path's pixel shader
|
||||
PS_OUTPUT PS_Billboard( PS_INPUT In )
|
||||
{
|
||||
PS_OUTPUT output = (PS_OUTPUT)0;
|
||||
|
||||
// Retrieve the particle data
|
||||
float3 particleViewSpacePos = In.ViewSpaceCentreAndRadius.xyz;
|
||||
float particleRadius = In.ViewSpaceCentreAndRadius.w;
|
||||
|
||||
// Get the depth at this point in screen space
|
||||
float depth = g_DepthTexture.Load( uint3( In.Position.x, In.Position.y, 0 ) ).x;
|
||||
|
||||
// Get viewspace position by generating a point in screen space at the depth of the depth buffer
|
||||
float4 viewSpacePos;
|
||||
viewSpacePos.x = In.Position.x / (float)g_ScreenWidth;
|
||||
viewSpacePos.y = 1 - ( In.Position.y / (float)g_ScreenHeight );
|
||||
viewSpacePos.xy = (2*viewSpacePos.xy) - 1;
|
||||
viewSpacePos.z = depth;
|
||||
viewSpacePos.w = 1;
|
||||
|
||||
// ...then transform it into view space using the inverse projection matrix and a divide by W
|
||||
viewSpacePos = mul( g_mProjectionInv, viewSpacePos );
|
||||
viewSpacePos.xyz /= viewSpacePos.w;
|
||||
|
||||
// Calculate the depth fade factor
|
||||
float depthFade = saturate( ( particleViewSpacePos.z - viewSpacePos.z ) / particleRadius );
|
||||
|
||||
float4 albedo = 1;
|
||||
albedo.a = depthFade;
|
||||
|
||||
// Read the texture atlas
|
||||
albedo *= g_ParticleTexture.SampleLevel( g_samClampLinear, In.TexCoord, 0 ); // 2d
|
||||
|
||||
// Multiply in the particle color
|
||||
output.color = albedo * In.Color;
|
||||
|
||||
// Calculate the UV based the screen space position
|
||||
float3 n = 0;
|
||||
float2 uv;
|
||||
#if defined (STREAKS)
|
||||
if ( In.Extrusion.z > 0.0 )
|
||||
{
|
||||
float2 ellipsoidRadius = In.EllipsoidRadius;
|
||||
|
||||
float2 extrusionVector = In.Extrusion.xy;
|
||||
float2 tangentVector = float2( extrusionVector.y, -extrusionVector.x );
|
||||
float2x2 transform = float2x2( tangentVector, extrusionVector );
|
||||
|
||||
float2 vecToCentre = In.ViewPos.xy - particleViewSpacePos.xy;
|
||||
vecToCentre = mul( transform, vecToCentre );
|
||||
|
||||
uv = vecToCentre / ellipsoidRadius;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
uv = (In.ViewPos.xy - particleViewSpacePos.xy ) / particleRadius;
|
||||
}
|
||||
|
||||
// Scale and bias
|
||||
uv = (1+uv)*0.5;
|
||||
|
||||
float pi = 3.1415926535897932384626433832795;
|
||||
|
||||
n.x = -cos( pi * uv.x );
|
||||
n.y = -cos( pi * uv.y );
|
||||
n.z = sin( pi * length( uv ) );
|
||||
n = normalize( n );
|
||||
|
||||
float ndotl = saturate( dot( g_SunDirectionVS.xyz, n ) );
|
||||
|
||||
// Fetch the emitter's lighting term
|
||||
float emitterNdotL = In.VelocityXYEmitterNdotL.z;
|
||||
|
||||
// Mix the particle lighting term with the emitter lighting
|
||||
ndotl = lerp( ndotl, emitterNdotL, 0.5 );
|
||||
|
||||
// Ambient lighting plus directional lighting
|
||||
float3 lighting = g_AmbientColor.rgb + ndotl * g_SunColor.rgb;
|
||||
|
||||
// Multiply lighting term in
|
||||
output.color.rgb *= lighting;
|
||||
|
||||
#if defined (REACTIVE)
|
||||
output.reactiveMask = max( output.color.r, max( output.color.g, output.color.b ) ) * albedo.a;
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
@ -0,0 +1,313 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#include "ParticleStructs.h"
|
||||
#include "fp16util.h"
|
||||
#include "SimulationBindings.h"
|
||||
#include "ParticleHelpers.h"
|
||||
|
||||
|
||||
|
||||
uint GetEmitterIndex( uint emitterProperties )
|
||||
{
|
||||
return (emitterProperties >> 16) & 0xff;
|
||||
}
|
||||
|
||||
|
||||
bool IsSleeping( uint emitterProperties )
|
||||
{
|
||||
return ( emitterProperties >> 31 ) & 0x01 ? true : false;
|
||||
}
|
||||
|
||||
|
||||
uint SetIsSleepingBit( uint properties )
|
||||
{
|
||||
return properties | (1 << 31);
|
||||
}
|
||||
|
||||
|
||||
// Function to calculate the streak radius in X and Y given the particles radius and velocity
|
||||
float2 calcEllipsoidRadius( float radius, float viewSpaceVelocitySpeed )
|
||||
{
|
||||
float radiusY = radius * max( 1.0, 0.1*viewSpaceVelocitySpeed );
|
||||
return float2( radius, radiusY );
|
||||
}
|
||||
|
||||
|
||||
// Calculate the view space position given a point in screen space and a texel offset
|
||||
float3 calcViewSpacePositionFromDepth( float2 normalizedScreenPosition, int2 texelOffset )
|
||||
{
|
||||
float2 uv;
|
||||
|
||||
// Add the texel offset to the normalized screen position
|
||||
normalizedScreenPosition.x += (float)texelOffset.x / (float)g_ScreenWidth;
|
||||
normalizedScreenPosition.y += (float)texelOffset.y / (float)g_ScreenHeight;
|
||||
|
||||
// Scale, bias and convert to texel range
|
||||
uv.x = (0.5 + normalizedScreenPosition.x * 0.5) * (float)g_ScreenWidth;
|
||||
uv.y = (1-(0.5 + normalizedScreenPosition.y * 0.5)) * (float)g_ScreenHeight;
|
||||
|
||||
// Fetch the depth value at this point
|
||||
float depth = g_DepthBuffer.Load( uint3( uv.x, uv.y, 0 ) ).x;
|
||||
|
||||
// Generate a point in screen space with this depth
|
||||
float4 viewSpacePosOfDepthBuffer;
|
||||
viewSpacePosOfDepthBuffer.xy = normalizedScreenPosition.xy;
|
||||
viewSpacePosOfDepthBuffer.z = depth;
|
||||
viewSpacePosOfDepthBuffer.w = 1;
|
||||
|
||||
// Transform into view space using the inverse projection matrix
|
||||
viewSpacePosOfDepthBuffer = mul( g_mProjectionInv, viewSpacePosOfDepthBuffer );
|
||||
viewSpacePosOfDepthBuffer.xyz /= viewSpacePosOfDepthBuffer.w;
|
||||
|
||||
return viewSpacePosOfDepthBuffer.xyz;
|
||||
}
|
||||
|
||||
|
||||
// Simulate 256 particles per thread group, one thread per particle
|
||||
[numthreads(256,1,1)]
|
||||
void CS_Simulate( uint3 id : SV_DispatchThreadID )
|
||||
{
|
||||
// Initialize the draw args and index buffer using the first thread in the Dispatch call
|
||||
if ( id.x == 0 )
|
||||
{
|
||||
g_DrawArgs[ 0 ].IndexCountPerInstance = 0; // Number of primitives reset to zero
|
||||
g_DrawArgs[ 0 ].InstanceCount = 1;
|
||||
g_DrawArgs[ 0 ].StartIndexLocation = 0;
|
||||
g_DrawArgs[ 0 ].BaseVertexLocation = 0;
|
||||
g_DrawArgs[ 0 ].StartInstanceLocation = 0;
|
||||
|
||||
g_AliveParticleCount[ 0 ] = 0;
|
||||
}
|
||||
|
||||
// Wait after draw args are written so no other threads can write to them before they are initialized
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
const float3 vGravity = float3( 0.0, -9.81, 0.0 );
|
||||
|
||||
// Fetch the particle from the global buffer
|
||||
GPUParticlePartA pa = g_ParticleBufferA[ id.x ];
|
||||
GPUParticlePartB pb = g_ParticleBufferB[ id.x ];
|
||||
|
||||
// If the partile is alive
|
||||
if ( pb.m_Age > 0.0f )
|
||||
{
|
||||
// Extract the individual emitter properties from the particle
|
||||
uint emitterProperties = pa.m_StreakLengthAndEmitterProperties;
|
||||
uint emitterIndex = GetEmitterIndex( emitterProperties );
|
||||
bool streaks = IsStreakEmitter( emitterProperties );
|
||||
float4 velocityXYEmitterNDotLAndRotation;// = UnpackFloat16( pa.m_PackedVelocityXYEmitterNDotLAndRotation );
|
||||
|
||||
// Age the particle by counting down from Lifespan to zero
|
||||
pb.m_Age -= g_FrameTime;
|
||||
|
||||
// Update the rotation
|
||||
pa.m_Rotation += 0.24 * g_FrameTime;
|
||||
|
||||
float3 vNewPosition = pb.m_Position;
|
||||
|
||||
// Apply force due to gravity
|
||||
if ( !IsSleeping( emitterProperties ) )
|
||||
{
|
||||
pb.m_Velocity += pb.m_Mass * vGravity * g_FrameTime;
|
||||
|
||||
// Apply a little bit of a wind force
|
||||
float3 windDir = float3( 1, 1, 0 );
|
||||
float windStrength = 0.1;
|
||||
|
||||
pb.m_Velocity += normalize( windDir ) * windStrength * g_FrameTime;
|
||||
|
||||
// Calculate the new position of the particle
|
||||
vNewPosition += pb.m_Velocity * g_FrameTime;
|
||||
}
|
||||
|
||||
// Calculate the normalized age
|
||||
float fScaledLife = 1.0 - saturate( pb.m_Age / pb.m_Lifespan );
|
||||
|
||||
// Calculate the size of the particle based on age
|
||||
float radius = lerp( pb.m_StartSize, pb.m_EndSize, fScaledLife );
|
||||
|
||||
// By default, we are not going to kill the particle
|
||||
bool killParticle = false;
|
||||
|
||||
if ( g_CollideParticles && g_FrameTime > 0.0 )
|
||||
{
|
||||
// Transform new position into view space
|
||||
float3 viewSpaceParticlePosition = mul( g_mView, float4( vNewPosition, 1 ) ).xyz;
|
||||
|
||||
// Also obtain screen space position
|
||||
float4 screenSpaceParticlePosition = mul( g_mViewProjection, float4( vNewPosition, 1 ) );
|
||||
screenSpaceParticlePosition.xyz /= screenSpaceParticlePosition.w;
|
||||
|
||||
// Only do depth buffer collisions if the particle is onscreen, otherwise assume no collisions
|
||||
if ( !IsSleeping( emitterProperties ) && screenSpaceParticlePosition.x > -1 && screenSpaceParticlePosition.x < 1 && screenSpaceParticlePosition.y > -1 && screenSpaceParticlePosition.y < 1 )
|
||||
{
|
||||
// Get the view space position of the depth buffer
|
||||
float3 viewSpacePosOfDepthBuffer = calcViewSpacePositionFromDepth( screenSpaceParticlePosition.xy, int2( 0, 0 ) );
|
||||
|
||||
// If the particle view space position is behind the depth buffer, but not by more than the collision thickness, then a collision has occurred
|
||||
if ( ( viewSpaceParticlePosition.z < viewSpacePosOfDepthBuffer.z ) && ( viewSpaceParticlePosition.z > viewSpacePosOfDepthBuffer.z - g_CollisionThickness ) )
|
||||
{
|
||||
// Generate the surface normal. Ideally, we would use the normals from the G-buffer as this would be more reliable than deriving them
|
||||
float3 surfaceNormal;
|
||||
|
||||
// Take three points on the depth buffer
|
||||
float3 p0 = viewSpacePosOfDepthBuffer;
|
||||
float3 p1 = calcViewSpacePositionFromDepth( screenSpaceParticlePosition.xy, int2( 1, 0 ) );
|
||||
float3 p2 = calcViewSpacePositionFromDepth( screenSpaceParticlePosition.xy, int2( 0, 1 ) );
|
||||
|
||||
// Generate the view space normal from the two vectors
|
||||
float3 viewSpaceNormal = normalize( cross( p2 - p0, p1 - p0 ) );
|
||||
|
||||
// Transform into world space using the inverse view matrix
|
||||
surfaceNormal = normalize( mul( g_mViewInv, -viewSpaceNormal ).xyz );
|
||||
|
||||
// The velocity is reflected in the collision plane
|
||||
float3 newVelocity = reflect( pb.m_Velocity, surfaceNormal );
|
||||
|
||||
// Update the velocity and apply some restitution
|
||||
pb.m_Velocity = 0.3*newVelocity;
|
||||
|
||||
// Update the new collided position
|
||||
vNewPosition = pb.m_Position + (pb.m_Velocity * g_FrameTime);
|
||||
|
||||
pa.m_CollisionCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Put particle to sleep if the velocity is small
|
||||
if ( g_EnableSleepState && pa.m_CollisionCount > 10 && length( pb.m_Velocity ) < 0.01 )
|
||||
{
|
||||
pa.m_StreakLengthAndEmitterProperties = SetIsSleepingBit( emitterProperties );
|
||||
}
|
||||
|
||||
// If the position is below the floor, let's kill it now rather than wait for it to retire
|
||||
if ( vNewPosition.y < -10 )
|
||||
{
|
||||
killParticle = true;
|
||||
}
|
||||
|
||||
// Write the new position
|
||||
pb.m_Position = vNewPosition;
|
||||
|
||||
// Calculate the the distance to the eye for sorting in the rasterization path
|
||||
float3 vec = vNewPosition - g_EyePosition.xyz;
|
||||
pb.m_DistanceToEye = length( vec );
|
||||
|
||||
// Lerp the color based on the age
|
||||
float4 color0 = g_StartColor[ emitterIndex ];
|
||||
float4 color1 = g_EndColor[ emitterIndex ];
|
||||
|
||||
float4 tintAndAlpha = 0;
|
||||
|
||||
tintAndAlpha = lerp( color0, color1, saturate(4*fScaledLife) );
|
||||
tintAndAlpha.a = pb.m_Age <= 0 ? 0 : tintAndAlpha.a;
|
||||
|
||||
if ( g_ShowSleepingParticles && IsSleeping( emitterProperties ) )
|
||||
{
|
||||
tintAndAlpha.rgb = float3( 1, 0, 1 );
|
||||
}
|
||||
|
||||
pa.m_PackedTintAndAlpha = PackFloat16( (min16float4)tintAndAlpha );
|
||||
|
||||
// The emitter-based lighting models the emitter as a vertical cylinder
|
||||
float2 emitterNormal = normalize( vNewPosition.xz - g_EmitterLightingCenter[ emitterIndex ].xz );
|
||||
|
||||
// Generate the lighting term for the emitter
|
||||
float emitterNdotL = saturate( dot( g_SunDirection.xz, emitterNormal ) + 0.5 );
|
||||
|
||||
// Transform the velocity into view space
|
||||
float2 vsVelocity = mul( g_mView, float4( pb.m_Velocity.xyz, 0 ) ).xy;
|
||||
float viewSpaceSpeed = 10 * length( vsVelocity );
|
||||
float streakLength = calcEllipsoidRadius( radius, viewSpaceSpeed ).y;
|
||||
pa.m_StreakLengthAndEmitterProperties = PackFloat16( min16float2( streakLength, 0 ) );
|
||||
pa.m_StreakLengthAndEmitterProperties |= (0xffff0000 & emitterProperties);
|
||||
|
||||
velocityXYEmitterNDotLAndRotation.xy = normalize( vsVelocity );
|
||||
velocityXYEmitterNDotLAndRotation.z = emitterNdotL;
|
||||
velocityXYEmitterNDotLAndRotation.w = pa.m_Rotation;
|
||||
|
||||
pa.m_PackedVelocityXYEmitterNDotLAndRotation = PackFloat16( (min16float4)velocityXYEmitterNDotLAndRotation );
|
||||
|
||||
// Pack the view spaced position and radius into a float4 buffer
|
||||
float4 viewSpacePositionAndRadius;
|
||||
|
||||
viewSpacePositionAndRadius.xyz = mul( g_mView, float4( vNewPosition, 1 ) ).xyz;
|
||||
viewSpacePositionAndRadius.w = radius;
|
||||
|
||||
g_PackedViewSpacePositions[ id.x ] = PackFloat16( (min16float4)viewSpacePositionAndRadius );
|
||||
|
||||
// For streaked particles (the sparks), calculate the the max radius in XY and store in a buffer
|
||||
if ( streaks )
|
||||
{
|
||||
float2 r2 = calcEllipsoidRadius( radius, viewSpaceSpeed );
|
||||
g_MaxRadiusBuffer[ id.x ] = max( r2.x, r2.y );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not a streaked particle so will have rotation. When rotating, the particle has a max radius of the centre to the corner = sqrt( r^2 + r^2 )
|
||||
g_MaxRadiusBuffer[ id.x ] = 1.41 * radius;
|
||||
}
|
||||
|
||||
// Dead particles are added to the dead list for recycling
|
||||
if ( pb.m_Age <= 0.0f || killParticle )
|
||||
{
|
||||
pb.m_Age = -1;
|
||||
|
||||
uint dstIdx = 0;
|
||||
InterlockedAdd( g_DeadList[ 0 ], 1, dstIdx );
|
||||
g_DeadList[ dstIdx + 1 ] = id.x;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Alive particles are added to the alive list
|
||||
int index = 0;
|
||||
InterlockedAdd( g_AliveParticleCount[ 0 ], 1, index );
|
||||
g_IndexBuffer[ index ] = id.x;
|
||||
g_DistanceBuffer[ index ] = pb.m_DistanceToEye;
|
||||
|
||||
uint dstIdx = 0;
|
||||
// 6 indices per particle billboard
|
||||
InterlockedAdd( g_DrawArgs[ 0 ].IndexCountPerInstance, 6, dstIdx );
|
||||
}
|
||||
|
||||
// Write the particle data back to the global particle buffer
|
||||
g_ParticleBufferA[ id.x ] = pa;
|
||||
g_ParticleBufferB[ id.x ] = pb;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Reset 256 particles per thread group, one thread per particle
|
||||
// Also adds each particle to the dead list UAV
|
||||
[numthreads(256,1,1)]
|
||||
void CS_Reset( uint3 id : SV_DispatchThreadID, uint3 globalIdx : SV_DispatchThreadID )
|
||||
{
|
||||
if ( globalIdx.x == 0 )
|
||||
{
|
||||
g_DeadList[ 0 ] = g_MaxParticles;
|
||||
}
|
||||
g_DeadList[ globalIdx.x + 1 ] = globalIdx.x;
|
||||
|
||||
g_ParticleBufferA[ id.x ] = (GPUParticlePartA)0;
|
||||
g_ParticleBufferB[ id.x ] = (GPUParticlePartB)0;
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
|
||||
// Particle structures
|
||||
// ===================
|
||||
|
||||
struct GPUParticlePartA
|
||||
{
|
||||
uint2 m_PackedTintAndAlpha; // The color and opacity
|
||||
uint2 m_PackedVelocityXYEmitterNDotLAndRotation; // Normalized view space velocity XY used for streak extrusion. The lighting term for the while emitter in Z. The rotation angle in W.
|
||||
|
||||
uint m_StreakLengthAndEmitterProperties; // 0-15: fp16 streak length
|
||||
// 16-23: The index of the emitter
|
||||
// 24-29: Atlas index
|
||||
// 30: Whether or not the emitter supports velocity-based streaks
|
||||
// 31: Whether or not the particle is sleeping (ie, don't update position)
|
||||
float m_Rotation; // Uncompressed rotation - some issues with using fp16 rotation (also, saves unpacking it)
|
||||
uint m_CollisionCount; // Keep track of how many times the particle has collided
|
||||
uint m_pad;
|
||||
};
|
||||
|
||||
struct GPUParticlePartB
|
||||
{
|
||||
float3 m_Position; // World space position
|
||||
float m_Mass; // Mass of particle
|
||||
|
||||
float3 m_Velocity; // World space velocity
|
||||
float m_Lifespan; // Lifespan of the particle.
|
||||
|
||||
float m_DistanceToEye; // The distance from the particle to the eye
|
||||
float m_Age; // The current age counting down from lifespan to zero
|
||||
float m_StartSize; // The size at spawn time
|
||||
float m_EndSize; // The time at maximum age
|
||||
};
|
@ -0,0 +1,109 @@
|
||||
//
|
||||
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "Globals.h"
|
||||
|
||||
|
||||
struct VS_RenderSceneInput
|
||||
{
|
||||
float3 f3Position : POSITION;
|
||||
float3 f3Normal : NORMAL;
|
||||
float2 f2TexCoord : TEXCOORD0;
|
||||
float3 f3Tangent : TANGENT;
|
||||
};
|
||||
|
||||
struct PS_RenderSceneInput
|
||||
{
|
||||
float4 f4Position : SV_Position;
|
||||
float2 f2TexCoord : TEXCOORD0;
|
||||
float3 f3Normal : NORMAL;
|
||||
float3 f3Tangent : TANGENT;
|
||||
float3 f3WorldPos : TEXCOORD2;
|
||||
};
|
||||
|
||||
Texture2D g_txDiffuse : register( t2 );
|
||||
Texture2D g_txNormal : register( t3 );
|
||||
|
||||
//=================================================================================================================================
|
||||
// This shader computes standard transform and lighting
|
||||
//=================================================================================================================================
|
||||
PS_RenderSceneInput VS_RenderScene( VS_RenderSceneInput I )
|
||||
{
|
||||
PS_RenderSceneInput O;
|
||||
|
||||
// Transform the position from object space to homogeneous projection space
|
||||
O.f4Position = mul( float4( I.f3Position, 1.0f ), g_mViewProjection );
|
||||
|
||||
O.f3WorldPos = I.f3Position;
|
||||
O.f3Normal = normalize( I.f3Normal );
|
||||
O.f3Tangent = normalize( I.f3Tangent );
|
||||
|
||||
// Pass through tex coords
|
||||
O.f2TexCoord = I.f2TexCoord;
|
||||
|
||||
return O;
|
||||
}
|
||||
|
||||
|
||||
//=================================================================================================================================
|
||||
// This shader outputs the pixel's color by passing through the lit
|
||||
// diffuse material color
|
||||
//=================================================================================================================================
|
||||
float4 PS_RenderScene( PS_RenderSceneInput I ) : SV_Target0
|
||||
{
|
||||
float4 f4Diffuse = g_txDiffuse.Sample( g_samWrapLinear, I.f2TexCoord );
|
||||
float fSpecMask = f4Diffuse.a;
|
||||
float3 f3Norm = g_txNormal.Sample( g_samWrapLinear, I.f2TexCoord ).xyz;
|
||||
f3Norm *= 2.0f;
|
||||
f3Norm -= float3( 1.0f, 1.0f, 1.0f );
|
||||
|
||||
float3 f3Binorm = normalize( cross( I.f3Normal, I.f3Tangent ) );
|
||||
float3x3 f3x3BasisMatrix = float3x3( f3Binorm, I.f3Tangent, I.f3Normal );
|
||||
f3Norm = normalize( mul( f3Norm, f3x3BasisMatrix ) );
|
||||
|
||||
// Diffuse lighting
|
||||
float4 f4Lighting = saturate( dot( f3Norm, g_SunDirection.xyz ) ) * g_SunColor;
|
||||
f4Lighting += g_AmbientColor;
|
||||
|
||||
// Calculate specular power
|
||||
float3 f3ViewDir = normalize( g_EyePosition.xyz - I.f3WorldPos );
|
||||
float3 f3HalfAngle = normalize( f3ViewDir + g_SunDirection.xyz );
|
||||
float4 f4SpecPower1 = pow( saturate( dot( f3HalfAngle, f3Norm ) ), 32 ) * g_SunColor;
|
||||
|
||||
return f4Lighting * f4Diffuse + ( f4SpecPower1 * fSpecMask );
|
||||
}
|
||||
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// PS for the sky
|
||||
//--------------------------------------------------------------------------------------
|
||||
float4 PS_Sky( PS_RenderSceneInput I ) : SV_Target
|
||||
{
|
||||
float4 f4O;
|
||||
|
||||
// Bog standard textured rendering
|
||||
f4O.xyz = g_txDiffuse.Sample( g_samWrapLinear, I.f2TexCoord ).xyz;
|
||||
f4O.w = 1.0f;
|
||||
|
||||
return f4O;
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
// This file is shared between the HLSL and C++ code for convenience
|
||||
|
||||
// Maximum number of emitters supported
|
||||
#define NUM_EMITTERS 4
|
@ -0,0 +1,121 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
|
||||
#include "ShaderConstants.h"
|
||||
|
||||
|
||||
// The particle buffers to fill with new particles
|
||||
[[vk::binding( 0, 0 )]] RWStructuredBuffer<GPUParticlePartA> g_ParticleBufferA : register( u0 );
|
||||
[[vk::binding( 1, 0 )]] RWStructuredBuffer<GPUParticlePartB> g_ParticleBufferB : register( u1 );
|
||||
|
||||
// The dead list, so any particles that are retired this frame can be added to this list. The first element is the number of dead particles
|
||||
[[vk::binding( 2, 0 )]] RWStructuredBuffer<int> g_DeadList : register( u2 );
|
||||
|
||||
// The alive list which gets built in the similution. The distances also get written out
|
||||
[[vk::binding( 3, 0 )]] RWStructuredBuffer<int> g_IndexBuffer : register( u3 );
|
||||
[[vk::binding( 4, 0 )]] RWStructuredBuffer<float> g_DistanceBuffer : register( u4 );
|
||||
|
||||
// The maximum radius in XY is calculated here and stored
|
||||
[[vk::binding( 5, 0 )]] RWStructuredBuffer<float> g_MaxRadiusBuffer : register( u5 );
|
||||
|
||||
// Viewspace particle positions are calculated here and stored
|
||||
[[vk::binding( 6, 0 )]] RWStructuredBuffer<uint2> g_PackedViewSpacePositions : register( u6 );
|
||||
|
||||
// The draw args for the ExecuteIndirect call needs to be filled in before the rasterization path is called, so do it here
|
||||
struct IndirectCommand
|
||||
{
|
||||
#ifdef API_DX12
|
||||
uint2 uav;
|
||||
#endif
|
||||
uint IndexCountPerInstance;
|
||||
uint InstanceCount;
|
||||
uint StartIndexLocation;
|
||||
int BaseVertexLocation;
|
||||
uint StartInstanceLocation;
|
||||
};
|
||||
[[vk::binding( 7, 0 )]] RWStructuredBuffer<IndirectCommand> g_DrawArgs : register( u7 );
|
||||
|
||||
[[vk::binding( 8, 0 )]] RWStructuredBuffer<uint> g_AliveParticleCount : register( u8 );
|
||||
|
||||
// The opaque scene's depth buffer read as a texture
|
||||
[[vk::binding( 9, 0 )]] Texture2D g_DepthBuffer : register( t0 );
|
||||
|
||||
// A texture filled with random values for generating some variance in our particles when we spawn them
|
||||
[[vk::binding( 10, 0 )]] Texture2D g_RandomBuffer : register( t1 );
|
||||
|
||||
|
||||
// Per-frame constant buffer
|
||||
[[vk::binding( 11, 0 )]] cbuffer SimulationConstantBuffer : register( b0 )
|
||||
{
|
||||
float4 g_StartColor[ NUM_EMITTERS ];
|
||||
float4 g_EndColor[ NUM_EMITTERS ];
|
||||
|
||||
float4 g_EmitterLightingCenter[ NUM_EMITTERS ];
|
||||
|
||||
matrix g_mViewProjection;
|
||||
matrix g_mView;
|
||||
matrix g_mViewInv;
|
||||
matrix g_mProjectionInv;
|
||||
|
||||
float4 g_EyePosition;
|
||||
float4 g_SunDirection;
|
||||
|
||||
uint g_ScreenWidth;
|
||||
uint g_ScreenHeight;
|
||||
float g_ElapsedTime;
|
||||
float g_CollisionThickness;
|
||||
|
||||
int g_CollideParticles;
|
||||
int g_ShowSleepingParticles;
|
||||
int g_EnableSleepState;
|
||||
float g_FrameTime;
|
||||
|
||||
int g_MaxParticles;
|
||||
uint g_Pad0;
|
||||
uint g_Pad1;
|
||||
uint g_Pad2;
|
||||
};
|
||||
|
||||
[[vk::binding( 12, 0 )]] cbuffer EmitterConstantBuffer : register( b1 )
|
||||
{
|
||||
float4 g_vEmitterPosition;
|
||||
float4 g_vEmitterVelocity;
|
||||
float4 g_PositionVariance;
|
||||
|
||||
int g_MaxParticlesThisFrame;
|
||||
float g_ParticleLifeSpan;
|
||||
float g_StartSize;
|
||||
float g_EndSize;
|
||||
|
||||
float g_VelocityVariance;
|
||||
float g_Mass;
|
||||
uint g_EmitterIndex;
|
||||
uint g_EmitterStreaks;
|
||||
|
||||
uint g_TextureIndex;
|
||||
uint g_pads0;
|
||||
uint g_pads1;
|
||||
uint g_pads2;
|
||||
};
|
||||
|
||||
[[vk::binding( 13, 0 )]] SamplerState g_samWrapPoint : register( s0 );
|
@ -0,0 +1,169 @@
|
||||
// HLSL intrinsics
|
||||
// cross
|
||||
min16float3 RTGCross(min16float3 a, min16float3 b)
|
||||
{
|
||||
return min16float3(
|
||||
a.y * b.z - a.z * b.y,
|
||||
a.z * b.x - a.x * b.z,
|
||||
a.x * b.y - a.y * b.x);
|
||||
}
|
||||
|
||||
// dot
|
||||
min16float RTGDot2(min16float2 a, min16float2 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y;
|
||||
}
|
||||
|
||||
min16float RTGDot3(min16float3 a, min16float3 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y
|
||||
+ a.z * b.z;
|
||||
}
|
||||
|
||||
min16float RTGDot4(min16float4 a, min16float4 b)
|
||||
{
|
||||
return a.x * b.x + a.y * b.y
|
||||
+ a.z * b.z + a.w * b.w;
|
||||
}
|
||||
|
||||
// length
|
||||
min16float RTGLength2(min16float2 a)
|
||||
{
|
||||
return sqrt(RTGDot2(a, a));
|
||||
}
|
||||
|
||||
min16float RTGLength3(min16float3 a)
|
||||
{
|
||||
return sqrt(RTGDot3(a, a));
|
||||
}
|
||||
|
||||
min16float RTGLength4(min16float4 a)
|
||||
{
|
||||
return sqrt(RTGDot4(a, a));
|
||||
}
|
||||
|
||||
// normalize
|
||||
min16float2 RTGNormalize2(min16float2 a)
|
||||
{
|
||||
min16float l = RTGLength2(a);
|
||||
return l == 0.0 ? a : a / l;
|
||||
}
|
||||
|
||||
min16float3 RTGNormalize3(min16float3 a)
|
||||
{
|
||||
min16float l = RTGLength3( a );
|
||||
return l == 0.0 ? a : a / l;
|
||||
}
|
||||
|
||||
min16float4 RTGNormalize4(min16float4 a)
|
||||
{
|
||||
min16float l = RTGLength4( a );
|
||||
return l == 0.0 ? a : a / l;
|
||||
}
|
||||
|
||||
|
||||
// distance
|
||||
min16float RTGDistance2(min16float2 from, min16float2 to)
|
||||
{
|
||||
return RTGLength2(to - from);
|
||||
}
|
||||
|
||||
min16float RTGDistance3(min16float3 from, min16float3 to)
|
||||
{
|
||||
return RTGLength3(to - from);
|
||||
}
|
||||
|
||||
min16float RTGDistance4(min16float4 from, min16float4 to)
|
||||
{
|
||||
return RTGLength4(to - from);
|
||||
}
|
||||
|
||||
|
||||
// Packing and Unpacking
|
||||
// min16{u}int2
|
||||
int PackInt16(min16int2 v)
|
||||
{
|
||||
uint x = asuint(int(v.x));
|
||||
uint y = asuint(int(v.y));
|
||||
return asint(x | y << 16);
|
||||
}
|
||||
|
||||
uint PackInt16(min16uint2 v)
|
||||
{
|
||||
return uint(v.x | (uint)(v.y) << 16);
|
||||
}
|
||||
|
||||
min16int2 UnpackInt16(int v)
|
||||
{
|
||||
uint x = asuint(v.x) & 0xFFFF;
|
||||
uint y = asuint(v.x) >> 16;
|
||||
return min16uint2(asint(x),
|
||||
asint(y));
|
||||
}
|
||||
|
||||
min16uint2 UnpackInt16(uint v)
|
||||
{
|
||||
return min16uint2(v.x & 0xFFFF,
|
||||
v.x >> 16);
|
||||
}
|
||||
|
||||
// min16{u}int4
|
||||
int2 PackInt16(min16int4 v)
|
||||
{
|
||||
return int2(PackInt16(v.xy),
|
||||
PackInt16(v.zw));
|
||||
}
|
||||
|
||||
uint2 PackInt16(min16uint4 v)
|
||||
{
|
||||
return uint2(PackInt16(v.xy),
|
||||
PackInt16(v.zw));
|
||||
}
|
||||
|
||||
min16int4 UnpackInt16(int2 v)
|
||||
{
|
||||
return min16int4(UnpackInt16(v.x),
|
||||
UnpackInt16(v.y));
|
||||
}
|
||||
|
||||
min16uint4 UnpackInt16(uint2 v)
|
||||
{
|
||||
return min16uint4(UnpackInt16(v.x),
|
||||
UnpackInt16(v.y));
|
||||
}
|
||||
|
||||
uint PackFloat16( min16float v )
|
||||
{
|
||||
uint p = f32tof16( v );
|
||||
return p.x;
|
||||
}
|
||||
|
||||
// min16float2
|
||||
uint PackFloat16(min16float2 v)
|
||||
{
|
||||
uint2 p = f32tof16(float2(v));
|
||||
return p.x | (p.y << 16);
|
||||
}
|
||||
|
||||
min16float2 UnpackFloat16(uint a)
|
||||
{
|
||||
float2 tmp = f16tof32(
|
||||
uint2(a & 0xFFFF, a >> 16));
|
||||
return min16float2(tmp);
|
||||
}
|
||||
|
||||
|
||||
// min16float4
|
||||
uint2 PackFloat16(min16float4 v)
|
||||
{
|
||||
return uint2(PackFloat16(v.xy),
|
||||
PackFloat16(v.zw));
|
||||
}
|
||||
|
||||
min16float4 UnpackFloat16(uint2 v)
|
||||
{
|
||||
return min16float4(
|
||||
UnpackFloat16(v.x),
|
||||
UnpackFloat16(v.y)
|
||||
);
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
inline float RandomVariance( float median, float variance )
|
||||
{
|
||||
float fUnitRandomValue = (float)rand() / (float)RAND_MAX;
|
||||
float fRange = variance * fUnitRandomValue;
|
||||
return median - variance + (2.0f * fRange);
|
||||
}
|
||||
|
||||
inline float RandomFromAndTo( float lowest, float highest )
|
||||
{
|
||||
float fUnitRandomValue = (float)rand() / (float)RAND_MAX;
|
||||
float fRange = (highest - lowest) * fUnitRandomValue;
|
||||
return lowest + fRange;
|
||||
}
|
@ -0,0 +1,93 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
// Implementation-agnostic particle system interface
|
||||
struct IParticleSystem
|
||||
{
|
||||
enum Flags
|
||||
{
|
||||
PF_Sort = 1 << 0, // Sort the particles
|
||||
PF_DepthCull = 1 << 1, // Do per-tile depth buffer culling
|
||||
PF_Streaks = 1 << 2, // Streak the particles based on velocity
|
||||
PF_Reactive = 1 << 3 // Particles also write to the reactive mask
|
||||
};
|
||||
|
||||
// Per-emitter parameters
|
||||
struct EmitterParams
|
||||
{
|
||||
math::Vector4 m_Position = {}; // World position of the emitter
|
||||
math::Vector4 m_Velocity = {}; // Velocity of each particle from the emitter
|
||||
math::Vector4 m_PositionVariance = {}; // Variance in position of each particle
|
||||
int m_NumToEmit = 0; // Number of particles to emit this frame
|
||||
float m_ParticleLifeSpan = 0.0f; // How long the particles should live
|
||||
float m_StartSize = 0.0f; // Size of particles at spawn time
|
||||
float m_EndSize = 0.0f; // Size of particle when they reach retirement age
|
||||
float m_Mass = 0.0f; // Mass of particle
|
||||
float m_VelocityVariance = 0.0f; // Variance in velocity of each particle
|
||||
int m_TextureIndex = 0; // Index of the texture in the atlas
|
||||
bool m_Streaks = false; // Streak the particles in the direction of travel
|
||||
};
|
||||
|
||||
struct ConstantData
|
||||
{
|
||||
math::Matrix4 m_ViewProjection = {};
|
||||
math::Matrix4 m_View = {};
|
||||
math::Matrix4 m_ViewInv = {};
|
||||
math::Matrix4 m_Projection = {};
|
||||
math::Matrix4 m_ProjectionInv = {};
|
||||
|
||||
math::Vector4 m_StartColor[ 10 ] = {};
|
||||
math::Vector4 m_EndColor[ 10 ] = {};
|
||||
math::Vector4 m_EmitterLightingCenter[ 10 ] = {};
|
||||
|
||||
math::Vector4 m_SunDirection = {};
|
||||
math::Vector4 m_SunColor = {};
|
||||
math::Vector4 m_AmbientColor = {};
|
||||
|
||||
float m_FrameTime = 0.0f;
|
||||
};
|
||||
|
||||
// Create a GPU particle system. Add more factory functions to create other types of system eg CPU-updated system
|
||||
static IParticleSystem* CreateGPUSystem( const char* particleAtlas );
|
||||
|
||||
virtual ~IParticleSystem() {}
|
||||
|
||||
#ifdef API_DX12
|
||||
virtual void Render( ID3D12GraphicsCommandList* pCommandList, DynamicBufferRing& constantBufferRing, int flags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData ) = 0;
|
||||
virtual void OnCreateDevice( Device &device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing ) = 0;
|
||||
virtual void OnResizedSwapChain( int width, int height, Texture& depthBuffer ) = 0;
|
||||
#endif
|
||||
#ifdef API_VULKAN
|
||||
virtual void Render( VkCommandBuffer commandBuffer, DynamicBufferRing& constantBufferRing, int contextFlags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData ) = 0;
|
||||
virtual void OnCreateDevice( Device &device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing, VkRenderPass renderPass ) = 0;
|
||||
virtual void OnResizedSwapChain( int width, int height, Texture& depthBuffer, VkFramebuffer frameBuffer ) = 0;
|
||||
#endif
|
||||
|
||||
virtual void OnReleasingSwapChain() = 0;
|
||||
virtual void OnDestroyDevice() = 0;
|
||||
|
||||
// Completely resets the state of all particles. Handy for changing scenes etc
|
||||
virtual void Reset() = 0;
|
||||
};
|
@ -0,0 +1,154 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "../GpuParticleShaders/ShaderConstants.h"
|
||||
#include "ParticleSystem.h"
|
||||
|
||||
|
||||
// Helper function to align values
|
||||
int align( int value, int alignment ) { return ( value + (alignment - 1) ) & ~(alignment - 1); }
|
||||
|
||||
|
||||
// GPUParticle structure is split into two sections for better cache efficiency - could even be SOA but would require creating more vertex buffers.
|
||||
struct GPUParticlePartA
|
||||
{
|
||||
math::Vector4 m_params[ 2 ];
|
||||
};
|
||||
|
||||
struct GPUParticlePartB
|
||||
{
|
||||
math::Vector4 m_params[ 3 ];
|
||||
};
|
||||
|
||||
|
||||
struct SimulationConstantBuffer
|
||||
{
|
||||
math::Vector4 m_StartColor[ NUM_EMITTERS ] = {};
|
||||
math::Vector4 m_EndColor[ NUM_EMITTERS ] = {};
|
||||
|
||||
math::Vector4 m_EmitterLightingCenter[ NUM_EMITTERS ] = {};
|
||||
|
||||
math::Matrix4 m_ViewProjection = {};
|
||||
math::Matrix4 m_View = {};
|
||||
math::Matrix4 m_ViewInv = {};
|
||||
math::Matrix4 m_ProjectionInv = {};
|
||||
|
||||
math::Vector4 m_EyePosition = {};
|
||||
math::Vector4 m_SunDirection = {};
|
||||
|
||||
UINT m_ScreenWidth = 0;
|
||||
UINT m_ScreenHeight = 0;
|
||||
float m_ElapsedTime = 0.0f;
|
||||
float m_CollisionThickness = 4.0f;
|
||||
|
||||
int m_CollideParticles = 0;
|
||||
int m_ShowSleepingParticles = 0;
|
||||
int m_EnableSleepState = 0;
|
||||
float m_FrameTime = 0.0f;
|
||||
|
||||
int m_MaxParticles = 0;
|
||||
UINT m_pad01 = 0;
|
||||
UINT m_pad02 = 0;
|
||||
UINT m_pad03 = 0;
|
||||
};
|
||||
|
||||
struct EmitterConstantBuffer
|
||||
{
|
||||
math::Vector4 m_EmitterPosition = {};
|
||||
math::Vector4 m_EmitterVelocity = {};
|
||||
math::Vector4 m_PositionVariance = {};
|
||||
|
||||
int m_MaxParticlesThisFrame = 0;
|
||||
float m_ParticleLifeSpan = 0.0f;
|
||||
float m_StartSize = 0.0f;
|
||||
float m_EndSize = 0.0f;
|
||||
|
||||
float m_VelocityVariance = 0.0f;
|
||||
float m_Mass = 0.0f;
|
||||
int m_Index = 0;
|
||||
int m_Streaks = 0;
|
||||
|
||||
int m_TextureIndex = 0;
|
||||
int m_pads[ 3 ] = {};
|
||||
};
|
||||
|
||||
|
||||
// The rasterization path constant buffer
|
||||
struct RenderingConstantBuffer
|
||||
{
|
||||
math::Matrix4 m_Projection = {};
|
||||
math::Matrix4 m_ProjectionInv = {};
|
||||
math::Vector4 m_SunColor = {};
|
||||
math::Vector4 m_AmbientColor = {};
|
||||
math::Vector4 m_SunDirectionVS = {};
|
||||
UINT m_ScreenWidth = 0;
|
||||
UINT m_ScreenHeight = 0;
|
||||
UINT m_pads[ 2 ] = {};
|
||||
};
|
||||
|
||||
struct CullingConstantBuffer
|
||||
{
|
||||
math::Matrix4 m_ProjectionInv = {};
|
||||
math::Matrix4 m_Projection = {};
|
||||
|
||||
UINT m_ScreenWidth = 0;
|
||||
UINT m_ScreenHeight = 0;
|
||||
UINT m_NumTilesX = 0;
|
||||
UINT m_NumCoarseCullingTilesX = 0;
|
||||
|
||||
UINT m_NumCullingTilesPerCoarseTileX = 0;
|
||||
UINT m_NumCullingTilesPerCoarseTileY = 0;
|
||||
UINT m_pad01 = 0;
|
||||
UINT m_pad02 = 0;
|
||||
};
|
||||
|
||||
struct TiledRenderingConstantBuffer
|
||||
{
|
||||
math::Matrix4 m_ProjectionInv = {};
|
||||
math::Vector4 m_SunColor = {};
|
||||
math::Vector4 m_AmbientColor = {};
|
||||
math::Vector4 m_SunDirectionVS = {};
|
||||
|
||||
UINT m_ScreenHeight = 0;
|
||||
float m_InvScreenWidth = 0.0f;
|
||||
float m_InvScreenHeight = 0.0f;
|
||||
float m_AlphaThreshold = 0.97f;
|
||||
|
||||
UINT m_NumTilesX = 0;
|
||||
UINT m_NumCoarseCullingTilesX = 0;
|
||||
UINT m_NumCullingTilesPerCoarseTileX = 0;
|
||||
UINT m_NumCullingTilesPerCoarseTileY = 0;
|
||||
|
||||
UINT m_AlignedScreenWidth = 0;
|
||||
UINT m_pads[ 3 ] = {};
|
||||
};
|
||||
|
||||
struct QuadConstantBuffer
|
||||
{
|
||||
UINT m_AlignedScreenWidth;
|
||||
UINT m_pads[ 3 ];
|
||||
};
|
||||
|
||||
// The maximum number of supported GPU particles
|
||||
static const int g_maxParticles = 400*1024;
|
@ -0,0 +1,745 @@
|
||||
//
|
||||
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#include "../DX12/stdafx.h"
|
||||
#include "../ParticleSystem.h"
|
||||
#include "../ParticleSystemInternal.h"
|
||||
#include "../ParticleHelpers.h"
|
||||
#include "ParallelSort.h"
|
||||
|
||||
|
||||
const D3D12_RESOURCE_STATES SHADER_READ_STATE = D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER|D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE|D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||
|
||||
|
||||
#pragma warning( disable : 4100 ) // disable unreference formal parameter warnings for /W4 builds
|
||||
|
||||
struct IndirectCommand
|
||||
{
|
||||
D3D12_GPU_VIRTUAL_ADDRESS uav = {};
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS drawArguments = {};
|
||||
};
|
||||
|
||||
// GPU Particle System class. Responsible for updating and rendering the particles
|
||||
class GPUParticleSystem : public IParticleSystem
|
||||
{
|
||||
public:
|
||||
|
||||
GPUParticleSystem( const char* particleAtlas );
|
||||
|
||||
private:
|
||||
|
||||
enum DepthCullingMode
|
||||
{
|
||||
DepthCullingOn,
|
||||
DepthCullingOff,
|
||||
NumDepthCullingModes
|
||||
};
|
||||
|
||||
enum StreakMode
|
||||
{
|
||||
StreaksOn,
|
||||
StreaksOff,
|
||||
NumStreakModes
|
||||
};
|
||||
|
||||
enum ReactiveMode
|
||||
{
|
||||
ReactiveOn,
|
||||
ReactiveOff,
|
||||
NumReactiveModes
|
||||
};
|
||||
|
||||
virtual ~GPUParticleSystem();
|
||||
|
||||
virtual void OnCreateDevice( Device &device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing );
|
||||
virtual void OnResizedSwapChain( int width, int height, Texture& depthBuffer );
|
||||
virtual void OnReleasingSwapChain();
|
||||
virtual void OnDestroyDevice();
|
||||
|
||||
virtual void Reset();
|
||||
|
||||
virtual void Render( ID3D12GraphicsCommandList* pCommandList, DynamicBufferRing& constantBufferRing, int flags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData );
|
||||
|
||||
void Emit( ID3D12GraphicsCommandList* pCommandList, DynamicBufferRing& constantBufferRing, int numEmitters, const EmitterParams* emitters );
|
||||
void Simulate( ID3D12GraphicsCommandList* pCommandList );
|
||||
void Sort( ID3D12GraphicsCommandList* pCommandList );
|
||||
|
||||
void FillRandomTexture( UploadHeap& uploadHeap );
|
||||
|
||||
void CreateSimulationAssets();
|
||||
void CreateRasterizedRenderingAssets();
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
ResourceViewHeaps* m_heaps = nullptr;
|
||||
const char* m_AtlasPath = nullptr;
|
||||
|
||||
Texture m_Atlas = {};
|
||||
Texture m_ParticleBufferA = {};
|
||||
Texture m_ParticleBufferB = {};
|
||||
Texture m_PackedViewSpaceParticlePositions = {};
|
||||
Texture m_MaxRadiusBuffer = {};
|
||||
Texture m_DeadListBuffer = {};
|
||||
Texture m_AliveIndexBuffer = {};
|
||||
Texture m_AliveDistanceBuffer = {};
|
||||
Texture m_AliveCountBuffer = {};
|
||||
Texture m_RenderingBuffer = {};
|
||||
Texture m_IndirectArgsBuffer = {};
|
||||
Texture m_RandomTexture = {};
|
||||
|
||||
const int m_SimulationUAVDescriptorTableCount = 9;
|
||||
CBV_SRV_UAV m_SimulationUAVDescriptorTable = {};
|
||||
|
||||
const int m_SimulationSRVDescriptorTableCount = 2;
|
||||
CBV_SRV_UAV m_SimulationSRVDescriptorTable = {};
|
||||
|
||||
const int m_RasterizationSRVDescriptorTableCount = 6;
|
||||
CBV_SRV_UAV m_RasterizationSRVDescriptorTable = {};
|
||||
|
||||
UINT m_ScreenWidth = 0;
|
||||
UINT m_ScreenHeight = 0;
|
||||
float m_InvScreenWidth = 0.0f;
|
||||
float m_InvScreenHeight = 0.0f;
|
||||
float m_ElapsedTime = 0.0f;
|
||||
float m_AlphaThreshold = 0.97f;
|
||||
|
||||
D3D12_INDEX_BUFFER_VIEW m_IndexBuffer = {};
|
||||
ID3D12RootSignature* m_pSimulationRootSignature = nullptr;
|
||||
ID3D12RootSignature* m_pRasterizationRootSignature = nullptr;
|
||||
|
||||
ID3D12PipelineState* m_pSimulatePipeline = nullptr;
|
||||
ID3D12PipelineState* m_pEmitPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pResetParticlesPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pRasterizationPipelines[ NumStreakModes ][ NumReactiveModes ] = {};
|
||||
|
||||
ID3D12CommandSignature* m_commandSignature = nullptr;
|
||||
|
||||
bool m_ResetSystem = true;
|
||||
FFXParallelSort m_SortLib = {};
|
||||
|
||||
D3D12_RESOURCE_STATES m_ReadBufferStates;
|
||||
D3D12_RESOURCE_STATES m_WriteBufferStates;
|
||||
D3D12_RESOURCE_STATES m_StridedBufferStates;
|
||||
};
|
||||
|
||||
IParticleSystem* IParticleSystem::CreateGPUSystem( const char* particleAtlas )
|
||||
{
|
||||
return new GPUParticleSystem( particleAtlas );
|
||||
}
|
||||
|
||||
|
||||
GPUParticleSystem::GPUParticleSystem( const char* particleAtlas ) : m_AtlasPath( particleAtlas )
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
GPUParticleSystem::~GPUParticleSystem()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::Sort( ID3D12GraphicsCommandList* pCommandList )
|
||||
{
|
||||
// Causes the debug layer to lock up
|
||||
m_SortLib.Draw( pCommandList );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::Reset()
|
||||
{
|
||||
m_ResetSystem = true;
|
||||
}
|
||||
|
||||
void GPUParticleSystem::Render( ID3D12GraphicsCommandList* pCommandList, DynamicBufferRing& constantBufferRing, int flags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData )
|
||||
{
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriersBeforeSimulation;
|
||||
if(m_WriteBufferStates == D3D12_RESOURCE_STATE_COMMON)
|
||||
{
|
||||
barriersBeforeSimulation.push_back(CD3DX12_RESOURCE_BARRIER::Transition(m_ParticleBufferB.GetResource(), m_WriteBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
|
||||
barriersBeforeSimulation.push_back(CD3DX12_RESOURCE_BARRIER::Transition(m_DeadListBuffer.GetResource(), m_WriteBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
|
||||
barriersBeforeSimulation.push_back(CD3DX12_RESOURCE_BARRIER::Transition(m_AliveDistanceBuffer.GetResource(), m_WriteBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
|
||||
barriersBeforeSimulation.push_back(CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectArgsBuffer.GetResource(), m_WriteBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
|
||||
m_WriteBufferStates = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
}
|
||||
|
||||
ID3D12DescriptorHeap* descriptorHeaps[] = { m_heaps->GetCBV_SRV_UAVHeap(), m_heaps->GetSamplerHeap() };
|
||||
pCommandList->SetDescriptorHeaps( _countof( descriptorHeaps ), descriptorHeaps );
|
||||
|
||||
SimulationConstantBuffer simulationConstants = {};
|
||||
|
||||
memcpy( simulationConstants.m_StartColor, constantData.m_StartColor, sizeof( simulationConstants.m_StartColor ) );
|
||||
memcpy( simulationConstants.m_EndColor, constantData.m_EndColor, sizeof( simulationConstants.m_EndColor ) );
|
||||
memcpy( simulationConstants.m_EmitterLightingCenter, constantData.m_EmitterLightingCenter, sizeof( simulationConstants.m_EmitterLightingCenter ) );
|
||||
|
||||
simulationConstants.m_ViewProjection = constantData.m_ViewProjection;
|
||||
simulationConstants.m_View = constantData.m_View;
|
||||
simulationConstants.m_ViewInv = constantData.m_ViewInv;
|
||||
simulationConstants.m_ProjectionInv = constantData.m_ProjectionInv;
|
||||
|
||||
simulationConstants.m_EyePosition = constantData.m_ViewInv.getCol3();
|
||||
simulationConstants.m_SunDirection = constantData.m_SunDirection;
|
||||
|
||||
simulationConstants.m_ScreenWidth = m_ScreenWidth;
|
||||
simulationConstants.m_ScreenHeight = m_ScreenHeight;
|
||||
simulationConstants.m_MaxParticles = g_maxParticles;
|
||||
simulationConstants.m_FrameTime = constantData.m_FrameTime;
|
||||
|
||||
math::Vector4 sunDirectionVS = constantData.m_View * constantData.m_SunDirection;
|
||||
|
||||
m_ElapsedTime += constantData.m_FrameTime;
|
||||
if ( m_ElapsedTime > 10.0f )
|
||||
m_ElapsedTime -= 10.0f;
|
||||
|
||||
simulationConstants.m_ElapsedTime = m_ElapsedTime;
|
||||
|
||||
{
|
||||
UserMarker marker( pCommandList, "simulation" );
|
||||
|
||||
void* data = nullptr;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS constantBuffer;
|
||||
constantBufferRing.AllocConstantBuffer( sizeof( simulationConstants ), &data, &constantBuffer );
|
||||
memcpy( data, &simulationConstants, sizeof( simulationConstants ) );
|
||||
|
||||
|
||||
pCommandList->SetComputeRootSignature( m_pSimulationRootSignature );
|
||||
pCommandList->SetComputeRootDescriptorTable( 0, m_SimulationUAVDescriptorTable.GetGPU() );
|
||||
pCommandList->SetComputeRootDescriptorTable( 1, m_SimulationSRVDescriptorTable.GetGPU() );
|
||||
pCommandList->SetComputeRootConstantBufferView( 2, constantBuffer );
|
||||
|
||||
barriersBeforeSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_ParticleBufferA.GetResource(), m_ReadBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
barriersBeforeSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_PackedViewSpaceParticlePositions.GetResource(), m_ReadBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
barriersBeforeSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_MaxRadiusBuffer.GetResource(), m_ReadBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
barriersBeforeSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_AliveIndexBuffer.GetResource(), m_ReadBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
barriersBeforeSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_AliveCountBuffer.GetResource(), m_ReadBufferStates, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
pCommandList->ResourceBarrier( (UINT)barriersBeforeSimulation.size(), &barriersBeforeSimulation[ 0 ] );
|
||||
m_ReadBufferStates = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
|
||||
// If we are resetting the particle system, then initialize the dead list
|
||||
if ( m_ResetSystem )
|
||||
{
|
||||
pCommandList->SetPipelineState( m_pResetParticlesPipeline );
|
||||
|
||||
pCommandList->Dispatch( align( g_maxParticles, 256 ) / 256, 1, 1 );
|
||||
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriersPostReset;
|
||||
barriersPostReset.push_back( CD3DX12_RESOURCE_BARRIER::UAV( m_ParticleBufferA.GetResource() ) );
|
||||
barriersPostReset.push_back( CD3DX12_RESOURCE_BARRIER::UAV( m_ParticleBufferB.GetResource() ) );
|
||||
barriersPostReset.push_back( CD3DX12_RESOURCE_BARRIER::UAV( m_DeadListBuffer.GetResource() ) );
|
||||
pCommandList->ResourceBarrier( (UINT)barriersPostReset.size(), &barriersPostReset[ 0 ] );
|
||||
|
||||
m_ResetSystem = false;
|
||||
}
|
||||
|
||||
// Emit particles into the system
|
||||
Emit( pCommandList, constantBufferRing, nNumEmitters, pEmitters );
|
||||
|
||||
// Run the simulation for this frame
|
||||
Simulate( pCommandList );
|
||||
|
||||
|
||||
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriersAfterSimulation;
|
||||
barriersAfterSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_ParticleBufferA.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, SHADER_READ_STATE) );
|
||||
barriersAfterSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_PackedViewSpaceParticlePositions.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, SHADER_READ_STATE) );
|
||||
barriersAfterSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_MaxRadiusBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, SHADER_READ_STATE) );
|
||||
barriersAfterSimulation.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_AliveCountBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, SHADER_READ_STATE) );
|
||||
barriersAfterSimulation.push_back( CD3DX12_RESOURCE_BARRIER::UAV( m_DeadListBuffer.GetResource() ) );
|
||||
pCommandList->ResourceBarrier( (UINT)barriersAfterSimulation.size(), &barriersAfterSimulation[ 0 ] );
|
||||
}
|
||||
|
||||
// Conventional rasterization path
|
||||
{
|
||||
UserMarker marker( pCommandList, "rasterization" );
|
||||
|
||||
// Sort if requested. Not doing so results in the particles rendering out of order and not blending correctly
|
||||
if ( flags & PF_Sort )
|
||||
{
|
||||
UserMarker marker( pCommandList, "sorting" );
|
||||
|
||||
const D3D12_RESOURCE_BARRIER barriers[] =
|
||||
{
|
||||
CD3DX12_RESOURCE_BARRIER::UAV( m_AliveIndexBuffer.GetResource() ),
|
||||
CD3DX12_RESOURCE_BARRIER::UAV( m_AliveDistanceBuffer.GetResource() ),
|
||||
};
|
||||
pCommandList->ResourceBarrier( _countof( barriers ), barriers );
|
||||
|
||||
Sort( pCommandList );
|
||||
}
|
||||
|
||||
StreakMode streaks = flags & PF_Streaks ? StreaksOn : StreaksOff;
|
||||
ReactiveMode reactive = flags & PF_Reactive ? ReactiveOn : ReactiveOff;
|
||||
|
||||
RenderingConstantBuffer* cb = nullptr;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS renderingConstantBuffer;
|
||||
constantBufferRing.AllocConstantBuffer( sizeof( RenderingConstantBuffer ), (void**)&cb, &renderingConstantBuffer );
|
||||
cb->m_Projection = constantData.m_Projection;
|
||||
cb->m_ProjectionInv = simulationConstants.m_ProjectionInv;
|
||||
cb->m_SunColor = constantData.m_SunColor;
|
||||
cb->m_AmbientColor = constantData.m_AmbientColor;
|
||||
cb->m_SunDirectionVS = sunDirectionVS;
|
||||
cb->m_ScreenWidth = m_ScreenWidth;
|
||||
cb->m_ScreenHeight = m_ScreenHeight;
|
||||
|
||||
pCommandList->SetGraphicsRootSignature( m_pRasterizationRootSignature );
|
||||
pCommandList->SetGraphicsRootDescriptorTable( 0, m_RasterizationSRVDescriptorTable.GetGPU() );
|
||||
pCommandList->SetGraphicsRootConstantBufferView( 1, renderingConstantBuffer );
|
||||
pCommandList->SetGraphicsRootUnorderedAccessView( 2, m_IndirectArgsBuffer.GetResource()->GetGPUVirtualAddress() );
|
||||
pCommandList->SetPipelineState( m_pRasterizationPipelines[ streaks ][ reactive ] );
|
||||
|
||||
pCommandList->IASetIndexBuffer( &m_IndexBuffer );
|
||||
pCommandList->IASetVertexBuffers( 0, 0, nullptr );
|
||||
pCommandList->IASetPrimitiveTopology( D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST );
|
||||
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriers;
|
||||
barriers.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_AliveIndexBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, SHADER_READ_STATE) );
|
||||
barriers.push_back( CD3DX12_RESOURCE_BARRIER::Transition( m_IndirectArgsBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT ) );
|
||||
pCommandList->ResourceBarrier( (UINT)barriers.size(), &barriers[ 0 ] );
|
||||
|
||||
pCommandList->ExecuteIndirect( m_commandSignature, 1, m_IndirectArgsBuffer.GetResource(), 0, nullptr, 0 );
|
||||
|
||||
pCommandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::Transition( m_IndirectArgsBuffer.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ) );
|
||||
}
|
||||
|
||||
m_ReadBufferStates = SHADER_READ_STATE;
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnCreateDevice(Device &device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing )
|
||||
{
|
||||
m_pDevice = &device;
|
||||
m_heaps = &heaps;
|
||||
|
||||
m_ReadBufferStates = D3D12_RESOURCE_STATE_COMMON;
|
||||
m_WriteBufferStates = D3D12_RESOURCE_STATE_COMMON; // D3D12_RESOURCE_STATE_UNORDERED_ACCESS
|
||||
m_StridedBufferStates = D3D12_RESOURCE_STATE_COMMON;
|
||||
|
||||
// Create the global particle pool. Each particle is split into two parts for better cache coherency. The first half contains the data more
|
||||
// relevant to rendering while the second half is more related to simulation
|
||||
CD3DX12_RESOURCE_DESC RDescParticlesA = CD3DX12_RESOURCE_DESC::Buffer( sizeof( GPUParticlePartA ) * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_ParticleBufferA.InitBuffer(&device, "ParticleBufferA", &RDescParticlesA, sizeof( GPUParticlePartA ), m_ReadBufferStates);
|
||||
|
||||
CD3DX12_RESOURCE_DESC RDescParticlesB = CD3DX12_RESOURCE_DESC::Buffer( sizeof( GPUParticlePartB ) * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_ParticleBufferB.InitBuffer(&device, "ParticleBufferB", &RDescParticlesB, sizeof( GPUParticlePartB ), m_WriteBufferStates);
|
||||
|
||||
// The packed view space positions of particles are cached during simulation so allocate a buffer for them
|
||||
CD3DX12_RESOURCE_DESC RDescPackedViewSpaceParticlePositions = CD3DX12_RESOURCE_DESC::Buffer( 8 * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_PackedViewSpaceParticlePositions.InitBuffer(&device, "PackedViewSpaceParticlePositions", &RDescPackedViewSpaceParticlePositions, 8, m_ReadBufferStates);
|
||||
|
||||
// The maximum radii of each particle is cached during simulation to avoid recomputing multiple times later. This is only required
|
||||
// for streaked particles as they are not round so we cache the max radius of X and Y
|
||||
CD3DX12_RESOURCE_DESC RDescMaxRadiusBuffer = CD3DX12_RESOURCE_DESC::Buffer( sizeof( float ) * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_MaxRadiusBuffer.InitBuffer(&device, "MaxRadiusBuffer", &RDescMaxRadiusBuffer, sizeof( float ), m_ReadBufferStates);
|
||||
|
||||
// The dead particle index list. Created as an append buffer
|
||||
CD3DX12_RESOURCE_DESC RDescDeadListBuffer = CD3DX12_RESOURCE_DESC::Buffer( sizeof( INT ) * ( g_maxParticles + 1 ), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_DeadListBuffer.InitBuffer(&device, "DeadListBuffer", &RDescDeadListBuffer, sizeof( INT ), m_WriteBufferStates);
|
||||
|
||||
// Create the index buffer of alive particles that is to be sorted (at least in the rasterization path).
|
||||
// For the tiled rendering path this could be just a UINT index buffer as particles are not globally sorted
|
||||
CD3DX12_RESOURCE_DESC RDescAliveIndexBuffer = CD3DX12_RESOURCE_DESC::Buffer( sizeof( int ) * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_AliveIndexBuffer.InitBuffer(&device, "AliveIndexBuffer", &RDescAliveIndexBuffer, sizeof( int ), m_ReadBufferStates);
|
||||
|
||||
CD3DX12_RESOURCE_DESC RDescAliveDistanceBuffer = CD3DX12_RESOURCE_DESC::Buffer( sizeof( float ) * g_maxParticles, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_AliveDistanceBuffer.InitBuffer(&device, "AliveDistanceBuffer", &RDescAliveDistanceBuffer, sizeof( float ), m_WriteBufferStates);
|
||||
|
||||
// Create the single element buffer which is used to store the count of alive particles
|
||||
CD3DX12_RESOURCE_DESC RDescAliveCountBuffer = CD3DX12_RESOURCE_DESC::Buffer( sizeof( UINT ), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_AliveCountBuffer.InitBuffer(&device, "AliveCountBuffer", &RDescAliveCountBuffer, sizeof( UINT ), m_ReadBufferStates);
|
||||
|
||||
|
||||
// Create the buffer to store the indirect args for the ExecuteIndirect call
|
||||
// Create the index buffer of alive particles that is to be sorted (at least in the rasterization path).
|
||||
CD3DX12_RESOURCE_DESC desc = CD3DX12_RESOURCE_DESC::Buffer( sizeof( IndirectCommand ), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS );
|
||||
m_IndirectArgsBuffer.InitBuffer(&device, "IndirectArgsBuffer", &desc, sizeof( IndirectCommand ), m_WriteBufferStates);
|
||||
|
||||
// Create the particle billboard index buffer required for the rasterization VS-only path
|
||||
UINT* indices = new UINT[ g_maxParticles * 6 ];
|
||||
UINT* ptr = indices;
|
||||
UINT base = 0;
|
||||
for ( int i = 0; i < g_maxParticles; i++ )
|
||||
{
|
||||
ptr[ 0 ] = base + 0;
|
||||
ptr[ 1 ] = base + 1;
|
||||
ptr[ 2 ] = base + 2;
|
||||
|
||||
ptr[ 3 ] = base + 2;
|
||||
ptr[ 4 ] = base + 1;
|
||||
ptr[ 5 ] = base + 3;
|
||||
|
||||
base += 4;
|
||||
ptr += 6;
|
||||
}
|
||||
|
||||
bufferPool.AllocIndexBuffer( g_maxParticles * 6, sizeof( UINT ), indices, &m_IndexBuffer );
|
||||
delete[] indices;
|
||||
|
||||
// Initialize the random numbers texture
|
||||
FillRandomTexture( uploadHeap );
|
||||
|
||||
m_Atlas.InitFromFile( &device, &uploadHeap, m_AtlasPath, true );
|
||||
|
||||
CreateSimulationAssets();
|
||||
CreateRasterizedRenderingAssets();
|
||||
|
||||
// Create the SortLib resources
|
||||
m_SortLib.OnCreate( m_pDevice, m_heaps, &constantBufferRing, &uploadHeap, &m_AliveCountBuffer, &m_AliveDistanceBuffer, &m_AliveIndexBuffer );
|
||||
}
|
||||
|
||||
void GPUParticleSystem::CreateSimulationAssets()
|
||||
{
|
||||
m_heaps->AllocCBV_SRV_UAVDescriptor( m_SimulationUAVDescriptorTableCount, &m_SimulationUAVDescriptorTable );
|
||||
|
||||
m_ParticleBufferA.CreateBufferUAV( 0, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_ParticleBufferB.CreateBufferUAV( 1, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_DeadListBuffer.CreateBufferUAV( 2, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_AliveIndexBuffer.CreateBufferUAV( 3, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_AliveDistanceBuffer.CreateBufferUAV( 4, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_MaxRadiusBuffer.CreateBufferUAV( 5, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_PackedViewSpaceParticlePositions.CreateBufferUAV( 6, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_IndirectArgsBuffer.CreateBufferUAV( 7, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
m_AliveCountBuffer.CreateBufferUAV( 8, nullptr, &m_SimulationUAVDescriptorTable );
|
||||
|
||||
m_heaps->AllocCBV_SRV_UAVDescriptor( m_SimulationSRVDescriptorTableCount, &m_SimulationSRVDescriptorTable );
|
||||
// depth buffer // t0
|
||||
m_RandomTexture.CreateSRV( 1, &m_SimulationSRVDescriptorTable ); // t1
|
||||
|
||||
{
|
||||
CD3DX12_DESCRIPTOR_RANGE DescRange[2] = {};
|
||||
DescRange[0].Init( D3D12_DESCRIPTOR_RANGE_TYPE_UAV, m_SimulationUAVDescriptorTableCount, 0 ); // u0 - u8
|
||||
DescRange[1].Init( D3D12_DESCRIPTOR_RANGE_TYPE_SRV, m_SimulationSRVDescriptorTableCount, 0 ); // t0 - t1
|
||||
|
||||
CD3DX12_ROOT_PARAMETER rootParamters[4] = {};
|
||||
rootParamters[0].InitAsDescriptorTable( 1, &DescRange[0], D3D12_SHADER_VISIBILITY_ALL ); // uavs
|
||||
rootParamters[1].InitAsDescriptorTable( 1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL ); // textures
|
||||
rootParamters[2].InitAsConstantBufferView( 0 ); // b0 - per frame
|
||||
rootParamters[3].InitAsConstantBufferView( 1 ); // b1 - per emitter
|
||||
|
||||
CD3DX12_STATIC_SAMPLER_DESC sampler( 0, D3D12_FILTER_MIN_MAG_MIP_POINT, D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_CLAMP );
|
||||
|
||||
CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = {};
|
||||
descRootSignature.Init( _countof( rootParamters ), rootParamters, 1, &sampler );
|
||||
|
||||
ID3DBlob *pOutBlob, *pErrorBlob = nullptr;
|
||||
D3D12SerializeRootSignature( &descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob );
|
||||
m_pDevice->GetDevice()->CreateRootSignature( 0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS( &m_pSimulationRootSignature ) );
|
||||
m_pSimulationRootSignature->SetName( L"SimulationRootSignature" );
|
||||
|
||||
pOutBlob->Release();
|
||||
if (pErrorBlob)
|
||||
pErrorBlob->Release();
|
||||
}
|
||||
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {};
|
||||
descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
|
||||
descPso.pRootSignature = m_pSimulationRootSignature;
|
||||
descPso.NodeMask = 0;
|
||||
|
||||
DefineList defines;
|
||||
defines["API_DX12"] = "";
|
||||
|
||||
{
|
||||
D3D12_SHADER_BYTECODE computeShader;
|
||||
CompileShaderFromFile( "ParticleSimulation.hlsl", &defines, "CS_Reset", "-T cs_6_0", &computeShader );
|
||||
|
||||
descPso.CS = computeShader;
|
||||
m_pDevice->GetDevice()->CreateComputePipelineState( &descPso, IID_PPV_ARGS( &m_pResetParticlesPipeline ) );
|
||||
m_pResetParticlesPipeline->SetName( L"ResetParticles" );
|
||||
}
|
||||
|
||||
{
|
||||
D3D12_SHADER_BYTECODE computeShader;
|
||||
CompileShaderFromFile( "ParticleSimulation.hlsl", &defines, "CS_Simulate", "-T cs_6_0", &computeShader );
|
||||
|
||||
descPso.CS = computeShader;
|
||||
m_pDevice->GetDevice()->CreateComputePipelineState( &descPso, IID_PPV_ARGS( &m_pSimulatePipeline ) );
|
||||
m_pSimulatePipeline->SetName( L"Simulation" );
|
||||
}
|
||||
|
||||
{
|
||||
D3D12_SHADER_BYTECODE computeShader;
|
||||
CompileShaderFromFile( "ParticleEmit.hlsl", &defines, "CS_Emit", "-T cs_6_0", &computeShader );
|
||||
|
||||
descPso.CS = computeShader;
|
||||
m_pDevice->GetDevice()->CreateComputePipelineState( &descPso, IID_PPV_ARGS( &m_pEmitPipeline ) );
|
||||
m_pEmitPipeline->SetName( L"Emit" );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::CreateRasterizedRenderingAssets()
|
||||
{
|
||||
m_heaps->AllocCBV_SRV_UAVDescriptor( m_RasterizationSRVDescriptorTableCount, &m_RasterizationSRVDescriptorTable );
|
||||
m_ParticleBufferA.CreateSRV( 0, &m_RasterizationSRVDescriptorTable );
|
||||
m_PackedViewSpaceParticlePositions.CreateSRV( 1, &m_RasterizationSRVDescriptorTable );
|
||||
m_AliveCountBuffer.CreateSRV( 2, &m_RasterizationSRVDescriptorTable );
|
||||
m_AliveIndexBuffer.CreateSRV( 3, &m_RasterizationSRVDescriptorTable );
|
||||
m_Atlas.CreateSRV( 4, &m_RasterizationSRVDescriptorTable );
|
||||
// depth texture t5
|
||||
|
||||
{
|
||||
CD3DX12_DESCRIPTOR_RANGE DescRange[1] = {};
|
||||
DescRange[0].Init( D3D12_DESCRIPTOR_RANGE_TYPE_SRV, m_RasterizationSRVDescriptorTableCount, 0 ); // t0-t5
|
||||
|
||||
CD3DX12_ROOT_PARAMETER rootParamters[3] = {};
|
||||
rootParamters[0].InitAsDescriptorTable( 1, &DescRange[0], D3D12_SHADER_VISIBILITY_ALL ); // textures
|
||||
rootParamters[1].InitAsConstantBufferView( 0 ); // b0
|
||||
rootParamters[2].InitAsUnorderedAccessView( 0 );
|
||||
|
||||
CD3DX12_STATIC_SAMPLER_DESC sampler( 0, D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_CLAMP );
|
||||
|
||||
CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = {};
|
||||
descRootSignature.Init( _countof( rootParamters ), rootParamters, 1, &sampler );
|
||||
|
||||
ID3DBlob *pOutBlob, *pErrorBlob = nullptr;
|
||||
D3D12SerializeRootSignature( &descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob );
|
||||
m_pDevice->GetDevice()->CreateRootSignature( 0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS( &m_pRasterizationRootSignature ) );
|
||||
m_pRasterizationRootSignature->SetName( L"RasterizationRootSignature" );
|
||||
|
||||
pOutBlob->Release();
|
||||
if (pErrorBlob)
|
||||
pErrorBlob->Release();
|
||||
}
|
||||
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC descPso = {};
|
||||
descPso.InputLayout = { nullptr, 0 };
|
||||
descPso.pRootSignature = m_pRasterizationRootSignature;
|
||||
|
||||
descPso.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
|
||||
descPso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
descPso.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
|
||||
descPso.BlendState.IndependentBlendEnable = true;
|
||||
descPso.BlendState.RenderTarget[0].BlendEnable = true;
|
||||
descPso.BlendState.RenderTarget[2].BlendEnable = true;
|
||||
|
||||
descPso.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
||||
descPso.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
||||
descPso.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
|
||||
descPso.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
|
||||
descPso.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO;
|
||||
descPso.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
|
||||
descPso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
||||
descPso.BlendState.RenderTarget[1].RenderTargetWriteMask = 0;
|
||||
descPso.BlendState.RenderTarget[2].RenderTargetWriteMask = 0;
|
||||
descPso.BlendState.RenderTarget[3].RenderTargetWriteMask = 0;
|
||||
|
||||
descPso.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
|
||||
descPso.DepthStencilState.DepthEnable = TRUE;
|
||||
descPso.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
descPso.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
|
||||
descPso.SampleMask = UINT_MAX;
|
||||
descPso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
descPso.NumRenderTargets = 4;
|
||||
descPso.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
descPso.RTVFormats[1] = DXGI_FORMAT_R16G16_FLOAT;
|
||||
descPso.RTVFormats[2] = DXGI_FORMAT_R8_UNORM;
|
||||
descPso.RTVFormats[3] = DXGI_FORMAT_R8_UNORM;
|
||||
descPso.DSVFormat = DXGI_FORMAT_D32_FLOAT;
|
||||
descPso.SampleDesc.Count = 1;
|
||||
descPso.NodeMask = 0;
|
||||
|
||||
for ( int i = 0; i < NumStreakModes; i++ )
|
||||
{
|
||||
for ( int j = 0; j < NumReactiveModes; j++ )
|
||||
{
|
||||
descPso.BlendState.RenderTarget[2].RenderTargetWriteMask = 0;
|
||||
|
||||
DefineList defines;
|
||||
defines["API_DX12"] = "";
|
||||
if ( i == StreaksOn )
|
||||
defines["STREAKS"] = "";
|
||||
|
||||
if ( j == ReactiveOn )
|
||||
{
|
||||
defines["REACTIVE"] = "";
|
||||
descPso.BlendState.RenderTarget[2].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED;
|
||||
}
|
||||
|
||||
D3D12_SHADER_BYTECODE vertexShader = {};
|
||||
CompileShaderFromFile( "ParticleRender.hlsl", &defines, "VS_StructuredBuffer", "-T vs_6_0", &vertexShader );
|
||||
|
||||
D3D12_SHADER_BYTECODE pixelShader = {};
|
||||
CompileShaderFromFile( "ParticleRender.hlsl", &defines, "PS_Billboard", "-T ps_6_0", &pixelShader );
|
||||
|
||||
descPso.VS = vertexShader;
|
||||
descPso.PS = pixelShader;
|
||||
m_pDevice->GetDevice()->CreateGraphicsPipelineState( &descPso, IID_PPV_ARGS( &m_pRasterizationPipelines[ i ][ j ] ) );
|
||||
}
|
||||
}
|
||||
|
||||
D3D12_INDIRECT_ARGUMENT_DESC argumentDescs[2] = {};
|
||||
argumentDescs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW;
|
||||
argumentDescs[0].UnorderedAccessView.RootParameterIndex = 2;
|
||||
argumentDescs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
|
||||
D3D12_COMMAND_SIGNATURE_DESC commandSignatureDesc = {};
|
||||
commandSignatureDesc.pArgumentDescs = argumentDescs;
|
||||
commandSignatureDesc.NumArgumentDescs = _countof( argumentDescs );
|
||||
commandSignatureDesc.ByteStride = sizeof( IndirectCommand );
|
||||
|
||||
m_pDevice->GetDevice()->CreateCommandSignature( &commandSignatureDesc, m_pRasterizationRootSignature, IID_PPV_ARGS( &m_commandSignature ) );
|
||||
m_commandSignature->SetName( L"CommandSignature" );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnResizedSwapChain( int width, int height, Texture& depthBuffer )
|
||||
{
|
||||
m_ScreenWidth = width;
|
||||
m_ScreenHeight = height;
|
||||
m_InvScreenWidth = 1.0f / m_ScreenWidth;
|
||||
m_InvScreenHeight = 1.0f / m_ScreenHeight;
|
||||
|
||||
depthBuffer.CreateSRV( 0, &m_SimulationSRVDescriptorTable );
|
||||
depthBuffer.CreateSRV( 5, &m_RasterizationSRVDescriptorTable );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnReleasingSwapChain()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnDestroyDevice()
|
||||
{
|
||||
m_pDevice = nullptr;
|
||||
|
||||
m_ParticleBufferA.OnDestroy();
|
||||
m_ParticleBufferB.OnDestroy();
|
||||
m_PackedViewSpaceParticlePositions.OnDestroy();
|
||||
m_MaxRadiusBuffer.OnDestroy();
|
||||
m_DeadListBuffer.OnDestroy();
|
||||
m_AliveIndexBuffer.OnDestroy();
|
||||
m_AliveDistanceBuffer.OnDestroy();
|
||||
m_AliveCountBuffer.OnDestroy();
|
||||
m_RandomTexture.OnDestroy();
|
||||
m_Atlas.OnDestroy();
|
||||
m_IndirectArgsBuffer.OnDestroy();
|
||||
|
||||
m_pSimulatePipeline->Release();
|
||||
m_pSimulatePipeline = nullptr;
|
||||
|
||||
m_pResetParticlesPipeline->Release();
|
||||
m_pResetParticlesPipeline = nullptr;
|
||||
|
||||
m_pEmitPipeline->Release();
|
||||
m_pEmitPipeline = nullptr;
|
||||
|
||||
m_pSimulationRootSignature->Release();
|
||||
m_pSimulationRootSignature = nullptr;
|
||||
|
||||
for ( int i = 0; i < NumStreakModes; i++ )
|
||||
{
|
||||
for ( int j = 0; j < NumReactiveModes; j++ )
|
||||
{
|
||||
m_pRasterizationPipelines[ i ][ j ]->Release();
|
||||
m_pRasterizationPipelines[ i ][ j ] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
m_pRasterizationRootSignature->Release();
|
||||
m_pRasterizationRootSignature = nullptr;
|
||||
|
||||
m_commandSignature->Release();
|
||||
m_commandSignature = nullptr;
|
||||
|
||||
m_SortLib.OnDestroy();
|
||||
|
||||
m_ResetSystem = true;
|
||||
}
|
||||
|
||||
|
||||
// Per-frame emission of particles into the GPU simulation
|
||||
void GPUParticleSystem::Emit( ID3D12GraphicsCommandList* pCommandList, DynamicBufferRing& constantBufferRing, int numEmitters, const EmitterParams* emitters )
|
||||
{
|
||||
pCommandList->SetPipelineState( m_pEmitPipeline );
|
||||
|
||||
// Run CS for each emitter
|
||||
for ( int i = 0; i < numEmitters; i++ )
|
||||
{
|
||||
const EmitterParams& emitter = emitters[ i ];
|
||||
|
||||
if ( emitter.m_NumToEmit > 0 )
|
||||
{
|
||||
EmitterConstantBuffer* constants = nullptr;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS constantBuffer;
|
||||
constantBufferRing.AllocConstantBuffer( sizeof(*constants), (void**)&constants, &constantBuffer );
|
||||
constants->m_EmitterPosition = emitter.m_Position;
|
||||
constants->m_EmitterVelocity = emitter.m_Velocity;
|
||||
constants->m_MaxParticlesThisFrame = emitter.m_NumToEmit;
|
||||
constants->m_ParticleLifeSpan = emitter.m_ParticleLifeSpan;
|
||||
constants->m_StartSize = emitter.m_StartSize;
|
||||
constants->m_EndSize = emitter.m_EndSize;
|
||||
constants->m_PositionVariance = emitter.m_PositionVariance;
|
||||
constants->m_VelocityVariance = emitter.m_VelocityVariance;
|
||||
constants->m_Mass = emitter.m_Mass;
|
||||
constants->m_Index = i;
|
||||
constants->m_Streaks = emitter.m_Streaks ? 1 : 0;
|
||||
constants->m_TextureIndex = emitter.m_TextureIndex;
|
||||
pCommandList->SetComputeRootConstantBufferView( 3, constantBuffer );
|
||||
|
||||
// Dispatch enough thread groups to spawn the requested particles
|
||||
int numThreadGroups = align( emitter.m_NumToEmit, 1024 ) / 1024;
|
||||
pCommandList->Dispatch( numThreadGroups, 1, 1 );
|
||||
|
||||
pCommandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::UAV( m_DeadListBuffer.GetResource() ) );
|
||||
}
|
||||
}
|
||||
|
||||
// RaW barriers
|
||||
pCommandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::UAV( m_ParticleBufferA.GetResource() ) );
|
||||
pCommandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::UAV( m_ParticleBufferB.GetResource() ) );
|
||||
}
|
||||
|
||||
|
||||
// Per-frame simulation step
|
||||
void GPUParticleSystem::Simulate( ID3D12GraphicsCommandList* pCommandList )
|
||||
{
|
||||
pCommandList->SetPipelineState( m_pSimulatePipeline );
|
||||
pCommandList->Dispatch( align( g_maxParticles, 256 ) / 256, 1, 1 );
|
||||
}
|
||||
|
||||
|
||||
// Populate a texture with random numbers (used for the emission of particles)
|
||||
void GPUParticleSystem::FillRandomTexture( UploadHeap& uploadHeap )
|
||||
{
|
||||
IMG_INFO header = {};
|
||||
header.width = 1024;
|
||||
header.height = 1024;
|
||||
header.depth = 1;
|
||||
header.arraySize = 1;
|
||||
header.mipMapCount = 1;
|
||||
header.format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
header.bitCount = 128;
|
||||
|
||||
float* values = new float[ header.width * header.height * 4 ];
|
||||
float* ptr = values;
|
||||
for ( UINT i = 0; i < header.width * header.height; i++ )
|
||||
{
|
||||
ptr[ 0 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 1 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 2 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 3 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
m_RandomTexture.InitFromData(m_pDevice, "RadomTexture", uploadHeap, header, values );
|
||||
|
||||
delete[] values;
|
||||
}
|
@ -0,0 +1,524 @@
|
||||
// ParallelSort.cpp
|
||||
//
|
||||
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define FFX_CPP
|
||||
#include "ParallelSort.h"
|
||||
#include "../FFX-ParallelSort/FFX_ParallelSort.h"
|
||||
|
||||
static const uint32_t NumKeys = { 400*1024 };
|
||||
|
||||
|
||||
void FFXParallelSort::CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, ID3D12PipelineState*& pPipeline)
|
||||
{
|
||||
std::string CompileFlags("-T cs_6_0");
|
||||
#ifdef _DEBUG
|
||||
CompileFlags += " -Zi -Od";
|
||||
#endif // _DEBUG
|
||||
|
||||
D3D12_SHADER_BYTECODE shaderByteCode = {};
|
||||
CompileShaderFromFile(shaderFile, defines, entryPoint, CompileFlags.c_str(), &shaderByteCode);
|
||||
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {};
|
||||
descPso.CS = shaderByteCode;
|
||||
descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
|
||||
descPso.pRootSignature = m_pFPSRootSignature;
|
||||
descPso.NodeMask = 0;
|
||||
|
||||
ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&pPipeline)));
|
||||
SetName(pPipeline, entryPoint);
|
||||
}
|
||||
|
||||
void FFXParallelSort::OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, Texture* elementCount, Texture* listA, Texture* listB)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
m_pUploadHeap = pUploadHeap;
|
||||
m_pResourceViewHeaps = pResourceViewHeaps;
|
||||
m_pConstantBufferRing = pConstantBufferRing;
|
||||
m_SrcKeyBuffer = listA;
|
||||
m_SrcPayloadBuffer = listB;
|
||||
m_MaxNumThreadgroups = 800;
|
||||
|
||||
// Allocate UAVs to use for data
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_ElementCountSRV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SrcKeyUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SrcPayloadUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(2, &m_DstKeyUAVTable);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(2, &m_DstPayloadUAVTable);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_FPSScratchUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_FPSReducedScratchUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectKeyCountsUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectConstantBufferUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectCountScatterArgsUAV);
|
||||
m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectReduceScanArgsUAV);
|
||||
|
||||
// The DstKey and DstPayload buffers will be used as src/dst when sorting. A copy of the
|
||||
// source key/payload will be copied into them before hand so we can keep our original values
|
||||
CD3DX12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_DstKeyTempBuffer[0].InitBuffer(m_pDevice, "DstKeyTempBuf0", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_DstKeyTempBuffer[1].InitBuffer(m_pDevice, "DstKeyTempBuf1", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_DstPayloadTempBuffer[0].InitBuffer(m_pDevice, "DstPayloadTempBuf0", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_DstPayloadTempBuffer[1].InitBuffer(m_pDevice, "DstPayloadTempBuf1", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
{
|
||||
CD3DX12_RESOURCE_BARRIER Barriers[4] =
|
||||
{
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyTempBuffer[0].GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyTempBuffer[1].GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadTempBuffer[0].GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadTempBuffer[1].GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
|
||||
};
|
||||
m_pUploadHeap->GetCommandList()->ResourceBarrier(4, Barriers);
|
||||
}
|
||||
|
||||
// Create UAVs
|
||||
listA->CreateBufferUAV(0, nullptr, &m_SrcKeyUAV);
|
||||
listB->CreateBufferUAV(0, nullptr, &m_SrcPayloadUAV);
|
||||
m_DstKeyTempBuffer[0].CreateBufferUAV(0, nullptr, &m_DstKeyUAVTable);
|
||||
m_DstPayloadTempBuffer[0].CreateBufferUAV(0, nullptr, &m_DstPayloadUAVTable);
|
||||
m_DstKeyTempBuffer[1].CreateBufferUAV(1, nullptr, &m_DstKeyUAVTable);
|
||||
m_DstPayloadTempBuffer[1].CreateBufferUAV(1, nullptr, &m_DstPayloadUAVTable);
|
||||
|
||||
elementCount->CreateSRV( 0, &m_ElementCountSRV, 0 );
|
||||
|
||||
// We are just going to fudge the indirect execution parameters for each resolution
|
||||
ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_IndirectKeyCounts.InitBuffer(m_pDevice, "IndirectKeyCounts", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_IndirectKeyCounts.CreateBufferUAV(0, nullptr, &m_IndirectKeyCountsUAV);
|
||||
uint8_t* pNumKeysBuffer = m_pUploadHeap->Suballocate(sizeof(uint32_t), D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
|
||||
memcpy(pNumKeysBuffer, &NumKeys, sizeof(uint32_t) );
|
||||
m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_IndirectKeyCounts.GetResource(), 0, m_pUploadHeap->GetResource(), pNumKeysBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t));
|
||||
CD3DX12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectKeyCounts.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
m_pUploadHeap->GetCommandList()->ResourceBarrier(1, &Barrier);
|
||||
|
||||
// Allocate the scratch buffers needed for radix sort
|
||||
uint32_t scratchBufferSize;
|
||||
uint32_t reducedScratchBufferSize;
|
||||
FFX_ParallelSort_CalculateScratchResourceSize(NumKeys, scratchBufferSize, reducedScratchBufferSize);
|
||||
|
||||
ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(scratchBufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_FPSScratchBuffer.InitBuffer(m_pDevice, "Scratch", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_FPSScratchBuffer.CreateBufferUAV(0, nullptr, &m_FPSScratchUAV);
|
||||
|
||||
ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(reducedScratchBufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_FPSReducedScratchBuffer.InitBuffer(m_pDevice, "ReducedScratch", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_FPSReducedScratchBuffer.CreateBufferUAV(0, nullptr, &m_FPSReducedScratchUAV);
|
||||
|
||||
// Allocate the buffers for indirect execution of the algorithm
|
||||
ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(FFX_ParallelSortCB), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_IndirectConstantBuffer.InitBuffer(m_pDevice, "IndirectConstantBuffer", &ResourceDesc, sizeof(FFX_ParallelSortCB), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_IndirectConstantBuffer.CreateBufferUAV(0, nullptr, &m_IndirectConstantBufferUAV);
|
||||
|
||||
ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * 3, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
||||
m_IndirectCountScatterArgs.InitBuffer(m_pDevice, "IndirectCount_Scatter_DispatchArgs", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_IndirectCountScatterArgs.CreateBufferUAV(0, nullptr, &m_IndirectCountScatterArgsUAV);
|
||||
m_IndirectReduceScanArgs.InitBuffer(m_pDevice, "IndirectCount_Scatter_DispatchArgs", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COMMON);
|
||||
m_IndirectReduceScanArgs.CreateBufferUAV(0, nullptr, &m_IndirectReduceScanArgsUAV);
|
||||
|
||||
{
|
||||
CD3DX12_RESOURCE_BARRIER Barriers[5] =
|
||||
{
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_FPSScratchBuffer.GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_FPSReducedScratchBuffer.GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectConstantBuffer.GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectCountScatterArgs.GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS),
|
||||
CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectReduceScanArgs.GetResource(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
|
||||
};
|
||||
m_pUploadHeap->GetCommandList()->ResourceBarrier(5, Barriers);
|
||||
}
|
||||
// Create root signature for Radix sort passes
|
||||
{
|
||||
D3D12_DESCRIPTOR_RANGE descRange[16];
|
||||
D3D12_ROOT_PARAMETER rootParams[17];
|
||||
|
||||
// Constant buffer table (always have 1)
|
||||
descRange[0] = { D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[0].Descriptor = { descRange[0].BaseShaderRegister, descRange[0].RegisterSpace };
|
||||
|
||||
// Constant buffer to setup indirect params (indirect)
|
||||
descRange[1] = { D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 1, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[1].Descriptor = { descRange[1].BaseShaderRegister, descRange[1].RegisterSpace };
|
||||
|
||||
rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[2].Constants = { 2, 0, 1 };
|
||||
|
||||
// SrcBuffer (sort or scan)
|
||||
descRange[2] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[3].DescriptorTable = { 1, &descRange[2] };
|
||||
|
||||
// ScrPayload (sort only)
|
||||
descRange[3] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 1, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[4].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[4].DescriptorTable = { 1, &descRange[3] };
|
||||
|
||||
// Scratch (sort only)
|
||||
descRange[4] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 2, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[5].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[5].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[5].DescriptorTable = { 1, &descRange[4] };
|
||||
|
||||
// Scratch (reduced)
|
||||
descRange[5] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 3, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[6].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[6].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[6].DescriptorTable = { 1, &descRange[5] };
|
||||
|
||||
// DstBuffer (sort or scan)
|
||||
descRange[6] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 4, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[7].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[7].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[7].DescriptorTable = { 1, &descRange[6] };
|
||||
|
||||
// DstPayload (sort only)
|
||||
descRange[7] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 5, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[8].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[8].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[8].DescriptorTable = { 1, &descRange[7] };
|
||||
|
||||
// ScanSrc
|
||||
descRange[8] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 6, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[9].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[9].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[9].DescriptorTable = { 1, &descRange[8] };
|
||||
|
||||
// ScanDst
|
||||
descRange[9] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 7, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[10].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[10].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[10].DescriptorTable = { 1, &descRange[9] };
|
||||
|
||||
// ScanScratch
|
||||
descRange[10] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 8, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[11].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[11].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[11].DescriptorTable = { 1, &descRange[10] };
|
||||
|
||||
// NumKeys (indirect)
|
||||
descRange[11] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 9, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[12].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[12].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[12].DescriptorTable = { 1, &descRange[11] };
|
||||
|
||||
// CBufferUAV (indirect)
|
||||
descRange[12] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 10, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[13].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[13].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[13].DescriptorTable = { 1, &descRange[12] };
|
||||
|
||||
// CountScatterArgs (indirect)
|
||||
descRange[13] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 11, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[14].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[14].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[14].DescriptorTable = { 1, &descRange[13] };
|
||||
|
||||
// ReduceScanArgs (indirect)
|
||||
descRange[14] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 12, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[15].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[15].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[15].DescriptorTable = { 1, &descRange[14] };
|
||||
|
||||
descRange[15] = { D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND };
|
||||
rootParams[16].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[16].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParams[16].DescriptorTable = { 1, &descRange[15] };
|
||||
|
||||
D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
|
||||
rootSigDesc.NumParameters = 17;
|
||||
rootSigDesc.pParameters = rootParams;
|
||||
rootSigDesc.NumStaticSamplers = 0;
|
||||
rootSigDesc.pStaticSamplers = nullptr;
|
||||
rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||
|
||||
ID3DBlob* pOutBlob, * pErrorBlob = nullptr;
|
||||
ThrowIfFailed(D3D12SerializeRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob));
|
||||
ThrowIfFailed(pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pFPSRootSignature)));
|
||||
SetName(m_pFPSRootSignature, "FPS_Signature");
|
||||
|
||||
pOutBlob->Release();
|
||||
if (pErrorBlob)
|
||||
pErrorBlob->Release();
|
||||
|
||||
// Also create the command signature for the indirect version
|
||||
D3D12_INDIRECT_ARGUMENT_DESC dispatch = {};
|
||||
dispatch.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH;
|
||||
D3D12_COMMAND_SIGNATURE_DESC desc = {};
|
||||
desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
|
||||
desc.NodeMask = 0;
|
||||
desc.NumArgumentDescs = 1;
|
||||
desc.pArgumentDescs = &dispatch;
|
||||
|
||||
ThrowIfFailed(pDevice->GetDevice()->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(&m_pFPSCommandSignature)));
|
||||
m_pFPSCommandSignature->SetName(L"FPS_CommandSignature");
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Create pipelines for radix sort
|
||||
{
|
||||
// Create all of the necessary pipelines for Sort and Scan
|
||||
DefineList defines;
|
||||
|
||||
// SetupIndirectParams (indirect only)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_SetupIndirectParameters", m_pFPSIndirectSetupParametersPipeline);
|
||||
|
||||
// Radix count (sum table generation)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Count", m_pFPSCountPipeline);
|
||||
// Radix count reduce (sum table reduction for offset prescan)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_CountReduce", m_pFPSCountReducePipeline);
|
||||
// Radix scan (prefix scan)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scan", m_pFPSScanPipeline);
|
||||
// Radix scan add (prefix scan + reduced prefix scan addition)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_ScanAdd", m_pFPSScanAddPipeline);
|
||||
// Radix scatter (key redistribution)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scatter", m_pFPSScatterPipeline);
|
||||
// Radix scatter with payload (key and payload redistribution)
|
||||
defines["kRS_ValueCopy"] = std::to_string(1);
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scatter", m_pFPSScatterPayloadPipeline);
|
||||
}
|
||||
}
|
||||
|
||||
void FFXParallelSort::OnDestroy()
|
||||
{
|
||||
// Release radix sort indirect resources
|
||||
m_IndirectKeyCounts.OnDestroy();
|
||||
m_IndirectConstantBuffer.OnDestroy();
|
||||
m_IndirectCountScatterArgs.OnDestroy();
|
||||
m_IndirectReduceScanArgs.OnDestroy();
|
||||
m_pFPSCommandSignature->Release();
|
||||
m_pFPSIndirectSetupParametersPipeline->Release();
|
||||
|
||||
// Release radix sort algorithm resources
|
||||
m_FPSScratchBuffer.OnDestroy();
|
||||
m_FPSReducedScratchBuffer.OnDestroy();
|
||||
m_pFPSRootSignature->Release();
|
||||
m_pFPSCountPipeline->Release();
|
||||
m_pFPSCountReducePipeline->Release();
|
||||
m_pFPSScanPipeline->Release();
|
||||
m_pFPSScanAddPipeline->Release();
|
||||
m_pFPSScatterPipeline->Release();
|
||||
m_pFPSScatterPayloadPipeline->Release();
|
||||
|
||||
// Release all of our resources
|
||||
m_DstKeyTempBuffer[0].OnDestroy();
|
||||
m_DstKeyTempBuffer[1].OnDestroy();
|
||||
m_DstPayloadTempBuffer[0].OnDestroy();
|
||||
m_DstPayloadTempBuffer[1].OnDestroy();
|
||||
}
|
||||
|
||||
|
||||
void FFXParallelSort::Draw(ID3D12GraphicsCommandList* pCommandList)
|
||||
{
|
||||
bool bIndirectDispatch = true;
|
||||
|
||||
std::string markerText = "FFXParallelSort";
|
||||
if (bIndirectDispatch) markerText += " Indirect";
|
||||
UserMarker marker(pCommandList, markerText.c_str());
|
||||
|
||||
FFX_ParallelSortCB constantBufferData = { 0 };
|
||||
|
||||
// Bind the descriptor heaps
|
||||
ID3D12DescriptorHeap* pDescriptorHeap = m_pResourceViewHeaps->GetCBV_SRV_UAVHeap();
|
||||
pCommandList->SetDescriptorHeaps(1, &pDescriptorHeap);
|
||||
|
||||
// Bind the root signature
|
||||
pCommandList->SetComputeRootSignature(m_pFPSRootSignature);
|
||||
|
||||
// Fill in the constant buffer data structure (this will be done by a shader in the indirect version)
|
||||
uint32_t NumThreadgroupsToRun;
|
||||
uint32_t NumReducedThreadgroupsToRun;
|
||||
if (!bIndirectDispatch)
|
||||
{
|
||||
uint32_t NumberOfKeys = NumKeys;
|
||||
FFX_ParallelSort_SetConstantAndDispatchData(NumberOfKeys, m_MaxNumThreadgroups, constantBufferData, NumThreadgroupsToRun, NumReducedThreadgroupsToRun);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct SetupIndirectCB
|
||||
{
|
||||
uint32_t MaxThreadGroups;
|
||||
};
|
||||
SetupIndirectCB IndirectSetupCB;
|
||||
IndirectSetupCB.MaxThreadGroups = m_MaxNumThreadgroups;
|
||||
|
||||
// Copy the data into the constant buffer
|
||||
D3D12_GPU_VIRTUAL_ADDRESS constantBuffer = m_pConstantBufferRing->AllocConstantBuffer(sizeof(SetupIndirectCB), &IndirectSetupCB);
|
||||
pCommandList->SetComputeRootConstantBufferView(1, constantBuffer); // SetupIndirect Constant buffer
|
||||
|
||||
// Bind other buffer
|
||||
pCommandList->SetComputeRootDescriptorTable(12, m_IndirectKeyCountsUAV.GetGPU()); // Key counts
|
||||
pCommandList->SetComputeRootDescriptorTable(13, m_IndirectConstantBufferUAV.GetGPU()); // Indirect Sort Constant Buffer
|
||||
pCommandList->SetComputeRootDescriptorTable(14, m_IndirectCountScatterArgsUAV.GetGPU()); // Indirect Sort Count/Scatter Args
|
||||
pCommandList->SetComputeRootDescriptorTable(15, m_IndirectReduceScanArgsUAV.GetGPU()); // Indirect Sort Reduce/Scan Args
|
||||
pCommandList->SetComputeRootDescriptorTable(16, m_ElementCountSRV.GetGPU()); // Indirect Sort Reduce/Scan Args
|
||||
|
||||
// Dispatch
|
||||
pCommandList->SetPipelineState(m_pFPSIndirectSetupParametersPipeline);
|
||||
pCommandList->Dispatch(1, 1, 1);
|
||||
|
||||
// When done, transition the args buffers to INDIRECT_ARGUMENT, and the constant buffer UAV to Constant buffer
|
||||
CD3DX12_RESOURCE_BARRIER barriers[5];
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(m_IndirectCountScatterArgs.GetResource());
|
||||
barriers[1] = CD3DX12_RESOURCE_BARRIER::UAV(m_IndirectReduceScanArgs.GetResource());
|
||||
barriers[2] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectConstantBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
|
||||
barriers[3] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectCountScatterArgs.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
||||
barriers[4] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectReduceScanArgs.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
||||
pCommandList->ResourceBarrier(5, barriers);
|
||||
}
|
||||
|
||||
// Setup resource/UAV pairs to use during sort
|
||||
RdxDX12ResourceInfo KeySrcInfo = { m_SrcKeyBuffer->GetResource(), m_SrcKeyUAV.GetGPU(0) };
|
||||
RdxDX12ResourceInfo PayloadSrcInfo = { m_SrcPayloadBuffer->GetResource(), m_SrcPayloadUAV.GetGPU(0) };
|
||||
RdxDX12ResourceInfo KeyTmpInfo = { m_DstKeyTempBuffer[1].GetResource(), m_DstKeyUAVTable.GetGPU(1) };
|
||||
RdxDX12ResourceInfo PayloadTmpInfo = { m_DstPayloadTempBuffer[1].GetResource(), m_DstPayloadUAVTable.GetGPU(1) };
|
||||
RdxDX12ResourceInfo ScratchBufferInfo = { m_FPSScratchBuffer.GetResource(), m_FPSScratchUAV.GetGPU() };
|
||||
RdxDX12ResourceInfo ReducedScratchBufferInfo = { m_FPSReducedScratchBuffer.GetResource(), m_FPSReducedScratchUAV.GetGPU() };
|
||||
|
||||
// Buffers to ping-pong between when writing out sorted values
|
||||
const RdxDX12ResourceInfo* ReadBufferInfo(&KeySrcInfo), * WriteBufferInfo(&KeyTmpInfo);
|
||||
const RdxDX12ResourceInfo* ReadPayloadBufferInfo(&PayloadSrcInfo), * WritePayloadBufferInfo(&PayloadTmpInfo);
|
||||
bool bHasPayload = true;
|
||||
|
||||
// Setup barriers for the run
|
||||
CD3DX12_RESOURCE_BARRIER barriers[3];
|
||||
|
||||
// Perform Radix Sort (currently only support 32-bit key/payload sorting
|
||||
for (uint32_t Shift = 0; Shift < 32u; Shift += FFX_PARALLELSORT_SORT_BITS_PER_PASS)
|
||||
{
|
||||
// Update the bit shift
|
||||
pCommandList->SetComputeRoot32BitConstant(2, Shift, 0);
|
||||
|
||||
// Copy the data into the constant buffer
|
||||
D3D12_GPU_VIRTUAL_ADDRESS constantBuffer;
|
||||
if (bIndirectDispatch)
|
||||
constantBuffer = m_IndirectConstantBuffer.GetResource()->GetGPUVirtualAddress();
|
||||
else
|
||||
constantBuffer = m_pConstantBufferRing->AllocConstantBuffer(sizeof(FFX_ParallelSortCB), &constantBufferData);
|
||||
|
||||
// Bind to root signature
|
||||
pCommandList->SetComputeRootConstantBufferView(0, constantBuffer); // Constant buffer
|
||||
pCommandList->SetComputeRootDescriptorTable(3, ReadBufferInfo->resourceGPUHandle); // SrcBuffer
|
||||
pCommandList->SetComputeRootDescriptorTable(5, ScratchBufferInfo.resourceGPUHandle); // Scratch buffer
|
||||
|
||||
// Sort Count
|
||||
{
|
||||
pCommandList->SetPipelineState(m_pFPSCountPipeline);
|
||||
|
||||
if (bIndirectDispatch)
|
||||
{
|
||||
pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectCountScatterArgs.GetResource(), 0, nullptr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pCommandList->Dispatch(NumThreadgroupsToRun, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// UAV barrier on the sum table
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ScratchBufferInfo.pResource);
|
||||
pCommandList->ResourceBarrier(1, barriers);
|
||||
|
||||
pCommandList->SetComputeRootDescriptorTable(6, ReducedScratchBufferInfo.resourceGPUHandle); // Scratch reduce buffer
|
||||
|
||||
// Sort Reduce
|
||||
{
|
||||
pCommandList->SetPipelineState(m_pFPSCountReducePipeline);
|
||||
|
||||
if (bIndirectDispatch)
|
||||
{
|
||||
pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectReduceScanArgs.GetResource(), 0, nullptr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pCommandList->Dispatch(NumReducedThreadgroupsToRun, 1, 1);
|
||||
}
|
||||
|
||||
// UAV barrier on the reduced sum table
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ReducedScratchBufferInfo.pResource);
|
||||
pCommandList->ResourceBarrier(1, barriers);
|
||||
}
|
||||
|
||||
// Sort Scan
|
||||
{
|
||||
// First do scan prefix of reduced values
|
||||
pCommandList->SetComputeRootDescriptorTable(9, ReducedScratchBufferInfo.resourceGPUHandle);
|
||||
pCommandList->SetComputeRootDescriptorTable(10, ReducedScratchBufferInfo.resourceGPUHandle);
|
||||
|
||||
pCommandList->SetPipelineState(m_pFPSScanPipeline);
|
||||
if (!bIndirectDispatch)
|
||||
{
|
||||
assert(NumReducedThreadgroupsToRun < FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE && "Need to account for bigger reduced histogram scan");
|
||||
}
|
||||
pCommandList->Dispatch(1, 1, 1);
|
||||
|
||||
// UAV barrier on the reduced sum table
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ReducedScratchBufferInfo.pResource);
|
||||
pCommandList->ResourceBarrier(1, barriers);
|
||||
|
||||
// Next do scan prefix on the histogram with partial sums that we just did
|
||||
pCommandList->SetComputeRootDescriptorTable(9, ScratchBufferInfo.resourceGPUHandle);
|
||||
pCommandList->SetComputeRootDescriptorTable(10, ScratchBufferInfo.resourceGPUHandle);
|
||||
pCommandList->SetComputeRootDescriptorTable(11, ReducedScratchBufferInfo.resourceGPUHandle);
|
||||
|
||||
pCommandList->SetPipelineState(m_pFPSScanAddPipeline);
|
||||
if (bIndirectDispatch)
|
||||
{
|
||||
pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectReduceScanArgs.GetResource(), 0, nullptr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pCommandList->Dispatch(NumReducedThreadgroupsToRun, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// UAV barrier on the sum table
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ScratchBufferInfo.pResource);
|
||||
pCommandList->ResourceBarrier(1, barriers);
|
||||
|
||||
if (bHasPayload)
|
||||
{
|
||||
pCommandList->SetComputeRootDescriptorTable(4, ReadPayloadBufferInfo->resourceGPUHandle); // ScrPayload
|
||||
pCommandList->SetComputeRootDescriptorTable(8, WritePayloadBufferInfo->resourceGPUHandle); // DstPayload
|
||||
}
|
||||
|
||||
pCommandList->SetComputeRootDescriptorTable(7, WriteBufferInfo->resourceGPUHandle); // DstBuffer
|
||||
|
||||
// Sort Scatter
|
||||
{
|
||||
pCommandList->SetPipelineState(bHasPayload ? m_pFPSScatterPayloadPipeline : m_pFPSScatterPipeline);
|
||||
|
||||
if (bIndirectDispatch)
|
||||
{
|
||||
pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectCountScatterArgs.GetResource(), 0, nullptr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pCommandList->Dispatch(NumThreadgroupsToRun, 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Finish doing everything and barrier for the next pass
|
||||
int numBarriers = 0;
|
||||
barriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::UAV(WriteBufferInfo->pResource);
|
||||
if (bHasPayload)
|
||||
barriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::UAV(WritePayloadBufferInfo->pResource);
|
||||
pCommandList->ResourceBarrier(numBarriers, barriers);
|
||||
|
||||
// Swap read/write sources
|
||||
std::swap(ReadBufferInfo, WriteBufferInfo);
|
||||
if (bHasPayload)
|
||||
std::swap(ReadPayloadBufferInfo, WritePayloadBufferInfo);
|
||||
}
|
||||
|
||||
// When we are all done, transition indirect buffers back to UAV for the next frame (if doing indirect dispatch)
|
||||
if (bIndirectDispatch)
|
||||
{
|
||||
barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectCountScatterArgs.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
barriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectReduceScanArgs.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
barriers[2] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectConstantBuffer.GetResource(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
pCommandList->ResourceBarrier(3, barriers);
|
||||
}
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
// ParallelSort.h
|
||||
//
|
||||
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "../DX12/stdafx.h"
|
||||
|
||||
#define SORT_BITS_PER_PASS 4
|
||||
#define SORT_BIN_COUNT (1 << SORT_BITS_PER_PASS)
|
||||
#define THREADGROUP_SIZE 64
|
||||
#define ELEMENTS_PER_THREAD 4 // (256 / THREADGROUP_SIZE)
|
||||
#define ITEMS_PER_WI 16
|
||||
#define INV_ITEMS_PER_WI 1/16
|
||||
|
||||
struct ParallelSortRenderCB // If you change this, also change struct ParallelSortRenderCB in ParallelSortVerify.hlsl
|
||||
{
|
||||
int32_t Width;
|
||||
int32_t Height;
|
||||
int32_t SortWidth;
|
||||
int32_t SortHeight;
|
||||
};
|
||||
|
||||
// Convenience struct for passing resource/UAV pairs around
|
||||
typedef struct RdxDX12ResourceInfo
|
||||
{
|
||||
ID3D12Resource* pResource; ///< Pointer to the resource -- used for barriers and syncs (must NOT be nullptr)
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE resourceGPUHandle; ///< The GPU Descriptor Handle to use for binding the resource
|
||||
} RdxDX12ResourceInfo;
|
||||
|
||||
class FFXParallelSort
|
||||
{
|
||||
public:
|
||||
void OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, Texture* elementCount, Texture* listA, Texture* listB);
|
||||
void OnDestroy();
|
||||
|
||||
void Draw(ID3D12GraphicsCommandList* pCommandList);
|
||||
|
||||
private:
|
||||
|
||||
void CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, ID3D12PipelineState*& pPipeline);
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
UploadHeap* m_pUploadHeap = nullptr;
|
||||
ResourceViewHeaps* m_pResourceViewHeaps = nullptr;
|
||||
DynamicBufferRing* m_pConstantBufferRing = nullptr;
|
||||
uint32_t m_MaxNumThreadgroups = 320; // Use a generic thread group size when not on AMD hardware (taken from experiments to determine best performance threshold)
|
||||
|
||||
// Sample resources
|
||||
Texture* m_SrcKeyBuffer = nullptr;
|
||||
Texture* m_SrcPayloadBuffer = nullptr;
|
||||
CBV_SRV_UAV m_ElementCountSRV;
|
||||
CBV_SRV_UAV m_SrcKeyUAV; // 32 bit source key UAVs
|
||||
CBV_SRV_UAV m_SrcPayloadUAV; // 32 bit source payload UAVs
|
||||
|
||||
Texture m_DstKeyTempBuffer[ 2 ];
|
||||
CBV_SRV_UAV m_DstKeyUAVTable; // 32 bit destination key UAVs
|
||||
|
||||
Texture m_DstPayloadTempBuffer[ 2 ];
|
||||
CBV_SRV_UAV m_DstPayloadUAVTable; // 32 bit destination payload UAVs
|
||||
|
||||
// Resources for parallel sort algorithm
|
||||
Texture m_FPSScratchBuffer; // Sort scratch buffer
|
||||
CBV_SRV_UAV m_FPSScratchUAV; // UAV needed for sort scratch buffer
|
||||
Texture m_FPSReducedScratchBuffer; // Sort reduced scratch buffer
|
||||
CBV_SRV_UAV m_FPSReducedScratchUAV; // UAV needed for sort reduced scratch buffer
|
||||
|
||||
ID3D12RootSignature* m_pFPSRootSignature = nullptr;
|
||||
ID3D12PipelineState* m_pFPSCountPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pFPSCountReducePipeline = nullptr;
|
||||
ID3D12PipelineState* m_pFPSScanPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pFPSScanAddPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pFPSScatterPipeline = nullptr;
|
||||
ID3D12PipelineState* m_pFPSScatterPayloadPipeline = nullptr;
|
||||
|
||||
// Resources for indirect execution of algorithm
|
||||
Texture m_IndirectKeyCounts; // Buffer to hold num keys for indirect dispatch
|
||||
CBV_SRV_UAV m_IndirectKeyCountsUAV; // UAV needed for num keys buffer
|
||||
Texture m_IndirectConstantBuffer; // Buffer to hold radix sort constant buffer data for indirect dispatch
|
||||
CBV_SRV_UAV m_IndirectConstantBufferUAV; // UAV needed for indirect constant buffer
|
||||
Texture m_IndirectCountScatterArgs; // Buffer to hold dispatch arguments used for Count/Scatter parts of the algorithm
|
||||
CBV_SRV_UAV m_IndirectCountScatterArgsUAV; // UAV needed for count/scatter args buffer
|
||||
Texture m_IndirectReduceScanArgs; // Buffer to hold dispatch arguments used for Reduce/Scan parts of the algorithm
|
||||
CBV_SRV_UAV m_IndirectReduceScanArgsUAV; // UAV needed for reduce/scan args buffer
|
||||
|
||||
ID3D12CommandSignature* m_pFPSCommandSignature;
|
||||
ID3D12PipelineState* m_pFPSIndirectSetupParametersPipeline = nullptr;
|
||||
};
|
@ -0,0 +1,179 @@
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "../vk/stdafx.h"
|
||||
|
||||
|
||||
namespace CAULDRON_VK
|
||||
{
|
||||
|
||||
// For adding markers in RGP
|
||||
class UserMarker
|
||||
{
|
||||
public:
|
||||
UserMarker(VkCommandBuffer commandBuffer, const char* name) : m_commandBuffer( commandBuffer ) { SetPerfMarkerBegin(m_commandBuffer, name); }
|
||||
~UserMarker() { SetPerfMarkerEnd(m_commandBuffer); }
|
||||
|
||||
private:
|
||||
VkCommandBuffer m_commandBuffer;
|
||||
};
|
||||
|
||||
|
||||
size_t FormatSize(VkFormat format);
|
||||
|
||||
|
||||
class Buffer
|
||||
{
|
||||
public:
|
||||
Buffer() {}
|
||||
virtual ~Buffer() {}
|
||||
virtual void OnDestroy()
|
||||
{
|
||||
if (m_bufferView)
|
||||
{
|
||||
vkDestroyBufferView(m_pDevice->GetDevice(), m_bufferView, nullptr);
|
||||
m_bufferView = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
if (m_buffer)
|
||||
{
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_buffer, m_alloc);
|
||||
m_buffer = VK_NULL_HANDLE;
|
||||
}
|
||||
m_pDevice = nullptr;
|
||||
m_sizeInBytes = 0;
|
||||
}
|
||||
|
||||
bool Init(Device *pDevice, int numElements, VkFormat format, const char* name)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
m_sizeInBytes = numElements * FormatSize( format );
|
||||
VmaAllocationCreateInfo bufferAllocCreateInfo = {};
|
||||
bufferAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
bufferAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
|
||||
bufferAllocCreateInfo.pUserData = (void*)name;
|
||||
VmaAllocationInfo gpuAllocInfo = {};
|
||||
|
||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferInfo.size = m_sizeInBytes;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
|
||||
|
||||
VkResult res = vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferInfo, &bufferAllocCreateInfo, &m_buffer, &m_alloc, &gpuAllocInfo);
|
||||
assert(res == VK_SUCCESS);
|
||||
SetResourceName(pDevice->GetDevice(), VK_OBJECT_TYPE_BUFFER, (uint64_t)m_buffer, name);
|
||||
|
||||
VkBufferViewCreateInfo viewInfo = {};
|
||||
viewInfo.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
|
||||
viewInfo.format = format;
|
||||
viewInfo.buffer = m_buffer;
|
||||
viewInfo.range = m_sizeInBytes;
|
||||
vkCreateBufferView(pDevice->GetDevice(), &viewInfo, nullptr, &m_bufferView);
|
||||
SetResourceName(m_pDevice->GetDevice(), VK_OBJECT_TYPE_BUFFER_VIEW, (uint64_t)m_bufferView, name);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Init(Device *pDevice, int numElements, size_t structSize, const char* name, bool indirectArgs)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
m_sizeInBytes = numElements * structSize;
|
||||
VmaAllocationCreateInfo bufferAllocCreateInfo = {};
|
||||
bufferAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
bufferAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
|
||||
bufferAllocCreateInfo.pUserData = (void*)name;
|
||||
VmaAllocationInfo gpuAllocInfo = {};
|
||||
|
||||
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferInfo.size = m_sizeInBytes;
|
||||
bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
if ( indirectArgs )
|
||||
bufferInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||
|
||||
VkResult res = vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferInfo, &bufferAllocCreateInfo, &m_buffer, &m_alloc, &gpuAllocInfo);
|
||||
assert(res == VK_SUCCESS);
|
||||
SetResourceName(pDevice->GetDevice(), VK_OBJECT_TYPE_BUFFER, (uint64_t)m_buffer, name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VkBuffer& Resource() { return m_buffer; }
|
||||
|
||||
void SetDescriptorSet(int index, VkDescriptorSet descriptorSet, bool asUAV) const
|
||||
{
|
||||
VkDescriptorBufferInfo descriptorBufferInfo = {};
|
||||
descriptorBufferInfo.buffer = m_buffer;
|
||||
descriptorBufferInfo.range = m_sizeInBytes;
|
||||
|
||||
VkWriteDescriptorSet write = {};
|
||||
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
write.dstSet = descriptorSet;
|
||||
write.descriptorCount = 1;
|
||||
if ( m_bufferView )
|
||||
{
|
||||
write.descriptorType = asUAV ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
||||
write.pTexelBufferView = &m_bufferView;
|
||||
}
|
||||
else
|
||||
{
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
write.pBufferInfo = &descriptorBufferInfo;
|
||||
}
|
||||
write.dstBinding = index;
|
||||
vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &write, 0, nullptr);
|
||||
}
|
||||
|
||||
void PipelineBarrier( VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask )
|
||||
{
|
||||
VkBufferMemoryBarrier memoryBarrier = {};
|
||||
memoryBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
memoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
memoryBarrier.dstAccessMask = dstStageMask == VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT ? VK_ACCESS_INDIRECT_COMMAND_READ_BIT : VK_ACCESS_SHADER_READ_BIT;
|
||||
memoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
memoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
memoryBarrier.buffer = m_buffer;
|
||||
memoryBarrier.size = m_sizeInBytes;
|
||||
vkCmdPipelineBarrier( commandBuffer, srcStageMask, dstStageMask, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 1, &memoryBarrier, 0, nullptr );
|
||||
}
|
||||
|
||||
void AddPipelineBarrier( std::vector<VkBufferMemoryBarrier>& barrierList, VkPipelineStageFlags dstStageMask )
|
||||
{
|
||||
VkBufferMemoryBarrier memoryBarrier = {};
|
||||
memoryBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
memoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
memoryBarrier.dstAccessMask = dstStageMask == VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT ? VK_ACCESS_INDIRECT_COMMAND_READ_BIT : VK_ACCESS_SHADER_READ_BIT;
|
||||
memoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
memoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
memoryBarrier.buffer = m_buffer;
|
||||
memoryBarrier.size = m_sizeInBytes;
|
||||
barrierList.push_back( memoryBarrier );
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
VmaAllocation m_alloc = VK_NULL_HANDLE;
|
||||
VkBuffer m_buffer = VK_NULL_HANDLE;
|
||||
size_t m_sizeInBytes = 0;
|
||||
VkBufferView m_bufferView = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
}
|
@ -0,0 +1,944 @@
|
||||
//
|
||||
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
#include "../vk/stdafx.h"
|
||||
#include "BufferHelper.h"
|
||||
#include "../ParticleSystem.h"
|
||||
#include "../ParticleHelpers.h"
|
||||
#include "../ParticleSystemInternal.h"
|
||||
#include "ParallelSort.h"
|
||||
#include "base/ExtDebugUtils.h"
|
||||
|
||||
|
||||
size_t CAULDRON_VK::FormatSize(VkFormat format)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case VK_FORMAT_R8_SINT: return 1;//(BYTE)
|
||||
case VK_FORMAT_R8_UINT: return 1;//(UNSIGNED_BYTE)1
|
||||
case VK_FORMAT_R16_SINT: return 2;//(SHORT)2
|
||||
case VK_FORMAT_R16_UINT: return 2;//(UNSIGNED_SHORT)2
|
||||
case VK_FORMAT_R32_SINT: return 4;//(SIGNED_INT)4
|
||||
case VK_FORMAT_R32_UINT: return 4;//(UNSIGNED_INT)4
|
||||
case VK_FORMAT_R32_SFLOAT: return 4;//(FLOAT)
|
||||
|
||||
case VK_FORMAT_R8G8_SINT: return 2 * 1;//(BYTE)
|
||||
case VK_FORMAT_R8G8_UINT: return 2 * 1;//(UNSIGNED_BYTE)1
|
||||
case VK_FORMAT_R16G16_SINT: return 2 * 2;//(SHORT)2
|
||||
case VK_FORMAT_R16G16_UINT: return 2 * 2; // (UNSIGNED_SHORT)2
|
||||
case VK_FORMAT_R32G32_SINT: return 2 * 4;//(SIGNED_INT)4
|
||||
case VK_FORMAT_R32G32_UINT: return 2 * 4;//(UNSIGNED_INT)4
|
||||
case VK_FORMAT_R32G32_SFLOAT: return 2 * 4;//(FLOAT)
|
||||
|
||||
case VK_FORMAT_UNDEFINED: return 0;//(BYTE) (UNSIGNED_BYTE) (SHORT) (UNSIGNED_SHORT)
|
||||
case VK_FORMAT_R32G32B32_SINT: return 3 * 4;//(SIGNED_INT)4
|
||||
case VK_FORMAT_R32G32B32_UINT: return 3 * 4;//(UNSIGNED_INT)4
|
||||
case VK_FORMAT_R32G32B32_SFLOAT: return 3 * 4;//(FLOAT)
|
||||
|
||||
case VK_FORMAT_R8G8B8A8_SINT: return 4 * 1;//(BYTE)
|
||||
case VK_FORMAT_R8G8B8A8_UINT: return 4 * 1;//(UNSIGNED_BYTE)1
|
||||
case VK_FORMAT_R16G16B16A16_SINT: return 4 * 2;//(SHORT)2
|
||||
case VK_FORMAT_R16G16B16A16_UINT: return 4 * 2;//(UNSIGNED_SHORT)2
|
||||
case VK_FORMAT_R32G32B32A32_SINT: return 4 * 4;//(SIGNED_INT)4
|
||||
case VK_FORMAT_R32G32B32A32_UINT: return 4 * 4;//(UNSIGNED_INT)4
|
||||
case VK_FORMAT_R32G32B32A32_SFLOAT: return 4 * 4;//(FLOAT)
|
||||
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT: return 4 * 2;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#pragma warning( disable : 4100 ) // disable unreference formal parameter warnings for /W4 builds
|
||||
|
||||
struct IndirectCommand
|
||||
{
|
||||
int args[ 5 ];
|
||||
};
|
||||
|
||||
// GPU Particle System class. Responsible for updating and rendering the particles
|
||||
class GPUParticleSystem : public IParticleSystem
|
||||
{
|
||||
public:
|
||||
|
||||
GPUParticleSystem( const char* particleAtlas );
|
||||
|
||||
private:
|
||||
|
||||
enum DepthCullingMode
|
||||
{
|
||||
DepthCullingOn,
|
||||
DepthCullingOff,
|
||||
NumDepthCullingModes
|
||||
};
|
||||
|
||||
enum StreakMode
|
||||
{
|
||||
StreaksOn,
|
||||
StreaksOff,
|
||||
NumStreakModes
|
||||
};
|
||||
|
||||
enum ReactiveMode
|
||||
{
|
||||
ReactiveOn,
|
||||
ReactiveOff,
|
||||
NumReactiveModes
|
||||
};
|
||||
|
||||
virtual ~GPUParticleSystem();
|
||||
|
||||
virtual void OnCreateDevice( Device& device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing, VkRenderPass renderPass );
|
||||
virtual void OnResizedSwapChain( int width, int height, Texture& depthBuffer, VkFramebuffer frameBuffer );
|
||||
virtual void OnReleasingSwapChain();
|
||||
virtual void OnDestroyDevice();
|
||||
|
||||
virtual void Reset();
|
||||
|
||||
virtual void Render( VkCommandBuffer commandBuffer, DynamicBufferRing& constantBufferRing, int flags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData );
|
||||
|
||||
void Emit( VkCommandBuffer commandBuffer, DynamicBufferRing& constantBufferRing, uint32_t perFrameConstantOffset, int numEmitters, const EmitterParams* emitters );
|
||||
void Simulate( VkCommandBuffer commandBuffer );
|
||||
void Sort( VkCommandBuffer commandBuffer );
|
||||
|
||||
void FillRandomTexture( UploadHeap& uploadHeap );
|
||||
void CreateSimulationAssets( DynamicBufferRing& constantBufferRing );
|
||||
void CreateRasterizedRenderingAssets( DynamicBufferRing& constantBufferRing );
|
||||
|
||||
VkPipeline CreatePipeline( const char* filename, const char* entry, VkPipelineLayout layout, const DefineList* defines );
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
ResourceViewHeaps* m_heaps = nullptr;
|
||||
const char* m_AtlasPath = nullptr;
|
||||
VkRenderPass m_renderPass = VK_NULL_HANDLE;
|
||||
VkFramebuffer m_frameBuffer = VK_NULL_HANDLE;
|
||||
|
||||
Texture m_Atlas = {};
|
||||
VkImageView m_AtlasSRV = {};
|
||||
Buffer m_ParticleBufferA = {};
|
||||
Buffer m_ParticleBufferB = {};
|
||||
Buffer m_PackedViewSpaceParticlePositions = {};
|
||||
Buffer m_MaxRadiusBuffer = {};
|
||||
Buffer m_DeadListBuffer = {};
|
||||
Buffer m_AliveCountBuffer = {};
|
||||
Buffer m_AliveIndexBuffer = {};
|
||||
Buffer m_AliveDistanceBuffer = {};
|
||||
Buffer m_DstAliveIndexBuffer = {}; // working memory for the Radix sorter
|
||||
Buffer m_DstAliveDistanceBuffer = {}; // working memory for the Radix sorter
|
||||
Buffer m_IndirectArgsBuffer = {};
|
||||
|
||||
Texture m_RandomTexture = {};
|
||||
VkImageView m_RandomTextureSRV = {};
|
||||
|
||||
VkImage m_DepthBuffer = {};
|
||||
VkImageView m_DepthBufferSRV = {};
|
||||
|
||||
VkDescriptorSetLayout m_SimulationDescriptorSetLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_SimulationDescriptorSet = VK_NULL_HANDLE;
|
||||
|
||||
VkDescriptorSetLayout m_RasterizationDescriptorSetLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_RasterizationDescriptorSet = VK_NULL_HANDLE;
|
||||
|
||||
VkSampler m_samplers[ 3 ] = {};
|
||||
|
||||
UINT m_ScreenWidth = 0;
|
||||
UINT m_ScreenHeight = 0;
|
||||
float m_InvScreenWidth = 0.0f;
|
||||
float m_InvScreenHeight = 0.0f;
|
||||
float m_ElapsedTime = 0.0f;
|
||||
float m_AlphaThreshold = 0.97f;
|
||||
|
||||
VkDescriptorBufferInfo m_IndexBuffer = {};
|
||||
|
||||
VkPipelineLayout m_SimulationPipelineLayout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_RasterizationPipelineLayout = VK_NULL_HANDLE;
|
||||
|
||||
VkPipeline m_SimulationPipeline = VK_NULL_HANDLE;
|
||||
VkPipeline m_EmitPipeline = VK_NULL_HANDLE;
|
||||
VkPipeline m_ResetParticlesPipeline = VK_NULL_HANDLE;
|
||||
VkPipeline m_RasterizationPipelines[ NumStreakModes ][ NumReactiveModes ] = {};
|
||||
|
||||
bool m_ResetSystem = true;
|
||||
FFXParallelSort m_SortLib = {};
|
||||
};
|
||||
|
||||
|
||||
IParticleSystem* IParticleSystem::CreateGPUSystem( const char* particleAtlas )
|
||||
{
|
||||
return new GPUParticleSystem( particleAtlas );
|
||||
}
|
||||
|
||||
|
||||
GPUParticleSystem::GPUParticleSystem( const char* particleAtlas ) : m_AtlasPath( particleAtlas ) {}
|
||||
GPUParticleSystem::~GPUParticleSystem() {}
|
||||
|
||||
|
||||
// Use the sort lib to perform a bitonic sort over the particle indices based on their distance from camera
|
||||
void GPUParticleSystem::Sort( VkCommandBuffer commandBuffer )
|
||||
{
|
||||
m_SortLib.Draw( commandBuffer );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::Reset()
|
||||
{
|
||||
m_ResetSystem = true;
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::Render( VkCommandBuffer commandBuffer, DynamicBufferRing& constantBufferRing, int flags, const EmitterParams* pEmitters, int nNumEmitters, const ConstantData& constantData )
|
||||
{
|
||||
SimulationConstantBuffer simulationConstants = {};
|
||||
|
||||
memcpy( simulationConstants.m_StartColor, constantData.m_StartColor, sizeof( simulationConstants.m_StartColor ) );
|
||||
memcpy( simulationConstants.m_EndColor, constantData.m_EndColor, sizeof( simulationConstants.m_EndColor ) );
|
||||
memcpy( simulationConstants.m_EmitterLightingCenter, constantData.m_EmitterLightingCenter, sizeof( simulationConstants.m_EmitterLightingCenter ) );
|
||||
|
||||
simulationConstants.m_ViewProjection = constantData.m_ViewProjection;
|
||||
simulationConstants.m_View = constantData.m_View;
|
||||
simulationConstants.m_ViewInv = constantData.m_ViewInv;
|
||||
simulationConstants.m_ProjectionInv = constantData.m_ProjectionInv;
|
||||
|
||||
simulationConstants.m_EyePosition = constantData.m_ViewInv.getCol3();
|
||||
simulationConstants.m_SunDirection = constantData.m_SunDirection;
|
||||
|
||||
simulationConstants.m_ScreenWidth = m_ScreenWidth;
|
||||
simulationConstants.m_ScreenHeight = m_ScreenHeight;
|
||||
simulationConstants.m_MaxParticles = g_maxParticles;
|
||||
simulationConstants.m_FrameTime = constantData.m_FrameTime;
|
||||
|
||||
math::Vector4 sunDirectionVS = constantData.m_View * constantData.m_SunDirection;
|
||||
|
||||
m_ElapsedTime += constantData.m_FrameTime;
|
||||
if ( m_ElapsedTime > 10.0f )
|
||||
m_ElapsedTime -= 10.0f;
|
||||
|
||||
simulationConstants.m_ElapsedTime = m_ElapsedTime;
|
||||
|
||||
void* data = nullptr;
|
||||
VkDescriptorBufferInfo constantBuffer = {};
|
||||
constantBufferRing.AllocConstantBuffer( sizeof( simulationConstants ), &data, &constantBuffer );
|
||||
memcpy( data, &simulationConstants, sizeof( simulationConstants ) );
|
||||
|
||||
{
|
||||
uint32_t uniformOffsets[] = { (uint32_t)constantBuffer.offset, 0 };
|
||||
vkCmdBindDescriptorSets( commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_SimulationPipelineLayout, 0, 1, &m_SimulationDescriptorSet, _countof( uniformOffsets ), uniformOffsets );
|
||||
|
||||
|
||||
UserMarker marker( commandBuffer, "simulation" );
|
||||
|
||||
// If we are resetting the particle system, then initialize the dead list
|
||||
if ( m_ResetSystem )
|
||||
{
|
||||
vkCmdBindPipeline( commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_ResetParticlesPipeline );
|
||||
|
||||
// Disaptch a set of 1d thread groups to fill out the dead list, one thread per particle
|
||||
vkCmdDispatch( commandBuffer, align( g_maxParticles, 256 ) / 256, 1, 1 );
|
||||
|
||||
std::vector<VkBufferMemoryBarrier> barriers = {};
|
||||
m_ParticleBufferA.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_ParticleBufferB.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_DeadListBuffer.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, (uint32_t)barriers.size(), &barriers[ 0 ], 0, nullptr );
|
||||
|
||||
m_ResetSystem = false;
|
||||
}
|
||||
|
||||
// Emit particles into the system
|
||||
Emit( commandBuffer, constantBufferRing, (uint32_t)constantBuffer.offset, nNumEmitters, pEmitters );
|
||||
|
||||
// Run the simulation for this frame
|
||||
Simulate( commandBuffer );
|
||||
|
||||
std::vector<VkBufferMemoryBarrier> barriersAfterSimulation = {};
|
||||
m_ParticleBufferA.AddPipelineBarrier( barriersAfterSimulation, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_PackedViewSpaceParticlePositions.AddPipelineBarrier( barriersAfterSimulation, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_MaxRadiusBuffer.AddPipelineBarrier( barriersAfterSimulation, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_DeadListBuffer.AddPipelineBarrier( barriersAfterSimulation, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_AliveCountBuffer.AddPipelineBarrier( barriersAfterSimulation, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
|
||||
VkImageMemoryBarrier barrier = {};
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
barrier.subresourceRange.baseMipLevel = 0;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.baseArrayLayer = 0;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
barrier.image = m_DepthBuffer;
|
||||
|
||||
vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, (uint32_t)barriersAfterSimulation.size(), &barriersAfterSimulation[ 0 ], 1, &barrier );
|
||||
}
|
||||
|
||||
{
|
||||
UserMarker marker( commandBuffer, "rasterization" );
|
||||
|
||||
// Sort if requested. Not doing so results in the particles rendering out of order and not blending correctly
|
||||
if ( flags & PF_Sort )
|
||||
{
|
||||
UserMarker marker( commandBuffer, "sorting" );
|
||||
|
||||
std::vector<VkBufferMemoryBarrier> barriers = {};
|
||||
m_AliveIndexBuffer.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_AliveDistanceBuffer.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, (uint32_t)barriers.size(), &barriers[ 0 ], 0, nullptr );
|
||||
|
||||
Sort( commandBuffer );
|
||||
}
|
||||
|
||||
std::vector<VkBufferMemoryBarrier> barriers = {};
|
||||
m_AliveIndexBuffer.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT );
|
||||
m_IndirectArgsBuffer.AddPipelineBarrier( barriers, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT );
|
||||
vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, (uint32_t)barriers.size(), &barriers[ 0 ], 0, nullptr );
|
||||
|
||||
RenderingConstantBuffer* cb = nullptr;
|
||||
VkDescriptorBufferInfo constantBuffer = {};
|
||||
constantBufferRing.AllocConstantBuffer( sizeof( RenderingConstantBuffer ), (void**)&cb, &constantBuffer );
|
||||
cb->m_Projection = constantData.m_Projection;
|
||||
cb->m_ProjectionInv = simulationConstants.m_ProjectionInv;
|
||||
cb->m_SunColor = constantData.m_SunColor;
|
||||
cb->m_AmbientColor = constantData.m_AmbientColor;
|
||||
cb->m_SunDirectionVS = sunDirectionVS;
|
||||
cb->m_ScreenWidth = m_ScreenWidth;
|
||||
cb->m_ScreenHeight = m_ScreenHeight;
|
||||
|
||||
uint32_t uniformOffsets[1] = { (uint32_t)constantBuffer.offset };
|
||||
vkCmdBindDescriptorSets( commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_RasterizationPipelineLayout, 0, 1, &m_RasterizationDescriptorSet, 1, uniformOffsets );
|
||||
|
||||
VkRenderPassBeginInfo renderPassBegin = {};
|
||||
renderPassBegin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
renderPassBegin.renderPass = m_renderPass;
|
||||
renderPassBegin.framebuffer = m_frameBuffer;
|
||||
renderPassBegin.renderArea.extent.width = m_ScreenWidth;
|
||||
renderPassBegin.renderArea.extent.height = m_ScreenHeight;
|
||||
|
||||
vkCmdBeginRenderPass( commandBuffer, &renderPassBegin, VK_SUBPASS_CONTENTS_INLINE );
|
||||
|
||||
StreakMode streaks = flags & PF_Streaks ? StreaksOn : StreaksOff;
|
||||
ReactiveMode reactive = flags & PF_Reactive ? ReactiveOn : ReactiveOff;
|
||||
|
||||
vkCmdBindPipeline( commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_RasterizationPipelines[ streaks ][ reactive ] );
|
||||
|
||||
vkCmdBindIndexBuffer( commandBuffer, m_IndexBuffer.buffer, m_IndexBuffer.offset, VK_INDEX_TYPE_UINT32 );
|
||||
|
||||
vkCmdDrawIndexedIndirect( commandBuffer, m_IndirectArgsBuffer.Resource(), 0, 1, sizeof( IndirectCommand ) );
|
||||
|
||||
vkCmdEndRenderPass( commandBuffer );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnCreateDevice( Device& device, UploadHeap& uploadHeap, ResourceViewHeaps& heaps, StaticBufferPool& bufferPool, DynamicBufferRing& constantBufferRing, VkRenderPass renderPass )
|
||||
{
|
||||
m_pDevice = &device;
|
||||
m_heaps = &heaps;
|
||||
m_renderPass = renderPass;
|
||||
|
||||
VkSamplerCreateInfo sampler = {};
|
||||
sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
|
||||
sampler.minLod = 0.0f;
|
||||
sampler.maxLod = FLT_MAX;
|
||||
sampler.mipLodBias = 0.0f;
|
||||
sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
|
||||
sampler.compareEnable = VK_FALSE;
|
||||
sampler.compareOp = VK_COMPARE_OP_NEVER;
|
||||
sampler.maxAnisotropy = 1.0f;
|
||||
sampler.anisotropyEnable = VK_FALSE;
|
||||
|
||||
for ( int i = 0; i < 3; i++ )
|
||||
{
|
||||
if ( i == 1 )
|
||||
{
|
||||
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
||||
}
|
||||
else
|
||||
{
|
||||
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
}
|
||||
|
||||
if ( i == 2 )
|
||||
{
|
||||
sampler.magFilter = VK_FILTER_NEAREST;
|
||||
sampler.minFilter = VK_FILTER_NEAREST;
|
||||
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
}
|
||||
else
|
||||
{
|
||||
sampler.magFilter = VK_FILTER_LINEAR;
|
||||
sampler.minFilter = VK_FILTER_LINEAR;
|
||||
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
}
|
||||
|
||||
vkCreateSampler( m_pDevice->GetDevice(), &sampler, nullptr, &m_samplers[ i ] );
|
||||
}
|
||||
|
||||
// Create the global particle pool. Each particle is split into two parts for better cache coherency. The first half contains the data more
|
||||
// relevant to rendering while the second half is more related to simulation
|
||||
m_ParticleBufferA.Init( m_pDevice, g_maxParticles, sizeof( GPUParticlePartA ), "ParticleBufferA", false );
|
||||
m_ParticleBufferB.Init( m_pDevice, g_maxParticles, sizeof( GPUParticlePartB ), "ParticleBufferB", false );
|
||||
|
||||
// The packed view space positions of particles are cached during simulation so allocate a buffer for them
|
||||
m_PackedViewSpaceParticlePositions.Init( m_pDevice, g_maxParticles, sizeof( UINT ) * 2, "PackedViewSpaceParticlePositions", false );
|
||||
|
||||
// The maximum radii of each particle is cached during simulation to avoid recomputing multiple times later. This is only required
|
||||
// for streaked particles as they are not round so we cache the max radius of X and Y
|
||||
m_MaxRadiusBuffer.Init( m_pDevice, g_maxParticles, 4, "MaxRadiusBuffer", false );
|
||||
|
||||
// The dead particle index list. Created as an append buffer
|
||||
m_DeadListBuffer.Init( m_pDevice, g_maxParticles + 1, 4, "DeadListBuffer", false );
|
||||
|
||||
// Create the buffer to hold the number of alive particles
|
||||
m_AliveCountBuffer.Init( m_pDevice, 1, 4, "AliveCountBuffer", false );
|
||||
|
||||
// Create the index buffer of alive particles that is to be sorted (at least in the rasterization path).
|
||||
m_AliveIndexBuffer.Init( m_pDevice, g_maxParticles, 4, "AliveIndexBuffer", false );
|
||||
m_DstAliveIndexBuffer.Init( m_pDevice, g_maxParticles, 4, "DstAliveIndexBuffer", false );
|
||||
|
||||
// Create the list of distances of each alive particle - used for sorting in the rasterization path.
|
||||
m_AliveDistanceBuffer.Init( m_pDevice, g_maxParticles, 4, "AliveDistanceBuffer", false );
|
||||
m_DstAliveDistanceBuffer.Init( m_pDevice, g_maxParticles, 4, "DstAliveDistanceBuffer", false );
|
||||
|
||||
// Create the buffer to store the indirect args for the ExecuteIndirect call
|
||||
// Create the index buffer of alive particles that is to be sorted (at least in the rasterization path).
|
||||
m_IndirectArgsBuffer.Init( m_pDevice, 1, sizeof( IndirectCommand ), "IndirectArgsBuffer", true );
|
||||
|
||||
// Create the particle billboard index buffer required for the rasterization VS-only path
|
||||
UINT* indices = new UINT[ g_maxParticles * 6 ];
|
||||
UINT* ptr = indices;
|
||||
UINT base = 0;
|
||||
for ( int i = 0; i < g_maxParticles; i++ )
|
||||
{
|
||||
ptr[ 0 ] = base + 0;
|
||||
ptr[ 1 ] = base + 1;
|
||||
ptr[ 2 ] = base + 2;
|
||||
|
||||
ptr[ 3 ] = base + 2;
|
||||
ptr[ 4 ] = base + 1;
|
||||
ptr[ 5 ] = base + 3;
|
||||
|
||||
base += 4;
|
||||
ptr += 6;
|
||||
}
|
||||
|
||||
bufferPool.AllocBuffer( g_maxParticles * 6, sizeof( UINT ), indices, &m_IndexBuffer );
|
||||
delete[] indices;
|
||||
|
||||
// Initialize the random numbers texture
|
||||
FillRandomTexture( uploadHeap );
|
||||
|
||||
m_Atlas.InitFromFile( &device, &uploadHeap, m_AtlasPath, true );
|
||||
m_Atlas.CreateSRV( &m_AtlasSRV );
|
||||
|
||||
CreateSimulationAssets( constantBufferRing );
|
||||
CreateRasterizedRenderingAssets( constantBufferRing );
|
||||
|
||||
// Create the SortLib resources
|
||||
m_SortLib.OnCreate( &device, &heaps, &constantBufferRing, &uploadHeap, &m_AliveCountBuffer, &m_AliveDistanceBuffer, &m_AliveIndexBuffer, &m_DstAliveDistanceBuffer, &m_DstAliveIndexBuffer );
|
||||
}
|
||||
|
||||
|
||||
VkPipeline GPUParticleSystem::CreatePipeline( const char* filename, const char* entry, VkPipelineLayout layout, const DefineList* defines )
|
||||
{
|
||||
VkPipelineShaderStageCreateInfo computeShader = {};
|
||||
VkResult res = VKCompileFromFile( m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, filename, entry, "-T cs_6_0", defines, &computeShader );
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
VkComputePipelineCreateInfo pipelineInfo = {};
|
||||
pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||
pipelineInfo.layout = layout;
|
||||
pipelineInfo.stage = computeShader;
|
||||
|
||||
VkPipeline pipeline = {};
|
||||
res = vkCreateComputePipelines( m_pDevice->GetDevice(), m_pDevice->GetPipelineCache(), 1, &pipelineInfo, nullptr, &pipeline );
|
||||
assert(res == VK_SUCCESS);
|
||||
SetResourceName( m_pDevice->GetDevice(), VK_OBJECT_TYPE_PIPELINE, (uint64_t)pipeline, entry );
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::CreateSimulationAssets( DynamicBufferRing& constantBufferRing )
|
||||
{
|
||||
// 0 - g_ParticleBufferA
|
||||
// 1 - g_ParticleBufferB
|
||||
// 2 - g_DeadList
|
||||
// 3 - g_IndexBuffer
|
||||
// 4 - g_DistanceBuffer
|
||||
// 5 - g_MaxRadiusBuffer
|
||||
// 6 - g_PackedViewSpacePositions
|
||||
// 7 - g_DrawArgs
|
||||
// 8 - g_AliveParticleCount
|
||||
// 9 - g_DepthBuffer
|
||||
// 10 - g_RandomBuffer
|
||||
// 11 - PerFrameConstantBuffer
|
||||
// 12 - EmitterConstantBuffer
|
||||
// 13 - g_samWrapPoint
|
||||
|
||||
std::vector<VkDescriptorSetLayoutBinding> layout_bindings( 14 );
|
||||
int binding = 0;
|
||||
for ( int i = 0; i < 9; i++ )
|
||||
{
|
||||
layout_bindings[binding].binding = binding;
|
||||
layout_bindings[binding].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
layout_bindings[binding].descriptorCount = 1;
|
||||
layout_bindings[binding].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
layout_bindings[binding].pImmutableSamplers = nullptr;
|
||||
binding++;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 2; i++ )
|
||||
{
|
||||
layout_bindings[binding].binding = binding;
|
||||
layout_bindings[binding].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
|
||||
layout_bindings[binding].descriptorCount = 1;
|
||||
layout_bindings[binding].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
layout_bindings[binding].pImmutableSamplers = nullptr;
|
||||
binding++;
|
||||
}
|
||||
for ( int i = 0; i < 2; i++ )
|
||||
{
|
||||
layout_bindings[binding].binding = binding;
|
||||
layout_bindings[binding].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
layout_bindings[binding].descriptorCount = 1;
|
||||
layout_bindings[binding].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
layout_bindings[binding].pImmutableSamplers = nullptr;
|
||||
binding++;
|
||||
}
|
||||
|
||||
{
|
||||
layout_bindings[binding].binding = binding;
|
||||
layout_bindings[binding].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
|
||||
layout_bindings[binding].descriptorCount = 1;
|
||||
layout_bindings[binding].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
layout_bindings[binding].pImmutableSamplers = &m_samplers[ 2 ];
|
||||
binding++;
|
||||
}
|
||||
|
||||
assert( binding == layout_bindings.size() );
|
||||
|
||||
m_heaps->CreateDescriptorSetLayoutAndAllocDescriptorSet( &layout_bindings, &m_SimulationDescriptorSetLayout, &m_SimulationDescriptorSet );
|
||||
constantBufferRing.SetDescriptorSet( 11, sizeof( SimulationConstantBuffer ), m_SimulationDescriptorSet );
|
||||
constantBufferRing.SetDescriptorSet( 12, sizeof( EmitterConstantBuffer ), m_SimulationDescriptorSet );
|
||||
|
||||
// Create pipeline layout
|
||||
//
|
||||
|
||||
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {};
|
||||
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pipelineLayoutCreateInfo.setLayoutCount = 1;
|
||||
pipelineLayoutCreateInfo.pSetLayouts = &m_SimulationDescriptorSetLayout;
|
||||
|
||||
VkResult res = vkCreatePipelineLayout( m_pDevice->GetDevice(), &pipelineLayoutCreateInfo, nullptr, &m_SimulationPipelineLayout );
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
m_ParticleBufferA.SetDescriptorSet( 0, m_SimulationDescriptorSet, true );
|
||||
m_ParticleBufferB.SetDescriptorSet( 1, m_SimulationDescriptorSet, true );
|
||||
m_DeadListBuffer.SetDescriptorSet( 2, m_SimulationDescriptorSet, true );
|
||||
m_AliveIndexBuffer.SetDescriptorSet( 3, m_SimulationDescriptorSet, true );
|
||||
m_AliveDistanceBuffer.SetDescriptorSet( 4, m_SimulationDescriptorSet, true );
|
||||
m_MaxRadiusBuffer.SetDescriptorSet( 5, m_SimulationDescriptorSet, true );
|
||||
m_PackedViewSpaceParticlePositions.SetDescriptorSet( 6, m_SimulationDescriptorSet, true );
|
||||
m_IndirectArgsBuffer.SetDescriptorSet( 7, m_SimulationDescriptorSet, true );
|
||||
m_AliveCountBuffer.SetDescriptorSet( 8, m_SimulationDescriptorSet, true );
|
||||
// depth buffer
|
||||
SetDescriptorSet( m_pDevice->GetDevice(), 10, m_RandomTextureSRV, nullptr, m_SimulationDescriptorSet );
|
||||
|
||||
// Create pipelines
|
||||
//
|
||||
|
||||
DefineList defines = {};
|
||||
defines[ "API_VULKAN" ] = "";
|
||||
|
||||
m_ResetParticlesPipeline = CreatePipeline( "ParticleSimulation.hlsl", "CS_Reset", m_SimulationPipelineLayout, &defines );
|
||||
m_SimulationPipeline = CreatePipeline( "ParticleSimulation.hlsl", "CS_Simulate", m_SimulationPipelineLayout, &defines );
|
||||
m_EmitPipeline = CreatePipeline( "ParticleEmit.hlsl", "CS_Emit", m_SimulationPipelineLayout, &defines );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::CreateRasterizedRenderingAssets( DynamicBufferRing& constantBufferRing )
|
||||
{
|
||||
// 0 - g_ParticleBufferA
|
||||
// 1 - g_PackedViewSpacePositions
|
||||
// 2 - g_NumParticlesBuffer
|
||||
// 3 - g_SortedIndexBuffer
|
||||
// 4 - g_ParticleTexture
|
||||
// 5 - g_DepthTexture
|
||||
// 6 - RenderingConstantBuffer
|
||||
// 7 - g_samClampLinear
|
||||
|
||||
std::vector<VkDescriptorSetLayoutBinding> layout_bindings( 8 );
|
||||
for ( uint32_t i = 0; i < layout_bindings.size(); i++ )
|
||||
{
|
||||
layout_bindings[i].binding = i;
|
||||
layout_bindings[i].descriptorCount = 1;
|
||||
layout_bindings[i].pImmutableSamplers = nullptr;
|
||||
}
|
||||
|
||||
layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
layout_bindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
layout_bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
layout_bindings[2].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
layout_bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
layout_bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
layout_bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
|
||||
layout_bindings[4].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
||||
layout_bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
|
||||
layout_bindings[5].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
||||
layout_bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
layout_bindings[6].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
||||
layout_bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
|
||||
layout_bindings[7].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
layout_bindings[7].pImmutableSamplers = &m_samplers[ 1 ];
|
||||
|
||||
m_heaps->CreateDescriptorSetLayoutAndAllocDescriptorSet( &layout_bindings, &m_RasterizationDescriptorSetLayout, &m_RasterizationDescriptorSet );
|
||||
m_ParticleBufferA.SetDescriptorSet( 0, m_RasterizationDescriptorSet, false );
|
||||
m_PackedViewSpaceParticlePositions.SetDescriptorSet( 1, m_RasterizationDescriptorSet, false );
|
||||
m_AliveCountBuffer.SetDescriptorSet( 2, m_RasterizationDescriptorSet, false );
|
||||
m_AliveIndexBuffer.SetDescriptorSet( 3, m_RasterizationDescriptorSet, false );
|
||||
SetDescriptorSet( m_pDevice->GetDevice(), 4, m_AtlasSRV, nullptr, m_RasterizationDescriptorSet );
|
||||
// depth buffer
|
||||
constantBufferRing.SetDescriptorSet( 6, sizeof( RenderingConstantBuffer ), m_RasterizationDescriptorSet );
|
||||
|
||||
// Create pipeline layout
|
||||
//
|
||||
|
||||
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {};
|
||||
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pipelineLayoutCreateInfo.setLayoutCount = 1;
|
||||
pipelineLayoutCreateInfo.pSetLayouts = &m_RasterizationDescriptorSetLayout;
|
||||
|
||||
VkResult res = vkCreatePipelineLayout( m_pDevice->GetDevice(), &pipelineLayoutCreateInfo, nullptr, &m_RasterizationPipelineLayout );
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
// input assembly state and layout
|
||||
VkPipelineVertexInputStateCreateInfo vi = {};
|
||||
vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
||||
|
||||
VkPipelineInputAssemblyStateCreateInfo ia;
|
||||
ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
ia.pNext = NULL;
|
||||
ia.flags = 0;
|
||||
ia.primitiveRestartEnable = VK_FALSE;
|
||||
ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
|
||||
// rasterizer state
|
||||
VkPipelineRasterizationStateCreateInfo rs;
|
||||
rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
rs.pNext = NULL;
|
||||
rs.flags = 0;
|
||||
rs.polygonMode = VK_POLYGON_MODE_FILL;
|
||||
rs.cullMode = VK_CULL_MODE_NONE;
|
||||
rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
||||
rs.depthClampEnable = VK_FALSE;
|
||||
rs.rasterizerDiscardEnable = VK_FALSE;
|
||||
rs.depthBiasEnable = VK_FALSE;
|
||||
rs.depthBiasConstantFactor = 0;
|
||||
rs.depthBiasClamp = 0;
|
||||
rs.depthBiasSlopeFactor = 0;
|
||||
rs.lineWidth = 1.0f;
|
||||
|
||||
VkPipelineColorBlendAttachmentState att_state[4] = {};
|
||||
att_state[0].colorWriteMask = 0xf;
|
||||
att_state[0].blendEnable = VK_TRUE;
|
||||
att_state[0].alphaBlendOp = VK_BLEND_OP_ADD;
|
||||
att_state[0].colorBlendOp = VK_BLEND_OP_ADD;
|
||||
att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
|
||||
att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
|
||||
att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
|
||||
att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
|
||||
att_state[1].colorWriteMask = 0x0;
|
||||
att_state[2].colorWriteMask = 0xf;
|
||||
att_state[3].colorWriteMask = 0x0;
|
||||
|
||||
// Color blend state
|
||||
VkPipelineColorBlendStateCreateInfo cb = {};
|
||||
cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||
cb.attachmentCount = _countof(att_state);
|
||||
cb.pAttachments = att_state;
|
||||
cb.logicOpEnable = VK_FALSE;
|
||||
cb.logicOp = VK_LOGIC_OP_NO_OP;
|
||||
cb.blendConstants[0] = 1.0f;
|
||||
cb.blendConstants[1] = 1.0f;
|
||||
cb.blendConstants[2] = 1.0f;
|
||||
cb.blendConstants[3] = 1.0f;
|
||||
|
||||
VkDynamicState dynamicStateEnables[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR };
|
||||
VkPipelineDynamicStateCreateInfo dynamicState = {};
|
||||
dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
||||
dynamicState.pNext = NULL;
|
||||
dynamicState.pDynamicStates = dynamicStateEnables;
|
||||
dynamicState.dynamicStateCount = _countof( dynamicStateEnables );
|
||||
|
||||
// view port state
|
||||
VkPipelineViewportStateCreateInfo vp = {};
|
||||
vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
||||
vp.viewportCount = 1;
|
||||
vp.scissorCount = 1;
|
||||
|
||||
// depth stencil state
|
||||
VkPipelineDepthStencilStateCreateInfo ds = {};
|
||||
ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
|
||||
ds.depthTestEnable = VK_TRUE;
|
||||
ds.depthWriteEnable = VK_FALSE;
|
||||
ds.depthCompareOp = VK_COMPARE_OP_GREATER_OR_EQUAL;
|
||||
ds.depthBoundsTestEnable = VK_FALSE;
|
||||
ds.stencilTestEnable = VK_FALSE;
|
||||
ds.back.failOp = VK_STENCIL_OP_KEEP;
|
||||
ds.back.passOp = VK_STENCIL_OP_KEEP;
|
||||
ds.back.compareOp = VK_COMPARE_OP_ALWAYS;
|
||||
ds.back.compareMask = 0;
|
||||
ds.back.reference = 0;
|
||||
ds.back.depthFailOp = VK_STENCIL_OP_KEEP;
|
||||
ds.back.writeMask = 0;
|
||||
ds.minDepthBounds = 0;
|
||||
ds.maxDepthBounds = 0;
|
||||
ds.stencilTestEnable = VK_FALSE;
|
||||
ds.front = ds.back;
|
||||
|
||||
// multi sample state
|
||||
VkPipelineMultisampleStateCreateInfo ms = {};
|
||||
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
for ( int i = 0; i < NumStreakModes; i++ )
|
||||
{
|
||||
for ( int j = 0; j < NumReactiveModes; j++ )
|
||||
{
|
||||
att_state[2].colorWriteMask = 0x0;
|
||||
|
||||
DefineList defines;
|
||||
if ( i == StreaksOn )
|
||||
defines[ "STREAKS" ] = "";
|
||||
|
||||
if ( j == ReactiveOn )
|
||||
{
|
||||
defines["REACTIVE"] = "";
|
||||
att_state[2].colorWriteMask = 0xf;
|
||||
}
|
||||
|
||||
// Compile shaders
|
||||
//
|
||||
VkPipelineShaderStageCreateInfo vertexShader = {};
|
||||
res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_VERTEX_BIT, "ParticleRender.hlsl", "VS_StructuredBuffer", "-T vs_6_0", &defines, &vertexShader );
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
VkPipelineShaderStageCreateInfo fragmentShader;
|
||||
res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_FRAGMENT_BIT, "ParticleRender.hlsl", "PS_Billboard", "-T ps_6_0", &defines, &fragmentShader );
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
VkPipelineShaderStageCreateInfo shaderStages[] = { vertexShader, fragmentShader };
|
||||
|
||||
// Create pipeline
|
||||
//
|
||||
VkGraphicsPipelineCreateInfo pipeline = {};
|
||||
pipeline.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
pipeline.layout = m_RasterizationPipelineLayout;
|
||||
pipeline.pVertexInputState = &vi;
|
||||
pipeline.pInputAssemblyState = &ia;
|
||||
pipeline.pRasterizationState = &rs;
|
||||
pipeline.pMultisampleState = &ms;
|
||||
pipeline.pColorBlendState = &cb;
|
||||
pipeline.pDynamicState = &dynamicState;
|
||||
pipeline.pViewportState = &vp;
|
||||
pipeline.pDepthStencilState = &ds;
|
||||
pipeline.pStages = shaderStages;
|
||||
pipeline.stageCount = _countof( shaderStages );
|
||||
pipeline.renderPass = m_renderPass;
|
||||
|
||||
res = vkCreateGraphicsPipelines( m_pDevice->GetDevice(), m_pDevice->GetPipelineCache(), 1, &pipeline, nullptr, &m_RasterizationPipelines[ i ][ j ] );
|
||||
assert(res == VK_SUCCESS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnResizedSwapChain( int width, int height, Texture& depthBuffer, VkFramebuffer frameBuffer )
|
||||
{
|
||||
m_frameBuffer = frameBuffer;
|
||||
m_ScreenWidth = width;
|
||||
m_ScreenHeight = height;
|
||||
m_InvScreenWidth = 1.0f / m_ScreenWidth;
|
||||
m_InvScreenHeight = 1.0f / m_ScreenHeight;
|
||||
|
||||
m_DepthBuffer = depthBuffer.Resource();
|
||||
depthBuffer.CreateSRV( &m_DepthBufferSRV );
|
||||
|
||||
SetDescriptorSetForDepth( m_pDevice->GetDevice(), 9, m_DepthBufferSRV, nullptr, m_SimulationDescriptorSet );
|
||||
SetDescriptorSetForDepth( m_pDevice->GetDevice(), 5, m_DepthBufferSRV, nullptr, m_RasterizationDescriptorSet );
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnReleasingSwapChain()
|
||||
{
|
||||
if (m_DepthBufferSRV != nullptr)
|
||||
{
|
||||
vkDestroyImageView(m_pDevice->GetDevice(), m_DepthBufferSRV, nullptr);
|
||||
m_DepthBufferSRV = {};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPUParticleSystem::OnDestroyDevice()
|
||||
{
|
||||
m_ParticleBufferA.OnDestroy();
|
||||
m_ParticleBufferB.OnDestroy();
|
||||
m_PackedViewSpaceParticlePositions.OnDestroy();
|
||||
m_MaxRadiusBuffer.OnDestroy();
|
||||
m_DeadListBuffer.OnDestroy();
|
||||
m_AliveDistanceBuffer.OnDestroy();
|
||||
m_AliveIndexBuffer.OnDestroy();
|
||||
m_DstAliveDistanceBuffer.OnDestroy();
|
||||
m_DstAliveIndexBuffer.OnDestroy();
|
||||
m_AliveCountBuffer.OnDestroy();
|
||||
vkDestroyImageView( m_pDevice->GetDevice(), m_RandomTextureSRV, nullptr );
|
||||
m_RandomTexture.OnDestroy();
|
||||
vkDestroyImageView( m_pDevice->GetDevice(), m_AtlasSRV, nullptr );
|
||||
m_Atlas.OnDestroy();
|
||||
m_IndirectArgsBuffer.OnDestroy();
|
||||
|
||||
vkDestroyDescriptorSetLayout( m_pDevice->GetDevice(), m_SimulationDescriptorSetLayout, nullptr );
|
||||
vkDestroyDescriptorSetLayout( m_pDevice->GetDevice(), m_RasterizationDescriptorSetLayout, nullptr );
|
||||
|
||||
vkDestroyPipeline( m_pDevice->GetDevice(), m_SimulationPipeline, nullptr );
|
||||
vkDestroyPipeline( m_pDevice->GetDevice(), m_ResetParticlesPipeline, nullptr );
|
||||
vkDestroyPipeline( m_pDevice->GetDevice(), m_EmitPipeline, nullptr );
|
||||
|
||||
for ( int i = 0; i < NumStreakModes; i++ )
|
||||
{
|
||||
for ( int j = 0; j < NumReactiveModes; j++ )
|
||||
{
|
||||
vkDestroyPipeline( m_pDevice->GetDevice(), m_RasterizationPipelines[ i ][ j ], nullptr );
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroyPipelineLayout( m_pDevice->GetDevice(), m_SimulationPipelineLayout, nullptr );
|
||||
vkDestroyPipelineLayout( m_pDevice->GetDevice(), m_RasterizationPipelineLayout, nullptr );
|
||||
|
||||
m_SortLib.OnDestroy();
|
||||
|
||||
for ( int i = 0; i < _countof( m_samplers ); i++ )
|
||||
{
|
||||
vkDestroySampler( m_pDevice->GetDevice(), m_samplers[ i ], nullptr );
|
||||
}
|
||||
|
||||
m_ResetSystem = true;
|
||||
m_pDevice = nullptr;
|
||||
}
|
||||
|
||||
|
||||
// Per-frame emission of particles into the GPU simulation
|
||||
void GPUParticleSystem::Emit( VkCommandBuffer commandBuffer, DynamicBufferRing& constantBufferRing, uint32_t perFrameConstantOffset, int numEmitters, const EmitterParams* emitters )
|
||||
{
|
||||
vkCmdBindPipeline( commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_EmitPipeline );
|
||||
|
||||
// Run CS for each emitter
|
||||
for ( int i = 0; i < numEmitters; i++ )
|
||||
{
|
||||
const EmitterParams& emitter = emitters[ i ];
|
||||
|
||||
if ( emitter.m_NumToEmit > 0 )
|
||||
{
|
||||
EmitterConstantBuffer* constants = nullptr;
|
||||
VkDescriptorBufferInfo constantBuffer = {};
|
||||
constantBufferRing.AllocConstantBuffer( sizeof(*constants), (void**)&constants, &constantBuffer );
|
||||
constants->m_EmitterPosition = emitter.m_Position;
|
||||
constants->m_EmitterVelocity = emitter.m_Velocity;
|
||||
constants->m_MaxParticlesThisFrame = emitter.m_NumToEmit;
|
||||
constants->m_ParticleLifeSpan = emitter.m_ParticleLifeSpan;
|
||||
constants->m_StartSize = emitter.m_StartSize;
|
||||
constants->m_EndSize = emitter.m_EndSize;
|
||||
constants->m_PositionVariance = emitter.m_PositionVariance;
|
||||
constants->m_VelocityVariance = emitter.m_VelocityVariance;
|
||||
constants->m_Mass = emitter.m_Mass;
|
||||
constants->m_Index = i;
|
||||
constants->m_Streaks = emitter.m_Streaks ? 1 : 0;
|
||||
constants->m_TextureIndex = emitter.m_TextureIndex;
|
||||
|
||||
uint32_t uniformOffsets[] = { perFrameConstantOffset, (uint32_t)constantBuffer.offset };
|
||||
vkCmdBindDescriptorSets( commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_SimulationPipelineLayout, 0, 1, &m_SimulationDescriptorSet, _countof( uniformOffsets ), uniformOffsets );
|
||||
|
||||
// Dispatch enough thread groups to spawn the requested particles
|
||||
int numThreadGroups = align( emitter.m_NumToEmit, 1024 ) / 1024;
|
||||
vkCmdDispatch( commandBuffer, numThreadGroups, 1, 1 );
|
||||
}
|
||||
}
|
||||
|
||||
// RaW barriers
|
||||
m_ParticleBufferA.PipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_ParticleBufferB.PipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
m_DeadListBuffer.PipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT );
|
||||
}
|
||||
|
||||
|
||||
// Per-frame simulation step
|
||||
void GPUParticleSystem::Simulate( VkCommandBuffer commandBuffer )
|
||||
{
|
||||
vkCmdBindPipeline( commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_SimulationPipeline );
|
||||
vkCmdDispatch( commandBuffer, align( g_maxParticles, 256 ) / 256, 1, 1 );
|
||||
}
|
||||
|
||||
// Populate a texture with random numbers (used for the emission of particles)
|
||||
void GPUParticleSystem::FillRandomTexture( UploadHeap& uploadHeap )
|
||||
{
|
||||
IMG_INFO header = {};
|
||||
header.width = 1024;
|
||||
header.height = 1024;
|
||||
header.depth = 1;
|
||||
header.arraySize = 1;
|
||||
header.mipMapCount = 1;
|
||||
header.format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
header.bitCount = 128;
|
||||
|
||||
float* values = new float[ header.width * header.height * 4 ];
|
||||
float* ptr = values;
|
||||
for ( UINT i = 0; i < header.width * header.height; i++ )
|
||||
{
|
||||
ptr[ 0 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 1 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 2 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr[ 3 ] = RandomVariance( 0.0f, 1.0f );
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
m_RandomTexture.InitFromData( m_pDevice, uploadHeap, header, values, "RandomTexture" );
|
||||
m_RandomTexture.CreateSRV( &m_RandomTextureSRV );
|
||||
|
||||
delete[] values;
|
||||
}
|
@ -0,0 +1,559 @@
|
||||
// ParallelSort.cpp
|
||||
//
|
||||
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#define FFX_CPP
|
||||
#include "ParallelSort.h"
|
||||
#include "../../FFX-ParallelSort/FFX_ParallelSort.h"
|
||||
|
||||
static const uint32_t NumKeys = { 400*1024 };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Helper for Vulkan
|
||||
VkBufferMemoryBarrier BufferTransition(VkBuffer buffer, VkAccessFlags before, VkAccessFlags after, uint32_t size)
|
||||
{
|
||||
VkBufferMemoryBarrier bufferBarrier = {};
|
||||
bufferBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
bufferBarrier.srcAccessMask = before;
|
||||
bufferBarrier.dstAccessMask = after;
|
||||
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
bufferBarrier.buffer = buffer;
|
||||
bufferBarrier.size = size;
|
||||
|
||||
return bufferBarrier;
|
||||
}
|
||||
|
||||
|
||||
void FFXParallelSort::BindConstantBuffer(VkDescriptorBufferInfo& GPUCB, VkDescriptorSet& DescriptorSet, uint32_t Binding/*=0*/, uint32_t Count/*=1*/)
|
||||
{
|
||||
VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
|
||||
write_set.pNext = nullptr;
|
||||
write_set.dstSet = DescriptorSet;
|
||||
write_set.dstBinding = Binding;
|
||||
write_set.dstArrayElement = 0;
|
||||
write_set.descriptorCount = Count;
|
||||
write_set.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
write_set.pImageInfo = nullptr;
|
||||
write_set.pBufferInfo = &GPUCB;
|
||||
write_set.pTexelBufferView = nullptr;
|
||||
vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &write_set, 0, nullptr);
|
||||
}
|
||||
|
||||
void FFXParallelSort::BindUAVBuffer(VkBuffer* pBuffer, VkDescriptorSet& DescriptorSet, uint32_t Binding/*=0*/, uint32_t Count/*=1*/)
|
||||
{
|
||||
std::vector<VkDescriptorBufferInfo> bufferInfos;
|
||||
for (uint32_t i = 0; i < Count; i++)
|
||||
{
|
||||
VkDescriptorBufferInfo bufferInfo;
|
||||
bufferInfo.buffer = pBuffer[i];
|
||||
bufferInfo.offset = 0;
|
||||
bufferInfo.range = VK_WHOLE_SIZE;
|
||||
bufferInfos.push_back(bufferInfo);
|
||||
}
|
||||
|
||||
VkWriteDescriptorSet write_set = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
|
||||
write_set.pNext = nullptr;
|
||||
write_set.dstSet = DescriptorSet;
|
||||
write_set.dstBinding = Binding;
|
||||
write_set.dstArrayElement = 0;
|
||||
write_set.descriptorCount = Count;
|
||||
write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
write_set.pImageInfo = nullptr;
|
||||
write_set.pBufferInfo = bufferInfos.data();
|
||||
write_set.pTexelBufferView = nullptr;
|
||||
|
||||
vkUpdateDescriptorSets(m_pDevice->GetDevice(), 1, &write_set, 0, nullptr);
|
||||
}
|
||||
|
||||
|
||||
void FFXParallelSort::CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, VkPipeline& pPipeline)
|
||||
{
|
||||
std::string CompileFlags("-T cs_6_0");
|
||||
#ifdef _DEBUG
|
||||
CompileFlags += " -Zi -Od";
|
||||
#endif // _DEBUG
|
||||
|
||||
VkPipelineShaderStageCreateInfo stage_create_info = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO };
|
||||
|
||||
VkResult vkResult = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, shaderFile, entryPoint, "-T cs_6_0", defines, &stage_create_info);
|
||||
stage_create_info.flags = 0;
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
|
||||
VkComputePipelineCreateInfo create_info = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
|
||||
create_info.pNext = nullptr;
|
||||
create_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
create_info.basePipelineIndex = 0;
|
||||
create_info.flags = 0;
|
||||
create_info.layout = m_SortPipelineLayout;
|
||||
create_info.stage = stage_create_info;
|
||||
vkResult = vkCreateComputePipelines(m_pDevice->GetDevice(), VK_NULL_HANDLE, 1, &create_info, nullptr, &pPipeline);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
}
|
||||
|
||||
void FFXParallelSort::OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, Buffer* elementCount, Buffer* listA, Buffer* listB, Buffer* listA2, Buffer* listB2)
|
||||
{
|
||||
m_pDevice = pDevice;
|
||||
m_pUploadHeap = pUploadHeap;
|
||||
m_pResourceViewHeaps = pResourceViewHeaps;
|
||||
m_pConstantBufferRing = pConstantBufferRing;
|
||||
m_SrcKeyBuffer = listA;
|
||||
m_SrcPayloadBuffer = listB;
|
||||
m_DstKeyBuffer = listA2;
|
||||
m_DstPayloadBuffer = listB2;
|
||||
|
||||
m_MaxNumThreadgroups = 800;
|
||||
|
||||
VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
bufferCreateInfo.pNext = nullptr;
|
||||
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
bufferCreateInfo.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; // | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
||||
|
||||
VmaAllocationCreateInfo allocCreateInfo = {};
|
||||
allocCreateInfo.memoryTypeBits = 0;
|
||||
allocCreateInfo.pool = VK_NULL_HANDLE;
|
||||
allocCreateInfo.preferredFlags = 0;
|
||||
allocCreateInfo.requiredFlags = 0;
|
||||
allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN;
|
||||
|
||||
// Allocate the scratch buffers needed for radix sort
|
||||
FFX_ParallelSort_CalculateScratchResourceSize(NumKeys, m_ScratchBufferSize, m_ReducedScratchBufferSize);
|
||||
|
||||
bufferCreateInfo.size = m_ScratchBufferSize;
|
||||
allocCreateInfo.pUserData = "Scratch";
|
||||
if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_FPSScratchBuffer, &m_FPSScratchBufferAllocation, nullptr))
|
||||
{
|
||||
Trace("Failed to create buffer for Scratch");
|
||||
}
|
||||
|
||||
bufferCreateInfo.size = m_ReducedScratchBufferSize;
|
||||
allocCreateInfo.pUserData = "ReducedScratch";
|
||||
if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_FPSReducedScratchBuffer, &m_FPSReducedScratchBufferAllocation, nullptr))
|
||||
{
|
||||
Trace("Failed to create buffer for ReducedScratch");
|
||||
}
|
||||
|
||||
// Allocate the buffers for indirect execution of the algorithm
|
||||
|
||||
bufferCreateInfo.size = sizeof(uint32_t) * 3;
|
||||
bufferCreateInfo.usage = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
allocCreateInfo.pUserData = "IndirectCount_Scatter_DispatchArgs";
|
||||
if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_IndirectCountScatterArgs, &m_IndirectCountScatterArgsAllocation, nullptr))
|
||||
{
|
||||
Trace("Failed to create buffer for IndirectCount_Scatter_DispatchArgs");
|
||||
}
|
||||
|
||||
allocCreateInfo.pUserData = "IndirectReduceScanArgs";
|
||||
if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_IndirectReduceScanArgs, &m_IndirectReduceScanArgsAllocation, nullptr))
|
||||
{
|
||||
Trace("Failed to create buffer for IndirectCount_Scatter_DispatchArgs");
|
||||
}
|
||||
|
||||
bufferCreateInfo.size = sizeof(FFX_ParallelSortCB);
|
||||
bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
allocCreateInfo.pUserData = "IndirectConstantBuffer";
|
||||
if (VK_SUCCESS != vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferCreateInfo, &allocCreateInfo, &m_IndirectConstantBuffer, &m_IndirectConstantBufferAllocation, nullptr))
|
||||
{
|
||||
Trace("Failed to create buffer for IndirectConstantBuffer");
|
||||
}
|
||||
|
||||
// Create Pipeline layout for Sort pass
|
||||
{
|
||||
// Create binding for Radix sort passes
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_0[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr } // Constant buffer table
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_1[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr } // Constant buffer to setup indirect params (indirect)
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_InputOutputs[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // SrcBuffer (sort)
|
||||
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // DstBuffer (sort)
|
||||
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // ScrPayload (sort only)
|
||||
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // DstPayload (sort only)
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_Scan[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // ScanSrc
|
||||
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // ScanDst
|
||||
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // ScanScratch
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_Scratch[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // Scratch (sort only)
|
||||
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // Scratch (reduced)
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_Indirect[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // NumKeys (indirect)
|
||||
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // CBufferUAV (indirect)
|
||||
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr }, // CountScatterArgs (indirect)
|
||||
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr } // ReduceScanArgs (indirect)
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
|
||||
descriptor_set_layout_create_info.pNext = nullptr;
|
||||
descriptor_set_layout_create_info.flags = 0;
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_0;
|
||||
descriptor_set_layout_create_info.bindingCount = 1;
|
||||
VkResult vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutConstants);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bool bDescriptorAlloc = true;
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstants, &m_SortDescriptorSetConstants[0]);
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstants, &m_SortDescriptorSetConstants[1]);
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstants, &m_SortDescriptorSetConstants[2]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_1;
|
||||
descriptor_set_layout_create_info.bindingCount = 1;
|
||||
vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutConstantsIndirect);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstantsIndirect, &m_SortDescriptorSetConstantsIndirect[0]);
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstantsIndirect, &m_SortDescriptorSetConstantsIndirect[1]);
|
||||
bDescriptorAlloc &= m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutConstantsIndirect, &m_SortDescriptorSetConstantsIndirect[2]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_InputOutputs;
|
||||
descriptor_set_layout_create_info.bindingCount = 4;
|
||||
vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutInputOutputs);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutInputOutputs, &m_SortDescriptorSetInputOutput[0]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutInputOutputs, &m_SortDescriptorSetInputOutput[1]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_Scan;
|
||||
descriptor_set_layout_create_info.bindingCount = 3;
|
||||
vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutScan);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutScan, &m_SortDescriptorSetScanSets[0]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutScan, &m_SortDescriptorSetScanSets[1]);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_Scratch;
|
||||
descriptor_set_layout_create_info.bindingCount = 2;
|
||||
vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutScratch);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutScratch, &m_SortDescriptorSetScratch);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
descriptor_set_layout_create_info.pBindings = layout_bindings_set_Indirect;
|
||||
descriptor_set_layout_create_info.bindingCount = 4;
|
||||
vkResult = vkCreateDescriptorSetLayout(m_pDevice->GetDevice(), &descriptor_set_layout_create_info, nullptr, &m_SortDescriptorSetLayoutIndirect);
|
||||
assert(vkResult == VK_SUCCESS);
|
||||
bDescriptorAlloc = m_pResourceViewHeaps->AllocDescriptor(m_SortDescriptorSetLayoutIndirect, &m_SortDescriptorSetIndirect);
|
||||
assert(bDescriptorAlloc == true);
|
||||
|
||||
// Create constant range representing our static constant
|
||||
VkPushConstantRange constant_range;
|
||||
constant_range.stageFlags = VK_SHADER_STAGE_ALL;
|
||||
constant_range.offset = 0;
|
||||
constant_range.size = 4;
|
||||
|
||||
// Create the pipeline layout (Root signature)
|
||||
VkPipelineLayoutCreateInfo layout_create_info = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
|
||||
layout_create_info.pNext = nullptr;
|
||||
layout_create_info.flags = 0;
|
||||
layout_create_info.setLayoutCount = 6;
|
||||
VkDescriptorSetLayout layouts[] = { m_SortDescriptorSetLayoutConstants, m_SortDescriptorSetLayoutConstantsIndirect, m_SortDescriptorSetLayoutInputOutputs,
|
||||
m_SortDescriptorSetLayoutScan, m_SortDescriptorSetLayoutScratch, m_SortDescriptorSetLayoutIndirect };
|
||||
layout_create_info.pSetLayouts = layouts;
|
||||
layout_create_info.pushConstantRangeCount = 1;
|
||||
layout_create_info.pPushConstantRanges = &constant_range;
|
||||
VkResult bCreatePipelineLayout = vkCreatePipelineLayout(m_pDevice->GetDevice(), &layout_create_info, nullptr, &m_SortPipelineLayout);
|
||||
assert(bCreatePipelineLayout == VK_SUCCESS);
|
||||
}
|
||||
|
||||
// Create Pipeline layout for Render of RadixBuffer info
|
||||
{
|
||||
// Create binding for Radix sort passes
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_0[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr } // Constant buffer table
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_1[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr } // Sort Buffer
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding layout_bindings_set_2[] = {
|
||||
{ 0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_ALL, nullptr } // ValidationTexture
|
||||
};
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Create pipelines for radix sort
|
||||
{
|
||||
// Create all of the necessary pipelines for Sort and Scan
|
||||
DefineList defines;
|
||||
defines[ "API_VULKAN" ] = "";
|
||||
|
||||
// SetupIndirectParams (indirect only)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_SetupIndirectParameters", m_FPSIndirectSetupParametersPipeline);
|
||||
|
||||
// Radix count (sum table generation)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Count", m_FPSCountPipeline);
|
||||
// Radix count reduce (sum table reduction for offset prescan)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_CountReduce", m_FPSCountReducePipeline);
|
||||
// Radix scan (prefix scan)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scan", m_FPSScanPipeline);
|
||||
// Radix scan add (prefix scan + reduced prefix scan addition)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_ScanAdd", m_FPSScanAddPipeline);
|
||||
// Radix scatter (key redistribution)
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scatter", m_FPSScatterPipeline);
|
||||
// Radix scatter with payload (key and payload redistribution)
|
||||
defines["kRS_ValueCopy"] = std::to_string(1);
|
||||
CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scatter", m_FPSScatterPayloadPipeline);
|
||||
}
|
||||
|
||||
// Do binding setups
|
||||
{
|
||||
VkBuffer BufferMaps[4];
|
||||
|
||||
// Map inputs/outputs
|
||||
BufferMaps[0] = m_SrcKeyBuffer->Resource();
|
||||
BufferMaps[1] = m_DstKeyBuffer->Resource();
|
||||
BufferMaps[2] = m_SrcPayloadBuffer->Resource();
|
||||
BufferMaps[3] = m_DstPayloadBuffer->Resource();
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetInputOutput[0], 0, 4);
|
||||
|
||||
BufferMaps[0] = m_DstKeyBuffer->Resource();
|
||||
BufferMaps[1] = m_SrcKeyBuffer->Resource();
|
||||
BufferMaps[2] = m_DstPayloadBuffer->Resource();
|
||||
BufferMaps[3] = m_SrcPayloadBuffer->Resource();
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetInputOutput[1], 0, 4);
|
||||
|
||||
// Map scan sets (reduced, scratch)
|
||||
BufferMaps[0] = BufferMaps[1] = m_FPSReducedScratchBuffer;
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetScanSets[0], 0, 2);
|
||||
|
||||
BufferMaps[0] = BufferMaps[1] = m_FPSScratchBuffer;
|
||||
BufferMaps[2] = m_FPSReducedScratchBuffer;
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetScanSets[1], 0, 3);
|
||||
|
||||
// Map Scratch areas (fixed)
|
||||
BufferMaps[0] = m_FPSScratchBuffer;
|
||||
BufferMaps[1] = m_FPSReducedScratchBuffer;
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetScratch, 0, 2);
|
||||
|
||||
// Map indirect buffers
|
||||
elementCount->SetDescriptorSet( 0, m_SortDescriptorSetIndirect, false );
|
||||
BufferMaps[0] = m_IndirectConstantBuffer;
|
||||
BufferMaps[1] = m_IndirectCountScatterArgs;
|
||||
BufferMaps[2] = m_IndirectReduceScanArgs;
|
||||
BindUAVBuffer(BufferMaps, m_SortDescriptorSetIndirect, 1, 3);
|
||||
}
|
||||
}
|
||||
|
||||
void FFXParallelSort::OnDestroy()
|
||||
{
|
||||
// Release radix sort indirect resources
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_IndirectConstantBuffer, m_IndirectConstantBufferAllocation);
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_IndirectCountScatterArgs, m_IndirectCountScatterArgsAllocation);
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_IndirectReduceScanArgs, m_IndirectReduceScanArgsAllocation);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSIndirectSetupParametersPipeline, nullptr);
|
||||
|
||||
// Release radix sort algorithm resources
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_FPSScratchBuffer, m_FPSScratchBufferAllocation);
|
||||
vmaDestroyBuffer(m_pDevice->GetAllocator(), m_FPSReducedScratchBuffer, m_FPSReducedScratchBufferAllocation);
|
||||
|
||||
vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_SortPipelineLayout, nullptr);
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutConstants, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstants[0]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstants[1]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstants[2]);
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutConstantsIndirect, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstantsIndirect[0]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstantsIndirect[1]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetConstantsIndirect[2]);
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutInputOutputs, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetInputOutput[0]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetInputOutput[1]);
|
||||
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutScan, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetScanSets[0]);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetScanSets[1]);
|
||||
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutScratch, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetScratch);
|
||||
|
||||
vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_SortDescriptorSetLayoutIndirect, nullptr);
|
||||
m_pResourceViewHeaps->FreeDescriptor(m_SortDescriptorSetIndirect);
|
||||
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSCountPipeline, nullptr);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSCountReducePipeline, nullptr);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSScanPipeline, nullptr);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSScanAddPipeline, nullptr);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSScatterPipeline, nullptr);
|
||||
vkDestroyPipeline(m_pDevice->GetDevice(), m_FPSScatterPayloadPipeline, nullptr);
|
||||
}
|
||||
|
||||
|
||||
void FFXParallelSort::Draw(VkCommandBuffer commandList)
|
||||
{
|
||||
// To control which descriptor set to use for updating data
|
||||
static uint32_t frameCount = 0;
|
||||
uint32_t frameConstants = (++frameCount) % 3;
|
||||
|
||||
std::string markerText = "FFXParallelSortIndirect";
|
||||
SetPerfMarkerBegin(commandList, markerText.c_str());
|
||||
|
||||
// Buffers to ping-pong between when writing out sorted values
|
||||
VkBuffer* ReadBufferInfo = &m_SrcKeyBuffer->Resource();
|
||||
VkBuffer* WriteBufferInfo(&m_DstKeyBuffer->Resource());
|
||||
VkBuffer* ReadPayloadBufferInfo(&m_SrcPayloadBuffer->Resource()), * WritePayloadBufferInfo(&m_DstPayloadBuffer->Resource());
|
||||
bool bHasPayload = true;
|
||||
|
||||
// Setup barriers for the run
|
||||
VkBufferMemoryBarrier Barriers[3];
|
||||
|
||||
// Fill in the constant buffer data structure (this will be done by a shader in the indirect version)
|
||||
{
|
||||
struct SetupIndirectCB
|
||||
{
|
||||
uint32_t MaxThreadGroups;
|
||||
};
|
||||
SetupIndirectCB IndirectSetupCB;
|
||||
IndirectSetupCB.MaxThreadGroups = m_MaxNumThreadgroups;
|
||||
|
||||
// Copy the data into the constant buffer
|
||||
VkDescriptorBufferInfo constantBuffer = m_pConstantBufferRing->AllocConstantBuffer(sizeof(SetupIndirectCB), (void*)&IndirectSetupCB);
|
||||
BindConstantBuffer(constantBuffer, m_SortDescriptorSetConstantsIndirect[frameConstants]);
|
||||
|
||||
// Dispatch
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 1, 1, &m_SortDescriptorSetConstantsIndirect[frameConstants], 0, nullptr);
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 5, 1, &m_SortDescriptorSetIndirect, 0, nullptr);
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_FPSIndirectSetupParametersPipeline);
|
||||
vkCmdDispatch(commandList, 1, 1, 1);
|
||||
|
||||
// When done, transition the args buffers to INDIRECT_ARGUMENT, and the constant buffer UAV to Constant buffer
|
||||
VkBufferMemoryBarrier barriers[5];
|
||||
barriers[0] = BufferTransition(m_IndirectCountScatterArgs, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * 3);
|
||||
barriers[1] = BufferTransition(m_IndirectReduceScanArgs, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * 3);
|
||||
barriers[2] = BufferTransition(m_IndirectConstantBuffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, sizeof(FFX_ParallelSortCB));
|
||||
barriers[3] = BufferTransition(m_IndirectCountScatterArgs, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, sizeof(uint32_t) * 3);
|
||||
barriers[4] = BufferTransition(m_IndirectReduceScanArgs, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, sizeof(uint32_t) * 3);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 5, barriers, 0, nullptr);
|
||||
}
|
||||
|
||||
// Bind the scratch descriptor sets
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 4, 1, &m_SortDescriptorSetScratch, 0, nullptr);
|
||||
|
||||
// Copy the data into the constant buffer and bind
|
||||
{
|
||||
//constantBuffer = m_IndirectConstantBuffer.GetResource()->GetGPUVirtualAddress();
|
||||
VkDescriptorBufferInfo constantBuffer;
|
||||
constantBuffer.buffer = m_IndirectConstantBuffer;
|
||||
constantBuffer.offset = 0;
|
||||
constantBuffer.range = VK_WHOLE_SIZE;
|
||||
BindConstantBuffer(constantBuffer, m_SortDescriptorSetConstants[frameConstants]);
|
||||
}
|
||||
|
||||
// Bind constants
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 0, 1, &m_SortDescriptorSetConstants[frameConstants], 0, nullptr);
|
||||
|
||||
// Perform Radix Sort (currently only support 32-bit key/payload sorting
|
||||
uint32_t inputSet = 0;
|
||||
for (uint32_t Shift = 0; Shift < 32u; Shift += FFX_PARALLELSORT_SORT_BITS_PER_PASS)
|
||||
{
|
||||
// Update the bit shift
|
||||
vkCmdPushConstants(commandList, m_SortPipelineLayout, VK_SHADER_STAGE_ALL, 0, 4, &Shift);
|
||||
|
||||
// Bind input/output for this pass
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 2, 1, &m_SortDescriptorSetInputOutput[inputSet], 0, nullptr);
|
||||
|
||||
// Sort Count
|
||||
{
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_FPSCountPipeline);
|
||||
|
||||
vkCmdDispatchIndirect(commandList, m_IndirectCountScatterArgs, 0);
|
||||
}
|
||||
|
||||
// UAV barrier on the sum table
|
||||
Barriers[0] = BufferTransition(m_FPSScratchBuffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, m_ScratchBufferSize);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, Barriers, 0, nullptr);
|
||||
|
||||
// Sort Reduce
|
||||
{
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_FPSCountReducePipeline);
|
||||
|
||||
vkCmdDispatchIndirect(commandList, m_IndirectReduceScanArgs, 0);
|
||||
|
||||
// UAV barrier on the reduced sum table
|
||||
Barriers[0] = BufferTransition(m_FPSReducedScratchBuffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, m_ReducedScratchBufferSize);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, Barriers, 0, nullptr);
|
||||
}
|
||||
|
||||
// Sort Scan
|
||||
{
|
||||
// First do scan prefix of reduced values
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 3, 1, &m_SortDescriptorSetScanSets[0], 0, nullptr);
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_FPSScanPipeline);
|
||||
|
||||
vkCmdDispatch(commandList, 1, 1, 1);
|
||||
|
||||
// UAV barrier on the reduced sum table
|
||||
Barriers[0] = BufferTransition(m_FPSReducedScratchBuffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, m_ReducedScratchBufferSize);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, Barriers, 0, nullptr);
|
||||
|
||||
// Next do scan prefix on the histogram with partial sums that we just did
|
||||
vkCmdBindDescriptorSets(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_SortPipelineLayout, 3, 1, &m_SortDescriptorSetScanSets[1], 0, nullptr);
|
||||
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, m_FPSScanAddPipeline);
|
||||
vkCmdDispatchIndirect(commandList, m_IndirectReduceScanArgs, 0);
|
||||
}
|
||||
|
||||
// UAV barrier on the sum table
|
||||
Barriers[0] = BufferTransition(m_FPSScratchBuffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, m_ScratchBufferSize);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, Barriers, 0, nullptr);
|
||||
|
||||
// Sort Scatter
|
||||
{
|
||||
vkCmdBindPipeline(commandList, VK_PIPELINE_BIND_POINT_COMPUTE, bHasPayload ? m_FPSScatterPayloadPipeline : m_FPSScatterPipeline);
|
||||
|
||||
vkCmdDispatchIndirect(commandList, m_IndirectCountScatterArgs, 0);
|
||||
}
|
||||
|
||||
// Finish doing everything and barrier for the next pass
|
||||
int numBarriers = 0;
|
||||
Barriers[numBarriers++] = BufferTransition(*WriteBufferInfo, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * NumKeys);
|
||||
if (bHasPayload)
|
||||
Barriers[numBarriers++] = BufferTransition(*WritePayloadBufferInfo, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * NumKeys);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, numBarriers, Barriers, 0, nullptr);
|
||||
|
||||
// Swap read/write sources
|
||||
std::swap(ReadBufferInfo, WriteBufferInfo);
|
||||
if (bHasPayload)
|
||||
std::swap(ReadPayloadBufferInfo, WritePayloadBufferInfo);
|
||||
inputSet = !inputSet;
|
||||
}
|
||||
|
||||
// When we are all done, transition indirect buffers back to UAV for the next frame (if doing indirect dispatch)
|
||||
{
|
||||
VkBufferMemoryBarrier barriers[3];
|
||||
barriers[0] = BufferTransition(m_IndirectConstantBuffer, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(FFX_ParallelSortCB));
|
||||
barriers[1] = BufferTransition(m_IndirectCountScatterArgs, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * 3);
|
||||
barriers[2] = BufferTransition(m_IndirectReduceScanArgs, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, sizeof(uint32_t) * 3);
|
||||
vkCmdPipelineBarrier(commandList, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 3, barriers, 0, nullptr);
|
||||
}
|
||||
|
||||
// Close out the perf capture
|
||||
SetPerfMarkerEnd(commandList);
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
// ParallelSort.h
|
||||
//
|
||||
// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "../vk/stdafx.h"
|
||||
#include "BufferHelper.h"
|
||||
|
||||
|
||||
struct ParallelSortRenderCB // If you change this, also change struct ParallelSortRenderCB in ParallelSortVerify.hlsl
|
||||
{
|
||||
int32_t Width;
|
||||
int32_t Height;
|
||||
int32_t SortWidth;
|
||||
int32_t SortHeight;
|
||||
};
|
||||
|
||||
|
||||
class FFXParallelSort
|
||||
{
|
||||
public:
|
||||
void OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, Buffer* elementCount, Buffer* listA, Buffer* listB, Buffer* listA2, Buffer* listB2);
|
||||
void OnDestroy();
|
||||
|
||||
void Draw(VkCommandBuffer commandList);
|
||||
|
||||
private:
|
||||
void CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, VkPipeline& pPipeline);
|
||||
void BindConstantBuffer(VkDescriptorBufferInfo& GPUCB, VkDescriptorSet& DescriptorSet, uint32_t Binding = 0, uint32_t Count = 1);
|
||||
void BindUAVBuffer(VkBuffer* pBuffer, VkDescriptorSet& DescriptorSet, uint32_t Binding = 0, uint32_t Count = 1);
|
||||
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
UploadHeap* m_pUploadHeap = nullptr;
|
||||
ResourceViewHeaps* m_pResourceViewHeaps = nullptr;
|
||||
DynamicBufferRing* m_pConstantBufferRing = nullptr;
|
||||
uint32_t m_MaxNumThreadgroups = 800;
|
||||
|
||||
uint32_t m_ScratchBufferSize;
|
||||
uint32_t m_ReducedScratchBufferSize;
|
||||
|
||||
Buffer* m_SrcKeyBuffer = nullptr;
|
||||
Buffer* m_SrcPayloadBuffer = nullptr;
|
||||
|
||||
Buffer* m_DstKeyBuffer = nullptr;
|
||||
Buffer* m_DstPayloadBuffer = nullptr;
|
||||
|
||||
VkBuffer m_FPSScratchBuffer; // Sort scratch buffer
|
||||
VmaAllocation m_FPSScratchBufferAllocation;
|
||||
|
||||
VkBuffer m_FPSReducedScratchBuffer; // Sort reduced scratch buffer
|
||||
VmaAllocation m_FPSReducedScratchBufferAllocation;
|
||||
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutConstants;
|
||||
VkDescriptorSet m_SortDescriptorSetConstants[3];
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutConstantsIndirect;
|
||||
VkDescriptorSet m_SortDescriptorSetConstantsIndirect[3];
|
||||
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutInputOutputs;
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutScan;
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutScratch;
|
||||
VkDescriptorSetLayout m_SortDescriptorSetLayoutIndirect;
|
||||
|
||||
VkDescriptorSet m_SortDescriptorSetInputOutput[2];
|
||||
VkDescriptorSet m_SortDescriptorSetScanSets[2];
|
||||
VkDescriptorSet m_SortDescriptorSetScratch;
|
||||
VkDescriptorSet m_SortDescriptorSetIndirect;
|
||||
VkPipelineLayout m_SortPipelineLayout;
|
||||
|
||||
VkPipeline m_FPSCountPipeline;
|
||||
VkPipeline m_FPSCountReducePipeline;
|
||||
VkPipeline m_FPSScanPipeline;
|
||||
VkPipeline m_FPSScanAddPipeline;
|
||||
VkPipeline m_FPSScatterPipeline;
|
||||
VkPipeline m_FPSScatterPayloadPipeline;
|
||||
|
||||
// Resources for indirect execution of algorithm
|
||||
VkBuffer m_IndirectConstantBuffer; // Buffer to hold radix sort constant buffer data for indirect dispatch
|
||||
VmaAllocation m_IndirectConstantBufferAllocation;
|
||||
VkBuffer m_IndirectCountScatterArgs; // Buffer to hold dispatch arguments used for Count/Scatter parts of the algorithm
|
||||
VmaAllocation m_IndirectCountScatterArgsAllocation;
|
||||
VkBuffer m_IndirectReduceScanArgs; // Buffer to hold dispatch arguments used for Reduce/Scan parts of the algorithm
|
||||
VmaAllocation m_IndirectReduceScanArgsAllocation;
|
||||
|
||||
VkPipeline m_FPSIndirectSetupParametersPipeline;
|
||||
};
|
@ -0,0 +1,294 @@
|
||||
// FidelityFX Super Resolution Sample
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#include "AnimatedTexture.h"
|
||||
|
||||
|
||||
struct ConstantBuffer
|
||||
{
|
||||
math::Matrix4 currentViewProj;
|
||||
math::Matrix4 previousViewProj;
|
||||
float jitterCompensation[ 2 ];
|
||||
float scrollFactor;
|
||||
float rotationFactor;
|
||||
int mode;
|
||||
int pads[3];
|
||||
};
|
||||
|
||||
|
||||
void AnimatedTextures::OnCreate( Device& device, UploadHeap& uploadHeap, StaticBufferPool& bufferPool, VkRenderPass renderPass, ResourceViewHeaps& resourceViewHeaps, DynamicBufferRing& constantBufferRing )
|
||||
{
|
||||
m_pDevice = &device;
|
||||
m_constantBufferRing = &constantBufferRing;
|
||||
|
||||
VkSamplerCreateInfo sampler = {};
|
||||
sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
|
||||
sampler.magFilter = VK_FILTER_LINEAR;
|
||||
sampler.minFilter = VK_FILTER_LINEAR;
|
||||
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
||||
sampler.minLod = -1000;
|
||||
sampler.maxLod = 1000;
|
||||
sampler.maxAnisotropy = 16.0f;
|
||||
VkResult res = vkCreateSampler( device.GetDevice(), &sampler, nullptr, &m_sampler);
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
// Compile shaders
|
||||
//
|
||||
DefineList attributeDefines;
|
||||
|
||||
VkPipelineShaderStageCreateInfo vertexShader;
|
||||
res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_VERTEX_BIT, "AnimatedTexture.hlsl", "VSMain", "-T vs_6_0", &attributeDefines, &vertexShader);
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
VkPipelineShaderStageCreateInfo fragmentShader;
|
||||
res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_FRAGMENT_BIT, "AnimatedTexture.hlsl", "PSMain", "-T ps_6_0", &attributeDefines, &fragmentShader);
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
|
||||
shaderStages.push_back(vertexShader);
|
||||
shaderStages.push_back(fragmentShader);
|
||||
|
||||
std::vector<VkDescriptorSetLayoutBinding> layoutBindings(3);
|
||||
layoutBindings[0].binding = 0;
|
||||
layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
layoutBindings[0].descriptorCount = 1;
|
||||
layoutBindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
layoutBindings[0].pImmutableSamplers = nullptr;
|
||||
|
||||
layoutBindings[1].binding = 1;
|
||||
layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
|
||||
layoutBindings[1].descriptorCount = 1;
|
||||
layoutBindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
layoutBindings[1].pImmutableSamplers = nullptr;
|
||||
|
||||
layoutBindings[2].binding = 2;
|
||||
layoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
|
||||
layoutBindings[2].descriptorCount = 1;
|
||||
layoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
layoutBindings[2].pImmutableSamplers = &m_sampler;
|
||||
|
||||
for (int i = 0; i < _countof(m_descriptorSets);i++)
|
||||
{
|
||||
resourceViewHeaps.CreateDescriptorSetLayoutAndAllocDescriptorSet( &layoutBindings, &m_descriptorSetLayout, &m_descriptorSets[i] );
|
||||
constantBufferRing.SetDescriptorSet( 0, sizeof( ConstantBuffer ), m_descriptorSets[i] );
|
||||
}
|
||||
|
||||
// Create pipeline layout
|
||||
//
|
||||
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {};
|
||||
pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pPipelineLayoutCreateInfo.setLayoutCount = 1;
|
||||
pPipelineLayoutCreateInfo.pSetLayouts = &m_descriptorSetLayout;
|
||||
|
||||
res = vkCreatePipelineLayout(m_pDevice->GetDevice(), &pPipelineLayoutCreateInfo, NULL, &m_pipelineLayout);
|
||||
assert(res == VK_SUCCESS);
|
||||
|
||||
VkPipelineVertexInputStateCreateInfo vi = {};
|
||||
vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
||||
|
||||
VkPipelineInputAssemblyStateCreateInfo ia = {};
|
||||
ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
|
||||
// rasterizer state
|
||||
VkPipelineRasterizationStateCreateInfo rs = {};
|
||||
rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
rs.polygonMode = VK_POLYGON_MODE_FILL;
|
||||
rs.cullMode = VK_CULL_MODE_NONE;
|
||||
rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
||||
rs.lineWidth = 1.0f;
|
||||
|
||||
VkPipelineColorBlendAttachmentState att_state[4] = {};
|
||||
att_state[0].colorWriteMask = 0xf;
|
||||
att_state[0].blendEnable = VK_FALSE;
|
||||
att_state[0].alphaBlendOp = VK_BLEND_OP_ADD;
|
||||
att_state[0].colorBlendOp = VK_BLEND_OP_ADD;
|
||||
att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
|
||||
att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
|
||||
att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE;
|
||||
att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
|
||||
|
||||
att_state[1].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT;
|
||||
att_state[2].colorWriteMask = 0x0;
|
||||
att_state[3].colorWriteMask = VK_COLOR_COMPONENT_R_BIT;
|
||||
|
||||
// Color blend state
|
||||
|
||||
VkPipelineColorBlendStateCreateInfo cb = {};
|
||||
cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||
cb.attachmentCount = _countof(att_state);
|
||||
cb.pAttachments = att_state;
|
||||
cb.logicOpEnable = VK_FALSE;
|
||||
cb.logicOp = VK_LOGIC_OP_NO_OP;
|
||||
cb.blendConstants[0] = 1.0f;
|
||||
cb.blendConstants[1] = 1.0f;
|
||||
cb.blendConstants[2] = 1.0f;
|
||||
cb.blendConstants[3] = 1.0f;
|
||||
|
||||
std::vector<VkDynamicState> dynamicStateEnables = {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_BLEND_CONSTANTS
|
||||
};
|
||||
VkPipelineDynamicStateCreateInfo dynamicState = {};
|
||||
dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
||||
dynamicState.pNext = NULL;
|
||||
dynamicState.pDynamicStates = dynamicStateEnables.data();
|
||||
dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size();
|
||||
|
||||
// view port state
|
||||
|
||||
VkPipelineViewportStateCreateInfo vp = {};
|
||||
vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
||||
vp.viewportCount = 1;
|
||||
vp.scissorCount = 1;
|
||||
|
||||
// depth stencil state
|
||||
|
||||
VkPipelineDepthStencilStateCreateInfo ds = {};
|
||||
ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
|
||||
ds.depthTestEnable = VK_TRUE;
|
||||
ds.depthWriteEnable = VK_TRUE;
|
||||
ds.depthCompareOp = VK_COMPARE_OP_GREATER_OR_EQUAL;
|
||||
ds.back.failOp = VK_STENCIL_OP_KEEP;
|
||||
ds.back.passOp = VK_STENCIL_OP_KEEP;
|
||||
ds.back.compareOp = VK_COMPARE_OP_ALWAYS;
|
||||
ds.back.depthFailOp = VK_STENCIL_OP_KEEP;
|
||||
ds.front = ds.back;
|
||||
|
||||
// multi sample state
|
||||
|
||||
VkPipelineMultisampleStateCreateInfo ms = {};
|
||||
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
// create pipeline
|
||||
|
||||
VkGraphicsPipelineCreateInfo pipeline = {};
|
||||
pipeline.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
pipeline.layout = m_pipelineLayout;
|
||||
pipeline.pVertexInputState = &vi;
|
||||
pipeline.pInputAssemblyState = &ia;
|
||||
pipeline.pRasterizationState = &rs;
|
||||
pipeline.pColorBlendState = &cb;
|
||||
pipeline.pMultisampleState = &ms;
|
||||
pipeline.pDynamicState = &dynamicState;
|
||||
pipeline.pViewportState = &vp;
|
||||
pipeline.pDepthStencilState = &ds;
|
||||
pipeline.pStages = shaderStages.data();
|
||||
pipeline.stageCount = (uint32_t)shaderStages.size();
|
||||
pipeline.renderPass = renderPass;
|
||||
pipeline.subpass = 0;
|
||||
|
||||
res = vkCreateGraphicsPipelines(m_pDevice->GetDevice(), device.GetPipelineCache(), 1, &pipeline, NULL, &m_pipelines[0]);
|
||||
assert(res == VK_SUCCESS);
|
||||
SetResourceName(m_pDevice->GetDevice(), VK_OBJECT_TYPE_PIPELINE, (uint64_t)m_pipelines[0], "AT pipeline with comp");
|
||||
|
||||
att_state[3].colorWriteMask = 0;
|
||||
res = vkCreateGraphicsPipelines(m_pDevice->GetDevice(), device.GetPipelineCache(), 1, &pipeline, NULL, &m_pipelines[1]);
|
||||
assert(res == VK_SUCCESS);
|
||||
SetResourceName(m_pDevice->GetDevice(), VK_OBJECT_TYPE_PIPELINE, (uint64_t)m_pipelines[1], "AT pipeline no comp");
|
||||
|
||||
UINT indices[6] = { 0, 1, 2, 2, 1, 3 };
|
||||
bufferPool.AllocBuffer( _countof( indices ), sizeof( UINT ), indices, &m_indexBuffer );
|
||||
|
||||
m_textures[0].InitFromFile( &device, &uploadHeap, "..\\media\\lion.jpg", true );
|
||||
m_textures[1].InitFromFile( &device, &uploadHeap, "..\\media\\checkerboard.dds", true );
|
||||
m_textures[2].InitFromFile( &device, &uploadHeap, "..\\media\\composition_text.dds", true );
|
||||
|
||||
for ( int i = 0; i < _countof( m_textures ); i++ )
|
||||
{
|
||||
m_textures[ i ].CreateSRV( &m_textureSRVs[i] );
|
||||
SetDescriptorSet( m_pDevice->GetDevice(), 1, m_textureSRVs[i], nullptr, m_descriptorSets[i] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AnimatedTextures::OnDestroy()
|
||||
{
|
||||
vkDestroySampler(m_pDevice->GetDevice(), m_sampler, nullptr);
|
||||
m_sampler = VK_NULL_HANDLE;
|
||||
|
||||
for ( int i = 0; i < _countof( m_textures ); i++ )
|
||||
{
|
||||
vkDestroyImageView(m_pDevice->GetDevice(), m_textureSRVs[i], nullptr);
|
||||
m_textureSRVs[i] = VK_NULL_HANDLE;
|
||||
|
||||
m_textures[i].OnDestroy();
|
||||
}
|
||||
|
||||
for ( int i = 0; i < _countof( m_pipelines ); i++ )
|
||||
{
|
||||
vkDestroyPipeline( m_pDevice->GetDevice(), m_pipelines[i], nullptr );
|
||||
m_pipelines[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
vkDestroyPipelineLayout( m_pDevice->GetDevice(), m_pipelineLayout, nullptr );
|
||||
m_pipelineLayout = VK_NULL_HANDLE;
|
||||
|
||||
vkDestroyDescriptorSetLayout( m_pDevice->GetDevice(), m_descriptorSetLayout, nullptr );
|
||||
m_descriptorSetLayout = VK_NULL_HANDLE;
|
||||
|
||||
m_pDevice = nullptr;
|
||||
}
|
||||
|
||||
|
||||
void AnimatedTextures::Render( VkCommandBuffer commandList, float frameTime, float speed, bool compositionMask, const Camera& camera )
|
||||
{
|
||||
m_scrollFactor += frameTime * 1.0f * speed;
|
||||
m_rotationFactor += frameTime * 2.0f * speed;
|
||||
m_flipTimer += frameTime * 1.0f;
|
||||
|
||||
if ( m_scrollFactor > 10.0f )
|
||||
m_scrollFactor -= 10.0f;
|
||||
|
||||
const float twoPI = 6.283185307179586476925286766559f;
|
||||
|
||||
if ( m_rotationFactor > twoPI )
|
||||
m_rotationFactor -= twoPI;
|
||||
|
||||
int textureIndex = min( (int)floorf( m_flipTimer * 0.33333f ), _countof( m_textures ) - 1 );
|
||||
if ( m_flipTimer > 9.0f )
|
||||
m_flipTimer = 0.0f;
|
||||
|
||||
VkDescriptorBufferInfo cb = {};
|
||||
ConstantBuffer* constantBuffer = nullptr;
|
||||
m_constantBufferRing->AllocConstantBuffer( sizeof(*constantBuffer), (void**)&constantBuffer, &cb );
|
||||
|
||||
constantBuffer->currentViewProj = camera.GetProjection() * camera.GetView();
|
||||
constantBuffer->previousViewProj = camera.GetPrevProjection() * camera.GetPrevView();
|
||||
|
||||
constantBuffer->jitterCompensation[0] = camera.GetPrevProjection().getCol2().getX() - camera.GetProjection().getCol2().getX();
|
||||
constantBuffer->jitterCompensation[1] = camera.GetPrevProjection().getCol2().getY() - camera.GetProjection().getCol2().getY();
|
||||
constantBuffer->scrollFactor = m_scrollFactor;
|
||||
constantBuffer->rotationFactor = m_rotationFactor;
|
||||
constantBuffer->mode = textureIndex;
|
||||
|
||||
uint32_t uniformOffsets[] = { (uint32_t)cb.offset };
|
||||
vkCmdBindDescriptorSets( commandList, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &m_descriptorSets[textureIndex], _countof( uniformOffsets ), uniformOffsets );
|
||||
vkCmdBindPipeline( commandList, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelines[compositionMask ? 0 : 1] );
|
||||
vkCmdBindIndexBuffer( commandList, m_indexBuffer.buffer, m_indexBuffer.offset, VK_INDEX_TYPE_UINT32 );
|
||||
vkCmdDrawIndexed( commandList, 6, 2, 0, 0, 0 );
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
// FidelityFX Super Resolution Sample
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
|
||||
#pragma once
|
||||
#include "stdafx.h"
|
||||
|
||||
|
||||
class AnimatedTextures
|
||||
{
|
||||
public:
|
||||
|
||||
AnimatedTextures() {}
|
||||
virtual ~AnimatedTextures() {}
|
||||
|
||||
void OnCreate( Device& device, UploadHeap& uploadHeap, StaticBufferPool& bufferPool, VkRenderPass renderPass, ResourceViewHeaps& resourceViewHeaps, DynamicBufferRing& constantBufferRing );
|
||||
void OnDestroy();
|
||||
|
||||
void Render( VkCommandBuffer commandList, float frameTime, float speed, bool compositionMask, const Camera& camera );
|
||||
|
||||
private:
|
||||
|
||||
Device* m_pDevice = nullptr;
|
||||
DynamicBufferRing* m_constantBufferRing = nullptr;
|
||||
|
||||
VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_descriptorSets[3] = {};
|
||||
VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE;
|
||||
VkPipeline m_pipelines[2] = {};
|
||||
VkDescriptorBufferInfo m_indexBuffer = {};
|
||||
|
||||
Texture m_textures[3] = {};
|
||||
VkImageView m_textureSRVs[3] = {};
|
||||
VkSampler m_sampler = VK_NULL_HANDLE;
|
||||
|
||||
float m_scrollFactor = 0.0f;
|
||||
float m_rotationFactor = 0.0f;
|
||||
float m_flipTimer = 0.0f;
|
||||
};
|
@ -0,0 +1,128 @@
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
[[vk::binding( 0, 0 )]] cbuffer cb : register(b0)
|
||||
{
|
||||
matrix g_CurrentViewProjection;
|
||||
matrix g_PreviousViewProjection;
|
||||
float2 g_CameraJitterCompensation;
|
||||
float g_ScrollFactor;
|
||||
float g_RotationFactor;
|
||||
int g_Mode;
|
||||
int pad0;
|
||||
int pad1;
|
||||
int pad2;
|
||||
}
|
||||
|
||||
|
||||
[[vk::binding( 1, 0 )]] Texture2D g_Texture : register(t0);
|
||||
[[vk::binding( 2, 0 )]] SamplerState g_Sampler : register(s0);
|
||||
|
||||
struct VERTEX_OUT
|
||||
{
|
||||
float4 CurrentPosition : TEXCOORD0;
|
||||
float4 PreviousPosition : TEXCOORD1;
|
||||
float3 TexCoord : TEXCOORD2;
|
||||
float4 Position : SV_POSITION;
|
||||
};
|
||||
|
||||
|
||||
VERTEX_OUT VSMain( uint vertexId : SV_VertexID, uint instanceId : SV_InstanceID )
|
||||
{
|
||||
VERTEX_OUT output = (VERTEX_OUT)0;
|
||||
|
||||
const float2 offsets[ 4 ] =
|
||||
{
|
||||
float2( -1, 1 ),
|
||||
float2( 1, 1 ),
|
||||
float2( -1, -1 ),
|
||||
float2( 1, -1 ),
|
||||
};
|
||||
|
||||
float2 offset = offsets[ vertexId ];
|
||||
float2 uv = (offset+1)*float2( instanceId == 0 ? -0.5 : 0.5, -0.5 );
|
||||
|
||||
float4 worldPos = float4( offsets[ vertexId ], 0.0, 1.0 );
|
||||
|
||||
worldPos.xyz += instanceId == 0 ? float3( -13, 1.5, 2 ) : float3( -13, 1.5, -2 );
|
||||
|
||||
output.CurrentPosition = mul( g_CurrentViewProjection, worldPos );
|
||||
output.PreviousPosition = mul( g_PreviousViewProjection, worldPos );
|
||||
|
||||
output.Position = output.CurrentPosition;
|
||||
|
||||
output.TexCoord.xy = uv;
|
||||
output.TexCoord.z = instanceId;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
struct Output
|
||||
{
|
||||
float4 finalColor : SV_TARGET0;
|
||||
float2 motionVectors : SV_TARGET1;
|
||||
float upscaleReactive : SV_TARGET2;
|
||||
float upscaleTransparencyAndComposition : SV_TARGET3;
|
||||
};
|
||||
|
||||
|
||||
float4 TextureLookup( int billboardIndex, float2 uv0 )
|
||||
{
|
||||
float4 color = 1;
|
||||
|
||||
if ( billboardIndex == 0 || g_Mode == 2 )
|
||||
{
|
||||
// Scrolling
|
||||
float2 uv = uv0;
|
||||
if ( g_Mode == 2 )
|
||||
uv += float2( -g_ScrollFactor, 0.0 );
|
||||
else
|
||||
uv += float2( -g_ScrollFactor, 0.5*g_ScrollFactor );
|
||||
|
||||
color.rgb = g_Texture.SampleLevel( g_Sampler, uv, 0 ).rgb;
|
||||
}
|
||||
else if ( billboardIndex == 1 )
|
||||
{
|
||||
// Rotated UVs
|
||||
float s, c;
|
||||
sincos( g_RotationFactor, s, c );
|
||||
float2x2 rotation = { float2( c, s ), float2( -s, c ) };
|
||||
|
||||
float2 rotatedUV = mul( rotation, uv0-float2( 0.5, -0.5) );
|
||||
color.rgb = g_Texture.SampleLevel( g_Sampler, rotatedUV, 0 ).rgb;
|
||||
}
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
|
||||
Output PSMain( VERTEX_OUT input )
|
||||
{
|
||||
Output output = (Output)0;
|
||||
|
||||
output.finalColor = TextureLookup( (int)input.TexCoord.z, input.TexCoord.xy );
|
||||
|
||||
output.motionVectors = (input.PreviousPosition.xy / input.PreviousPosition.w) - (input.CurrentPosition.xy / input.CurrentPosition.w) + g_CameraJitterCompensation;
|
||||
output.motionVectors *= float2(0.5f, -0.5f);
|
||||
|
||||
output.upscaleReactive = 0; // Nothing to write to the reactice mask. Color writes are off on this target anyway.
|
||||
output.upscaleTransparencyAndComposition = 1; // Write a value into here to indicate the depth and motion vectors are as expected for a static object, but the surface contents are changing.
|
||||
|
||||
return output;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue