// This file is part of the FidelityFX SDK. // // Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include "ffx_blur_resources.h" #if defined(FFX_GPU) #include "ffx_core.h" #ifndef FFX_PREFER_WAVE64 #define FFX_PREFER_WAVE64 #endif // #if defined(FFX_PREFER_WAVE64) #pragma warning(disable: 3205) // conversion from larger type to smaller #if defined(FFX_BLUR_BIND_CB_BLUR) layout (set = 0, binding = FFX_BLUR_BIND_CB_BLUR, std140) uniform cbBLUR_t { FfxInt32x2 imageSize; } cbBLUR; #endif FfxInt32x2 ImageSize() { return cbBLUR.imageSize; } // SRVs #if defined FFX_BLUR_BIND_SRV_INPUT_SRC layout (set = 0, binding = FFX_BLUR_BIND_SRV_INPUT_SRC) uniform texture2D r_input_src; #endif // UAV declarations #if defined FFX_BLUR_BIND_UAV_OUTPUT layout (set = 0, binding = FFX_BLUR_BIND_UAV_OUTPUT, rgba32f) uniform image2D rw_output; #endif // FFX_BLUR_OPTION_KERNEL_DIMENSION to be defined by the client application // App should define e.g the following for 5x5 blur: // #define FFX_BLUR_OPTION_KERNEL_DIMENSION 5 #ifndef FFX_BLUR_OPTION_KERNEL_DIMENSION #error Please define FFX_BLUR_OPTION_KERNEL_DIMENSION #endif // FFX_BLUR_KERNEL_RANGE is center + half width of the kernel // // consider a blur kernel 5x5 - '*' indicates the center of the kernel // FFX_BLUR_OPTION_KERNEL_DIMENSION=5 // |---------------| // x x x x x // x x x x x // x x x* x x // x x x x x // x x x x x // // // as separate 1D kernels // // x x x* x x // |-------| // FFX_BLUR_KERNEL_RANGE // #define FFX_BLUR_KERNEL_RANGE (((FFX_BLUR_OPTION_KERNEL_DIMENSION - 1) / 2) + 1) #define FFX_BLUR_KERNEL_RANGE_MINUS1 (FFX_BLUR_KERNEL_RANGE - 1) // // FFX-Blur Callback definitions // #if FFX_HALF #define FFX_BLUR_KERNEL_TYPE FfxFloat16 #else #define FFX_BLUR_KERNEL_TYPE FfxFloat32 #endif FFX_BLUR_KERNEL_TYPE FfxBlurLoadKernelWeight(FfxInt32 iKernelIndex) { // GAUSSIAN BLUR 1D KERNELS // //---------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------- // Kernel Size: [3, 21]: odd numbers // Kernels are pregenerated using three different sigma values. // Larger sigmas are better for larger kernels. const FFX_BLUR_KERNEL_TYPE kernel_weights[FFX_BLUR_KERNEL_RANGE] = #if FFX_BLUR_OPTION_KERNEL_PERMUTATION == 0 // Sigma: 1.6 #if FFX_BLUR_KERNEL_RANGE == 2 { FFX_BLUR_KERNEL_TYPE(0.3765770884), FFX_BLUR_KERNEL_TYPE(0.3117114558) }; #elif FFX_BLUR_KERNEL_RANGE == 3 { FFX_BLUR_KERNEL_TYPE(0.2782163289), FFX_BLUR_KERNEL_TYPE(0.230293397), FFX_BLUR_KERNEL_TYPE(0.1305984385) }; #elif FFX_BLUR_KERNEL_RANGE == 4 { FFX_BLUR_KERNEL_TYPE(0.2525903052), FFX_BLUR_KERNEL_TYPE(0.2090814714), FFX_BLUR_KERNEL_TYPE(0.1185692428), FFX_BLUR_KERNEL_TYPE(0.0460541333) }; #elif FFX_BLUR_KERNEL_RANGE == 5 { FFX_BLUR_KERNEL_TYPE(0.2465514351), FFX_BLUR_KERNEL_TYPE(0.2040828004), FFX_BLUR_KERNEL_TYPE(0.115734517), FFX_BLUR_KERNEL_TYPE(0.0449530818), FFX_BLUR_KERNEL_TYPE(0.0119538834) }; #elif FFX_BLUR_KERNEL_RANGE == 6 { FFX_BLUR_KERNEL_TYPE(0.245483563), FFX_BLUR_KERNEL_TYPE(0.2031988699), FFX_BLUR_KERNEL_TYPE(0.1152332436), FFX_BLUR_KERNEL_TYPE(0.0447583794), FFX_BLUR_KERNEL_TYPE(0.0119021083), FFX_BLUR_KERNEL_TYPE(0.0021656173) }; #elif FFX_BLUR_KERNEL_RANGE == 7 { FFX_BLUR_KERNEL_TYPE(0.2453513488), FFX_BLUR_KERNEL_TYPE(0.2030894296), FFX_BLUR_KERNEL_TYPE(0.1151711805), FFX_BLUR_KERNEL_TYPE(0.0447342732), FFX_BLUR_KERNEL_TYPE(0.011895698), FFX_BLUR_KERNEL_TYPE(0.0021644509), FFX_BLUR_KERNEL_TYPE(0.0002692935) }; #elif FFX_BLUR_KERNEL_RANGE == 8 { FFX_BLUR_KERNEL_TYPE(0.2453401155), FFX_BLUR_KERNEL_TYPE(0.2030801313), FFX_BLUR_KERNEL_TYPE(0.1151659074), FFX_BLUR_KERNEL_TYPE(0.044732225), FFX_BLUR_KERNEL_TYPE(0.0118951533), FFX_BLUR_KERNEL_TYPE(0.0021643518), FFX_BLUR_KERNEL_TYPE(0.0002692811), FFX_BLUR_KERNEL_TYPE(2.28922E-05) }; #elif FFX_BLUR_KERNEL_RANGE == 9 { FFX_BLUR_KERNEL_TYPE(0.2453394635), FFX_BLUR_KERNEL_TYPE(0.2030795916), FFX_BLUR_KERNEL_TYPE(0.1151656014), FFX_BLUR_KERNEL_TYPE(0.0447321061), FFX_BLUR_KERNEL_TYPE(0.0118951217), FFX_BLUR_KERNEL_TYPE(0.0021643461), FFX_BLUR_KERNEL_TYPE(0.0002692804), FFX_BLUR_KERNEL_TYPE(2.28922E-05), FFX_BLUR_KERNEL_TYPE(1.3287E-06) }; #elif FFX_BLUR_KERNEL_RANGE == 10 { FFX_BLUR_KERNEL_TYPE(0.2453394377), FFX_BLUR_KERNEL_TYPE(0.2030795703), FFX_BLUR_KERNEL_TYPE(0.1151655892), FFX_BLUR_KERNEL_TYPE(0.0447321014), FFX_BLUR_KERNEL_TYPE(0.0118951205), FFX_BLUR_KERNEL_TYPE(0.0021643458), FFX_BLUR_KERNEL_TYPE(0.0002692804), FFX_BLUR_KERNEL_TYPE(2.28922E-05), FFX_BLUR_KERNEL_TYPE(1.3287E-06), FFX_BLUR_KERNEL_TYPE(5.26E-08) }; #elif FFX_BLUR_KERNEL_RANGE == 11 { FFX_BLUR_KERNEL_TYPE(0.2453394371), FFX_BLUR_KERNEL_TYPE(0.2030795697), FFX_BLUR_KERNEL_TYPE(0.1151655889), FFX_BLUR_KERNEL_TYPE(0.0447321013), FFX_BLUR_KERNEL_TYPE(0.0118951204), FFX_BLUR_KERNEL_TYPE(0.0021643458), FFX_BLUR_KERNEL_TYPE(0.0002692804), FFX_BLUR_KERNEL_TYPE(2.28922E-05), FFX_BLUR_KERNEL_TYPE(1.3287E-06), FFX_BLUR_KERNEL_TYPE(5.26E-08), FFX_BLUR_KERNEL_TYPE(1.4E-09) }; #endif #elif FFX_BLUR_OPTION_KERNEL_PERMUTATION == 1 // Sigma: 2.8 #if FFX_BLUR_KERNEL_RANGE == 2 { FFX_BLUR_KERNEL_TYPE(0.3474999743), FFX_BLUR_KERNEL_TYPE(0.3262500129) }; #elif FFX_BLUR_KERNEL_RANGE == 3 { FFX_BLUR_KERNEL_TYPE(0.2256541468), FFX_BLUR_KERNEL_TYPE(0.2118551763), FFX_BLUR_KERNEL_TYPE(0.1753177504) }; #elif FFX_BLUR_KERNEL_RANGE == 4 { FFX_BLUR_KERNEL_TYPE(0.1796953063), FFX_BLUR_KERNEL_TYPE(0.1687067636), FFX_BLUR_KERNEL_TYPE(0.1396108926), FFX_BLUR_KERNEL_TYPE(0.1018346906) }; #elif FFX_BLUR_KERNEL_RANGE == 5 { FFX_BLUR_KERNEL_TYPE(0.1588894947), FFX_BLUR_KERNEL_TYPE(0.1491732476), FFX_BLUR_KERNEL_TYPE(0.1234462081), FFX_BLUR_KERNEL_TYPE(0.0900438796), FFX_BLUR_KERNEL_TYPE(0.0578919173) }; #elif FFX_BLUR_KERNEL_RANGE == 6 { FFX_BLUR_KERNEL_TYPE(0.1491060676), FFX_BLUR_KERNEL_TYPE(0.1399880866), FFX_BLUR_KERNEL_TYPE(0.1158451582), FFX_BLUR_KERNEL_TYPE(0.0844995374), FFX_BLUR_KERNEL_TYPE(0.054327293), FFX_BLUR_KERNEL_TYPE(0.0307868909) }; #elif FFX_BLUR_KERNEL_RANGE == 7 { FFX_BLUR_KERNEL_TYPE(0.1446570603), FFX_BLUR_KERNEL_TYPE(0.1358111404), FFX_BLUR_KERNEL_TYPE(0.1123885856), FFX_BLUR_KERNEL_TYPE(0.0819782513), FFX_BLUR_KERNEL_TYPE(0.0527062824), FFX_BLUR_KERNEL_TYPE(0.0298682757), FFX_BLUR_KERNEL_TYPE(0.0149189344) }; #elif FFX_BLUR_KERNEL_RANGE == 8 { FFX_BLUR_KERNEL_TYPE(0.1427814521), FFX_BLUR_KERNEL_TYPE(0.1340502275), FFX_BLUR_KERNEL_TYPE(0.110931367), FFX_BLUR_KERNEL_TYPE(0.0809153299), FFX_BLUR_KERNEL_TYPE(0.0520228983), FFX_BLUR_KERNEL_TYPE(0.0294810068), FFX_BLUR_KERNEL_TYPE(0.0147254971), FFX_BLUR_KERNEL_TYPE(0.0064829474) }; #elif FFX_BLUR_KERNEL_RANGE == 9 { FFX_BLUR_KERNEL_TYPE(0.1420666821), FFX_BLUR_KERNEL_TYPE(0.1333791663), FFX_BLUR_KERNEL_TYPE(0.1103760399), FFX_BLUR_KERNEL_TYPE(0.0805102644), FFX_BLUR_KERNEL_TYPE(0.0517624694), FFX_BLUR_KERNEL_TYPE(0.0293334236), FFX_BLUR_KERNEL_TYPE(0.0146517806), FFX_BLUR_KERNEL_TYPE(0.0064504935), FFX_BLUR_KERNEL_TYPE(0.0025030212) }; #elif FFX_BLUR_KERNEL_RANGE == 10 { FFX_BLUR_KERNEL_TYPE(0.1418238658), FFX_BLUR_KERNEL_TYPE(0.1331511984), FFX_BLUR_KERNEL_TYPE(0.1101873883), FFX_BLUR_KERNEL_TYPE(0.0803726585), FFX_BLUR_KERNEL_TYPE(0.0516739985), FFX_BLUR_KERNEL_TYPE(0.0292832877), FFX_BLUR_KERNEL_TYPE(0.0146267382), FFX_BLUR_KERNEL_TYPE(0.0064394685), FFX_BLUR_KERNEL_TYPE(0.0024987432), FFX_BLUR_KERNEL_TYPE(0.0008545858) }; #elif FFX_BLUR_KERNEL_RANGE == 11 { FFX_BLUR_KERNEL_TYPE(0.1417508359), FFX_BLUR_KERNEL_TYPE(0.1330826344), FFX_BLUR_KERNEL_TYPE(0.1101306491), FFX_BLUR_KERNEL_TYPE(0.0803312719), FFX_BLUR_KERNEL_TYPE(0.0516473898), FFX_BLUR_KERNEL_TYPE(0.0292682088), FFX_BLUR_KERNEL_TYPE(0.0146192064), FFX_BLUR_KERNEL_TYPE(0.0064361526), FFX_BLUR_KERNEL_TYPE(0.0024974565), FFX_BLUR_KERNEL_TYPE(0.0008541457), FFX_BLUR_KERNEL_TYPE(0.0002574667) }; #endif #elif FFX_BLUR_OPTION_KERNEL_PERMUTATION == 2 // Sigma: 4 #if FFX_BLUR_KERNEL_RANGE == 2 { FFX_BLUR_KERNEL_TYPE(0.3402771036), FFX_BLUR_KERNEL_TYPE(0.3298614482) }; #elif FFX_BLUR_KERNEL_RANGE == 3 { FFX_BLUR_KERNEL_TYPE(0.2125433723), FFX_BLUR_KERNEL_TYPE(0.2060375614), FFX_BLUR_KERNEL_TYPE(0.1876907525) }; #elif FFX_BLUR_KERNEL_RANGE == 4 { FFX_BLUR_KERNEL_TYPE(0.1608542243), FFX_BLUR_KERNEL_TYPE(0.1559305837), FFX_BLUR_KERNEL_TYPE(0.1420455978), FFX_BLUR_KERNEL_TYPE(0.1215967064) }; #elif FFX_BLUR_KERNEL_RANGE == 5 { FFX_BLUR_KERNEL_TYPE(0.1345347233), FFX_BLUR_KERNEL_TYPE(0.1304167051), FFX_BLUR_KERNEL_TYPE(0.1188036266), FFX_BLUR_KERNEL_TYPE(0.1017006505), FFX_BLUR_KERNEL_TYPE(0.0818116562) }; #elif FFX_BLUR_KERNEL_RANGE == 6 { FFX_BLUR_KERNEL_TYPE(0.1197258568), FFX_BLUR_KERNEL_TYPE(0.1160611281), FFX_BLUR_KERNEL_TYPE(0.1057263555), FFX_BLUR_KERNEL_TYPE(0.090505984), FFX_BLUR_KERNEL_TYPE(0.0728062644), FFX_BLUR_KERNEL_TYPE(0.0550373395) }; #elif FFX_BLUR_KERNEL_RANGE == 7 { FFX_BLUR_KERNEL_TYPE(0.1110429695), FFX_BLUR_KERNEL_TYPE(0.1076440182), FFX_BLUR_KERNEL_TYPE(0.0980587551), FFX_BLUR_KERNEL_TYPE(0.0839422118), FFX_BLUR_KERNEL_TYPE(0.0675261302), FFX_BLUR_KERNEL_TYPE(0.0510458624), FFX_BLUR_KERNEL_TYPE(0.0362615375) }; #elif FFX_BLUR_KERNEL_RANGE == 8 { FFX_BLUR_KERNEL_TYPE(0.1059153311), FFX_BLUR_KERNEL_TYPE(0.1026733334), FFX_BLUR_KERNEL_TYPE(0.0935306896), FFX_BLUR_KERNEL_TYPE(0.0800660068), FFX_BLUR_KERNEL_TYPE(0.0644079717), FFX_BLUR_KERNEL_TYPE(0.0486887143), FFX_BLUR_KERNEL_TYPE(0.0345870861), FFX_BLUR_KERNEL_TYPE(0.0230885324) }; #elif FFX_BLUR_KERNEL_RANGE == 9 { FFX_BLUR_KERNEL_TYPE(0.1029336421), FFX_BLUR_KERNEL_TYPE(0.0997829119), FFX_BLUR_KERNEL_TYPE(0.0908976484), FFX_BLUR_KERNEL_TYPE(0.0778120183), FFX_BLUR_KERNEL_TYPE(0.0625947824), FFX_BLUR_KERNEL_TYPE(0.0473180477), FFX_BLUR_KERNEL_TYPE(0.0336134033), FFX_BLUR_KERNEL_TYPE(0.0224385526), FFX_BLUR_KERNEL_TYPE(0.0140758142) }; #elif FFX_BLUR_KERNEL_RANGE == 10 { FFX_BLUR_KERNEL_TYPE(0.1012533395), FFX_BLUR_KERNEL_TYPE(0.0981540422), FFX_BLUR_KERNEL_TYPE(0.089413823), FFX_BLUR_KERNEL_TYPE(0.0765418045), FFX_BLUR_KERNEL_TYPE(0.0615729768), FFX_BLUR_KERNEL_TYPE(0.0465456216), FFX_BLUR_KERNEL_TYPE(0.0330646936), FFX_BLUR_KERNEL_TYPE(0.0220722627), FFX_BLUR_KERNEL_TYPE(0.0138460388), FFX_BLUR_KERNEL_TYPE(0.0081620671) }; #elif FFX_BLUR_KERNEL_RANGE == 11 { FFX_BLUR_KERNEL_TYPE(0.1003459368), FFX_BLUR_KERNEL_TYPE(0.0972744146), FFX_BLUR_KERNEL_TYPE(0.0886125226), FFX_BLUR_KERNEL_TYPE(0.0758558594), FFX_BLUR_KERNEL_TYPE(0.0610211779), FFX_BLUR_KERNEL_TYPE(0.0461284934), FFX_BLUR_KERNEL_TYPE(0.0327683775), FFX_BLUR_KERNEL_TYPE(0.0218744576), FFX_BLUR_KERNEL_TYPE(0.0137219546), FFX_BLUR_KERNEL_TYPE(0.008088921), FFX_BLUR_KERNEL_TYPE(0.0044808529) }; #endif #else #error FFX_BLUR_OPTION_KERNEL_PERMUTATION is not a valid value. #endif // FFX_BLUR_OPTION_KERNEL_PERMUTATIONs return kernel_weights[iKernelIndex]; } #if FFX_HALF #if defined (FFX_BLUR_BIND_UAV_OUTPUT) void FfxBlurStoreOutput(FfxInt32x2 outPxCoord, FfxFloat16x3 color) { imageStore(rw_output, outPxCoord, FfxFloat16x4(color, 1)); } #endif // #if defined (FFX_BLUR_BIND_UAV_OUTPUT) #if defined (FFX_BLUR_BIND_SRV_INPUT_SRC) FfxFloat16x3 FfxBlurLoadInput(FfxInt16x2 inPxCoord) { return FfxFloat16x3(texelFetch(r_input_src, inPxCoord, 0).rgb); } #endif // #if defined FFX_BLUR_BIND_SRV_INPUT_SRC #else // FFX_HALF #if defined (FFX_BLUR_BIND_UAV_OUTPUT) void FfxBlurStoreOutput(FfxInt32x2 outPxCoord, FfxFloat32x3 color) { imageStore(rw_output, outPxCoord, FfxFloat32x4(color, 1)); } #endif // #if defined FFX_BLUR_BIND_UAV_OUTPUT // DXIL generates load/sync/store blocks for each channel, ticket open: https://ontrack-internal.amd.com/browse/SWDEV-303837 // this is 10x times slower!!! //void Blur_StoreOutput(FfxInt32x2 outPxCoord, FfxFloat32x3 color) { texColorOutput[outPxCoord].rgb = color; } #if defined (FFX_BLUR_BIND_SRV_INPUT_SRC) FfxFloat32x3 FfxBlurLoadInput(FfxInt32x2 inPxCoord) { return texelFetch(r_input_src, inPxCoord, 0).rgb; } #endif // #if defined FFX_BLUR_BIND_SRV_INPUT_SRC #endif // !FFX_HALF #endif // #if defined(FFX_GPU)