From 42aaf444ff9d38f283ce93871ce65e89dd35940f Mon Sep 17 00:00:00 2001 From: ecker Date: Mon, 20 Apr 2026 17:33:22 -0500 Subject: [PATCH] lots of changes (physics tweaks, actually use dynamic buffers for UBOs, perf bottleneck fixed, bug fixes, etc) --- bin/data/config.json | 8 +- bin/data/entities/prop.json | 4 +- bin/data/scenes/scene.json | 4 +- bin/data/shaders/graph/cull/comp.glsl | 224 +----------------- engine/inc/uf/ext/reactphysics/reactphysics.h | 2 + engine/inc/uf/ext/vulkan/buffer.h | 17 +- engine/inc/uf/ext/vulkan/graphic.h | 11 +- engine/inc/uf/ext/vulkan/rendermode.h | 11 +- engine/inc/uf/ext/vulkan/shader.h | 3 +- engine/inc/uf/ext/vulkan/swapchain.h | 5 +- engine/inc/uf/ext/vulkan/vk.h | 1 + engine/inc/uf/utils/math/physics/impl.h | 49 +++- engine/inc/uf/utils/thread/thread.h | 8 +- engine/src/engine/ext/ext.cpp | 7 +- engine/src/engine/ext/light/behavior.cpp | 3 +- engine/src/engine/ext/scene/behavior.cpp | 8 +- engine/src/engine/graph/graph.cpp | 6 +- engine/src/engine/object/behavior.cpp | 1 - engine/src/engine/scene/scene.cpp | 3 +- engine/src/ext/vulkan/buffer.cpp | 66 +++--- engine/src/ext/vulkan/device.cpp | 2 +- engine/src/ext/vulkan/graphic.cpp | 111 +++++---- engine/src/ext/vulkan/rendermode.cpp | 35 +-- engine/src/ext/vulkan/rendermodes/base.cpp | 130 +++++----- .../src/ext/vulkan/rendermodes/deferred.cpp | 56 ++--- .../ext/vulkan/rendermodes/rendertarget.cpp | 44 ++-- engine/src/ext/vulkan/shader.cpp | 23 +- engine/src/ext/vulkan/swapchain.cpp | 4 +- engine/src/ext/vulkan/texture.cpp | 2 +- engine/src/ext/vulkan/vulkan.cpp | 25 +- engine/src/utils/math/physics.cpp | 9 +- engine/src/utils/math/physics/bvh.inl | 110 +++++---- engine/src/utils/math/physics/helpers.inl | 23 +- engine/src/utils/math/physics/impl.cpp | 96 +++----- engine/src/utils/math/physics/integration.inl | 21 +- engine/src/utils/math/physics/plane.inl | 2 +- engine/src/utils/math/physics/solvers.inl | 32 +-- engine/src/utils/thread/thread.cpp | 47 ++-- 38 files changed, 568 insertions(+), 645 deletions(-) diff --git a/bin/data/config.json b/bin/data/config.json index e93dd336..64d48ef8 100644 --- a/bin/data/config.json +++ b/bin/data/config.json @@ -9,8 +9,8 @@ "max": 32, "shadows": { "enabled": true, - "update": 4, - "max": 8, + "update": 8, + "max": 32, "samples": 2 }, "bloom": { @@ -101,7 +101,7 @@ "experimental": { "rebuild on tick begin": false, "batch queue submissions": true, - "dedicated thread": false, + "dedicated thread": false, // mostly works "memory budget": false, "register render modes": true, "skip render on rebuild": false @@ -115,7 +115,7 @@ "pipelines": { "deferred": true, "gui": true, - "vsync": false, // vsync on vulkan side rather than engine-side + "vsync": true, // vsync on vulkan side rather than engine-side "hdr": true, "vxgi": true, "culling": true, diff --git a/bin/data/entities/prop.json b/bin/data/entities/prop.json index 0fb2fcdc..130962ac 100644 --- a/bin/data/entities/prop.json +++ b/bin/data/entities/prop.json @@ -8,8 +8,8 @@ "physics": { "mass": 0, "inertia": false, - // "type": "bounding box" - "type": "mesh" + "type": "bounding box" + // "type": "mesh" } } } \ No newline at end of file diff --git a/bin/data/scenes/scene.json b/bin/data/scenes/scene.json index d7b75053..dd219ad7 100644 --- a/bin/data/scenes/scene.json +++ b/bin/data/scenes/scene.json @@ -37,7 +37,7 @@ "dialogue": "/gui/dialogue/main.json" }, "light": { - "enabled": true, + "0-enabled": true, "ambient": [ 0.1, 0.1, 0.1 ], @@ -49,7 +49,7 @@ "size": 8, "smoothness": 0.5 }, - "shadows": { + "0-shadows": { "enabled": true } }, diff --git a/bin/data/shaders/graph/cull/comp.glsl b/bin/data/shaders/graph/cull/comp.glsl index 345d6d13..d941df4b 100644 --- a/bin/data/shaders/graph/cull/comp.glsl +++ b/bin/data/shaders/graph/cull/comp.glsl @@ -5,7 +5,7 @@ #extension GL_EXT_samplerless_texture_functions : enable layout (constant_id = 0) const uint PASSES = 6; -layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; #define COMPUTE 1 #define QUERY_MIPMAPS 1 @@ -90,14 +90,9 @@ bool frustumCull( uint id ) { if ( drawCommand.indices == 0 || drawCommand.vertices == 0 ) return false; - bool visible = false; + bool visible = true; for ( uint pass = 0; pass < PushConstant.passes; ++pass ) { -#if 0 - vec4 sphere = aabbToSphere( instance.bounds ); - vec3 center = vec3( camera.viewport[pass].view * object.model * vec4( ) ); -#else mat4 mat = camera.viewport[pass].projection * camera.viewport[pass].view * object.model; - #if 1 vec4 planes[6]; { for (int i = 0; i < 3; ++i) for (int j = 0; j < 2; ++j) { @@ -108,48 +103,18 @@ bool frustumCull( uint id ) { planes[i*2+j] = normalizePlane( planes[i*2+j] ); } } + bool insideFrustum = true; for ( uint p = 0; p < 6; ++p ) { float d = max(instance.bounds.min.x * planes[p].x, instance.bounds.max.x * planes[p].x) + max(instance.bounds.min.y * planes[p].y, instance.bounds.max.y * planes[p].y) + max(instance.bounds.min.z * planes[p].z, instance.bounds.max.z * planes[p].z); - if ( d > -planes[p].w ) return true; + + if (d < -planes[p].w) { + visible = false; + break; + } } - #else - vec4 corners[8] = { - vec4( instance.bounds.min.x, instance.bounds.min.y, instance.bounds.min.z, 1.0 ), - vec4( instance.bounds.max.x, instance.bounds.min.y, instance.bounds.min.z, 1.0 ), - vec4( instance.bounds.max.x, instance.bounds.max.y, instance.bounds.min.z, 1.0 ), - vec4( instance.bounds.min.x, instance.bounds.max.y, instance.bounds.min.z, 1.0 ), - - vec4( instance.bounds.min.x, instance.bounds.min.y, instance.bounds.max.z, 1.0 ), - vec4( instance.bounds.max.x, instance.bounds.min.y, instance.bounds.max.z, 1.0 ), - vec4( instance.bounds.max.x, instance.bounds.max.y, instance.bounds.max.z, 1.0 ), - vec4( instance.bounds.min.x, instance.bounds.max.y, instance.bounds.max.z, 1.0 ), - }; - vec4 planes[6]; { - #pragma unroll 3 - for (int i = 0; i < 3; ++i) - #pragma unroll 2 - for (int j = 0; j < 2; ++j) { - planes[i*2+j].x = mat[0][3] + (j == 0 ? mat[0][i] : -mat[0][i]); - planes[i*2+j].y = mat[1][3] + (j == 0 ? mat[1][i] : -mat[1][i]); - planes[i*2+j].z = mat[2][3] + (j == 0 ? mat[2][i] : -mat[2][i]); - planes[i*2+j].w = mat[3][3] + (j == 0 ? mat[3][i] : -mat[3][i]); - planes[i*2+j] = normalizePlane( planes[i*2+j] ); - } - } - #pragma unroll 8 - for ( uint p = 0; p < 8; ++p ) corners[p] = mat * corners[p]; - #pragma unroll 6 - for ( uint p = 0; p < 6; ++p ) { - #pragma unroll 8 - for ( uint q = 0; q < 8; ++q ) { - if ( dot( corners[q], planes[p] ) > 0 ) return true; - } - return false; - } - #endif -#endif + if ( !visible ) break; } return visible; } @@ -163,12 +128,11 @@ bool occlusionCull( uint id ) { bool visible = true; for ( uint pass = 0; pass < PushConstant.passes; ++pass ) { -#if 1 vec4 aabb; vec4 sphere = aabbToSphere( instance.bounds ); vec3 center = (camera.viewport[pass].view * object.model * vec4(sphere.xyz, 1)).xyz; float radius = (object.model * vec4(sphere.w, 0, 0, 0)).x; - // center.y *= -1; + mat4 proj = camera.viewport[pass].projection; float znear = proj[3][2]; float P00 = proj[0][0]; @@ -197,87 +161,6 @@ bool occlusionCull( uint id ) { //if the depth of the sphere is in front of the depth pyramid value, then the object is visible visible = visible && depthSphere >= depth - DEPTH_BIAS; } - -#else - mat4 mat = camera.viewport[pass].projection * camera.viewport[pass].view * object.model; - vec3 boundsSize = instance.bounds.max - instance.bounds.min; - vec3 points[8] = { - instance.bounds.min.xyz, - instance.bounds.min.xyz + vec3(boundsSize.x,0,0), - instance.bounds.min.xyz + vec3(0, boundsSize.y,0), - instance.bounds.min.xyz + vec3(0, 0, boundsSize.z), - instance.bounds.min.xyz + vec3(boundsSize.xy,0), - instance.bounds.min.xyz + vec3(0, boundsSize.yz), - instance.bounds.min.xyz + vec3(boundsSize.x, 0, boundsSize.z), - instance.bounds.min.xyz + boundsSize.xyz, - }; - vec2 minXY = vec2(1); - vec2 maxXY = vec2(0); - - float minZ = 1; - float maxZ = 0; - - #pragma unroll 8 - for ( uint i = 0; i < 8; ++i ) { - vec4 clip = mat * vec4( points[i], 1 ); - clip.xyz /= clip.w; - clip.xy = clip.xy * 0.5 + 0.5; - - minXY.x = min(minXY.x, clip.x); - minXY.y = min(minXY.y, clip.y); - - maxXY.x = max(maxXY.x, clip.x); - maxXY.y = max(maxXY.y, clip.y); - - #if INVERSE - clip.z = 1.0 - clip.z; - maxZ = max(maxZ, clip.z); - #else - minZ = min(minZ, clip.z); - #endif - } - - if ( maxXY.x <= 0 || maxXY.y <= 0 ) return false; - if ( minXY.x >= 1 || minXY.y >= 1 ) return false; - - ivec2 depthSize = textureSize( samplerDepth, 0 ); - float mips = mipLevels( depthSize ); - - vec4 uv = vec4(minXY, maxXY); - - ivec2 clipSize = ivec2(maxXY - minXY) * depthSize; - float mip = mipLevels( clipSize ); - mip = clamp( mip, 0, mips ); - if ( mip == 0 ) { - mip = 1; - } else { - float lower = max(mip - 1, 0); - float scale = exp2(-lower); - vec2 a = floor(uv.xy * scale); - vec2 b = ceil(uv.zw * scale); - vec2 dims = b - a; - - // Use the lower level if we only touch <= 2 texels in both dimensions - if (dims.x <= 2 && dims.y <= 2) mip = lower; - } - - float depths[4] = { - textureLod( samplerDepth, uv.xy, mip ).r, - textureLod( samplerDepth, uv.zy, mip ).r, - textureLod( samplerDepth, uv.xw, mip ).r, - textureLod( samplerDepth, uv.zw, mip ).r, - }; - #if INVERSE - float minDepth = 1.0 - min(min(min(depths[0], depths[1]), depths[2]), depths[3]); - #else - float maxDepth = max(max(max(depths[0], depths[1]), depths[2]), depths[3]); - #endif - - instances[drawCommand.instanceID].bounds.padding1 = minZ; - instances[drawCommand.instanceID].bounds.padding2 = maxDepth; - - return minZ <= maxDepth; -#endif } return visible; } @@ -288,90 +171,5 @@ void main() { bool visible = frustumCull( gID ); // if ( visible ) visible = occlusionCull( gID ); -// bool visible = occlusionCull( gID ); drawCommands[gID].instances = visible ? 1 : 0; -} - - -/* - Frustum frustum; - for (int i = 0; i < 3; ++i) - for (int j = 0; j < 2; ++j) { - frustum.planes[i*2+j].x = mat[0][3] + (j == 0 ? mat[0][i] : -mat[0][i]); - frustum.planes[i*2+j].y = mat[1][3] + (j == 0 ? mat[1][i] : -mat[1][i]); - frustum.planes[i*2+j].z = mat[2][3] + (j == 0 ? mat[2][i] : -mat[2][i]); - frustum.planes[i*2+j].w = mat[3][3] + (j == 0 ? mat[3][i] : -mat[3][i]); - frustum.planes[i*2+j]*= length(frustum.planes[i*2+j].xyz); - } - for ( uint i = 0; i < 6; ++i ) { - vec4 plane = frustum.planes[i]; - float d = dot(instance.bounds.center, plane.xyz); - float r = dot(instance.bounds.extent, abs(plane.xyz)); - bool inside = d + r > -plane.w; - if ( !inside ) return 0; - } - return true; -*/ -/* - vec4 plane; - vec4 center = vec4( (max + min) * 0.5, 1 ); - vec4 extent = vec4( (max - min) * 0.5, 1 ); - center = mat * center; - extent = mat * extent; - center.xyz /= center.w; - extent.xyz /= extent.w; - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] + mat[i][0]; // left - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] - mat[i][0]; // right - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] + mat[i][1]; // bottom - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] - mat[i][1]; // top - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] + mat[i][2]; // near - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - - for (int i = 0; i < 4; ++i ) plane[i] = mat[i][3] - mat[i][2]; // far - visible = dot(center.xyz + extent.xyz * sign(plane.xyz), plane.xyz ) > -plane.w; - if ( visible ) return true; - -*/ -/* - for ( uint p = 0; p < 8; ++p ) { - vec4 t = corners[p]; - float w = abs(t.w); - visible = -w <= t.x && t.x <= w && -w <= t.y && t.y <= w && 0 <= t.z && t.z <= w; // && -w <= t.z && t.z <= w; - } -*/ -/* -mat4 convert( mat4 proj ) { - float f = -proj[1][1]; - float raidou = f / proj[0][0]; - float zNear = proj[3][2]; - float zFar = 32; - - float range = zNear - zFar; - - float Sx = f * raidou; - float Sy = f; - float Sz = (-zNear - zFar) / range; - float Pz = 2 * zFar * zNear / range; - - mat4 new = mat4(1.0); - new[0][0] = Sx; - new[1][1] = -Sy; - new[2][2] = Sz; - new[3][2] = Pz; - new[2][3] = 1; - return new; -} -*/ \ No newline at end of file +} \ No newline at end of file diff --git a/engine/inc/uf/ext/reactphysics/reactphysics.h b/engine/inc/uf/ext/reactphysics/reactphysics.h index f6b05545..533b49b3 100644 --- a/engine/inc/uf/ext/reactphysics/reactphysics.h +++ b/engine/inc/uf/ext/reactphysics/reactphysics.h @@ -90,6 +90,8 @@ namespace ext { void UF_API terminate( uf::Object& ); extern UF_API float timescale; + extern UF_API bool async; + extern UF_API bool interpolate; extern UF_API bool shared; extern UF_API bool globalStorage; diff --git a/engine/inc/uf/ext/vulkan/buffer.h b/engine/inc/uf/ext/vulkan/buffer.h index e22dde1b..f1f15a0b 100644 --- a/engine/inc/uf/ext/vulkan/buffer.h +++ b/engine/inc/uf/ext/vulkan/buffer.h @@ -18,8 +18,9 @@ namespace ext { 0 }; VkDeviceSize alignment = 0; - size_t address = {}; + mutable size_t address = {}; void* mapped = nullptr; + int32_t count = 1; VkBufferUsageFlags usage = 0; VkMemoryPropertyFlags memoryProperties = 0; @@ -29,25 +30,19 @@ namespace ext { void* map( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ); void unmap(); - // void* map( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ) const; - // void unmap() const; - // VkResult bind( VkDeviceSize offset = 0 ); - // void copyTo( void* data, VkDeviceSize size ); - // VkResult flush( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ) const; - // VkResult invalidate( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ); - void updateDescriptor( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ); void allocate( VkBufferCreateInfo ); - uint64_t getAddress(); uint64_t getAddress() const; + + VkDeviceSize getLength() const; // returns the aligned length for the entire buffer + VkDeviceSize getOffset( size_t = 0 ) const; // returns the offset / stride / length of one object within the buffer - // RAII ~Buffer(); void initialize( ext::vulkan::Device& device, size_t = {} ); void initialize( const void*, VkDeviceSize, VkBufferUsageFlags, VkMemoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool = VK_DEFAULT_STAGE_BUFFERS ); - bool update( const void*, VkDeviceSize, bool = VK_DEFAULT_STAGE_BUFFERS ) const; + bool update( const void*, VkDeviceSize, bool = VK_DEFAULT_STAGE_BUFFERS ) const; // returns true if a reallocation occurred (to signal rebuilding command buffers) void destroy(bool = VK_DEFAULT_DEFER_BUFFER_DESTROY); void swap( Buffer& ); diff --git a/engine/inc/uf/ext/vulkan/graphic.h b/engine/inc/uf/ext/vulkan/graphic.h index ebc09cb0..01f5ac15 100644 --- a/engine/inc/uf/ext/vulkan/graphic.h +++ b/engine/inc/uf/ext/vulkan/graphic.h @@ -38,12 +38,14 @@ namespace ext { void initialize( const Graphic& graphic, const GraphicDescriptor& descriptor ); void update( const Graphic& graphic ); void update( const Graphic& graphic, const GraphicDescriptor& descriptor ); - void record( const Graphic& graphic, VkCommandBuffer, size_t = 0, size_t = 0 ) const; - void record( const Graphic& graphic, const GraphicDescriptor& descriptor, VkCommandBuffer, size_t = 0, size_t = 0 ) const; + void record( const Graphic& graphic, VkCommandBuffer, size_t = 0, size_t = 0, size_t = 0 ) const; + void record( const Graphic& graphic, const GraphicDescriptor& descriptor, VkCommandBuffer, size_t = 0, size_t = 0, size_t = 0 ) const; void destroy(); uf::stl::vector getShaders( uf::stl::vector& ); uf::stl::vector getShaders( const uf::stl::vector& ) const; + + void collectBuffers( const Shader& shader, const RenderMode& renderMode, const Graphic& graphic, const std::function& lambda ) const; }; struct UF_API Material { @@ -114,8 +116,9 @@ namespace ext { void updatePipelines(); - void record( VkCommandBuffer commandBuffer, size_t pass = 0, size_t draw = 0 ) const; - void record( VkCommandBuffer commandBuffer, const GraphicDescriptor& descriptor, size_t pass = 0, size_t draw = 0 ) const; + void record( VkCommandBuffer commandBuffer, size_t pass = 0, size_t draw = 0, size_t offset = 0 ) const; + void record( VkCommandBuffer commandBuffer, const GraphicDescriptor& descriptor, size_t pass = 0, size_t draw = 0, size_t offset = 0 ) const; + }; } } \ No newline at end of file diff --git a/engine/inc/uf/ext/vulkan/rendermode.h b/engine/inc/uf/ext/vulkan/rendermode.h index 39928edf..73a4a96a 100644 --- a/engine/inc/uf/ext/vulkan/rendermode.h +++ b/engine/inc/uf/ext/vulkan/rendermode.h @@ -7,6 +7,14 @@ #include #include +#define VK_COMMAND_BUFFER_CALLBACK( pass, commandBuffer, i, f ) {\ + auto it = commandBufferCallbacks.find(pass);\ + if ( it != commandBufferCallbacks.end() ) {\ + commandBufferCallbacks[pass]( commandBuffer, i );\ + f;\ + }\ +} + namespace ext { namespace vulkan { struct Graphic; @@ -54,8 +62,9 @@ namespace ext { Device* device = VK_NULL_HANDLE; RenderTarget renderTarget; - VkSemaphore renderCompleteSemaphore; + uf::stl::vector renderCompleteSemaphores; uf::stl::vector fences; + uf::renderer::QueueEnum queueEnum = {}; typedef uf::stl::vector commands_container_t; std::thread::id mostRecentCommandPoolId; diff --git a/engine/inc/uf/ext/vulkan/shader.h b/engine/inc/uf/ext/vulkan/shader.h index 040a1466..9dc48def 100644 --- a/engine/inc/uf/ext/vulkan/shader.h +++ b/engine/inc/uf/ext/vulkan/shader.h @@ -124,6 +124,8 @@ namespace ext { uf::stl::vector attachments; uf::stl::vector buffers; } aliases; + + uf::stl::vector dynamicRanges; } metadata; ext::vulkan::userdata_t specializationConstants; @@ -177,7 +179,6 @@ namespace ext { void setSpecializationConstants( const uf::stl::unordered_map& values ); void setDescriptorCounts( const uf::stl::unordered_map& values ); - /* uf::Serializer getUniformJson( const uf::stl::string& name, bool cache = true ); bool updateUniform( const uf::stl::string& name, const ext::json::Value& payload ); diff --git a/engine/inc/uf/ext/vulkan/swapchain.h b/engine/inc/uf/ext/vulkan/swapchain.h index 1cf721ca..a8d8e215 100644 --- a/engine/inc/uf/ext/vulkan/swapchain.h +++ b/engine/inc/uf/ext/vulkan/swapchain.h @@ -13,11 +13,10 @@ namespace ext { bool initialized = false; uint32_t buffers = {}; - VkSemaphore presentCompleteSemaphore; - VkSemaphore renderCompleteSemaphore; + uf::stl::vector presentCompleteSemaphores; // helpers - VkResult acquireNextImage( uint32_t* imageIndex, VkSemaphore ); + VkResult acquireNextImage( uint32_t* imageIndex, VkSemaphore, VkFence = nullptr ); VkResult queuePresent( VkQueue queue, uint32_t imageIndex, VkSemaphore waitSemaphore = VK_NULL_HANDLE ); // RAII diff --git a/engine/inc/uf/ext/vulkan/vk.h b/engine/inc/uf/ext/vulkan/vk.h index 5ef31176..0daea033 100644 --- a/engine/inc/uf/ext/vulkan/vk.h +++ b/engine/inc/uf/ext/vulkan/vk.h @@ -22,6 +22,7 @@ #define VK_DEFAULT_STAGE_BUFFERS ext::vulkan::settings::defaultStageBuffers #define VK_DEFAULT_DEFER_BUFFER_DESTROY ext::vulkan::settings::defaultDeferBufferDestroy #define VK_DEFAULT_COMMAND_BUFFER_IMMEDIATE ext::vulkan::settings::defaultCommandBufferImmediate +#define VK_UBO_USE_N_BUFFERS 1 namespace ext { namespace vulkan { diff --git a/engine/inc/uf/utils/math/physics/impl.h b/engine/inc/uf/utils/math/physics/impl.h index 87396b96..eb1a9a86 100644 --- a/engine/inc/uf/utils/math/physics/impl.h +++ b/engine/inc/uf/utils/math/physics/impl.h @@ -227,6 +227,50 @@ namespace pod { pod::BVH::pairs_t pairs; }; + struct PhysicsSettings { + bool warmupSolver = true; // cache manifold data to warm up the solver + bool blockContactSolver = true; // use BlockNxN solvers (where N = number of contacts for a manifold) + bool psgContactSolver = true; // use PSG contact solver + bool useGjk = false; // currently don't have a way to broadphase mesh => narrowphase tri via GJK + bool fixedStep = true; // run physics simulation with a fixed delta time (with accumulation), rather than rely on actual engine deltatime + uint32_t substeps = 4; // number of substeps per frame tick + uint32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator + + // increasing these make things lag for reasons I can imagine why + uint32_t broadphaseBvhCapacity = 4; // number of bodies per leaf node + uint32_t meshBvhCapacity = 4; // number of triangles per leaf node + + // additionally flattens a BVH for linear iteration, rather than a recursive / stack-based traversal + bool flattenBvhBodies = true; + bool flattenBvhMeshes = true; + + // use surface area heuristics for building the BVH, rather than naive splits + bool useBvhSahBodies = true; // it actually seems slower to use these...... + bool useBvhSahMeshes = true; + + bool useSplitBvhs = true; // creates separate BVHs for static / dynamic objects + + // to-do: find possibly better values for this + uint32_t solverIterations = 10; + float baumgarteCorrectionPercent = 0.4f; + float baumgarteCorrectionSlop = 0.01f; + + uf::stl::unordered_map manifoldsCache; + uint32_t manifoldCacheLifetime = 6; // to-do: find a good value for this + + uint32_t frameCounter = 0; + + // to-do: tweak this to not be annoying + pod::BVH::UpdatePolicy bvhUpdatePolicy = { + .displacementThreshold = 0.25f, + .overlapThreshold = 2.0f, + .dirtyRatioThreshold = 0.3f, + .maxFramesBeforeRebuild = 60, // * 10, // 10 seconds + }; + + float groundedThreshold = 0.7f; // threshold before marking a body as grounded + }; + struct World { uf::stl::vector bodies; @@ -240,11 +284,14 @@ namespace uf { namespace physics { namespace impl { extern UF_API float timescale; + extern UF_API bool async; + extern UF_API bool interpolate; extern UF_API bool shared; extern UF_API bool globalStorage; extern UF_API pod::World world; + extern UF_API pod::PhysicsSettings settings; void UF_API initialize(); void UF_API initialize( uf::Object& ); @@ -272,7 +319,7 @@ namespace uf { void UF_API updateInertia( pod::PhysicsBody& body ); void UF_API applyForce( pod::PhysicsBody& body, const pod::Vector3f& force ); - void UF_API applyForceAtPoint( pod::PhysicsBody body, const pod::Vector3f& force, const pod::Vector3f& point ); + void UF_API applyForceAtPoint( pod::PhysicsBody& body, const pod::Vector3f& force, const pod::Vector3f& point ); void UF_API applyImpulse( pod::PhysicsBody& body, const pod::Vector3f& impulse ); void UF_API applyTorque( pod::PhysicsBody& body, const pod::Vector3f& torque ); diff --git a/engine/inc/uf/utils/thread/thread.h b/engine/inc/uf/utils/thread/thread.h index e8b24cfb..198217c5 100644 --- a/engine/inc/uf/utils/thread/thread.h +++ b/engine/inc/uf/utils/thread/thread.h @@ -40,6 +40,7 @@ namespace pod { std::condition_variable queued; std::condition_variable finished; } conditions; + std::thread thread; pod::Thread::queue_t queue; @@ -47,6 +48,7 @@ namespace pod { uf::Timer timer; uint affinity = 0; + std::atomic pending{0}; struct UF_API Tasks { uf::stl::string name = uf::thread::workerThreadName; @@ -112,7 +114,11 @@ namespace uf { // schedules to named thread inline void queue( const uf::stl::string& name, const pod::Thread::function_t& fun ) { return uf::thread::queue( uf::thread::get(name), fun ); } inline void add( const uf::stl::string& name, const pod::Thread::function_t& fun ) { return uf::thread::add( uf::thread::get(name), fun ); } - + + /* + template + inline void queue( const uf::stl::string& name, const F& fun ) { return uf::thread::queue( uf::thread::get(name), [=](){ fun(); } ); } + */ void UF_API process( pod::Thread& ); void UF_API wait( pod::Thread& ); diff --git a/engine/src/engine/ext/ext.cpp b/engine/src/engine/ext/ext.cpp index 94052e3e..52b7cfef 100644 --- a/engine/src/engine/ext/ext.cpp +++ b/engine/src/engine/ext/ext.cpp @@ -753,6 +753,8 @@ void UF_API uf::initialize() { } void UF_API uf::tick() { + ++uf::time::frame; + #if 1 if ( /*global*/::sceneTransition.phase >= 0 ) { auto target = /*global*/::sceneTransition.payload["scene"].as(); @@ -837,13 +839,14 @@ void UF_API uf::tick() { lMetadata["light"]["color"][2] = (rand() % 100) / 100.0; } auto& sMetadata = scene.getComponent(); - sMetadata["light"]["should"] = true; + sMetadata["light"]["enabled"] = true; } } } #endif /* Update physics timer */ { - // uf::physics::tick(); + // to-do: add setting to either run in main thread or defer to a background thread + uf::physics::tick(); } /* Update entities */ { uf::scene::tick(); diff --git a/engine/src/engine/ext/light/behavior.cpp b/engine/src/engine/ext/light/behavior.cpp index dbb745b9..11f32424 100644 --- a/engine/src/engine/ext/light/behavior.cpp +++ b/engine/src/engine/ext/light/behavior.cpp @@ -40,6 +40,7 @@ void ext::LightBehavior::initialize( uf::Object& self ) { if ( ++::roundRobin.current >= ::roundRobin.lights.size() ) ::roundRobin.current = 0; }); } + /* if ( !metadataJson["light"]["bias"]["shader"].is() ) metadataJson["light"]["bias"]["shader"] = 0.000000005f; */ @@ -56,7 +57,7 @@ void ext::LightBehavior::initialize( uf::Object& self ) { #if UF_USE_OPENGL metadataJson["light"]["shadows"] = false; #endif - if ( !sceneMetadataJson["lights"]["shadows"]["enabled"].as(true) ) { + if ( !sceneMetadataJson["light"]["shadows"]["enabled"].as(true) ) { metadataJson["light"]["shadows"] = false; } if ( metadataJson["light"]["shadows"].as() ) { diff --git a/engine/src/engine/ext/scene/behavior.cpp b/engine/src/engine/ext/scene/behavior.cpp index e86d412c..ca9ebffd 100644 --- a/engine/src/engine/ext/scene/behavior.cpp +++ b/engine/src/engine/ext/scene/behavior.cpp @@ -767,8 +767,7 @@ void ext::ExtSceneBehavior::destroy( uf::Object& self ) { } } void ext::ExtSceneBehavior::Metadata::serialize( uf::Object& self, uf::Serializer& serializer ) { - serializer["light"]["should"] = /*this->*/light.enabled; - + serializer["light"]["enabled"] = /*this->*/light.enabled; serializer["light"]["ambient"] = uf::vector::encode( /*this->*/light.ambient ); serializer["light"]["exposure"] = /*this->*/light.exposure; serializer["light"]["gamma"] = /*this->*/light.gamma; @@ -834,7 +833,7 @@ void ext::ExtSceneBehavior::Metadata::deserialize( uf::Object& self, uf::Seriali /*this->*/shadow.update = serializer["light"]["shadows"]["update"].as(/*this->*/shadow.update); /*this->*/shadow.typeMap = serializer["light"]["shadows"]["map type"].as(/*this->*/shadow.typeMap); - /*this->*/light.enabled = serializer["light"]["enabled"].as(/*this->*/light.enabled) && serializer["light"]["should"].as(/*this->*/light.enabled); + /*this->*/light.enabled = serializer["light"]["enabled"].as(/*this->*/light.enabled) && serializer["light"]["enabled"].as(/*this->*/light.enabled); /*this->*/light.max = serializer["light"]["max"].as(/*this->*/light.max); /*this->*/light.ambient = uf::vector::decode( serializer["light"]["ambient"], /*this->*/light.ambient); @@ -1228,6 +1227,9 @@ void ext::ExtSceneBehavior::bindBuffers( uf::Object& self, uf::renderer::Graphic auto& shader = graphic.material.getShader(shaderType, shaderPipeline); if ( !shader.hasUniform("UBO") ) return; //UF_MSG_DEBUG( "{}: {} {} // {}", uf::string::toString( self ), shaderType, shaderPipeline, uf::string::toString( uf::scene::getCurrentScene() ) ); + +// if ( controller.getName() == "Player" ) UF_MSG_DEBUG("frame={}, camera={}", uf::time::frame, uf::matrix::toString( uniforms.matrices[0].view )); + shader.updateBuffer( (const void*) &uniforms, sizeof(uniforms), shader.getUniformBuffer("UBO") ); bool shouldUpdate2 = !uf::matrix::equals( uniforms.matrices[0].view, previousUniforms.matrices[0].view, 0.0001f ); diff --git a/engine/src/engine/graph/graph.cpp b/engine/src/engine/graph/graph.cpp index b0f23702..8f5c8c67 100644 --- a/engine/src/engine/graph/graph.cpp +++ b/engine/src/engine/graph/graph.cpp @@ -1494,7 +1494,9 @@ void uf::graph::render( uf::Object& object ) { } void uf::graph::render( pod::Graph::Storage& storage ) { auto* renderMode = uf::renderer::getCurrentRenderMode(); - + + if ( renderMode->getName() == "Gui" ) return; + auto& scene = uf::scene::getCurrentScene(); auto& controller = scene.getController(); auto& camera = scene.getCamera( controller ); @@ -1509,6 +1511,8 @@ void uf::graph::render( pod::Graph::Storage& storage ) { } #endif +// if ( controller.getName() == "Player" ) UF_MSG_DEBUG("frame={}, camera={}, renderMode={}, {}", uf::time::frame, uf::matrix::toString( viewport.matrices[0].view ), renderMode->getName(), renderMode->getType() ); + storage.buffers.camera.update( (const void*) &viewport, sizeof(pod::Camera::Viewports) ); #if UF_USE_VULKAN diff --git a/engine/src/engine/object/behavior.cpp b/engine/src/engine/object/behavior.cpp index c26da4d0..1824226e 100644 --- a/engine/src/engine/object/behavior.cpp +++ b/engine/src/engine/object/behavior.cpp @@ -140,7 +140,6 @@ void uf::ObjectBehavior::initialize( uf::Object& self ) { pod::Vector3f min = uf::vector::decode( metadataJsonPhysics["min"], pod::Vector3f{-0.5f, -0.5f, -0.5f} ); pod::Vector3f max = uf::vector::decode( metadataJsonPhysics["max"], pod::Vector3f{0.5f, 0.5f, 0.5f} ); - UF_MSG_DEBUG("entity={}, min={}, max={}", uf::string::toString( *this ), uf::vector::toString( min ), uf::vector::toString( max )); #if UF_USE_REACTPHYSICS auto center = ( max + min ) * 0.5f; if ( metadataJsonPhysics["recenter"].as(true) ) offset = (center - transform.position); diff --git a/engine/src/engine/scene/scene.cpp b/engine/src/engine/scene/scene.cpp index 5fbdcc66..d6c355da 100644 --- a/engine/src/engine/scene/scene.cpp +++ b/engine/src/engine/scene/scene.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include UF_OBJECT_REGISTER_BEGIN(uf::Scene) @@ -268,7 +269,7 @@ void uf::scene::tick() { auto& scene = uf::scene::getCurrentScene(); auto/*&*/ graph = scene.getGraph(true); - uf::physics::tick( scene ); +// uf::physics::tick( scene ); #if !UF_SCENE_GLOBAL_GRAPH auto& metadata = scene.getComponent(); diff --git a/engine/src/ext/vulkan/buffer.cpp b/engine/src/ext/vulkan/buffer.cpp index a8e7d1a1..a07e6e7d 100644 --- a/engine/src/ext/vulkan/buffer.cpp +++ b/engine/src/ext/vulkan/buffer.cpp @@ -44,37 +44,13 @@ void ext::vulkan::Buffer::aliasBuffer( const ext::vulkan::Buffer& buffer ) { void* ext::vulkan::Buffer::map( VkDeviceSize size, VkDeviceSize offset ) { if ( !mapped ) VK_CHECK_RESULT(vmaMapMemory( allocator, allocation, &mapped )); - return mapped; + return static_cast(mapped) + offset;; } void ext::vulkan::Buffer::unmap() { if ( !mapped ) return; vmaUnmapMemory( allocator, allocation ); mapped = nullptr; } -/* -void* ext::vulkan::Buffer::map( VkDeviceSize size, VkDeviceSize offset ) const { - void* mapped{}; - VK_CHECK_RESULT(vmaMapMemory( allocator, allocation, &mapped )); - return mapped; -} -void ext::vulkan::Buffer::unmap() const { - vmaUnmapMemory( allocator, allocation ); -} -VkResult ext::vulkan::Buffer::bind( VkDeviceSize offset ) { - return VK_SUCCESS; -} -VkResult ext::vulkan::Buffer::flush( VkDeviceSize size, VkDeviceSize offset ) const { - return VK_SUCCESS; -} - -VkResult ext::vulkan::Buffer::invalidate( VkDeviceSize size, VkDeviceSize offset ) { - return VK_SUCCESS; -} -void ext::vulkan::Buffer::copyTo( void* data, VkDeviceSize size ) { - assert(mapped); - memcpy(mapped, data, size); -} -*/ void ext::vulkan::Buffer::updateDescriptor( VkDeviceSize size, VkDeviceSize offset ) { descriptor.offset = offset; @@ -95,7 +71,7 @@ void ext::vulkan::Buffer::allocate( VkBufferCreateInfo bufferCreateInfo ) { VK_REGISTER_HANDLE( buffer ); } -size_t ext::vulkan::Buffer::getAddress() { +size_t ext::vulkan::Buffer::getAddress() const { // if ( !(usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) ) UF_MSG_DEBUG("CALLING GETADDRESS ON BUFFER WITHOUT ADDRESS BIT: {}", fmt::ptr(this->buffer)); if ( this->address ) return this->address; @@ -104,14 +80,12 @@ size_t ext::vulkan::Buffer::getAddress() { info.buffer = buffer; return (this->address = vkGetBufferDeviceAddressKHR(this->device ? *this->device : ext::vulkan::device, &info)); } -size_t ext::vulkan::Buffer::getAddress() const { -// if ( !(usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) ) UF_MSG_DEBUG("CALLING GETADDRESS ON BUFFER WITHOUT ADDRESS BIT: {}", fmt::ptr(this->buffer)); - if ( this->address ) return this->address; - VkBufferDeviceAddressInfoKHR info{}; - info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; - info.buffer = buffer; - return vkGetBufferDeviceAddressKHR(this->device ? *this->device : ext::vulkan::device, &info); +size_t ext::vulkan::Buffer::getLength( ) const { + return allocationInfo.size; +} +size_t ext::vulkan::Buffer::getOffset( size_t i ) const { + return this->getLength() / this->count * i; } ext::vulkan::Buffer::~Buffer() { @@ -147,16 +121,28 @@ void ext::vulkan::Buffer::initialize( const void* data, VkDeviceSize length, VkB if ( !device ) device = &ext::vulkan::device; if ( stage ) usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; // implicitly set properties -// if ( usage != VK_BUFFER_USAGE_TRANSFER_SRC_BIT ) usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + // assume all UBOs are dynamic + auto totalLength = length; +#if VK_UBO_USE_N_BUFFERS + if ( usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ) { + this->count = ext::vulkan::swapchain.buffers; + this->alignment = device->properties.limits.minUniformBufferOffsetAlignment; + totalLength = ALIGNED_SIZE( length, this->alignment ) * this->count; + } +#endif VK_CHECK_RESULT(device->createBuffer( nullptr, - length, + totalLength, usage, memoryProperties, *this )); + if ( length != totalLength ) { + this->updateDescriptor( length, 0 ); + } + if ( data && length ) update( data, length, stage ); /* @@ -187,6 +173,13 @@ bool ext::vulkan::Buffer::update( const void* data, VkDeviceSize length, bool st if ( !length ) return false; if ( !buffer ) return false; + VkDeviceSize offset = 0; +#if VK_UBO_USE_N_BUFFERS + if ( this->count == ext::vulkan::swapchain.buffers ) { + offset = this->getOffset( states::currentBuffer ); + } +#endif + // to-do: fix this because it's a thorn in my side when a mesh needs to update if ( length > allocationInfo.size ) { UF_MSG_WARNING("Buffer update of {} exceeds buffer size of {}", length, allocationInfo.size); @@ -206,7 +199,7 @@ bool ext::vulkan::Buffer::update( const void* data, VkDeviceSize length, bool st if ( !data ) return false; if ( !stage ) { auto* self = const_cast(this); - void* map = self->map(); + void* map = self->map(length, offset); memcpy(map, data, length); self->unmap(); return false; @@ -224,6 +217,7 @@ bool ext::vulkan::Buffer::update( const void* data, VkDeviceSize length, bool st auto commandBuffer = device->fetchCommandBuffer(QueueEnum::TRANSFER); // waits on finish VkBufferCopy region = {}; region.size = length; + region.dstOffset = offset; device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "copyBuffer" ); vkCmdCopyBuffer(commandBuffer, staging.buffer, buffer, 1, ®ion); device->flushCommandBuffer(commandBuffer); diff --git a/engine/src/ext/vulkan/device.cpp b/engine/src/ext/vulkan/device.cpp index 037afbfc..ad8ac3f9 100644 --- a/engine/src/ext/vulkan/device.cpp +++ b/engine/src/ext/vulkan/device.cpp @@ -984,7 +984,7 @@ void ext::vulkan::Device::initialize() { } auto& deviceInfo = deviceInfos[bestDeviceIndex]; this->physicalDevice = deviceInfo.handle; - VK_VALIDATION_MESSAGE("Usind device #{}: (score: {} | device ID: {} | vendor ID: {} | API version: {} | driver version: {})", bestDeviceIndex, deviceInfo.properties.deviceName, deviceInfo.score, deviceInfo.properties.deviceID, deviceInfo.properties.vendorID, deviceInfo.properties.apiVersion, deviceInfo.properties.driverVersion ); + VK_VALIDATION_MESSAGE("Using device #{}: (score: {} | device ID: {} | vendor ID: {} | API version: {} | driver version: {})", bestDeviceIndex, deviceInfo.properties.deviceName, deviceInfo.score, deviceInfo.properties.deviceID, deviceInfo.properties.vendorID, deviceInfo.properties.apiVersion, deviceInfo.properties.driverVersion ); /* VK_VALIDATION_MESSAGE("Using device #" << bestDeviceIndex << " (" "score: " << deviceInfo.score << " | " diff --git a/engine/src/ext/vulkan/graphic.cpp b/engine/src/ext/vulkan/graphic.cpp index dd0bbc95..8388fffb 100644 --- a/engine/src/ext/vulkan/graphic.cpp +++ b/engine/src/ext/vulkan/graphic.cpp @@ -393,12 +393,18 @@ PIPELINE_INITIALIZATION_INVALID: }); return; } -void ext::vulkan::Pipeline::record( const Graphic& graphic, VkCommandBuffer commandBuffer, size_t pass, size_t draw ) const { - return record( graphic, descriptor, commandBuffer, pass, draw ); +void ext::vulkan::Pipeline::record( const Graphic& graphic, VkCommandBuffer commandBuffer, size_t pass, size_t draw, size_t offset ) const { + return record( graphic, descriptor, commandBuffer, pass, draw, offset ); } -void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescriptor& descriptor, VkCommandBuffer commandBuffer, size_t pass, size_t draw ) const { +void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescriptor& descriptor, VkCommandBuffer commandBuffer, size_t pass, size_t draw, size_t offset ) const { auto shaders = getShaders( graphic.material.shaders ); + // create dynamic offset ranges + static thread_local uf::stl::vector dynamicOffsets; + dynamicOffsets.clear(); + + RenderMode& renderMode = ext::vulkan::getRenderMode(descriptor.renderMode, true); + bool bound = false; for ( auto* shader : shaders ) { // compute shaders @@ -435,13 +441,23 @@ void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescrip vkCmdPushConstants( commandBuffer, pipelineLayout, shader->descriptor.stage, 0, size, data ); } } + + dynamicOffsets.insert( dynamicOffsets.end(), shader->metadata.dynamicRanges.begin(), shader->metadata.dynamicRanges.end() ); + } + + for ( auto& dynamicOffset : dynamicOffsets ) { + dynamicOffset *= offset; } // no matching bind point for shaders, skip if ( !bound ) return; // Bind descriptor sets describing shader binding points - vkCmdBindDescriptorSets(commandBuffer, (VkPipelineBindPoint)descriptor.bind.point, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); +#if VK_UBO_USE_N_BUFFERS + vkCmdBindDescriptorSets(commandBuffer, (VkPipelineBindPoint) descriptor.bind.point, pipelineLayout, 0, 1, &descriptorSet, dynamicOffsets.size(), dynamicOffsets.data()); +#else + vkCmdBindDescriptorSets(commandBuffer, (VkPipelineBindPoint) descriptor.bind.point, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); +#endif // Bind the rendering pipeline // The pipeline (state object) contains all states of the rendering pipeline, binding it will set all the states specified at pipeline creation time vkCmdBindPipeline(commandBuffer, (VkPipelineBindPoint)descriptor.bind.point, pipeline); @@ -516,50 +532,10 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip auto& infos = INFOS.emplace_back(); uf::stl::vector types; - // add aliased-by-name buffers - for ( auto& descriptor : shader->metadata.aliases.buffers ) { - auto matches = uf::string::match(descriptor.name, R"(/^(.+?)\[(\d+)\]$/)"); - auto name = matches.size() == 2 ? matches[0] : descriptor.name; - auto view = matches.size() == 2 ? stoi(matches[1]) : -1; - const ext::vulkan::Buffer* buffer = &descriptor.fallback; - if ( descriptor.renderMode ) { - if ( descriptor.renderMode->hasBuffer(name) ) - buffer = &descriptor.renderMode->getBuffer(name); - } else if ( renderMode.hasBuffer(name) ) { - buffer = &renderMode.getBuffer(name); - } - - if ( !buffer ) continue; - - if ( buffer->usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer->descriptor); - if ( buffer->usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer->descriptor); - } - #if 0 - // add per-rendermode buffers - for ( auto& buffer : renderMode.buffers ) { + this->collectBuffers( *shader, renderMode, graphic, [&]( const Buffer& buffer ){ if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); - // if ( buffer.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR ) infos.accelerationStructure.emplace_back(buffer.descriptor); - } - #endif - // add per-shader buffers - for ( auto& buffer : shader->buffers ) { - if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); - if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); - // if ( buffer.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR ) infos.accelerationStructure.emplace_back(buffer.descriptor); - } - // add per-pipeline buffers - for ( auto& buffer : this->buffers ) { - if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); - if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); - // if ( buffer.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR ) infos.accelerationStructure.emplace_back(buffer.descriptor); - } - // add per-graphics buffers - for ( auto& buffer : graphic.buffers ) { - if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); - if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); - // if ( buffer.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR ) infos.accelerationStructure.emplace_back(buffer.descriptor); - } + } ); if ( descriptor.subpass < renderTarget.passes.size() ) { auto& subpass = renderTarget.passes[descriptor.subpass]; @@ -796,7 +772,8 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip )); samplerInfo += layout.descriptorCount; } break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { UF_ASSERT_BREAK_MSG( uniformBufferInfo != infos.uniform.end(), "Filename: {}\tCount: {}", shader->filename, layout.descriptorCount ) writeDescriptorSets.emplace_back(ext::vulkan::initializers::writeDescriptorSet( descriptorSet, @@ -807,7 +784,8 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip )); uniformBufferInfo += layout.descriptorCount; } break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { UF_ASSERT_BREAK_MSG( storageBufferInfo != infos.storage.end(), "Filename: {}\tCount: {}", shader->filename, layout.descriptorCount ) writeDescriptorSets.emplace_back(ext::vulkan::initializers::writeDescriptorSet( descriptorSet, @@ -908,6 +886,35 @@ PIPELINE_UPDATE_INVALID: }); return; } +void ext::vulkan::Pipeline::collectBuffers( const Shader& shader, const RenderMode& renderMode, const Graphic& graphic, const std::function& lambda ) const { + // add aliased-by-name buffers + for ( auto& descriptor : shader.metadata.aliases.buffers ) { + auto matches = uf::string::match(descriptor.name, R"(/^(.+?)\[(\d+)\]$/)"); + auto name = matches.size() == 2 ? matches[0] : descriptor.name; + auto view = matches.size() == 2 ? stoi(matches[1]) : -1; + const ext::vulkan::Buffer* buffer = &descriptor.fallback; + if ( descriptor.renderMode ) { + if ( descriptor.renderMode->hasBuffer(name) ) + buffer = &descriptor.renderMode->getBuffer(name); + } else if ( renderMode.hasBuffer(name) ) { + buffer = &renderMode.getBuffer(name); + } + + if ( !buffer ) continue; + + lambda( *buffer ); + } +#if 0 + // add per-rendermode buffers + for ( auto& buffer : renderMode.buffers ) lambda( buffer ); +#endif + // add per-shader buffers + for ( auto& buffer : shader.buffers ) lambda( buffer ); + // add per-pipeline buffers + for ( auto& buffer : this->buffers ) lambda( buffer ); + // add per-graphics buffers + for ( auto& buffer : graphic.buffers ) lambda( buffer ); +} void ext::vulkan::Pipeline::destroy() { if ( aliased ) return; @@ -1826,10 +1833,10 @@ const ext::vulkan::Pipeline& ext::vulkan::Graphic::getPipeline( const GraphicDes void ext::vulkan::Graphic::updatePipelines() { for ( auto pair : this->pipelines ) pair.second.update( *this ); } -void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, size_t pass, size_t draw ) const { - return this->record( commandBuffer, descriptor, pass, draw ); +void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, size_t pass, size_t draw, size_t offset ) const { + return this->record( commandBuffer, descriptor, pass, draw, offset ); } -void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, const GraphicDescriptor& descriptor, size_t pass, size_t draw ) const { +void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, const GraphicDescriptor& descriptor, size_t pass, size_t draw, size_t offset ) const { if ( !process ) return; if ( !this->hasPipeline( descriptor ) ) { VK_DEBUG_VALIDATION_MESSAGE(this << ": has no valid pipeline ({} {})", descriptor.renderMode, descriptor.renderTarget); @@ -1842,7 +1849,7 @@ void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, const GraphicD return; } if ( !pipeline.metadata.process ) return; - pipeline.record(*this, descriptor, commandBuffer, pass, draw); + pipeline.record(*this, descriptor, commandBuffer, pass, draw, offset); auto shaders = pipeline.getShaders( material.shaders ); for ( auto* shader : shaders ) { diff --git a/engine/src/ext/vulkan/rendermode.cpp b/engine/src/ext/vulkan/rendermode.cpp index fe94d063..99b54969 100644 --- a/engine/src/ext/vulkan/rendermode.cpp +++ b/engine/src/ext/vulkan/rendermode.cpp @@ -221,9 +221,9 @@ ext::vulkan::GraphicDescriptor ext::vulkan::RenderMode::bindGraphicDescriptor( c } void ext::vulkan::RenderMode::createCommandBuffers() { - this->execute = true; - - uf::stl::vector graphics; + static thread_local uf::stl::vector graphics; + graphics.clear(); + auto& scene = uf::scene::getCurrentScene(); auto/*&*/ graph = scene.getGraph(); for ( auto entity : graph ) { @@ -243,6 +243,7 @@ void ext::vulkan::RenderMode::createCommandBuffers() { this->mostRecentCommandPoolId = std::this_thread::get_id(); this->rebuild = false; this->rerecord = false; + this->execute = true; } ext::vulkan::RenderMode::commands_container_t& ext::vulkan::RenderMode::getCommands( std::thread::id id ) { bool exists = this->commands.has(id); //this->commands.count(id) > 0; @@ -251,7 +252,7 @@ ext::vulkan::RenderMode::commands_container_t& ext::vulkan::RenderMode::getComma commands.resize( swapchain.buffers ); VkCommandBufferAllocateInfo cmdBufAllocateInfo = ext::vulkan::initializers::commandBufferAllocateInfo( - device->getCommandPool(this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS), + device->getCommandPool(this->queueEnum), VK_COMMAND_BUFFER_LEVEL_PRIMARY, static_cast(commands.size()) ); @@ -277,7 +278,6 @@ void ext::vulkan::RenderMode::cleanupAllCommands() { for ( auto& pair : container ) { if ( pair.second.empty() ) continue; - auto queueEnum = this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS; VkQueue queue = device->getQueue( queueEnum, pair.first ); VkResult res = vkWaitForFences( *device, fences.size(), fences.data(), VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT ); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); @@ -299,7 +299,6 @@ void ext::vulkan::RenderMode::cleanupCommands( std::thread::id id ) { if ( pair.first == id ) continue; if ( pair.second.empty() ) continue; - auto queueEnum = this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS; VkQueue queue = device->getQueue( queueEnum, pair.first ); VkResult res = vkWaitForFences( *device, fences.size(), fences.data(), VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT ); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); @@ -319,8 +318,6 @@ void ext::vulkan::RenderMode::createCommandBuffers( const uf::stl::vectorexecute = true; - uf::stl::vector graphics; auto& scene = uf::scene::getCurrentScene(); auto/*&*/ graph = scene.getGraph(); @@ -333,6 +330,7 @@ void ext::vulkan::RenderMode::bindPipelines() { } this->synchronize(); this->bindPipelines( graphics ); + this->execute = true; } void ext::vulkan::RenderMode::bindPipelines( const uf::stl::vector& graphics ) { //lockMutex(); @@ -382,14 +380,16 @@ void ext::vulkan::RenderMode::initialize( Device& device ) { // this->width = 0; //ext::vulkan::width; // this->height = 0; //ext::vulkan::height; - if ( this->scale == 0 ) this->scale = 1; - + if ( this->scale == 0 ) this->scale = 1; { if ( this->width > 0 ) renderTarget.width = this->width; if ( this->height > 0 ) renderTarget.height = this->height; if ( this->scale > 0 ) renderTarget.scale = this->scale; } + // set enum type + this->queueEnum = this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS; + // Set sync objects { // Fences (Used to check draw command buffer completion) @@ -403,7 +403,8 @@ void ext::vulkan::RenderMode::initialize( Device& device ) { VK_REGISTER_HANDLE( fence ); } // Set sync objects - { + for ( auto i = 0; i < ext::vulkan::swapchain.buffers; ++i ) { + auto& renderCompleteSemaphore = renderCompleteSemaphores.emplace_back(); // Semaphores (Used for correct command ordering) VkSemaphoreCreateInfo semaphoreCreateInfo = {}; semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -425,8 +426,10 @@ void ext::vulkan::RenderMode::initialize( Device& device ) { void ext::vulkan::RenderMode::tick() { if ( ext::vulkan::states::resized || uf::renderer::states::rebuild || rebuild ) { + if ( device ) vkDeviceWaitIdle(*device); cleanupAllCommands(); } + this->synchronize(); if ( metadata.limiter.frequency > 0 ) { @@ -445,26 +448,28 @@ void ext::vulkan::RenderMode::render() { } void ext::vulkan::RenderMode::destroy() { + if ( device ) vkDeviceWaitIdle(*device); this->synchronize(); renderTarget.destroy(); for ( auto& pair : this->commands.container() ) { if ( !pair.second.empty() ) { - vkFreeCommandBuffers( *device, device->getCommandPool(this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS, pair.first), static_cast(pair.second.size()), pair.second.data()); + vkFreeCommandBuffers( *device, device->getCommandPool(this->queueEnum, pair.first), static_cast(pair.second.size()), pair.second.data()); } pair.second.clear(); } - if ( renderCompleteSemaphore != VK_NULL_HANDLE ) { + for ( auto& renderCompleteSemaphore : renderCompleteSemaphores ) { vkDestroySemaphore( *device, renderCompleteSemaphore, nullptr); VK_UNREGISTER_HANDLE( renderCompleteSemaphore ); - renderCompleteSemaphore = VK_NULL_HANDLE; } for ( auto& fence : fences ) { vkDestroyFence( *device, fence, nullptr); VK_UNREGISTER_HANDLE( fence ); } + + renderCompleteSemaphores.clear(); fences.clear(); blitter.destroy(); ext::vulkan::Buffers::destroy(); @@ -474,9 +479,9 @@ void ext::vulkan::RenderMode::synchronize( uint64_t timeout ) { if ( fences.empty() ) return; lockMutex(); - auto queueEnum = this->getType() == "Compute" ? QueueEnum::COMPUTE : QueueEnum::GRAPHICS; VkQueue queue = device->getQueue( queueEnum, this->mostRecentCommandPoolId ); VkResult res = vkWaitForFences( *device, fences.size(), fences.data(), VK_TRUE, timeout ); +// VkResult res = vkWaitForFences(*device, 1, &fences[states::currentBuffer], VK_TRUE, timeout); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); unlockMutex(); diff --git a/engine/src/ext/vulkan/rendermodes/base.cpp b/engine/src/ext/vulkan/rendermodes/base.cpp index 09e8dc28..6b992d49 100644 --- a/engine/src/ext/vulkan/rendermodes/base.cpp +++ b/engine/src/ext/vulkan/rendermodes/base.cpp @@ -90,10 +90,10 @@ void ext::vulkan::BaseRenderMode::createCommandBuffers( const uf::stl::vectorUF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "setImageLayout" ); vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); } // pre-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_BEGIN) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_BEGIN, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[begin]" ); - commandBufferCallbacks[CALLBACK_BEGIN]( commandBuffer, i ); - } + } ); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::BEGIN, "renderPass[begin]" ); vkCmdBeginRenderPass(commandBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); @@ -151,17 +150,16 @@ void ext::vulkan::BaseRenderMode::createCommandBuffers( const uf::stl::vectorgetName() ) continue; ext::vulkan::GraphicDescriptor descriptor = blitter.descriptor; // bindGraphicDescriptor(blitter.descriptor, currentSubpass); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("blitter[{}: {}]", layer->getName(), layer->getType()) ); - blitter.record(commandBuffer, descriptor); + blitter.record(commandBuffer, descriptor, 0, 0, frame); } } device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::END, "renderPass[end]" ); vkCmdEndRenderPass(commandBuffer); // post-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_END) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_END, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[end]" ); - commandBufferCallbacks[CALLBACK_END]( commandBuffer, i ); - } + } ); // need to transfer it back, if they differ if ( ext::vulkan::device.queueFamilyIndices.graphics != ext::vulkan::device.queueFamilyIndices.present ) { @@ -169,18 +167,18 @@ void ext::vulkan::BaseRenderMode::createCommandBuffers( const uf::stl::vectorUF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "setImageLayout" ); vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); @@ -205,10 +203,13 @@ void ext::vulkan::BaseRenderMode::render() { // if ( ext::vulkan::renderModes.size() > 1 ) return; // if ( ext::vulkan::renderModes.back() != this ) return; + if ( this->commands.container().empty() ) return; + //lockMutex( this->mostRecentCommandPoolId ); auto& commands = getCommands( this->mostRecentCommandPoolId ); + // Get next image in the swap chain (back/front buffer) - VK_CHECK_RESULT(swapchain.acquireNextImage(&states::currentBuffer, swapchain.presentCompleteSemaphore)); + VK_CHECK_RESULT(swapchain.acquireNextImage(&states::currentBuffer, swapchain.presentCompleteSemaphores[0])); // Use a fence to wait until the command buffer has finished execution before using it again VK_CHECK_RESULT(vkWaitForFences(*device, 1, &fences[states::currentBuffer], VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT)); @@ -219,32 +220,39 @@ void ext::vulkan::BaseRenderMode::render() { // The submit info structure specifices a command buffer queue submission batch VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.pWaitDstStageMask = waitStageMask; // Pointer to the list of pipeline stages that the semaphore waits will occur at - submitInfo.pWaitSemaphores = &swapchain.presentCompleteSemaphore; // Semaphore(s) to wait upon before the submitted command buffer starts executing - submitInfo.waitSemaphoreCount = 1; // One wait semaphore - submitInfo.pSignalSemaphores = &renderCompleteSemaphore; // Semaphore(s) to be signaled when command buffers have completed - submitInfo.signalSemaphoreCount = 1; // One signal semaphore - submitInfo.pCommandBuffers = &commands[states::currentBuffer]; // Command buffers(s) to execute in this batch (submission) + submitInfo.pWaitDstStageMask = waitStageMask; // Pointer to the list of pipeline stages that the semaphore waits will occur at + submitInfo.pWaitSemaphores = &swapchain.presentCompleteSemaphores[0]; // Semaphore(s) to wait upon before the submitted command buffer starts executing + submitInfo.waitSemaphoreCount = 1; // One wait semaphore + submitInfo.pSignalSemaphores = &renderCompleteSemaphores[states::currentBuffer]; // Semaphore(s) to be signaled when command buffers have completed + submitInfo.signalSemaphoreCount = 1; // One signal semaphore + submitInfo.pCommandBuffers = &commands[states::currentBuffer]; // Command buffers(s) to execute in this batch (submission) submitInfo.commandBufferCount = 1; // Submit to the graphics queue passing a wait fence -// VK_CHECK_RESULT(vkQueueSubmit( device->getQueue( QueueEnum::GRAPHICS ), 1, &submitInfo, fences[states::currentBuffer])); +#if 1 + VK_CHECK_RESULT(vkQueueSubmit( device->getQueue( QueueEnum::GRAPHICS ), 1, &submitInfo, fences[states::currentBuffer])); +#else { VkQueue queue = device->getQueue( QueueEnum::GRAPHICS ); VkResult res = vkQueueSubmit( queue, 1, &submitInfo, fences[states::currentBuffer]); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); } +#endif // Present the current buffer to the swap chain // Pass the semaphore signaled by the command buffer submission from the submit info as the wait semaphore for swap chain presentation // This ensures that the image is not presented to the windowing system until all commands have been submitted - VK_CHECK_RESULT(swapchain.queuePresent(device->getQueue( QueueEnum::PRESENT ), states::currentBuffer, renderCompleteSemaphore)); -// VK_CHECK_RESULT(vkQueueWaitIdle(device->getQueue( QueueEnum::PRESENT ))); + VK_CHECK_RESULT(swapchain.queuePresent(device->getQueue( QueueEnum::PRESENT ), states::currentBuffer, renderCompleteSemaphores[states::currentBuffer])); + +#if 1 + //VK_CHECK_RESULT(vkQueueWaitIdle(device->getQueue( QueueEnum::PRESENT ))); +#else { VkQueue queue = device->getQueue( QueueEnum::PRESENT ); VkResult res = vkQueueWaitIdle(device->getQueue( QueueEnum::PRESENT )); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); } +#endif this->executed = true; @@ -276,7 +284,7 @@ void ext::vulkan::BaseRenderMode::initialize( Device& device ) { // uint32_t height = windowSize.y; //this->height > 0 ? this->height : windowSize.y; size_t attachmentIndex = 0; - for ( size_t i = 0; i < ext::vulkan::swapchain.buffers; ++i ) { + for ( size_t frame = 0; frame < ext::vulkan::swapchain.buffers; ++frame ) { VkImageViewCreateInfo colorAttachmentView = {}; colorAttachmentView.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; colorAttachmentView.pNext = NULL; @@ -294,20 +302,20 @@ void ext::vulkan::BaseRenderMode::initialize( Device& device ) { colorAttachmentView.subresourceRange.layerCount = 1; colorAttachmentView.viewType = VK_IMAGE_VIEW_TYPE_2D; colorAttachmentView.flags = 0; - colorAttachmentView.image = images[i]; + colorAttachmentView.image = images[frame]; - VK_CHECK_RESULT(vkCreateImageView( device, &colorAttachmentView, nullptr, &renderTarget.attachments[i].view)); - VK_REGISTER_HANDLE( renderTarget.attachments[i].view ); + VK_CHECK_RESULT(vkCreateImageView( device, &colorAttachmentView, nullptr, &renderTarget.attachments[frame].view)); + VK_REGISTER_HANDLE( renderTarget.attachments[frame].view ); - renderTarget.attachments[i].descriptor.format = ext::vulkan::settings::formats::color; - // renderTarget.attachments[i].descriptor.layout = VK_IMAGE_LAYOUT_UNDEFINED; - renderTarget.attachments[i].descriptor.layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - renderTarget.attachments[i].descriptor.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - renderTarget.attachments[i].descriptor.aliased = true; - renderTarget.attachments[i].image = images[i]; - renderTarget.attachments[i].mem = VK_NULL_HANDLE; + renderTarget.attachments[frame].descriptor.format = ext::vulkan::settings::formats::color; + // renderTarget.attachments[frame].descriptor.layout = VK_IMAGE_LAYOUT_UNDEFINED; + renderTarget.attachments[frame].descriptor.layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + renderTarget.attachments[frame].descriptor.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + renderTarget.attachments[frame].descriptor.aliased = true; + renderTarget.attachments[frame].image = images[frame]; + renderTarget.attachments[frame].mem = VK_NULL_HANDLE; - metadata.attachments["color["+std::to_string((int) i)+"]"] = attachmentIndex++; + metadata.attachments["color["+std::to_string((int) frame)+"]"] = attachmentIndex++; } { // Create depth @@ -523,10 +531,10 @@ void ext::vulkan::BaseRenderMode::initialize( Device& device ) { { // Create a frame buffer for every image in the swapchain renderTarget.framebuffers.resize(images.size()); - for (size_t i = 0; i < renderTarget.framebuffers.size(); i++) + for (size_t frame = 0; frame < renderTarget.framebuffers.size(); frame++) { std::array attachments; - attachments[0] = renderTarget.attachments[i].view; // Color attachment is the view of the swapchain image + attachments[0] = renderTarget.attachments[frame].view; // Color attachment is the view of the swapchain image attachments[1] = renderTarget.attachments[metadata.attachments["depth"]].view; // Depth/Stencil attachment is the same for all frame buffers VkFramebufferCreateInfo frameBufferCreateInfo = {}; @@ -539,32 +547,32 @@ void ext::vulkan::BaseRenderMode::initialize( Device& device ) { frameBufferCreateInfo.height = height; frameBufferCreateInfo.layers = 1; // Create the framebuffer - VK_CHECK_RESULT(vkCreateFramebuffer( device, &frameBufferCreateInfo, nullptr, &renderTarget.framebuffers[i])); - VK_REGISTER_HANDLE(renderTarget.framebuffers[i]); + VK_CHECK_RESULT(vkCreateFramebuffer( device, &frameBufferCreateInfo, nullptr, &renderTarget.framebuffers[frame])); + VK_REGISTER_HANDLE(renderTarget.framebuffers[frame]); } } #if 0 if ( true ) { auto commandBuffer = device.fetchCommandBuffer(uf::renderer::QueueEnum::TRANSFER); - for ( size_t i = 0; i < images.size(); ++i ) { + for ( size_t frame = 0; frame < images.size(); ++frame ) { VkImageMemoryBarrier imageMemoryBarrier = {}; imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; imageMemoryBarrier.srcAccessMask = 0; imageMemoryBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - imageMemoryBarrier.oldLayout = renderTarget.attachments[i].descriptor.layout; + imageMemoryBarrier.oldLayout = renderTarget.attachments[frame].descriptor.layout; imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // ext::vulkan::device.queueFamilyIndices.present; imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // ext::vulkan::device.queueFamilyIndices.graphics; - imageMemoryBarrier.image = renderTarget.attachments[i].image; + imageMemoryBarrier.image = renderTarget.attachments[frame].image; imageMemoryBarrier.subresourceRange.baseMipLevel = 0; imageMemoryBarrier.subresourceRange.levelCount = 1; imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; imageMemoryBarrier.subresourceRange.layerCount = 1; imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - renderTarget.attachments[i].descriptor.layout = imageMemoryBarrier.newLayout; + renderTarget.attachments[frame].descriptor.layout = imageMemoryBarrier.newLayout; vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); } @@ -597,15 +605,16 @@ void ext::vulkan::BaseRenderMode::initialize( Device& device ) { renderTarget.initialize( device ); */ // Set sync objects - { + for ( auto i = 0; i < ext::vulkan::swapchain.buffers; ++i ) { + auto& presentCompleteSemaphore = swapchain.presentCompleteSemaphores.emplace_back(); // Semaphores (Used for correct command ordering) VkSemaphoreCreateInfo semaphoreCreateInfo = {}; semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; semaphoreCreateInfo.pNext = nullptr; // Semaphore used to ensures that image presentation is complete before starting to submit again - VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &swapchain.presentCompleteSemaphore)); - VK_REGISTER_HANDLE(swapchain.presentCompleteSemaphore); + VK_CHECK_RESULT(vkCreateSemaphore(device, &semaphoreCreateInfo, nullptr, &presentCompleteSemaphore)); + VK_REGISTER_HANDLE(presentCompleteSemaphore); } } @@ -616,11 +625,11 @@ void ext::vulkan::BaseRenderMode::destroy() { renderTarget.renderPass = VK_NULL_HANDLE; } - for ( uint32_t i = 0; i < renderTarget.framebuffers.size(); i++ ) { - if ( renderTarget.framebuffers[i] != VK_NULL_HANDLE ) { - vkDestroyFramebuffer( *device, renderTarget.framebuffers[i], nullptr ); - VK_UNREGISTER_HANDLE( renderTarget.framebuffers[i] ); - renderTarget.framebuffers[i] = VK_NULL_HANDLE; + for ( uint32_t frame = 0; frame < renderTarget.framebuffers.size(); frame++ ) { + if ( renderTarget.framebuffers[frame] != VK_NULL_HANDLE ) { + vkDestroyFramebuffer( *device, renderTarget.framebuffers[frame], nullptr ); + VK_UNREGISTER_HANDLE( renderTarget.framebuffers[frame] ); + renderTarget.framebuffers[frame] = VK_NULL_HANDLE; } } for ( auto& attachment : renderTarget.attachments ) { @@ -650,10 +659,11 @@ void ext::vulkan::BaseRenderMode::destroy() { ext::vulkan::RenderMode::destroy(); - if ( swapchain.presentCompleteSemaphore != VK_NULL_HANDLE ) { - vkDestroySemaphore( *device, swapchain.presentCompleteSemaphore, nullptr); - VK_UNREGISTER_HANDLE( swapchain.presentCompleteSemaphore ); + for ( auto& presentCompleteSemaphore : swapchain.presentCompleteSemaphores ) { + vkDestroySemaphore( *device, presentCompleteSemaphore, nullptr); + VK_UNREGISTER_HANDLE( presentCompleteSemaphore ); } + swapchain.presentCompleteSemaphores.clear(); } ext::vulkan::GraphicDescriptor ext::vulkan::BaseRenderMode::bindGraphicDescriptor( const ext::vulkan::GraphicDescriptor& reference, size_t pass ) { diff --git a/engine/src/ext/vulkan/rendermodes/deferred.cpp b/engine/src/ext/vulkan/rendermodes/deferred.cpp index fc99b7b7..2b9d47f4 100644 --- a/engine/src/ext/vulkan/rendermodes/deferred.cpp +++ b/engine/src/ext/vulkan/rendermodes/deferred.cpp @@ -610,26 +610,30 @@ VkSubmitInfo ext::vulkan::DeferredRenderMode::queue() { // The submit info structure specifices a command buffer queue submission batch VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submitInfo.pWaitDstStageMask = waitStageMask; // Pointer to the list of pipeline stages that the semaphore waits will occur at - submitInfo.pWaitSemaphores = &swapchain.presentCompleteSemaphore; // Semaphore(s) to wait upon before the submitted command buffer starts executing - submitInfo.waitSemaphoreCount = 1; // One wait semaphore - submitInfo.pSignalSemaphores = &renderCompleteSemaphore; // Semaphore(s) to be signaled when command buffers have completed - submitInfo.signalSemaphoreCount = 1; // One signal semaphore - submitInfo.pCommandBuffers = &commands[states::currentBuffer]; // Command buffers(s) to execute in this batch (submission) + submitInfo.pWaitDstStageMask = waitStageMask; // Pointer to the list of pipeline stages that the semaphore waits will occur at + submitInfo.pWaitSemaphores = &swapchain.presentCompleteSemaphores[states::currentBuffer]; // Semaphore(s) to wait upon before the submitted command buffer starts executing + submitInfo.waitSemaphoreCount = 1; // One wait semaphore + submitInfo.pSignalSemaphores = &renderCompleteSemaphores[states::currentBuffer]; // Semaphore(s) to be signaled when command buffers have completed + submitInfo.signalSemaphoreCount = 1; // One signal semaphore + submitInfo.pCommandBuffers = &commands[states::currentBuffer]; // Command buffers(s) to execute in this batch (submission) submitInfo.commandBufferCount = 1; return submitInfo; } void ext::vulkan::DeferredRenderMode::render() { // if ( this->executed ) return; - if ( commandBufferCallbacks.count(EXECUTE_BEGIN) > 0 ) commandBufferCallbacks[EXECUTE_BEGIN]( VkCommandBuffer{}, 0 ); - //lockMutex( this->mostRecentCommandPoolId ); + if ( this->commands.container().empty() ) return; + auto& commands = getCommands( this->mostRecentCommandPoolId ); + + VK_COMMAND_BUFFER_CALLBACK( EXECUTE_BEGIN, VkCommandBuffer{}, 0, {} ); // Submit commands // Use a fence to ensure that command buffer has finished executing before using it again + /* VK_CHECK_RESULT(vkWaitForFences( *device, 1, &fences[states::currentBuffer], VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT )); VK_CHECK_RESULT(vkResetFences( *device, 1, &fences[states::currentBuffer] )); + */ VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; @@ -643,9 +647,9 @@ void ext::vulkan::DeferredRenderMode::render() { // VK_CHECK_RESULT(vkQueueSubmit(device->getQueue( QueueEnum::GRAPHICS ), 1, &submitInfo, fences[states::currentBuffer])); VkQueue queue = device->getQueue( QueueEnum::GRAPHICS ); - VkResult res = vkQueueSubmit( queue, 1, &submitInfo, fences[states::currentBuffer]); + VkResult res = vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE/*fences[states::currentBuffer]*/); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); - if ( commandBufferCallbacks.count(EXECUTE_END) > 0 ) commandBufferCallbacks[EXECUTE_END]( VkCommandBuffer{}, 0 ); + VK_COMMAND_BUFFER_CALLBACK( EXECUTE_END, VkCommandBuffer{}, 0, {} ); this->executed = true; //unlockMutex( this->mostRecentCommandPoolId ); @@ -701,8 +705,8 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto } } bool shouldRecord = true; // ( settings::pipelines::rt && !uf::config["engine"]["scenes"]["rt"]["full"].as() ) || !settings::pipelines::rt; - for (size_t i = 0; i < commands.size(); ++i) { - auto commandBuffer = commands[i]; + for (size_t frame = 0; frame < commands.size(); ++frame) { + auto commandBuffer = commands[frame]; VK_CHECK_RESULT( vkBeginCommandBuffer(commandBuffer, &cmdBufInfo) ); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::BEGIN, "begin" ); @@ -718,7 +722,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto renderPassBeginInfo.clearValueCount = clearValues.size(); renderPassBeginInfo.pClearValues = &clearValues[0]; renderPassBeginInfo.renderPass = renderTarget.renderPass; - renderPassBeginInfo.framebuffer = renderTarget.framebuffers[i]; + renderPassBeginInfo.framebuffer = renderTarget.framebuffers[frame]; // Update dynamic viewport state VkViewport viewport = {}; @@ -780,15 +784,14 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto descriptor.bind.point = VK_PIPELINE_BIND_POINT_COMPUTE; } device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("graphic[{}]", pipeline) ); - graphic->record( commandBuffer, descriptor, 0, metadata.eyes ); + graphic->record( commandBuffer, descriptor, 0, metadata.eyes, frame ); } } // pre-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_BEGIN) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_BEGIN, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[begin]" ); - commandBufferCallbacks[CALLBACK_BEGIN]( commandBuffer, i ); - } + } ); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::BEGIN, "renderPass[begin]" ) ; vkCmdBeginRenderPass(commandBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); @@ -803,7 +806,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto if ( graphic->descriptor.renderMode != this->getName() ) continue; ext::vulkan::GraphicDescriptor descriptor = bindGraphicDescriptor(graphic->descriptor, currentSubpass); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("graphic[{}]", currentDraw) ); - graphic->record( commandBuffer, descriptor, eye, currentDraw++ ); + graphic->record( commandBuffer, descriptor, eye, currentDraw++, frame ); } if ( eye + 1 < metadata.eyes ) { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "nextSubpass" ); @@ -821,7 +824,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto descriptor.subpass = currentSubpass; descriptor.bind.point = VK_PIPELINE_BIND_POINT_GRAPHICS; device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "deferred" ); - blitter.record(commandBuffer, descriptor, eye, currentDraw++); + blitter.record(commandBuffer, descriptor, eye, currentDraw++, frame); } if ( eye + 1 < metadata.eyes ) { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "nextSubpass" ); @@ -849,7 +852,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto // dispatch compute shader device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "deferred" ); - blitter.record(commandBuffer, descriptor, 0, 0); + blitter.record(commandBuffer, descriptor, 0, 0, frame); // transition attachments back to shader read layouts device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "setImageLayout" ); @@ -877,15 +880,15 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto auto& attachmentScratch = this->getAttachment("scratch"); // pingpong device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "bloom[1]" ); - blitter.record(commandBuffer, descriptor, 0, 1); + blitter.record( commandBuffer, descriptor, 0, 1 ); cmdImageBarrier( commandBuffer, attachmentScratch.image, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL ); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "bloom[2]" ); - blitter.record(commandBuffer, descriptor, 0, 2); + blitter.record( commandBuffer, descriptor, 0, 2 ); cmdImageBarrier( commandBuffer, attachmentBright.image, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL ); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "bloom[3]" ); - blitter.record(commandBuffer, descriptor, 0, 3); + blitter.record( commandBuffer, descriptor, 0, 3 ); // transition attachments back to shader read layouts device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "setImageLayout" ); @@ -934,7 +937,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto if ( descriptor.bind.width < 1 ) descriptor.bind.width = 1; if ( descriptor.bind.height < 1 ) descriptor.bind.height = 1; - blitter.record(commandBuffer, descriptor, 0, i); + blitter.record(commandBuffer, descriptor, 0, i, frame); vkCmdPipelineBarrier( commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_FLAGS_NONE, 1, &memoryBarrier, 0, NULL, 0, NULL ); } @@ -945,10 +948,9 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto } #endif // post-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_END) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_END, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[end]" ); - commandBufferCallbacks[CALLBACK_END]( commandBuffer, i ); - } + } ); #if 0 if ( this->hasAttachment("depth") ) { diff --git a/engine/src/ext/vulkan/rendermodes/rendertarget.cpp b/engine/src/ext/vulkan/rendermodes/rendertarget.cpp index 9200e19c..409fc360 100644 --- a/engine/src/ext/vulkan/rendermodes/rendertarget.cpp +++ b/engine/src/ext/vulkan/rendermodes/rendertarget.cpp @@ -419,15 +419,19 @@ void ext::vulkan::RenderTargetRenderMode::destroy() { void ext::vulkan::RenderTargetRenderMode::render() { // if ( this->executed ) return; - - if ( commandBufferCallbacks.count(EXECUTE_BEGIN) > 0 ) commandBufferCallbacks[EXECUTE_BEGIN]( VkCommandBuffer{}, 0 ); + if ( this->commands.container().empty() ) return; //lockMutex( this->mostRecentCommandPoolId ); auto& commands = getCommands( this->mostRecentCommandPoolId ); + + VK_COMMAND_BUFFER_CALLBACK( EXECUTE_BEGIN, VkCommandBuffer{}, 0, {} ); + // Submit commands // Use a fence to ensure that command buffer has finished executing before using it again + /* VK_CHECK_RESULT(vkWaitForFences( *device, 1, &fences[states::currentBuffer], VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT )); VK_CHECK_RESULT(vkResetFences( *device, 1, &fences[states::currentBuffer] )); + */ VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; @@ -441,10 +445,9 @@ void ext::vulkan::RenderTargetRenderMode::render() { // VK_CHECK_RESULT(vkQueueSubmit(device->getQueue( QueueEnum::GRAPHICS ), 1, &submitInfo, fences[states::currentBuffer])); VkQueue queue = device->getQueue( QueueEnum::GRAPHICS ); - VkResult res = vkQueueSubmit( queue, 1, &submitInfo, fences[states::currentBuffer]); + VkResult res = vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE/*fences[states::currentBuffer]*/); VK_CHECK_QUEUE_CHECKPOINT( queue, res ); - - if ( commandBufferCallbacks.count(EXECUTE_END) > 0 ) commandBufferCallbacks[EXECUTE_END]( VkCommandBuffer{}, 0 ); + VK_COMMAND_BUFFER_CALLBACK( EXECUTE_END, VkCommandBuffer{}, 0, {} ); this->executed = true; //unlockMutex( this->mostRecentCommandPoolId ); @@ -481,8 +484,8 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v } auto& commands = getCommands(); - for (size_t i = 0; i < commands.size(); ++i) { - auto& commandBuffer = commands[i]; + for (size_t frame = 0; frame < commands.size(); ++frame) { + auto& commandBuffer = commands[frame]; VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &cmdBufInfo)); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::BEGIN, "begin" ); { @@ -497,7 +500,7 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v renderPassBeginInfo.clearValueCount = clearValues.size(); renderPassBeginInfo.pClearValues = &clearValues[0]; renderPassBeginInfo.renderPass = renderTarget.renderPass; - renderPassBeginInfo.framebuffer = renderTarget.framebuffers[i]; + renderPassBeginInfo.framebuffer = renderTarget.framebuffers[frame]; // Update dynamic viewport state VkViewport viewport = {}; @@ -517,7 +520,7 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v size_t currentPass = 0; // - // this->pipelineBarrier( commands[i], 1 ); + // this->pipelineBarrier( commands[frame], 1 ); // VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL @@ -540,10 +543,9 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v #endif // pre-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_BEGIN) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_BEGIN, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[begin]" ); - commandBufferCallbacks[CALLBACK_BEGIN]( commandBuffer, i ); - } + } ); if ( this->getName() == "Compute" ) { for ( auto graphic : graphics ) { @@ -573,7 +575,7 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v UF_MSG_DEBUG("Aux pipeline: {}", pipeline); } device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("graphic[{}]", pipeline) ); - graphic->record( commandBuffer, descriptor, 0, metadata.type == uf::renderer::settings::pipelines::names::vxgi ? 0 : MIN(subpasses,6) ); + graphic->record( commandBuffer, descriptor, 0, metadata.type == uf::renderer::settings::pipelines::names::vxgi ? 0 : MIN(subpasses,6), frame ); } } @@ -587,12 +589,13 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v if ( graphic->descriptor.renderMode != this->getTarget() ) continue; ext::vulkan::GraphicDescriptor descriptor = bindGraphicDescriptor(graphic->descriptor, currentPass); device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("graphic[{}]", currentDraw) ); - graphic->record( commandBuffer, descriptor, currentPass, currentDraw++ ); + graphic->record( commandBuffer, descriptor, currentPass, currentDraw++, frame ); } - if ( commandBufferCallbacks.count( currentPass ) > 0 ) { - commandBufferCallbacks[currentPass]( commandBuffer, i ); + + VK_COMMAND_BUFFER_CALLBACK( currentPass, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, ::fmt::format("callback[{}]", currentPass) ); - } + } ); + if ( currentPass + 1 < subpasses ) { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "nextSubpass" ); vkCmdNextSubpass(commandBuffer, VK_SUBPASS_CONTENTS_INLINE); @@ -604,12 +607,11 @@ void ext::vulkan::RenderTargetRenderMode::createCommandBuffers( const uf::stl::v // post-renderpass commands - if ( commandBufferCallbacks.count(CALLBACK_END) > 0 ) { + VK_COMMAND_BUFFER_CALLBACK( CALLBACK_END, commandBuffer, frame, { device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::GENERIC, "callback[end]" ); - commandBufferCallbacks[CALLBACK_END]( commandBuffer, i ); - } + } ); - // this->pipelineBarrier( commands[i], 1 ); + // this->pipelineBarrier( commands[frame], 1 ); } device->UF_CHECKPOINT_MARK( commandBuffer, pod::Checkpoint::END, "end" ); VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); diff --git a/engine/src/ext/vulkan/shader.cpp b/engine/src/ext/vulkan/shader.cpp index 4d16f496..fedc8af2 100644 --- a/engine/src/ext/vulkan/shader.cpp +++ b/engine/src/ext/vulkan/shader.cpp @@ -484,18 +484,17 @@ void ext::vulkan::Shader::initialize( ext::vulkan::Device& device, const uf::stl etype, }; } break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { size_t bufferSize = comp.get_declared_struct_size(base_type); if ( bufferSize <= 0 ) break; if ( bufferSize > device.properties.limits.maxUniformBufferRange ) { VK_DEBUG_VALIDATION_MESSAGE("Invalid uniform buffer length of " << bufferSize << " for shader " << filename); bufferSize = device.properties.limits.maxUniformBufferRange; } - size_t misalignment = bufferSize % device.properties.limits.minStorageBufferOffsetAlignment; - if ( misalignment != 0 ) { - VK_DEBUG_VALIDATION_MESSAGE("Invalid uniform buffer alignment of " << misalignment << " for shader " << filename << ", correcting..."); - bufferSize += misalignment; - } + + bufferSize = ALIGNED_SIZE( bufferSize, device.properties.limits.minUniformBufferOffsetAlignment ); + { VK_DEBUG_VALIDATION_MESSAGE("Uniform size of " << bufferSize << " for shader " << filename); // auto& uniform = uniforms.emplace_back(); @@ -518,14 +517,20 @@ void ext::vulkan::Shader::initialize( ext::vulkan::Device& device, const uf::stl binding, bufferSize, }; + + if ( descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ) { + metadata.dynamicRanges.emplace_back( bufferSize ); + } } break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { // generate definition to JSON #if UF_SHADER_PARSE_AS_JSON { metadata.json["definitions"]["storage"][name]["name"] = name; metadata.json["definitions"]["storage"][name]["index"] = index; metadata.json["definitions"]["storage"][name]["binding"] = binding; + // metadata.json["definitions"]["storage"][name]["size"] = bufferSize; metadata.json["definitions"]["storage"][name]["members"] = parseMembers(resource.type_id); } #endif @@ -567,7 +572,11 @@ void ext::vulkan::Shader::initialize( ext::vulkan::Device& device, const uf::stl LOOP_RESOURCES( storage_images, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ); LOOP_RESOURCES( separate_samplers, VK_DESCRIPTOR_TYPE_SAMPLER ); LOOP_RESOURCES( subpass_inputs, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ); + #if VK_UBO_USE_N_BUFFERS + LOOP_RESOURCES( uniform_buffers, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ); + #else LOOP_RESOURCES( uniform_buffers, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ); + #endif LOOP_RESOURCES( storage_buffers, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ); LOOP_RESOURCES( acceleration_structures, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR ); #undef LOOP_RESOURCES diff --git a/engine/src/ext/vulkan/swapchain.cpp b/engine/src/ext/vulkan/swapchain.cpp index 7911bb3a..4e1bc19c 100644 --- a/engine/src/ext/vulkan/swapchain.cpp +++ b/engine/src/ext/vulkan/swapchain.cpp @@ -5,10 +5,10 @@ #include #include -VkResult ext::vulkan::Swapchain::acquireNextImage( uint32_t* imageIndex, VkSemaphore presentCompleteSemaphore ) { +VkResult ext::vulkan::Swapchain::acquireNextImage( uint32_t* imageIndex, VkSemaphore presentCompleteSemaphore, VkFence acquireFence ) { // By setting timeout to UINT64_MAX we will always wait until the next image has been acquired or an actual error is thrown // With that we don't have to handle VK_NOT_READY - return vkAcquireNextImageKHR( *device, swapChain, VK_DEFAULT_FENCE_TIMEOUT, presentCompleteSemaphore, (VkFence) nullptr, imageIndex ); + return vkAcquireNextImageKHR( *device, swapChain, VK_DEFAULT_FENCE_TIMEOUT, presentCompleteSemaphore, acquireFence, imageIndex ); } VkResult ext::vulkan::Swapchain::queuePresent( VkQueue queue, uint32_t imageIndex, VkSemaphore waitSemaphore ) { diff --git a/engine/src/ext/vulkan/texture.cpp b/engine/src/ext/vulkan/texture.cpp index b9f8506a..28ff0ba7 100644 --- a/engine/src/ext/vulkan/texture.cpp +++ b/engine/src/ext/vulkan/texture.cpp @@ -515,7 +515,7 @@ void ext::vulkan::Texture::fromBuffers( vkGetPhysicalDeviceFormatProperties(device.physicalDevice, format, &formatProperties); if (!(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) { this->mips = 1; - VK_VALIDATION_MESSAGE("Texture image format {} does not support linear blitting", format); + // VK_VALIDATION_MESSAGE("Texture image format {} does not support linear blitting", format); } } diff --git a/engine/src/ext/vulkan/vulkan.cpp b/engine/src/ext/vulkan/vulkan.cpp index d8d9c5e0..52640966 100644 --- a/engine/src/ext/vulkan/vulkan.cpp +++ b/engine/src/ext/vulkan/vulkan.cpp @@ -476,7 +476,7 @@ void ext::vulkan::initialize( bool soft ) { auto tasks = uf::thread::schedule( settings::invariant::multithreadedRecording ); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; - tasks.queue([&]{ + tasks.queue([renderMode]{ if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); renderMode->createCommandBuffers(); }); @@ -523,11 +523,11 @@ void ext::vulkan::tick() { auto tasks = uf::thread::schedule( settings::invariant::multithreadedRecording ); for ( auto& renderMode : renderModes ) { if ( !renderMode || (renderMode->executed && !renderMode->execute) ) continue; - if ( ext::vulkan::states::rebuild || renderMode->rebuild ) tasks.queue([&]{ + if ( ext::vulkan::states::rebuild || renderMode->rebuild ) tasks.queue([renderMode]{ if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); renderMode->createCommandBuffers(); }); - else if ( renderMode->rerecord ) tasks.queue([&]{ + else if ( renderMode->rerecord ) tasks.queue([renderMode]{ renderMode->createCommandBuffers(); }); } @@ -597,9 +597,11 @@ void ext::vulkan::render() { else submitsGraphics.emplace_back(submitInfo); renderMode->executed = true; -// tasks.queue([&]{ +// tasks.queue([renderMode]{ ext::vulkan::setCurrentRenderMode(renderMode); - uf::scene::render(); + if ( renderMode->getType() != "Swapchain" ) { + uf::scene::render(); + } ext::vulkan::setCurrentRenderMode(NULL); // }); } @@ -617,9 +619,11 @@ void ext::vulkan::render() { // stuff we can't batch for ( auto renderMode : specialRenderModes ) { ext::vulkan::setCurrentRenderMode(renderMode); - uf::scene::render(); + if ( renderMode->getType() != "Swapchain" ) { + uf::scene::render(); + } #if UF_USE_FFX_FSR - if ( renderMode->getName() == "Swapchain" && settings::pipelines::fsr && ext::fsr::initialized ) { + if ( renderMode->getType() == "Swapchain" && settings::pipelines::fsr && ext::fsr::initialized ) { ext::fsr::tick(); ext::fsr::render(); } @@ -637,15 +641,16 @@ void ext::vulkan::render() { if ( !renderMode || !renderMode->execute || !renderMode->metadata.limiter.execute ) continue; #if UF_USE_FFX_FSR - if ( renderMode->getName() == "Swapchain" && settings::pipelines::fsr && ext::fsr::initialized ) { + if ( renderMode->getType() == "Swapchain" && settings::pipelines::fsr && ext::fsr::initialized ) { ext::fsr::tick(); ext::fsr::render(); } #endif ext::vulkan::setCurrentRenderMode(renderMode); - uf::graph::render(); - uf::scene::render(); + if ( renderMode->getType() != "Swapchain" ) { + uf::scene::render(); + } renderMode->render(); ext::vulkan::setCurrentRenderMode(NULL); diff --git a/engine/src/utils/math/physics.cpp b/engine/src/utils/math/physics.cpp index c074e98b..37725002 100644 --- a/engine/src/utils/math/physics.cpp +++ b/engine/src/utils/math/physics.cpp @@ -22,8 +22,6 @@ void uf::physics::tick( ) { return uf::physics::tick( uf::scene::getCurrentScene() ); } void uf::physics::tick( uf::Object& scene ) { - ++uf::physics::time::frame; - uf::physics::time::previous = uf::physics::time::current; uf::physics::time::current = uf::physics::time::timer.elapsed(); @@ -31,7 +29,12 @@ void uf::physics::tick( uf::Object& scene ) { if ( uf::physics::time::delta > uf::physics::time::clamp ) { uf::physics::time::delta = uf::physics::time::clamp; } - uf::physics::impl::tick( scene, uf::physics::time::delta ); + + if ( uf::physics::impl::async ) { + uf::thread::queue( "Physics", [&](){ uf::physics::impl::tick( scene, uf::physics::time::delta ); }); + } else { + uf::physics::impl::tick( scene, uf::physics::time::delta ); + } } void uf::physics::terminate( ) { return uf::physics::terminate( uf::scene::getCurrentScene() ); diff --git a/engine/src/utils/math/physics/bvh.inl b/engine/src/utils/math/physics/bvh.inl index d96e71d8..42c0e823 100644 --- a/engine/src/utils/math/physics/bvh.inl +++ b/engine/src/utils/math/physics/bvh.inl @@ -77,7 +77,6 @@ namespace { constexpr auto numBins = 16; static thread_local Bin bins[numBins]; - for ( auto i = 0; i < numBins; i++ ) bins[i].count = 0; auto extent = bound.max - bound.min; auto bestAxis = -1, bestSplit = -1; @@ -85,6 +84,10 @@ namespace { for ( auto axis = 0; axis < 3; ++axis ) { if ( extent[axis] < EPS(1e-6f) ) continue; + for ( auto i = 0; i < numBins; i++ ) { + bins[i].count = 0; + bins[i].bounds = {}; + } float minC = bound.min[axis]; float maxC = bound.max[axis]; @@ -203,10 +206,10 @@ namespace { if ( bvh.indices.empty() ) return; // inserted nothing // recursively build BVH from indices - if ( ::useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); + if ( uf::physics::impl::settings.useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity ); // flatten if requested - if ( ::flattenBvhBodies ) ::flattenBVH( bvh, 0 ); + if ( uf::physics::impl::settings.flattenBvhBodies ) ::flattenBVH( bvh, 0 ); // mark as clean bvh.dirty = false; @@ -245,10 +248,10 @@ namespace { } // recursively build BVH from indices - if ( ::useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); + if ( uf::physics::impl::settings.useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity ); // flatten if requested - if ( ::flattenBvhMeshes ) ::flattenBVH( bvh, 0 ); + if ( uf::physics::impl::settings.flattenBvhMeshes ) ::flattenBVH( bvh, 0 ); // mark as clean bvh.dirty = false; @@ -316,7 +319,7 @@ namespace { // update leaf bounds uf::stl::vector leaves; - leaves.reserve(::reserveCount); + leaves.reserve(uf::physics::impl::settings.reserveCount); for ( auto i = 0; i < bvh.nodes.size(); i++ ) { if ( bvh.nodes[i].getCount() == 0 ) continue; leaves.emplace_back(i); @@ -333,7 +336,7 @@ namespace { } // update internal nodes bottom-up - for ( pod::BVH::index_t i = (pod::BVH::index_t) bvh.nodes.size() - 1; i >= 0; i-- ) { + for ( int64_t i = (int64_t) bvh.nodes.size() - 1; i >= 0; i-- ) { auto& node = bvh.nodes[i]; auto& bound = bvh.bounds[i]; // internal node @@ -534,7 +537,7 @@ namespace { if ( !bvh.flattened.empty() ) return ::queryFlatOverlaps( bvh, outPairs ); if ( bvh.nodes.empty() ) return; - outPairs.reserve(::reserveCount); + outPairs.reserve(uf::physics::impl::settings.reserveCount); ::traverseBVH( bvh, 0, outPairs ); } @@ -542,7 +545,7 @@ namespace { if ( !bvhA.flattened.empty() && !bvhB.flattened.empty() ) return ::queryFlatOverlaps( bvhA, bvhB, outPairs ); if ( bvhA.nodes.empty() || bvhB.nodes.empty() ) return; - outPairs.reserve(::reserveCount); + outPairs.reserve(uf::physics::impl::settings.reserveCount); ::traverseNodePair(bvhA, 0, bvhB, 0, outPairs); } } @@ -554,7 +557,7 @@ namespace { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices ); - outIndices.reserve(::reserveCount); + outIndices.reserve(uf::physics::impl::settings.reserveCount); static thread_local uf::stl::stack stack; //stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function @@ -581,7 +584,7 @@ namespace { void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices, pod::BVH::index_t nodeID ) { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices ); - if ( nodeID == 0 ) outIndices.reserve(::reserveCount); + if ( nodeID == 0 ) outIndices.reserve(uf::physics::impl::settings.reserveCount); const auto& node = bvh.nodes[nodeID]; if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[nodeID] ) ) return; @@ -601,7 +604,7 @@ namespace { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist ); if ( bvh.nodes.empty() ) return; - outIndices.reserve(::reserveCount); + outIndices.reserve(uf::physics::impl::settings.reserveCount); static thread_local uf::stl::stack stack; //stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function @@ -627,7 +630,7 @@ namespace { void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, pod::BVH::index_t nodeID, float maxDist ) { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist ); - if ( nodeID == 0 ) outIndices.reserve(::reserveCount); + if ( nodeID == 0 ) outIndices.reserve(uf::physics::impl::settings.reserveCount); const auto& node = bvh.nodes[nodeID]; float tMin, tMax; @@ -649,63 +652,81 @@ namespace { namespace { void queryFlatOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs ) { auto& nodes = bvh.flattened; + auto& bounds = bvh.flatBounds; auto& indices = bvh.indices; - outPairs.reserve(::reserveCount); + if ( nodes.empty() ) return; + outPairs.reserve( uf::physics::impl::settings.reserveCount ); - for ( auto i = 0; i < nodes.size(); ++i ) { - const auto& nodeA = nodes[i]; + for ( pod::BVH::index_t a = 0; a < nodes.size(); ++a ) { + const auto& nodeA = nodes[a]; if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue; - for ( auto j = i + 1; j < nodes.size(); ++j ) { - const auto& nodeB = nodes[j]; - if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue; + const auto& boundsA = bounds[a]; + pod::BVH::index_t b = a + 1; + while ( b < nodes.size() ) { + const auto& nodeB = nodes[b]; - if ( !::aabbOverlap( bvh.flatBounds[i], bvh.flatBounds[j] ) ) continue; + if ( nodeB.isAsleep() || !::aabbOverlap( boundsA, bounds[b] ) ) { + b = nodeB.skipIndex; + continue; + } - for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) { - for ( auto ib = 0; ib < nodeB.getCount(); ++ib ) { - auto indexA = indices[nodeA.start + ia]; - auto indexB = indices[nodeB.start + ib]; + if ( nodeB.getCount() > 0 ) { + for ( pod::BVH::index_t ia = 0; ia < nodeA.getCount(); ++ia ) { + for ( pod::BVH::index_t ib = 0; ib < nodeB.getCount(); ++ib ) { + auto indexA = indices[nodeA.start + ia]; + auto indexB = indices[nodeB.start + ib]; - if ( indexA == indexB ) continue; - if ( indexA > indexB ) std::swap( indexA, indexB ); + if ( indexA == indexB ) continue; + if ( indexA > indexB ) std::swap(indexA, indexB); - outPairs.emplace( indexA, indexB ); + outPairs.emplace( indexA, indexB ); + } } } + ++b; } } } void queryFlatOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs ) { auto& nodesA = bvhA.flattened; + auto& boundsA = bvhA.flatBounds; auto& indicesA = bvhA.indices; - + auto& nodesB = bvhB.flattened; + auto& boundsB = bvhB.flatBounds; auto& indicesB = bvhB.indices; if ( nodesA.empty() || nodesB.empty() ) return; - - outPairs.reserve(::reserveCount); + outPairs.reserve(uf::physics::impl::settings.reserveCount); - for ( auto i = 0; i < nodesA.size(); ++i ) { - const auto& nodeA = nodesA[i]; + for ( pod::BVH::index_t a = 0; a < nodesA.size(); ++a ) { + const auto& nodeA = nodesA[a]; if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue; - for ( auto j = 0; j < nodesB.size(); ++j ) { - const auto& nodeB = nodesB[j]; - if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue; + const auto& bA = boundsA[a]; - if ( !::aabbOverlap( bvhA.flatBounds[i], bvhB.flatBounds[j] ) ) continue; + pod::BVH::index_t b = 0; + while ( b < nodesB.size() ) { + const auto& nodeB = nodesB[b]; - for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) { - for (auto ib = 0; ib < nodeB.getCount(); ++ib ) { - auto indexA = indicesA[nodeA.start + ia]; - auto indexB = indicesB[nodeB.start + ib]; + if ( nodeB.isAsleep() || !::aabbOverlap(bA, boundsB[b]) ) { + b = nodeB.skipIndex; + continue; + } - outPairs.emplace( indexA, indexB ); + if ( nodeB.getCount() > 0 ) { + for ( pod::BVH::index_t ia = 0; ia < nodeA.getCount(); ++ia ) { + for ( pod::BVH::index_t ib = 0; ib < nodeB.getCount(); ++ib ) { + auto indexA = indicesA[nodeA.start + ia]; + auto indexB = indicesB[nodeB.start + ib]; + + outPairs.emplace(indexA, indexB); + } } } + ++b; } } } @@ -714,7 +735,7 @@ namespace { auto& nodes = bvh.flattened; auto& indices = bvh.indices; - outIndices.reserve(::reserveCount); + outIndices.reserve(uf::physics::impl::settings.reserveCount); pod::BVH::index_t idx = 0; while ( idx < nodes.size() ) { @@ -738,7 +759,7 @@ namespace { auto& nodes = bvh.flattened; auto& indices = bvh.indices; - outIndices.reserve(::reserveCount); + outIndices.reserve(uf::physics::impl::settings.reserveCount); pod::BVH::index_t idx = 0; while ( idx < nodes.size() ) { @@ -802,7 +823,8 @@ namespace { } // map root to island index - uf::stl::unordered_map rootToIsland; + static thread_local uf::stl::unordered_map rootToIsland; + rootToIsland.clear(); islands.clear(); islands.reserve(bodies.size()); diff --git a/engine/src/utils/math/physics/helpers.inl b/engine/src/utils/math/physics/helpers.inl index eb0e8bcf..3105fd65 100644 --- a/engine/src/utils/math/physics/helpers.inl +++ b/engine/src/utils/math/physics/helpers.inl @@ -61,9 +61,10 @@ namespace { uint64_t lhs = reinterpret_cast(&a); uint64_t rhs = reinterpret_cast(&b); if (lhs > rhs) std::swap(lhs, rhs); - - lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2); - return lhs; + size_t seed = 0; + seed ^= std::hash{}(lhs) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= std::hash{}(rhs) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; } // marks a body as asleep @@ -78,6 +79,7 @@ namespace { } void updateActivity( pod::PhysicsBody& body, float dt ) { // reset grounded state + bool wasGrounded = body.activity.grounded; body.activity.grounded = false; // already asleep @@ -91,7 +93,7 @@ namespace { if ( linSpeed < pod::Activity::linearSleepEpsilon && angSpeed < pod::Activity::angularSleepEpsilon ) { body.activity.sleepTimer += dt; float threshold = pod::Activity::sleepThreshold; - if ( body.activity.grounded ) threshold *= 0.25f; + if ( wasGrounded ) threshold *= 0.25f; if ( body.activity.sleepTimer > threshold ) ::sleepBody( body ); } // body is moving, reset timer @@ -242,19 +244,24 @@ namespace { // check against AB float t = std::clamp(uf::vector::dot( ao, ab ) / d00, 0.0f, 1.0f); auto pAB = a + (ab * t); - float distAB = uf::vector::dot( pAB, pAB ); + float distAB = uf::vector::dot( p - pAB, p - pAB ); + //float distAB = uf::vector::dot( pAB, pAB ); // check against AC float t2 = std::clamp(uf::vector::dot( ao, ac ) / d11, 0.0f, 1.0f); auto pAC = a + (ac * t2); - float distAC = uf::vector::dot( pAC, pAC ); + float distAC = uf::vector::dot( p - pAC, p - pAC ); + //float distAC = uf::vector::dot( pAC, pAC ); // check against BC auto bc = c - b; float d22 = uf::vector::dot( bc, bc ); - float t3 = std::clamp(uf::vector::dot( -b, bc ) / d22, 0.0f, 1.0f); + float t3 = std::clamp(uf::vector::dot( p - b, bc ) / d22, 0.0f, 1.0f); + //float t3 = std::clamp(uf::vector::dot( -b, bc ) / d22, 0.0f, 1.0f); + auto pBC = b + ( bc * t3 ); - float distBC = uf::vector::dot( pBC, pBC ); + float distBC = uf::vector::dot( p - pBC, p - pBC ); + //float distBC = uf::vector::dot( pBC, pBC ); // pick closest edge/vertex if ( distAB <= distAC && distAB <= distBC ) return { 1.0f - t, t, 0.0f }; diff --git a/engine/src/utils/math/physics/impl.cpp b/engine/src/utils/math/physics/impl.cpp index ec8ccb77..7ecf208c 100644 --- a/engine/src/utils/math/physics/impl.cpp +++ b/engine/src/utils/math/physics/impl.cpp @@ -4,50 +4,6 @@ #include #include -namespace { - bool warmupSolver = true; // cache manifold data to warm up the solver - bool blockContactSolver = true; // use BlockNxN solvers (where N = number of contacts for a manifold) - bool psgContactSolver = true; // use PSG contact solver - bool useGjk = false; // currently don't have a way to broadphase mesh => narrowphase tri via GJK - bool fixedStep = false; // run physics simulation with a fixed delta time (with accumulation), rather than rely on actual engine deltatime - uint32_t substeps = 4; // number of substeps per frame tick - uint32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator - - // increasing these make things lag for reasons I can imagine why - uint32_t broadphaseBvhCapacity = 4; // number of bodies per leaf node - uint32_t meshBvhCapacity = 4; // number of triangles per leaf node - - // additionally flattens a BVH for linear iteration, rather than a recursive / stack-based traversal - bool flattenBvhBodies = true; - bool flattenBvhMeshes = true; - - // use surface area heuristics for building the BVH, rather than naive splits - bool useBvhSahBodies = true; // it actually seems slower to use these...... - bool useBvhSahMeshes = true; - - bool useSplitBvhs = true; // creates separate BVHs for static / dynamic objects - - // to-do: find possibly better values for this - uint32_t solverIterations = 10; - float baumgarteCorrectionPercent = 0.4f; - float baumgarteCorrectionSlop = 0.01f; - - uf::stl::unordered_map manifoldsCache; - uint32_t manifoldCacheLifetime = 6; // to-do: find a good value for this - - uint32_t frameCounter = 0; - - // to-do: tweak this to not be annoying - pod::BVH::UpdatePolicy bvhUpdatePolicy = { - .displacementThreshold = 0.25f, - .overlapThreshold = 2.0f, - .dirtyRatioThreshold = 0.3f, - .maxFramesBeforeRebuild = 60, // * 10, // 10 seconds - }; - - float groundedThreshold = 0.7f; // threshold before marking a body as grounded -} - #define EPS(x) 1.0e-6f #define EPS2 (EPS(1.0e-6) * EPS(1.0e-6)) #define ASSERT_COLLIDER_TYPES( A, B ) UF_ASSERT( a.collider.type == pod::ShapeType::A && b.collider.type == pod::ShapeType::B ); @@ -67,8 +23,12 @@ namespace { #include "integration.inl" #include "solvers.inl" -// unused, as these are from reactphysics +pod::PhysicsSettings uf::physics::impl::settings; + float uf::physics::impl::timescale = 1.0f / 60.0f; +bool uf::physics::impl::async = false; + +// unused, as these are from reactphysics bool uf::physics::impl::interpolate = false; bool uf::physics::impl::shared = false; bool uf::physics::impl::globalStorage = false; @@ -90,9 +50,8 @@ void uf::physics::impl::tick( uf::Object& object, float dt ) { uf::physics::impl::tick( object.getComponent(), dt ); } void uf::physics::impl::tick( pod::World& world, float dt ) { - if ( !::fixedStep ) { - - if ( ::substeps > 0 ) uf::physics::impl::substep( world, dt, ::substeps ); + if ( !uf::physics::impl::settings.fixedStep ) { + if ( uf::physics::impl::settings.substeps > 0 ) uf::physics::impl::substep( world, dt, uf::physics::impl::settings.substeps ); else uf::physics::impl::step( world, dt ); return; @@ -101,7 +60,7 @@ void uf::physics::impl::tick( pod::World& world, float dt ) { static float accumulator = 0; accumulator += dt; while ( accumulator >= uf::physics::impl::timescale ) { - if ( ::substeps > 0 ) uf::physics::impl::substep( world, uf::physics::impl::timescale, ::substeps ); + if ( uf::physics::impl::settings.substeps > 0 ) uf::physics::impl::substep( world, uf::physics::impl::timescale, uf::physics::impl::settings.substeps ); else uf::physics::impl::step( world, uf::physics::impl::timescale ); accumulator -= uf::physics::impl::timescale; } @@ -130,20 +89,20 @@ void uf::physics::impl::step( pod::World& world, float dt ) { if ( bodies.empty() ) return; - ++::frameCounter; + ++uf::physics::impl::settings.frameCounter; for ( auto* body : bodies ) { ::integrate( *body, dt ); } // rebuild static bvh if dirty - if ( staticBvh.dirty && ::useSplitBvhs ) { - ::buildBroadphaseBVH( staticBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, true ); // (re)build + if ( staticBvh.dirty && uf::physics::impl::settings.useSplitBvhs ) { + ::buildBroadphaseBVH( staticBvh, bodies, uf::physics::impl::settings.broadphaseBvhCapacity, uf::physics::impl::settings.useSplitBvhs, true ); // (re)build } - switch ( ::decideBVHUpdate( dynamicBvh, bodies, ::bvhUpdatePolicy, ::frameCounter ) ) { + switch ( ::decideBVHUpdate( dynamicBvh, bodies, uf::physics::impl::settings.bvhUpdatePolicy, uf::physics::impl::settings.frameCounter ) ) { case pod::BVH::UpdatePolicy::Decision::REBUILD: { - ::buildBroadphaseBVH( dynamicBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, false ); // (re)build + ::buildBroadphaseBVH( dynamicBvh, bodies, uf::physics::impl::settings.broadphaseBvhCapacity, uf::physics::impl::settings.useSplitBvhs, false ); // (re)build } break; case pod::BVH::UpdatePolicy::Decision::REFIT: { ::refitBVH( dynamicBvh, bodies ); // refit @@ -157,7 +116,7 @@ void uf::physics::impl::step( pod::World& world, float dt ) { // query for overlaps pod::BVH::pairs_t pairs; ::queryOverlaps( dynamicBvh, pairs ); - if ( ::useSplitBvhs ) { + if ( uf::physics::impl::settings.useSplitBvhs ) { ::queryOverlaps( dynamicBvh, staticBvh, pairs ); } @@ -165,13 +124,14 @@ void uf::physics::impl::step( pod::World& world, float dt ) { uf::stl::vector islands; ::buildIslands( pairs, bodies, islands ); - if ( ::warmupSolver ) ::prepareManifoldCache( ::manifoldsCache, islands, bodies ); + if ( uf::physics::impl::settings.warmupSolver ) ::prepareManifoldCache( uf::physics::impl::settings.manifoldsCache, islands, bodies ); // iterate islands #pragma omp parallel for schedule(dynamic) for ( auto& island : islands ) { - uf::stl::vector manifolds; - manifolds.reserve(::reserveCount); + static thread_local uf::stl::vector manifolds; + manifolds.clear(); + manifolds.reserve(uf::physics::impl::settings.reserveCount); // sleeping island, skip if ( !::updateIsland( island, bodies, dt ) ) continue; @@ -190,9 +150,9 @@ void uf::physics::impl::step( pod::World& world, float dt ) { for ( auto& c : manifold.points ) c.normal = ::orientNormalToAB( a, b, c.normal ); } // retrieve accumulated impulses - if ( ::warmupSolver ) { - auto it = ::manifoldsCache.find( ::makePairKey( a, b ) ); - if ( it != ::manifoldsCache.end() ) ::retrieveContacts( manifold, it->second ); + if ( uf::physics::impl::settings.warmupSolver ) { + auto it = uf::physics::impl::settings.manifoldsCache.find( ::makePairKey( a, b ) ); + if ( it != uf::physics::impl::settings.manifoldsCache.end() ) ::retrieveContacts( manifold, it->second ); } // merge similar contacts from a mesh to ensure continuity if ( a.collider.type == pod::ShapeType::MESH || b.collider.type == pod::ShapeType::MESH ) { @@ -207,7 +167,7 @@ void uf::physics::impl::step( pod::World& world, float dt ) { if ( b.activity.awake && !a.activity.awake ) ::wakeBody( a ); // mark as grounded for ( auto& c : manifold.points ) { - if ( std::fabs(uf::vector::dot(c.normal, pod::Vector3f{0,1,0})) > ::groundedThreshold ) { + if ( std::fabs(uf::vector::dot(c.normal, pod::Vector3f{0,1,0})) > uf::physics::impl::settings.groundedThreshold ) { // only mark if contact point is below body if ( c.point.y < getPosition(a).y ) a.activity.grounded = true; if ( c.point.y < getPosition(b).y ) b.activity.grounded = true; @@ -223,12 +183,12 @@ void uf::physics::impl::step( pod::World& world, float dt ) { // do position correction ::solvePositions( manifolds, dt ); // cache manifold positions - if ( ::warmupSolver ) { - ::updateManifoldCache( manifolds, ::manifoldsCache ); + if ( uf::physics::impl::settings.warmupSolver ) { + ::updateManifoldCache( manifolds, uf::physics::impl::settings.manifoldsCache ); } } - if ( ::warmupSolver ) ::pruneManifoldCache( ::manifoldsCache ); + if ( uf::physics::impl::settings.warmupSolver ) ::pruneManifoldCache( uf::physics::impl::settings.manifoldsCache ); for ( auto* b : bodies ) { if ( b->isStatic ) continue; @@ -381,7 +341,7 @@ void uf::physics::impl::applyForce( pod::PhysicsBody& body, const pod::Vector3f& if ( body.isStatic ) return; ::wakeBody( body ); body.forceAccumulator += force; } -void uf::physics::impl::applyForceAtPoint( pod::PhysicsBody body, const pod::Vector3f& force, const pod::Vector3f& point ) { +void uf::physics::impl::applyForceAtPoint( pod::PhysicsBody& body, const pod::Vector3f& force, const pod::Vector3f& point ) { if ( body.isStatic ) return; ::wakeBody( body ); // linear force body.forceAccumulator += force; @@ -483,7 +443,7 @@ pod::PhysicsBody& uf::physics::impl::create( pod::World& world, uf::Object& obje body.collider.mesh.bvh = new pod::BVH; auto& bvh = *body.collider.mesh.bvh; - ::buildMeshBVH( bvh, mesh, ::meshBvhCapacity ); + ::buildMeshBVH( bvh, mesh, uf::physics::impl::settings.meshBvhCapacity ); body.bounds = ::computeAABB( body ); uf::physics::impl::updateInertia( body ); @@ -562,7 +522,7 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World& static thread_local uf::stl::vector candidates; candidates.clear(); ::queryBVH( dynamicBvh, ray, candidates ); - if ( ::useSplitBvhs ) ::queryBVH( staticBvh, ray, candidates ); + if ( uf::physics::impl::settings.useSplitBvhs ) ::queryBVH( staticBvh, ray, candidates ); for ( auto i : candidates ) { auto* b = bodies[i]; diff --git a/engine/src/utils/math/physics/integration.inl b/engine/src/utils/math/physics/integration.inl index 9c31f009..2f49430b 100644 --- a/engine/src/utils/math/physics/integration.inl +++ b/engine/src/utils/math/physics/integration.inl @@ -24,11 +24,15 @@ namespace { void applyImpulseTo( pod::PhysicsBody& a, pod::PhysicsBody& b, const pod::Vector3f& rA, const pod::Vector3f& rB, const pod::Vector3f& impulse ) { if ( !a.isStatic ) { a.velocity -= impulse * a.inverseMass; - a.angularVelocity -= (uf::vector::cross(rA, impulse)) * a.inverseInertiaTensor; + //a.angularVelocity -= (uf::vector::cross(rA, impulse)) * a.inverseInertiaTensor; + pod::Matrix3f invIa = computeWorldInverseInertia( a ); + a.angularVelocity -= uf::matrix::multiply( invIa, uf::vector::cross(rA, impulse) ); } if ( !b.isStatic ) { b.velocity += impulse * b.inverseMass; - b.angularVelocity += (uf::vector::cross(rB, impulse)) * b.inverseInertiaTensor; + //b.angularVelocity += (uf::vector::cross(rB, impulse)) * b.inverseInertiaTensor; + pod::Matrix3f invIb = computeWorldInverseInertia( b ); + a.angularVelocity += uf::matrix::multiply( invIb, uf::vector::cross(rB, impulse) ); } } @@ -65,7 +69,7 @@ namespace { } bool generateContacts( pod::PhysicsBody& a, pod::PhysicsBody& b, pod::Manifold& manifold, float dt ) { - if ( ::useGjk ) return generateContactsGjk( a, b, manifold, dt ); + if ( uf::physics::impl::settings.useGjk ) return generateContactsGjk( a, b, manifold, dt ); ::bindManifold( a, b, manifold, dt ); #define CHECK_CONTACT( A, B, fun )\ @@ -111,7 +115,8 @@ namespace { void reduceContacts( pod::Manifold& manifold ) { if ( manifold.points.size() <= 4 ) return; - uf::stl::vector result; + static thread_local uf::stl::vector result; + result.clear(); result.reserve(4); for ( auto& c : manifold.points ) { @@ -200,7 +205,7 @@ namespace { // prune points that are too old for ( auto it = manifold.points.begin(); it != manifold.points.end(); ) { - if ( it->lifetime > ::manifoldCacheLifetime ) it = manifold.points.erase(it); + if ( it->lifetime > uf::physics::impl::settings.manifoldCacheLifetime ) it = manifold.points.erase(it); else ++it; } @@ -251,11 +256,11 @@ namespace { // baumgarte position correction void positionCorrection( pod::PhysicsBody& a, pod::PhysicsBody& b, const pod::Contact& contact ) { - if ( ::baumgarteCorrectionPercent <= 0 ) return; + if ( uf::physics::impl::settings.baumgarteCorrectionPercent <= 0 ) return; if ( a.isStatic && b.isStatic ) return; // penetration depth beyond slop - float penetration = std::max( contact.penetration - ::baumgarteCorrectionSlop, 0.0f ); + float penetration = std::max( contact.penetration - uf::physics::impl::settings.baumgarteCorrectionSlop, 0.0f ); if ( penetration <= 0.0f ) return; // compute correction magnitude @@ -265,7 +270,7 @@ namespace { if ( totalInvMass <= EPS(1e-8f) ) return; // apply correction vector - pod::Vector3f correction = contact.normal * (penetration / totalInvMass) * ::baumgarteCorrectionPercent; + pod::Vector3f correction = contact.normal * (penetration / totalInvMass) * uf::physics::impl::settings.baumgarteCorrectionPercent; if ( !a.isStatic ) a.transform->position -= correction * invMassA; if ( !b.isStatic ) b.transform->position += correction * invMassB; diff --git a/engine/src/utils/math/physics/plane.inl b/engine/src/utils/math/physics/plane.inl index 18779e7e..7f624663 100644 --- a/engine/src/utils/math/physics/plane.inl +++ b/engine/src/utils/math/physics/plane.inl @@ -37,7 +37,7 @@ namespace { if ( dist > r ) return false; float penetration = r - dist; - auto contact = center - normal * dist - normal * penetration; + auto contact = center - normal * r; manifold.points.emplace_back(pod::Contact{ contact, normal, penetration }); return true; diff --git a/engine/src/utils/math/physics/solvers.inl b/engine/src/utils/math/physics/solvers.inl index c8862a99..d6bd6e81 100644 --- a/engine/src/utils/math/physics/solvers.inl +++ b/engine/src/utils/math/physics/solvers.inl @@ -26,7 +26,7 @@ namespace { // normal impulse scalar float jn = -(1.0f + e) * velAlongNormal; jn /= invMassN; - if ( ::warmupSolver ) { + if ( uf::physics::impl::settings.warmupSolver ) { float jnOld = contact.accumulatedNormalImpulse; float jnNew = std::max(0.0f, jnOld + jn); float jnDelta = jnNew - jnOld; @@ -57,7 +57,7 @@ namespace { if ( std::fabs(jt) > jn * mu_s) jt = -jn * mu_d; // dynamic friction: resist sliding proportionally - if ( ::warmupSolver ) { + if ( uf::physics::impl::settings.warmupSolver ) { float maxFriction = mu_s * contact.accumulatedNormalImpulse; float jtOld = contact.accumulatedTangentImpulse; float jtNew = std::max(-maxFriction, std::min(jtOld + jt, maxFriction)); @@ -83,6 +83,9 @@ namespace { // precompute inverse masses float invMassA = ( a.isStatic ? 0.0f : a.inverseMass ); float invMassB = ( b.isStatic ? 0.0f : b.inverseMass ); + + pod::Matrix3f invIa = computeWorldInverseInertia( a ); + pod::Matrix3f invIb = computeWorldInverseInertia( b ); auto pA = ::getPosition( a, true ); auto pB = ::getPosition( b, true ); @@ -90,12 +93,11 @@ namespace { for ( auto i = 0; i < N; i++ ) { pod::Vector3f rA_i = manifold.points[i].point - pA; pod::Vector3f rB_i = manifold.points[i].point - pB; - + pod::Vector3f n_i = manifold.points[i].normal; + for ( auto j = 0; j < N; j++ ) { pod::Vector3f rA_j = manifold.points[j].point - pA; pod::Vector3f rB_j = manifold.points[j].point - pB; - - pod::Vector3f n_i = manifold.points[i].normal; pod::Vector3f n_j = manifold.points[j].normal; float termLinear = (invMassA + invMassB) * uf::vector::dot(n_i, n_j); @@ -104,8 +106,6 @@ namespace { pod::Vector3f raXnj = uf::vector::cross(rA_j, n_j); pod::Vector3f rbXnj = uf::vector::cross(rB_j, n_j); - pod::Matrix3f invIa = computeWorldInverseInertia( a ); - pod::Matrix3f invIb = computeWorldInverseInertia( b ); pod::Vector3f Ia_raXnj = uf::matrix::multiply( invIa, raXnj ); pod::Vector3f Ib_rbXnj = uf::matrix::multiply( invIb, rbXnj ); @@ -126,7 +126,7 @@ namespace { for ( auto i = 0; i < N; i++ ) { float vRel = uf::vector::dot( relVelLinear, manifold.points[i].normal ); - float penetrationBias = std::max( manifold.points[i].penetration - ::baumgarteCorrectionSlop, 0.0f ) * ( ::baumgarteCorrectionPercent / dt ); + float penetrationBias = std::max( manifold.points[i].penetration - uf::physics::impl::settings.baumgarteCorrectionSlop, 0.0f ) * ( uf::physics::impl::settings.baumgarteCorrectionPercent / dt ); float cDot = vRel + penetrationBias; rhs[i] = (cDot < 0.0f) ? -cDot : 0.0f; @@ -141,7 +141,7 @@ namespace { float vRel = uf::vector::dot((vB - vA), contact.normal); // penetration bias with clamp - float penetrationBias = std::max(contact.penetration - ::baumgarteCorrectionSlop, 0.0f) * (::baumgarteCorrectionPercent / dt); + float penetrationBias = std::max(contact.penetration - uf::physics::impl::settings.baumgarteCorrectionSlop, 0.0f) * (uf::physics::impl::settings.baumgarteCorrectionPercent / dt); penetrationBias = std::min(penetrationBias, 2.0f / dt); // clamp float maxPenetrationRecovery = 2.0f; // limit to 2 units per second @@ -214,7 +214,7 @@ namespace { // restitution bias + baumgarte float e = std::min( a.material.restitution, b.material.restitution ); - float penetrationBias = std::max( c.penetration - ::baumgarteCorrectionSlop, 0.0f ) * (::baumgarteCorrectionPercent / dt); + float penetrationBias = std::max( c.penetration - uf::physics::impl::settings.baumgarteCorrectionSlop, 0.0f ) * (uf::physics::impl::settings.baumgarteCorrectionPercent / dt); cc.bias = (vn < -1.0f ? -e * vn : 0.0f) + penetrationBias; // effective mass (normal) @@ -232,6 +232,7 @@ namespace { cc.effectiveMassT = ( Kt > 0.0f ) ? ( 1.0f / Kt ) : 0.0f; // warm start + #if 1 cc.accumulatedNormalImpulse = c.accumulatedNormalImpulse; cc.accumulatedTangentImpulse = c.accumulatedTangentImpulse; @@ -239,10 +240,11 @@ namespace { pod::Vector3f P = cc.normal * cc.accumulatedNormalImpulse + cc.tangent * cc.accumulatedTangentImpulse; ::applyImpulseTo(a, b, cc.rA, cc.rB, P); + #endif } // iterative PGS - for ( auto iter = 0; iter < ::solverIterations; iter++ ) { + for ( auto iter = 0; iter < uf::physics::impl::settings.solverIterations; iter++ ) { for ( auto i = 0; i < count; i++ ) { auto& cc = cache[i]; @@ -280,18 +282,18 @@ namespace { } void resolveManifold( pod::PhysicsBody& a, pod::PhysicsBody& b, pod::Manifold& manifold, float dt ) { - if ( ::blockContactSolver ) { + if ( uf::physics::impl::settings.blockContactSolver ) { if ( manifold.points.size() == 2 ) return ::block2x2Solver( a, b, manifold, dt ); if ( manifold.points.size() == 3 ) return ::block3x3Solver( a, b, manifold, dt ); if ( manifold.points.size() == 4 ) return ::block4x4Solver( a, b, manifold, dt ); } - if ( ::psgContactSolver ) return ::blockPGSSolver( a, b, manifold, dt ); + if ( uf::physics::impl::settings.psgContactSolver ) return ::blockPGSSolver( a, b, manifold, dt ); for ( auto& contact : manifold.points ) ::iterativeImpulseSolver( a, b, contact, dt ); } void solveContacts( uf::stl::vector& manifolds, float dt ) { - if ( ::warmupSolver ) for ( auto& manifold : manifolds ) ::warmupManifold( *manifold.a, *manifold.b, manifold, dt ); - for ( auto i = 0; i < ::solverIterations; ++i ) for ( auto& manifold : manifolds ) ::resolveManifold( *manifold.a, *manifold.b, manifold, dt ); + if ( uf::physics::impl::settings.warmupSolver ) for ( auto& manifold : manifolds ) ::warmupManifold( *manifold.a, *manifold.b, manifold, dt ); + for ( auto i = 0; i < uf::physics::impl::settings.solverIterations; ++i ) for ( auto& manifold : manifolds ) ::resolveManifold( *manifold.a, *manifold.b, manifold, dt ); } void solvePositions( uf::stl::vector& manifolds, float dt, uint32_t iterations = 2 ) { diff --git a/engine/src/utils/thread/thread.cpp b/engine/src/utils/thread/thread.cpp index 84a4937e..d3339c40 100644 --- a/engine/src/utils/thread/thread.cpp +++ b/engine/src/utils/thread/thread.cpp @@ -37,11 +37,6 @@ void uf::thread::tick( pod::Thread& thread ) { thread.timer.start(); while ( thread.running ) { - std::unique_lock lock(*thread.mutex); - thread.conditions.queued.wait(lock, [&]{ - return (!thread.container.empty() || !thread.queue.empty()) || !thread.running; - }); - uf::thread::process( thread ); if ( thread.limiter > 0 ) { @@ -57,7 +52,7 @@ void uf::thread::tick( pod::Thread& thread ) { pod::Thread& uf::thread::fetchWorker( const uf::stl::string& name ) { static int current = 0; - static int limit = uf::thread::workers; + int limit = uf::thread::workers; int tries = 0; while ( tries++ < limit ) { @@ -153,12 +148,25 @@ void uf::thread::queue( pod::Thread& thread, const pod::Thread::function_t& func if ( thread.mutex != NULL ) thread.mutex->lock(); thread.queue.emplace( function ); thread.conditions.queued.notify_one(); + thread.pending.fetch_add(1); if ( thread.mutex != NULL ) thread.mutex->unlock(); } -void uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(uf::thread::uid(thread)) )return; //ops - while ( !thread.queue.empty() ) { - auto& function = thread.queue.front(); - if ( function ) +void uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(uf::thread::uid(thread)) ) return; // ops + pod::Thread::queue_t local_queue; + pod::Thread::container_t local_container; + + { + std::unique_lock lock(*thread.mutex); + thread.conditions.queued.wait(lock, [&]{ + return (!thread.container.empty() || !thread.queue.empty()) || !thread.running; + }); + + if ( !thread.running ) return; + std::swap( local_queue, thread.queue ); + } + + while ( !local_queue.empty() ) { + auto& function = local_queue.front(); #if UF_EXCEPTIONS try { #endif @@ -168,10 +176,17 @@ void uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(uf::thre UF_MSG_ERROR("Thread {} (UID: {}) caught exception: {}", thread.name, thread.uid, e.what()); } #endif - thread.queue.pop(); + + local_queue.pop(); + thread.pending.fetch_sub(1); } - for ( auto function : thread.container ) { - if ( function ) + + { + std::unique_lock lock(*thread.mutex); + local_container = thread.container; + } + + for ( auto& function : local_container ) { #if UF_EXCEPTIONS try { #endif @@ -182,7 +197,11 @@ void uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(uf::thre } #endif } - thread.conditions.finished.notify_one(); + + { + std::lock_guard lock(*thread.mutex); + thread.conditions.finished.notify_all(); + } } void uf::thread::wait( pod::Thread& thread ) { if ( thread.mutex != NULL ) {