From 89ca3efb3edcc16346d1ce05bfb9e550c4da80c6 Mon Sep 17 00:00:00 2001 From: ecker Date: Mon, 4 May 2026 21:14:04 -0500 Subject: [PATCH] rewrote meshopt (again) to actually perform vertex optimizations, and fixed LOD generation), fixed LOD level calculation (because Vulkan's -Y ruined things), some tweaks and fixes to the memory pool / allocator (it will still segfault on termination because of how things are ordered) --- bin/data/config.json | 6 +- bin/data/entities/prop.json | 2 +- .../sourceengine/base_sourceengine.json | 8 +- .../scenes/sourceengine/mds_mcdonalds.json | 6 +- bin/data/shaders/common/structs.h | 4 +- bin/data/shaders/graph/cull/comp.glsl | 22 +- engine/inc/uf/macros.h | 6 + engine/inc/uf/utils/memory/allocator.h | 177 ++------- engine/inc/uf/utils/memory/pool.h | 10 +- engine/inc/uf/utils/memory/pool.inl | 4 +- engine/inc/uf/utils/mesh/mesh.h | 4 +- engine/src/engine/ext/ext.cpp | 19 +- engine/src/engine/ext/player/behavior.cpp | 2 +- engine/src/engine/graph/decode.cpp | 4 +- engine/src/engine/graph/encode.cpp | 2 + engine/src/engine/graph/graph.cpp | 31 +- engine/src/engine/scene/scene.cpp | 7 +- engine/src/ext/gltf/gltf.cpp | 11 +- engine/src/ext/meshopt/meshopt.cpp | 361 +++++++++++------- engine/src/ext/vulkan/graphic.cpp | 3 +- engine/src/ext/vulkan/rendermode.cpp | 3 +- engine/src/ext/xatlas/xatlas.cpp | 2 +- .../src/utils/math/physics/broadphase/bvh.cpp | 8 +- .../utils/math/physics/broadphase/island.cpp | 4 +- engine/src/utils/math/physics/impl.cpp | 6 +- engine/src/utils/math/physics/integration.cpp | 6 +- .../utils/math/physics/narrowphase/hull.cpp | 6 +- .../utils/math/physics/narrowphase/mesh.cpp | 18 +- engine/src/utils/memory/allocator.cpp | 43 ++- engine/src/utils/memory/pool.cpp | 26 +- engine/src/utils/thread/thread.cpp | 14 +- 31 files changed, 410 insertions(+), 415 deletions(-) diff --git a/bin/data/config.json b/bin/data/config.json index 8b32cd6c..077990bb 100644 --- a/bin/data/config.json +++ b/bin/data/config.json @@ -352,7 +352,7 @@ "enabled": true, // needs to be kept on for GC "subPools": true, "alignment": 64, - "override": false, + "override": true, "size": "512 MiB", "pools": { "entity": "128 MiB", @@ -363,11 +363,11 @@ "render modes": { "gui": true, "deferred": true }, "limiters": { "deltaTime": 5, - "framerate": 300 // "auto" // for some reason drops to 60 + "framerate": "auto" // "auto" // for some reason drops to 60 }, "threads": { "workers" : "auto", - "frame limiter": 0 // "auto" + "frame limiter": "auto" }, "debug": { "framerate": { diff --git a/bin/data/entities/prop.json b/bin/data/entities/prop.json index 5795599b..a4dd27e8 100644 --- a/bin/data/entities/prop.json +++ b/bin/data/entities/prop.json @@ -6,7 +6,7 @@ "metadata": { "holdable": true, "physics": { - "mass": 100, + "mass": 0, "inertia": false, "type": "bounding box" // "type": "mesh" diff --git a/bin/data/scenes/sourceengine/base_sourceengine.json b/bin/data/scenes/sourceengine/base_sourceengine.json index 8fb73c7e..a6f23b9f 100644 --- a/bin/data/scenes/sourceengine/base_sourceengine.json +++ b/bin/data/scenes/sourceengine/base_sourceengine.json @@ -4,7 +4,7 @@ "graph": { "renderer": { "separate": false }, "exporter": { - "optimize": "tagged" + "optimize": { "simplify": 0, "lods": true, "print": true } }, "baking": { "enabled": true }, "tags": { @@ -12,13 +12,11 @@ "worldspawn": { "physics": { "type": "mesh", "static": true, "mass": 0 }, "grid": { "size": [8,1,8], "epsilon": 0.001, "cleanup": true, "print": true, "clip": true }, - "optimize mesh": { "simplify": 0, "lods": true, "print": true }, "unwrap mesh": true }, "worldspawn_skybox": { "physics": { "type": "aabb", "static": true, "mass": 0 }, "grid": { "size": [8,1,8], "epsilon": 0.001, "cleanup": true, "print": true, "clip": true }, - "optimize mesh": { "simplify": 0, "lods": true, "print": true }, "unwrap mesh": true }, "info_player_spawn": { "action": "attach", "filename": "./player.json", "transform": { "orientation": [ 0, 1, 0, 0 ] } }, @@ -42,8 +40,8 @@ "/^prop_door_/": { "action": "load", "payload": { "import": "/door.json", "metadata": { "angle":-1.570795, "normal": [-1,0,0] } } }, "/^prop_static/": { /*"action": "load", "payload": { "import": "/prop.json", "metadata": { "physics": { "gravity": [ 0, 0, 0 ] } } }*/ }, "/^prop_dynamic/": { /*"action": "load", "payload": { "import": "/prop.json", "metadata": { "physics": { "gravity": [ 0, 0, 0 ] } } }*/ }, - "/^func_physbox/": { "action": "load", "payload": { "import": "/prop.json" }, "optimize mesh": { "simplify": 0, "lods": true, "print": true } }, - "/^prop_physics/": { "action": "load", "payload": { "import": "/prop.json" }, "optimize mesh": { "simplify": 0, "lods": true, "print": true } }, + "/^func_physbox/": { "action": "load", "payload": { "import": "/prop.json" } }, + "/^prop_physics/": { "action": "load", "payload": { "import": "/prop.json" } }, "/^tools\\/toolsnodraw/": { "material": { "base": [ 1.0, 1.0, 1.0, 0.0 ] } } } diff --git a/bin/data/scenes/sourceengine/mds_mcdonalds.json b/bin/data/scenes/sourceengine/mds_mcdonalds.json index faef9f93..f7c3e224 100644 --- a/bin/data/scenes/sourceengine/mds_mcdonalds.json +++ b/bin/data/scenes/sourceengine/mds_mcdonalds.json @@ -2,8 +2,8 @@ "import": "./base_sourceengine.json", "assets": [ // { "filename": "./models/mds_mcdonalds.glb" } - { "filename": "./models/mds_mcdonalds/graph.json" }, - { "filename": "/burger.json", "delay": 1 } + { "filename": "./models/mds_mcdonalds/graph.json" } + // { "filename": "/burger.json", "delay": 1 } ], "metadata": { "graph": { @@ -15,7 +15,7 @@ "func_door_rotating_5568": { "action": "load", "payload": { "import": "/door.json", "metadata": { "angle":-1.570795, "normal": [1,0,0] } } }, "func_door_rotating_5584": { "action": "load", "payload": { "import": "/door.json", "metadata": { "angle":-1.570795, "normal": [1,0,0] } } }, - "prop_physics_override_5813": { "action": "load", "payload": { "import": "/physics_prop.json" } }, + // "prop_physics_override_5813": { "action": "load", "payload": { "import": "/physics_prop.json" } }, // regex matches "/^prop_physics_[^o]/": { "action": "load", "payload": { "import": "/prop.json" } }, diff --git a/bin/data/shaders/common/structs.h b/bin/data/shaders/common/structs.h index a38437d0..143aedd4 100644 --- a/bin/data/shaders/common/structs.h +++ b/bin/data/shaders/common/structs.h @@ -84,7 +84,7 @@ struct DrawCommand { uint indices; // triangle count uint instances; // instance count uint indexID; // starting triangle position - int vertexID; // starting vertex position + uint vertexID; // starting vertex position uint instanceID; // starting instance position float padding1; // @@ -101,7 +101,9 @@ struct Bounds { struct LOD { uint indices; + uint vertexID; uint indexID; + uint vertices; }; struct LODMetadata { diff --git a/bin/data/shaders/graph/cull/comp.glsl b/bin/data/shaders/graph/cull/comp.glsl index 931f234f..a5ac8a25 100644 --- a/bin/data/shaders/graph/cull/comp.glsl +++ b/bin/data/shaders/graph/cull/comp.glsl @@ -182,23 +182,25 @@ void main() { #endif #if LODS if ( isVisible ) { - vec3 viewCenter = (camera.viewport[0].view * vec4(worldCenter, 1.0)).xyz; + vec3 viewCenter = (camera.viewport[0].view * vec4(worldCenter, 1.0)).xyz; + float dist = length(viewCenter); + float P11 = abs(camera.viewport[0].projection[1][1]); + float projectedSize = (worldRadius * P11) / max(dist, 0.001); - float P11 = camera.viewport[0].projection[1][1]; - - float screenRadius = (worldRadius * P11) / max(abs(viewCenter.z), 0.001); - - uint lodLevel = 0; - if ( screenRadius < 0.5 ) lodLevel = 1; - if ( screenRadius < 0.2 ) lodLevel = 2; - if ( screenRadius < 0.05 ) lodLevel = 3; - lodLevel = min(lodLevel, MAX_LODS - 1); + uint lodLevel = 0; + if ( projectedSize < 0.20 ) lodLevel = 1; + if ( projectedSize < 0.08 ) lodLevel = 2; + if ( projectedSize < 0.02 ) lodLevel = 3; + lodLevel = min(lodLevel, MAX_LODS - 1); + lodLevel = 3; LOD lod = lodMetadata[drawCommand.instanceID].levels[lodLevel]; if ( lod.indices > 0 ) { drawCommands[gID].indices = lod.indices; drawCommands[gID].indexID = lod.indexID; + drawCommands[gID].vertexID = lod.vertexID; + drawCommands[gID].vertices = lod.vertices; } } #endif diff --git a/engine/inc/uf/macros.h b/engine/inc/uf/macros.h index 84e3898f..2b7f5005 100644 --- a/engine/inc/uf/macros.h +++ b/engine/inc/uf/macros.h @@ -143,6 +143,12 @@ #define UF_MSG(...) {} #endif +#if 1 + #define STATIC_THREAD_LOCAL(T, name) T name; +#else + #define STATIC_THREAD_LOCAL(T, name) static thread_local T name; name.clear(); +#endif + #ifndef UF_DEBUG #define UF_DEBUG 1 #endif diff --git a/engine/inc/uf/utils/memory/allocator.h b/engine/inc/uf/utils/memory/allocator.h index d3f827c8..52f615aa 100644 --- a/engine/inc/uf/utils/memory/allocator.h +++ b/engine/inc/uf/utils/memory/allocator.h @@ -1,9 +1,9 @@ #pragma once #include +#include #include -// #include -// #include +#include #define UF_MEMORYPOOL_USE_STL_ALLOCATOR 1 @@ -11,178 +11,59 @@ namespace uf { namespace allocator { extern UF_API bool override; - void* UF_API allocate( size_t n ); - void UF_API deallocate( void* p, size_t = 0 ); - + void* UF_API allocate( size_t n); + void UF_API deallocate( void* p, size_t n = 0 ); + void* UF_API malloc_m( size_t n ); - void UF_API free_m( void* p, size_t = 0 ); - - /* - template - struct Use : std::true_type {}; - - template - struct Use> : std::false_type {}; - */ + void UF_API free_m( void* p, size_t n = 0 ); } - template struct Allocator { typedef T value_type; - Allocator () = default; - template constexpr Allocator (const Allocator &) noexcept {} + Allocator() = default; + template constexpr Allocator( const Allocator& ) noexcept {} - T* allocate(size_t n) noexcept { - // n *= sizeof(T); - // if ( !uf::allocator::Use::value ) return static_cast( uf::allocator::malloc_m( n ) ); - // return static_cast( uf::allocator::allocate( n ) ); - return static_cast( uf::allocator::allocate( n * sizeof(T) ) ); + T* allocate( size_t n ) { + void* p = uf::allocator::allocate( n * sizeof(T) ); + if ( !p ) throw std::bad_alloc(); + return static_cast(p); } - void deallocate(T* p, size_t n) noexcept { - // if ( !uf::allocator::Use::value ) return uf::allocator::free_m(p); - uf::allocator::deallocate( p, n ); + + void deallocate( T* p, size_t n ) noexcept { + uf::allocator::deallocate( p, n * sizeof(T) ); } }; template - bool operator==(const uf::Allocator &, const uf::Allocator &) { return true; } + bool operator==( const uf::Allocator&, const uf::Allocator& ) { return true; } template - bool operator!=(const uf::Allocator &, const uf::Allocator &) { return false; } + bool operator!=( const uf::Allocator&, const uf::Allocator& ) { return false; } + // will never ever use the pool template struct Mallocator { typedef T value_type; - Mallocator () = default; - template constexpr Mallocator (const Mallocator &) noexcept {} + Mallocator() = default; + template constexpr Mallocator( const Mallocator& ) noexcept {} - T* allocate(size_t n) noexcept { - // n *= sizeof(T); - return static_cast( uf::allocator::malloc_m( n * sizeof(T) ) ); + T* allocate( size_t n ) { + void* p = uf::allocator::malloc_m( n * sizeof(T) ); + if ( !p ) throw std::bad_alloc(); + return static_cast( p ); } + void deallocate(T* p, size_t n) noexcept { - uf::allocator::free_m( p, n ); + uf::allocator::free_m( p, n * sizeof(T) ); } }; - - template - bool operator==(const uf::Mallocator &, const uf::Mallocator &) { return true; } template - bool operator!=(const uf::Mallocator &, const uf::Mallocator &) { return false; } -} - - -/* - template - class Allocator { - public: - using value_type = T; - -// template struct rebind {typedef Allocator other;}; -// using pointer = value_type*; -// using const_pointer = typename std::pointer_traits::template rebind; -// using void_pointer = typename std::pointer_traits::template rebind; -// using const_void_pointer = typename std::pointer_traits::template rebind; -// using difference_type = typename std::pointer_traits::difference_type; -// using size_type = std::make_unsigned_t; - - Allocator() noexcept {} // not required, unless used - template Allocator(Allocator const&) noexcept {} - - value_type* allocate( size_t n ); - void deallocate( value_type* p, size_t = 0 ) noexcept; - -// value_type* -// allocate(size_t n, const_void_pointer) { -// return allocate(n); -// } - -// template void construct(U* p, Args&& ...args) { -// ::new(p) U(std::forward(args)...); -// } - -// template void destroy(U* p) noexcept { -// p->~U(); -// } - -// size_t max_size() const noexcept { return std::numeric_limits::max(); } -// Allocator select_on_container_copy_construction() const { return *this; } - -// using propagate_on_container_copy_assignment = std::false_type; -// using propagate_on_container_move_assignment = std::false_type; -// using propagate_on_container_swap = std::false_type; -// using is_always_equal = std::is_empty; - - }; + bool operator==(const uf::Mallocator&, const uf::Mallocator&) { return true; } template - bool operator==(uf::Allocator const&, uf::Allocator const&) noexcept { - return true; - } - - template - bool operator!=(uf::Allocator const& x, uf::Allocator const& y) noexcept { - return !(x == y); - } -} - -namespace uf { - template - class Mallocator { - public: - using value_type = T; - -// template struct rebind {typedef Mallocator other;}; -// using pointer = value_type*; -// using const_pointer = typename std::pointer_traits::template rebind; -// using void_pointer = typename std::pointer_traits::template rebind; -// using const_void_pointer = typename std::pointer_traits::template rebind; -// using difference_type = typename std::pointer_traits::difference_type; -// using size_type = std::make_unsigned_t; - - Mallocator() noexcept {} // not required, unless used - template Mallocator(Mallocator const&) noexcept {} - - value_type* allocate( size_t n ); - void deallocate( value_type* p, size_t = 0 ) noexcept; - -// value_type* -// allocate(size_t n, const_void_pointer) { -// return allocate(n); -// } - -// template void construct(U* p, Args&& ...args) { -// ::new(p) U(std::forward(args)...); -// } - -// template void destroy(U* p) noexcept { -// p->~U(); -// } - -// size_t max_size() const noexcept { return std::numeric_limits::max(); } -// Mallocator select_on_container_copy_construction() const { return *this; } - -// using propagate_on_container_copy_assignment = std::false_type; -// using propagate_on_container_move_assignment = std::false_type; -// using propagate_on_container_swap = std::false_type; -// using is_always_equal = std::is_empty; - - }; - - template - bool operator==(uf::Mallocator const&, uf::Mallocator const&) noexcept { - return true; - } - - template - bool operator!=(uf::Mallocator const& x, uf::Mallocator const& y) noexcept { - return !(x == y); - } -} - -#include "allocator.inl" -*/ \ No newline at end of file + bool operator!=(const uf::Mallocator&, const uf::Mallocator&) { return false; } +} \ No newline at end of file diff --git a/engine/inc/uf/utils/memory/pool.h b/engine/inc/uf/utils/memory/pool.h index 582f68a1..5a91fef3 100644 --- a/engine/inc/uf/utils/memory/pool.h +++ b/engine/inc/uf/utils/memory/pool.h @@ -60,11 +60,11 @@ namespace pod { } buddy; } state; - typedef std::vector> allocations_t; #if UF_MEMORYPOOL_MUTEX std::mutex mutex; #endif #if UF_MEMORYPOOL_STORE_ORPHANS + typedef std::vector> allocations_t; allocations_t orphaned; #endif }; @@ -88,11 +88,11 @@ namespace uf { pod::Allocation UF_API allocate( pod::MemoryPool&, size_t, size_t alignment = uf::memoryPool::alignment ); void* UF_API alloc( pod::MemoryPool&, size_t, size_t alignment = uf::memoryPool::alignment ); - pod::Allocation& UF_API fetch( pod::MemoryPool&, void*, size_t = 0 ); + // pod::Allocation& UF_API fetch( pod::MemoryPool&, void*, size_t = 0 ); bool UF_API exists( pod::MemoryPool&, void*, size_t = 0 ); bool UF_API free( pod::MemoryPool&, void*, size_t = 0 ); - const pod::MemoryPool::allocations_t& UF_API allocations( const pod::MemoryPool& ); + // const pod::MemoryPool::allocations_t& UF_API allocations( const pod::MemoryPool& ); template T& alloc( pod::MemoryPool&, const T& = T()/*, size_t alignment = uf::memoryPool::alignment*/ ); template pod::Allocation allocate( pod::MemoryPool&, const T& = T()/*, size_t alignment = uf::memoryPool::alignment*/ ); @@ -120,11 +120,11 @@ namespace uf { // inline void* alloc( size_t size, void* data = NULL/*, size_t alignment = uf::memoryPool::alignment*/ ); inline pod::Allocation allocate( size_t size/*, size_t alignment = uf::memoryPool::alignment*/ ); inline void* alloc( size_t size/*, size_t alignment = uf::memoryPool::alignment*/ ); - inline pod::Allocation& fetch( void* data, size_t size = 0 ); + // inline pod::Allocation& fetch( void* data, size_t size = 0 ); inline bool exists( void* data, size_t size = 0 ); inline bool free( void* data, size_t size ); - inline const pod::MemoryPool::allocations_t& allocations() const; + // inline const pod::MemoryPool::allocations_t& allocations() const; inline pod::MemoryPool& data(); inline const pod::MemoryPool& data() const; diff --git a/engine/inc/uf/utils/memory/pool.inl b/engine/inc/uf/utils/memory/pool.inl index 186d96df..8fb58e1e 100644 --- a/engine/inc/uf/utils/memory/pool.inl +++ b/engine/inc/uf/utils/memory/pool.inl @@ -51,11 +51,11 @@ void uf::MemoryPool::destroy() { return uf::memoryPool::destroy( m_pod ); } //void* uf::MemoryPool::alloc( size_t size, void* data/*, size_t alignment*/ ) { return uf::memoryPool::alloc( m_pod, data, size/*, alignment*/ ); } pod::Allocation uf::MemoryPool::allocate( size_t size/*, size_t alignment*/ ) { return uf::memoryPool::allocate( m_pod, size/*, alignment*/ ); } void* uf::MemoryPool::alloc( size_t size/*, size_t alignment*/ ) { return uf::memoryPool::alloc( m_pod, size/*, alignment*/ ); } -pod::Allocation& uf::MemoryPool::fetch( void* data, size_t size ) { return uf::memoryPool::fetch( m_pod, data, size ); } +//pod::Allocation& uf::MemoryPool::fetch( void* data, size_t size ) { return uf::memoryPool::fetch( m_pod, data, size ); } bool uf::MemoryPool::exists( void* data, size_t size ) { return uf::memoryPool::exists( m_pod, data, size ); } bool uf::MemoryPool::free( void* data, size_t size ) { return uf::memoryPool::free( m_pod, data, size ); } -const pod::MemoryPool::allocations_t& uf::MemoryPool::allocations() const { return uf::memoryPool::allocations( m_pod ); } +//const pod::MemoryPool::allocations_t& uf::MemoryPool::allocations() const { return uf::memoryPool::allocations( m_pod ); } inline pod::MemoryPool& uf::MemoryPool::data() { return m_pod; } inline const pod::MemoryPool& uf::MemoryPool::data() const { return m_pod; } diff --git a/engine/inc/uf/utils/mesh/mesh.h b/engine/inc/uf/utils/mesh/mesh.h index 203c3239..8bc984c6 100644 --- a/engine/inc/uf/utils/mesh/mesh.h +++ b/engine/inc/uf/utils/mesh/mesh.h @@ -65,7 +65,7 @@ namespace pod { alignas(4) uint32_t indices = 0; // triangle count alignas(4) uint32_t instances = 0; // instance count alignas(4) uint32_t indexID = 0; // starting triangle position - alignas(4) int32_t vertexID = 0; // starting vertex position + alignas(4) uint32_t vertexID = 0; // starting vertex position alignas(4) uint32_t instanceID = 0; // starting instance position // extra data for padding alignas(4) uint32_t auxID = 0; // used for storing which grid this belongs to when slicing, otherwise unused @@ -77,7 +77,9 @@ namespace pod { struct UF_API LODMetadata { struct Level { alignas(4) uint32_t indices = 0; + alignas(4) uint32_t vertexID = 0; alignas(4) uint32_t indexID = 0; + alignas(4) uint32_t vertices = 0; } levels[4]; }; diff --git a/engine/src/engine/ext/ext.cpp b/engine/src/engine/ext/ext.cpp index 9faaf3de..120551a0 100644 --- a/engine/src/engine/ext/ext.cpp +++ b/engine/src/engine/ext/ext.cpp @@ -390,18 +390,18 @@ void UF_API uf::initialize() { if ( size <= 0 || uf::memoryPool::subPool ) { { size_t size = deduceSize( configMemoryPoolJson["pools"]["component"] ); - UF_MSG_DEBUG("Requesting {} bytes for component memory pool: {}", (int) size, (void*) &uf::component::memoryPool); uf::component::memoryPool.initialize( size ); + UF_MSG_DEBUG("Requested {} bytes for component memory pool: {}", (int) size, uf::component::memoryPool.data().memory); } { size_t size = deduceSize( configMemoryPoolJson["pools"]["userdata"] ); - UF_MSG_DEBUG("Requesting {} bytes for userdata memory pool: {}", (int) size, (void*) &uf::userdata::memoryPool); uf::userdata::memoryPool.initialize( size ); + UF_MSG_DEBUG("Requested {} bytes for userdata memory pool: {}", (int) size, uf::userdata::memoryPool.data().memory); } { size_t size = deduceSize( configMemoryPoolJson["pools"]["entity"] ); - UF_MSG_DEBUG("Requesting {} bytes for entity memory pool: {}", (int) size, (void*) &uf::Entity::memoryPool); uf::Entity::memoryPool.initialize( size, pod::MemoryPool::Strategy::POOL, sizeof(uf::Entity) ); + UF_MSG_DEBUG("Requested {} bytes for entity memory pool: {}", (int) size, uf::Entity::memoryPool.data().memory); } } uf::allocator::override = configMemoryPoolJson["override"].as( uf::allocator::override ); @@ -873,7 +873,7 @@ void UF_API uf::tick() { #if UF_ENV_DREAMCAST DC_STATS(); #endif - #if UF_THREAD_METRICS + #if 0 && UF_THREAD_METRICS auto metrics = uf::thread::collectStats(); for ( auto& [ name, stats ] : metrics ) UF_MSG_DEBUG("Thread {}: active={}, idle={}, total={}, tasks={}", name, std::get<0>(stats), std::get<1>(stats), std::get<2>(stats), std::get<3>(stats) ); #endif @@ -993,9 +993,6 @@ void UF_API uf::terminate() { { uf::scene::destroy(); } - /* Kill physics */ { - // uf::physics::terminate(); - } /* Garbage collection */ if ( /*global*/::config.engine.gc.enabled ) { size_t collected = uf::instantiator::collect( /*global*/::config.engine.gc.mode ); if ( collected > 0 ) { @@ -1018,6 +1015,14 @@ void UF_API uf::terminate() { } #endif + /* Destroy memory pools */ { + uf::component::memoryPool.destroy(); + uf::userdata::memoryPool.destroy(); + uf::Entity::memoryPool.destroy(); + + uf::memoryPool::global.destroy(); // should probably leave this to be statically destructed + } + /* Print system stats */ { /*global*/::times.total.time = /*global*/::times.sys.elapsed().asDouble(); UF_MSG_DEBUG("System: Total Time: {} | Total Frames: {} | Average FPS: {}", /*global*/::times.total.time, /*global*/::times.total.frames, /*global*/::times.total.frames / /*global*/::times.total.time); diff --git a/engine/src/engine/ext/player/behavior.cpp b/engine/src/engine/ext/player/behavior.cpp index c8eb78c3..e942ebf2 100644 --- a/engine/src/engine/ext/player/behavior.cpp +++ b/engine/src/engine/ext/player/behavior.cpp @@ -447,7 +447,7 @@ void ext::PlayerBehavior::tick( uf::Object& self ) { if ( stats.walking ) { float factor = stats.floored ? 1.0f : speed.air; if ( stats.noclipped ) { - physicsBody.velocity += target * speed.move * ONE_OVER_SIXTY; + physicsBody.velocity += target * speed.move * 50 * ONE_OVER_SIXTY; } else { physicsBody.velocity += target * std::clamp( speed.move * factor - uf::vector::dot( physicsBody.velocity, target ), 0.0f, speed.move * 10 * ONE_OVER_SIXTY /*uf::physics::time::delta*/ ); } diff --git a/engine/src/engine/graph/decode.cpp b/engine/src/engine/graph/decode.cpp index 9bb25d2f..b78a8ec7 100644 --- a/engine/src/engine/graph/decode.cpp +++ b/engine/src/engine/graph/decode.cpp @@ -14,7 +14,7 @@ #if UF_USE_OPENGL #define UF_GRAPH_LOAD_MULTITHREAD 0 #else - #define UF_GRAPH_LOAD_MULTITHREAD 0 + #define UF_GRAPH_LOAD_MULTITHREAD 1 #endif #define UF_GRAPH_EXTENDED 1 @@ -190,6 +190,8 @@ namespace { ext::json::forEach( json, [&]( size_t i, ext::json::Value& value ){ lodMetadata.levels[i].indices = value["indices"].as( lodMetadata.levels[i].indices ); lodMetadata.levels[i].indexID = value["indexID"].as( lodMetadata.levels[i].indexID ); + lodMetadata.levels[i].vertices = value["vertices"].as( lodMetadata.levels[i].vertices ); + lodMetadata.levels[i].vertexID = value["vertexID"].as( lodMetadata.levels[i].vertexID ); }); return lodMetadata; } diff --git a/engine/src/engine/graph/encode.cpp b/engine/src/engine/graph/encode.cpp index 36513fa5..be626e01 100644 --- a/engine/src/engine/graph/encode.cpp +++ b/engine/src/engine/graph/encode.cpp @@ -142,6 +142,8 @@ namespace { auto& value = json.emplace_back(); value["indices"] = lodMetadata.levels[i].indices; value["indexID"] = lodMetadata.levels[i].indexID; + value["vertexID"] = lodMetadata.levels[i].vertexID; + value["vertices"] = lodMetadata.levels[i].vertices; } return json; } diff --git a/engine/src/engine/graph/graph.cpp b/engine/src/engine/graph/graph.cpp index d1e6e706..dc445a3e 100644 --- a/engine/src/engine/graph/graph.cpp +++ b/engine/src/engine/graph/graph.cpp @@ -23,11 +23,7 @@ #define UF_DEBUG_TIMER_MULTITRACE_END(...) #endif -#if UF_USE_OPENGL - #define UF_GRAPH_SPARSE_READ_MESH 1 -#else - #define UF_GRAPH_SPARSE_READ_MESH 1 -#endif +#define UF_GRAPH_SPARSE_READ_MESH 1 #define UF_GRAPH_EXTENDED 1 namespace { @@ -1401,21 +1397,19 @@ void uf::graph::tick( uf::Object& object ) { bool uf::graph::tick( pod::Graph::Storage& storage ) { bool rebuild = false; - static thread_local uf::stl::vector instances; - static thread_local uf::stl::vector instanceAddresses; - static thread_local uf::stl::vector lodMetadata; - static thread_local uf::stl::vector joints; - static thread_local uf::stl::vector objects; - static thread_local uf::stl::vector materials; - static thread_local uf::stl::vector textures; - static thread_local uf::stl::vector drawCommands; + STATIC_THREAD_LOCAL(uf::stl::vector, instances); + STATIC_THREAD_LOCAL(uf::stl::vector, instanceAddresses); + STATIC_THREAD_LOCAL(uf::stl::vector, lodMetadata); + STATIC_THREAD_LOCAL(uf::stl::vector, joints); + STATIC_THREAD_LOCAL(uf::stl::vector, objects); + STATIC_THREAD_LOCAL(uf::stl::vector, materials); + STATIC_THREAD_LOCAL(uf::stl::vector, textures); + STATIC_THREAD_LOCAL(uf::stl::vector, drawCommands); - joints.clear(); for ( auto& key : storage.joints.keys ) { joints.insert( joints.end(), storage.joints.map[key].begin(), storage.joints.map[key].end() ); } - objects.clear(); for ( auto& key : storage.objects.keys ) { auto& entity = *storage.entities.map[key]; auto& object = storage.objects.map[key]; @@ -1437,10 +1431,6 @@ bool uf::graph::tick( pod::Graph::Storage& storage ) { rebuild = storage.buffers.object.update( (const void*) objects.data(), objects.size() * sizeof(pod::Instance::Object) ) || rebuild; if ( ::newGraphAdded ) { - drawCommands.clear(); - instances.clear(); - lodMetadata.clear(); - for ( auto& key : storage.primitives.keys ) { for ( auto& primitive : storage.primitives[key] ) { drawCommands.emplace_back( primitive.drawCommand ); @@ -1449,15 +1439,12 @@ bool uf::graph::tick( pod::Graph::Storage& storage ) { } } - instanceAddresses.clear(); for ( auto& key : storage.instanceAddresses.keys ) { instanceAddresses.insert( instanceAddresses.end(), storage.instanceAddresses.map[key].begin(), storage.instanceAddresses.map[key].end() ); } - textures.clear(); for ( auto& key : storage.textures.keys ) textures.emplace_back( storage.textures.map[key] ); - materials.clear(); for ( auto& key : storage.materials.keys ) materials.emplace_back( storage.materials.map[key] ); rebuild = storage.buffers.instance.update( (const void*) instances.data(), instances.size() * sizeof(pod::Instance) ) || rebuild; diff --git a/engine/src/engine/scene/scene.cpp b/engine/src/engine/scene/scene.cpp index 66fee98a..3bd0d9f4 100644 --- a/engine/src/engine/scene/scene.cpp +++ b/engine/src/engine/scene/scene.cpp @@ -208,15 +208,14 @@ void uf::scene::unloadScene() { uf::Scene* current = uf::scene::scenes.back(); current->queueDeletion(); - // destroy phyiscs state + // destroy graph if ( current->hasComponent() ) { uf::graph::destroy( current->getComponent() ); } - #if 0 - if ( current->hasComponent() ) { + // destroy physics state + if ( current->hasComponent() ) { uf::physics::destroy( *current ); } - #endif // mark rendermodes as disabled immediately auto graph = current->getGraph(true); diff --git a/engine/src/ext/gltf/gltf.cpp b/engine/src/ext/gltf/gltf.cpp index 8f8e4962..80a82df7 100644 --- a/engine/src/ext/gltf/gltf.cpp +++ b/engine/src/ext/gltf/gltf.cpp @@ -296,7 +296,7 @@ void ext::gltf::load( pod::Graph& graph, const uf::stl::string& filename, const meshgrid.metadata = value["grid"]; }); - #if UF_USE_MESHOPT + #if 0 && UF_USE_MESHOPT // cleanup if blender's exporter is poopy if ( graph.metadata["exporter"]["optimize"].as(false) || graph.metadata["exporter"]["optimize"].as("") == "tagged" ) { if ( graph.metadata["exporter"]["optimize"].as("") == "tagged" ) { @@ -518,7 +518,7 @@ void ext::gltf::load( pod::Graph& graph, const uf::stl::string& filename, const #endif #if UF_USE_MESHOPT // cleanup if blender's exporter is poopy - if ( graph.metadata["exporter"]["optimize"].as(false) || graph.metadata["exporter"]["optimize"].as("") == "tagged" ) { + if ( graph.metadata["exporter"]["optimize"].as(false) || graph.metadata["exporter"]["optimize"].as("") == "tagged" || ext::json::isObject( graph.metadata["exporter"]["optimize"] ) ) { UF_MSG_DEBUG( "Optimizing meshes..." ); for ( auto& keyName : graph.meshes ) { size_t level = SIZE_MAX; @@ -544,6 +544,11 @@ void ext::gltf::load( pod::Graph& graph, const uf::stl::string& filename, const }); if ( !should ) continue; + } else if ( ext::json::isObject( graph.metadata["exporter"]["optimize"] ) ) { + level = graph.metadata["exporter"]["optimize"]["level"].as( level ); + simplify = graph.metadata["exporter"]["optimize"]["simplify"].as( simplify ); + print = graph.metadata["exporter"]["optimize"]["print"].as( print ); + lods = graph.metadata["exporter"]["optimize"]["lods"].as( lods ); } auto& mesh = storage.meshes[keyName]; @@ -559,7 +564,7 @@ void ext::gltf::load( pod::Graph& graph, const uf::stl::string& filename, const } else { UF_MSG_DEBUG("Generated {} LODs: {}", factors.size() - 1, keyName); auto& primitives = storage.primitives[keyName]; - UF_ASSERT( primitives.size() == lodMetadata.size() ); + UF_ASSERT( primitives.size() == lodMetadata.size() ); for ( auto i = 0; i < primitives.size(); ++i ) primitives[i].lod = lodMetadata[i]; } } diff --git a/engine/src/ext/meshopt/meshopt.cpp b/engine/src/ext/meshopt/meshopt.cpp index f0acbf01..fbddc9c1 100644 --- a/engine/src/ext/meshopt/meshopt.cpp +++ b/engine/src/ext/meshopt/meshopt.cpp @@ -1,102 +1,162 @@ #include + #if UF_USE_MESHOPT #include #include +namespace { + uint32_t readIndex(const uint8_t* ptr, size_t index, size_t size) { + switch (size) { + case 1: return ((const uint8_t*)ptr)[index]; + case 2: return ((const uint16_t*)ptr)[index]; + case 4: return ((const uint32_t*)ptr)[index]; + default: return 0; + } + } + void writeIndex(uint8_t* ptr, size_t index, size_t size, uint32_t value) { + switch (size) { + case 1: ((uint8_t*)ptr)[index] = (uint8_t)value; break; + case 2: ((uint16_t*)ptr)[index] = (uint16_t)value; break; + case 4: ((uint32_t*)ptr)[index] = (uint32_t)value; break; + } + } +} + bool ext::meshopt::optimize( uf::Mesh& mesh, float simplify, size_t o, bool verbose ) { if ( mesh.isInterleaved() ) { - UF_MSG_ERROR("Optimization of interleaved meshes is currently not supported. Consider optimizing on meshlets."); + UF_MSG_ERROR("Optimization of interleaved meshes is currently not supported."); return false; } mesh.updateDescriptor(); const auto& views = mesh.buffer_views; - if ( views.empty() ) { - UF_MSG_ERROR("No buffer views found. Cannot optimize per-submesh."); - return false; - } + if ( views.empty() ) return false; - uf::stl::vector optIndices; pod::DrawCommand* drawCommands = mesh.indirect.count > 0 ? (pod::DrawCommand*) mesh.getBuffer(mesh.indirect).data() : nullptr; + const uint8_t* srcIndexData = mesh.index.count > 0 ? mesh.getBuffer(mesh.index).data() : nullptr; + + uf::stl::vector outIndices; + uf::stl::vector> outVertices(mesh.vertex.attributes.size()); + + uf::Mesh::Attribute positionAttribute; + for ( auto& attr : mesh.vertex.attributes ) if ( attr.descriptor.name == "position" ) positionAttribute = attr; for ( size_t viewIdx = 0; viewIdx < views.size(); ++viewIdx ) { const auto& view = views[viewIdx]; - auto& indicesView = view["index"]; - auto& positionsView = view["position"]; + uint32_t cmdIdx = view.indirectIndex; - if ( !indicesView.valid() || !positionsView.valid() ) continue; + uint32_t srcVertexOffset = view.vertex.first; + uint32_t srcVertexCount = view.vertex.count; + uint32_t srcIndexOffset = view.index.first; + uint32_t srcIndexCount = view.index.count; - size_t indicesCount = view.index.count; - size_t vertexCount = view.vertex.count; + if ( srcIndexCount == 0 ) continue; - uf::stl::vector submeshIndices(indicesCount); - for ( size_t i = 0; i < indicesCount; ++i ) { - size_t global_i = view.index.first + i; - switch ( indicesView.attribute.descriptor.size ) { - case 1: submeshIndices[i] = indicesView.get(global_i)[0]; break; - case 2: submeshIndices[i] = indicesView.get(global_i)[0]; break; - case 4: submeshIndices[i] = indicesView.get(global_i)[0]; break; + // retrieve indices + uf::stl::vector localIndices(srcIndexCount); + if ( srcIndexData ) { + for ( size_t i = 0; i < srcIndexCount; ++i ) { + localIndices[i] = readIndex(srcIndexData, srcIndexOffset + i, mesh.index.size); } + } else { + for ( size_t i = 0; i < srcIndexCount; ++i ) localIndices[i] = i; } - meshopt_optimizeVertexCache(&submeshIndices[0], &submeshIndices[0], indicesCount, mesh.vertex.count); + // setup streams + uf::stl::vector streams; + for ( auto& attr : mesh.vertex.attributes ) { + const uint8_t* basePtr = (const uint8_t*)attr.pointer + srcVertexOffset * attr.stride; + streams.emplace_back({ basePtr, attr.descriptor.size, attr.stride }); + } - meshopt_optimizeOverdraw( - &submeshIndices[0], - &submeshIndices[0], - indicesCount, - (const float*) positionsView.data(), - mesh.vertex.count, - positionsView.stride(), - 1.05f + // deduplicate vertices + uf::stl::vector remap(srcVertexCount); + size_t uniqueVertices = meshopt_generateVertexRemapMulti( + remap.data(), localIndices.data(), srcIndexCount, + srcVertexCount, streams.data(), streams.size() ); + meshopt_remapIndexBuffer(localIndices.data(), localIndices.data(), srcIndexCount, remap.data()); + // copy position data + uf::stl::vector tempPositions(uniqueVertices * positionAttribute.stride); + const uint8_t* srcPositions = (const uint8_t*)positionAttribute.pointer + srcVertexOffset * positionAttribute.stride; + meshopt_remapVertexBuffer(tempPositions.data(), srcPositions, srcVertexCount, positionAttribute.stride, remap.data()); + + // optimize cache + overdray + meshopt_optimizeVertexCache(localIndices.data(), localIndices.data(), srcIndexCount, uniqueVertices); + meshopt_optimizeOverdraw(localIndices.data(), localIndices.data(), srcIndexCount, (const float*)tempPositions.data(), uniqueVertices, positionAttribute.stride, 1.05f); + + // simplify + size_t optimizedIndexCount = srcIndexCount; if ( 0.0f < simplify && simplify < 1.0f ) { - uf::stl::vector indicesSimplified(indicesCount); - - float targetError = FLT_MAX; // 1e-2f / simplify; + uf::stl::vector simplified(srcIndexCount); + float targetError = 1e-2f / simplify; float realError = 0.0f; - size_t realIndices = meshopt_simplify( - &indicesSimplified[0], - &submeshIndices[0], - indicesCount, - (const float*) positionsView.data(), - mesh.vertex.count, - positionsView.stride(), - indicesCount * simplify, - targetError, - 0, &realError + optimizedIndexCount = meshopt_simplify( + simplified.data(), localIndices.data(), srcIndexCount, + (const float*)tempPositions.data(), uniqueVertices, positionAttribute.stride, + srcIndexCount * simplify, targetError, meshopt_SimplifyLockBorder, &realError ); - if ( verbose ) { - UF_MSG_DEBUG("[View {} Simplified] indices: {} -> {} | error: {} -> {}", viewIdx, indicesCount, realIndices, targetError, realError); - } - - indicesCount = realIndices; - submeshIndices.swap(indicesSimplified); - submeshIndices.resize(indicesCount); + if ( verbose ) UF_MSG_DEBUG("[View {}] Simplified: {} -> {}", viewIdx, srcIndexCount, optimizedIndexCount); + localIndices.swap(simplified); + localIndices.resize(optimizedIndexCount); } - size_t newIndexStart = optIndices.size(); - optIndices.insert(optIndices.end(), submeshIndices.begin(), submeshIndices.end()); + // optimize for vertex fetch + uf::stl::vector fetchRemap(uniqueVertices); + size_t finalVertices = meshopt_optimizeVertexFetchRemap(fetchRemap.data(), localIndices.data(), optimizedIndexCount, uniqueVertices); + meshopt_remapIndexBuffer(localIndices.data(), localIndices.data(), optimizedIndexCount, fetchRemap.data()); + // store to output buffer + uint32_t outVertexOffset = outVertices[0].size() / mesh.vertex.attributes[0].stride; + uint32_t outIndexOffset = outIndices.size(); + + for ( size_t i = 0; i < localIndices.size(); ++i ) outIndices.emplace_back( localIndices[i] ); + + // remap buffers + for ( size_t a = 0; a < mesh.vertex.attributes.size(); ++a ) { + auto& attr = mesh.vertex.attributes[a]; + const uint8_t* basePtr = (const uint8_t*) attr.pointer + srcVertexOffset * attr.stride; + + // double remap: source -> unique -> final + uf::stl::vector tempBuf(uniqueVertices * attr.stride); + meshopt_remapVertexBuffer(tempBuf.data(), basePtr, srcVertexCount, attr.stride, remap.data()); + + uf::stl::vector finalBuf(finalVertices * attr.stride); + meshopt_remapVertexBuffer(finalBuf.data(), tempBuf.data(), uniqueVertices, attr.stride, fetchRemap.data()); + + outVertices[a].insert(outVertices[a].end(), finalBuf.begin(), finalBuf.end()); + } + + // update indirect buffer if ( drawCommands ) { - drawCommands[view.indirectIndex].indexID = newIndexStart; - drawCommands[view.indirectIndex].indices = indicesCount; + drawCommands[cmdIdx].indexID = outIndexOffset; + drawCommands[cmdIdx].indices = optimizedIndexCount; + drawCommands[cmdIdx].vertexID = outVertexOffset; + drawCommands[cmdIdx].vertices = finalVertices; } } - mesh.index.count = optIndices.size(); - mesh.resizeIndices( mesh.index.count ); + // apply index buffer (if missing) + if ( mesh.index.attributes.empty() ) { + mesh.bindIndex(); + mesh.bind(mesh, mesh.isInterleaved()); + } - uint8_t* dstPointer = (uint8_t*) mesh.getBuffer(mesh.index).data(); - for ( size_t i = 0; i < optIndices.size(); ++i ) { - switch ( mesh.index.size ) { - case 1: (( uint8_t*) dstPointer)[i] = (uint8_t) optIndices[i]; break; - case 2: ((uint16_t*) dstPointer)[i] = (uint16_t) optIndices[i]; break; - case 4: ((uint32_t*) dstPointer)[i] = (uint32_t) optIndices[i]; break; - } + // write indices to buffer + mesh.index.count = outIndices.size(); + mesh.resizeIndices(mesh.index.count); + uint8_t* dstIdx = mesh.getBuffer(mesh.index).data(); + for ( size_t i = 0; i < outIndices.size(); ++i ) writeIndex(dstIdx, i, mesh.index.size, outIndices[i]); + + // write vertices to buffer + mesh.vertex.count = outVertices[0].size() / mesh.vertex.attributes[0].stride; + for ( size_t a = 0; a < mesh.vertex.attributes.size(); ++a ) { + auto& attr = mesh.vertex.attributes[a]; + mesh.buffers[attr.buffer].swap(outVertices[a]); + attr.pointer = mesh.buffers[attr.buffer].data(); } mesh.updateDescriptor(); @@ -128,103 +188,128 @@ uf::stl::vector ext::meshopt::generateLODs( uf::Mesh& mesh, co mesh.updateDescriptor(); const auto& views = mesh.buffer_views; - if ( views.empty() ) return lodMetadata; + if ( views.empty() || lodFactors.empty() ) return lodMetadata; size_t numLODs = std::min(lodFactors.size(), (size_t)4); lodMetadata.resize(mesh.indirect.count); - uf::stl::vector> lodBlocks(numLODs); pod::DrawCommand* drawCommands = mesh.indirect.count > 0 ? (pod::DrawCommand*) mesh.getBuffer(mesh.indirect).data() : nullptr; + if ( !drawCommands ) return lodMetadata; + // store LOD0 as-is + uf::stl::vector outIndices(mesh.index.count); + const uint8_t* srcIndexData = mesh.getBuffer(mesh.index).data(); + for ( size_t i = 0; i < mesh.index.count; ++i ) { + outIndices[i] = readIndex(srcIndexData, i, mesh.index.size); + } + // write LOD0 data for ( size_t viewIdx = 0; viewIdx < views.size(); ++viewIdx ) { - const auto& view = views[viewIdx]; - uint32_t cmdIdx = view.indirectIndex; - - auto& indicesView = view["index"]; - auto& positionsView = view["position"]; - - size_t baseIndicesCount = view.index.count; - uf::stl::vector baseIndices(baseIndicesCount); - - for ( size_t i = 0; i < baseIndicesCount; ++i ) { - size_t global_i = view.index.first + i; - switch ( indicesView.attribute.descriptor.size ) { - case 1: baseIndices[i] = indicesView.get(global_i)[0]; break; - case 2: baseIndices[i] = indicesView.get(global_i)[0]; break; - case 4: baseIndices[i] = indicesView.get(global_i)[0]; break; - } - } - - meshopt_optimizeVertexCache(&baseIndices[0], &baseIndices[0], baseIndicesCount, mesh.vertex.count); - - size_t previousIndicesCount = baseIndicesCount; - for ( size_t lodIdx = 0; lodIdx < numLODs; ++lodIdx ) { - float simplify = lodFactors[lodIdx]; - uf::stl::vector lodIndices = baseIndices; - size_t currentIndicesCount = baseIndicesCount; - - if ( simplify < 1.0f ) { - float targetError = FLT_MAX; // 1e-2f / simplify; - float realError = 0.0f; - currentIndicesCount = meshopt_simplify( - &lodIndices[0], &baseIndices[0], baseIndicesCount, - (const float*)positionsView.data(0), mesh.vertex.count, positionsView.stride(), - baseIndicesCount * simplify, targetError, - 0, &realError - ); - - if ( previousIndicesCount == currentIndicesCount ) { - continue; - } - previousIndicesCount = currentIndicesCount; - - if ( verbose ) { - UF_MSG_DEBUG("[View {} Simplified LOD {}] indices: {} -> {} | error: {} -> {}", viewIdx, lodIdx, baseIndicesCount, currentIndicesCount, targetError, realError); - } - - - lodIndices.resize(currentIndicesCount); - } - - lodMetadata[cmdIdx].levels[lodIdx].indexID = lodBlocks[lodIdx].size(); - lodMetadata[cmdIdx].levels[lodIdx].indices = currentIndicesCount; - - lodBlocks[lodIdx].insert(lodBlocks[lodIdx].end(), lodIndices.begin(), lodIndices.end()); - } + uint32_t cmdIdx = views[viewIdx].indirectIndex; + auto& cmd = drawCommands[cmdIdx]; + lodMetadata[cmdIdx].levels[0].indexID = cmd.indexID; + lodMetadata[cmdIdx].levels[0].indices = cmd.indices; + lodMetadata[cmdIdx].levels[0].vertexID = cmd.vertexID; + lodMetadata[cmdIdx].levels[0].vertices = cmd.vertices; } - uf::stl::vector unifiedIndices; - size_t currentGlobalOffset = 0; + // copy position attribute + int posAttrIdx = -1; + uf::stl::vector> outVertices(mesh.vertex.attributes.size()); + for ( size_t a = 0; a < mesh.vertex.attributes.size(); ++a ) { + auto& attr = mesh.vertex.attributes[a]; + if ( attr.descriptor.name == "position" ) posAttrIdx = a; + + auto& buf = mesh.buffers[attr.buffer]; + outVertices[a].assign(buf.begin(), buf.end()); + } + + + // generate LOD1=>N + for ( size_t lodIdx = 1; lodIdx < numLODs; ++lodIdx ) { + float simplify = lodFactors[lodIdx]; - for ( size_t lodIdx = 0; lodIdx < numLODs; ++lodIdx ) { for ( size_t viewIdx = 0; viewIdx < views.size(); ++viewIdx ) { uint32_t cmdIdx = views[viewIdx].indirectIndex; - lodMetadata[cmdIdx].levels[lodIdx].indexID += currentGlobalOffset; - if ( lodIdx == 0 && drawCommands ) { - drawCommands[cmdIdx].indexID = lodMetadata[cmdIdx].levels[0].indexID; - drawCommands[cmdIdx].indices = lodMetadata[cmdIdx].levels[0].indices; + // source from LOD0 + auto& cmd0 = lodMetadata[cmdIdx].levels[0]; + size_t previousIndicesCount = lodMetadata[cmdIdx].levels[lodIdx - 1].indices; + + uf::stl::vector baseIndices(cmd0.indices); + for ( size_t i = 0; i < cmd0.indices; ++i ) baseIndices[i] = outIndices[cmd0.indexID + i]; + + // generate LOD + if ( 0.0f < simplify && simplify < 1.0f ) { + float targetError = 1e-2f / simplify; + float realError = 0.0f; + size_t currentIndicesCount = cmd0.indices; + uf::stl::vector lodIndices = baseIndices; + + const float* basePositions = (const float*) (outVertices[posAttrIdx].data() + cmd0.vertexID * mesh.vertex.attributes[posAttrIdx].stride); + + currentIndicesCount = meshopt_simplify( + lodIndices.data(), baseIndices.data(), cmd0.indices, + basePositions, cmd0.vertices, mesh.vertex.attributes[posAttrIdx].stride, + cmd0.indices * simplify, targetError, meshopt_SimplifyLockBorder, &realError + ); + + // couldn't simplify further, use previous LOD + if ( currentIndicesCount == previousIndicesCount ) { + lodMetadata[cmdIdx].levels[lodIdx] = lodMetadata[cmdIdx].levels[lodIdx - 1]; + continue; + } + + if ( verbose ) UF_MSG_DEBUG("[View {}] LOD {}: {} -> {}", viewIdx, lodIdx, cmd0.indices, currentIndicesCount); + + lodIndices.resize(currentIndicesCount); + + // optimize and pack vertices for this specific LOD + uf::stl::vector fetchRemap(cmd0.vertices); + size_t uniqueVertices = meshopt_optimizeVertexFetchRemap(fetchRemap.data(), lodIndices.data(), currentIndicesCount, cmd0.vertices); + meshopt_remapIndexBuffer(lodIndices.data(), lodIndices.data(), currentIndicesCount, fetchRemap.data()); + + // record the new offsets appended at the end of the global buffers + uint32_t lodVertexOffset = outVertices[0].size() / mesh.vertex.attributes[0].stride; + uint32_t lodIndexOffset = outIndices.size(); + + lodMetadata[cmdIdx].levels[lodIdx].indexID = lodIndexOffset; + lodMetadata[cmdIdx].levels[lodIdx].indices = currentIndicesCount; + lodMetadata[cmdIdx].levels[lodIdx].vertexID = lodVertexOffset; + lodMetadata[cmdIdx].levels[lodIdx].vertices = uniqueVertices; + + // append indices + for ( size_t i = 0; i < currentIndicesCount; ++i ) outIndices.emplace_back(lodIndices[i]); + // append vertices + for ( size_t a = 0; a < mesh.vertex.attributes.size(); ++a ) { + auto& attr = mesh.vertex.attributes[a]; + const uint8_t* srcPtr = outVertices[a].data() + cmd0.vertexID * attr.stride; + + uf::stl::vector packed(uniqueVertices * attr.stride); + meshopt_remapVertexBuffer(packed.data(), srcPtr, cmd0.vertices, attr.stride, fetchRemap.data()); + outVertices[a].insert(outVertices[a].end(), packed.begin(), packed.end()); + } + } else { + // no simplification, just use LOD0 (shouldn't happen) + lodMetadata[cmdIdx].levels[lodIdx] = lodMetadata[cmdIdx].levels[0]; } } - - unifiedIndices.insert(unifiedIndices.end(), lodBlocks[lodIdx].begin(), lodBlocks[lodIdx].end()); - currentGlobalOffset = unifiedIndices.size(); } - mesh.index.count = unifiedIndices.size(); - mesh.resizeIndices( mesh.index.count ); - uint8_t* dstPointer = (uint8_t*) mesh.getBuffer(mesh.index).data(); - for ( size_t i = 0; i < unifiedIndices.size(); ++i ) { - switch ( mesh.index.size ) { - case 1: (( uint8_t*) dstPointer)[i] = (uint8_t) unifiedIndices[i]; break; - case 2: ((uint16_t*) dstPointer)[i] = (uint16_t) unifiedIndices[i]; break; - case 4: ((uint32_t*) dstPointer)[i] = (uint32_t) unifiedIndices[i]; break; - } + // write indices to mesh + mesh.index.count = outIndices.size(); + mesh.resizeIndices(mesh.index.count); + uint8_t* dstIdx = mesh.getBuffer(mesh.index).data(); + for ( size_t i = 0; i < outIndices.size(); ++i ) writeIndex(dstIdx, i, mesh.index.size, outIndices[i]); + + // write vertices to mesh + mesh.vertex.count = outVertices[0].size() / mesh.vertex.attributes[0].stride; + for ( size_t a = 0; a < mesh.vertex.attributes.size(); ++a ) { + auto& attr = mesh.vertex.attributes[a]; + mesh.buffers[attr.buffer].swap(outVertices[a]); + attr.pointer = mesh.buffers[attr.buffer].data(); } mesh.updateDescriptor(); - return lodMetadata; } - -#endif \ No newline at end of file +#endif diff --git a/engine/src/ext/vulkan/graphic.cpp b/engine/src/ext/vulkan/graphic.cpp index b85ba3e4..71047a40 100644 --- a/engine/src/ext/vulkan/graphic.cpp +++ b/engine/src/ext/vulkan/graphic.cpp @@ -399,8 +399,7 @@ void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescrip auto shaders = getShaders( graphic.material.shaders, descriptor.pipeline ); // create dynamic offset ranges - static thread_local uf::stl::vector dynamicOffsets; - dynamicOffsets.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, dynamicOffsets); RenderMode& renderMode = ext::vulkan::getRenderMode(descriptor.renderMode, true); diff --git a/engine/src/ext/vulkan/rendermode.cpp b/engine/src/ext/vulkan/rendermode.cpp index 7d1514af..516efba7 100644 --- a/engine/src/ext/vulkan/rendermode.cpp +++ b/engine/src/ext/vulkan/rendermode.cpp @@ -221,8 +221,7 @@ ext::vulkan::GraphicDescriptor ext::vulkan::RenderMode::bindGraphicDescriptor( c } void ext::vulkan::RenderMode::createCommandBuffers() { - static thread_local uf::stl::vector graphics; - graphics.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, graphics); auto& scene = uf::scene::getCurrentScene(); auto/*&*/ graph = scene.getGraph(); diff --git a/engine/src/ext/xatlas/xatlas.cpp b/engine/src/ext/xatlas/xatlas.cpp index 4c790fbe..e3b3c6ba 100644 --- a/engine/src/ext/xatlas/xatlas.cpp +++ b/engine/src/ext/xatlas/xatlas.cpp @@ -2,7 +2,7 @@ #if UF_USE_XATLAS #include -#define UF_XATLAS_UNWRAP_MULTITHREAD 0 // prone to crashing +#define UF_XATLAS_UNWRAP_MULTITHREAD 1 // prone to crashing size_t ext::xatlas::unwrap( pod::Graph& graph ) { struct Entry { diff --git a/engine/src/utils/math/physics/broadphase/bvh.cpp b/engine/src/utils/math/physics/broadphase/bvh.cpp index e5950ada..330c32e9 100644 --- a/engine/src/utils/math/physics/broadphase/bvh.cpp +++ b/engine/src/utils/math/physics/broadphase/bvh.cpp @@ -68,7 +68,7 @@ pod::BVH::index_t impl::buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector

> stack; - stack.clear(); + STATIC_THREAD_LOCAL(pod::BVH::pairs_t, stack); stack.emplace_back(0, 0); while ( !stack.empty() ) { @@ -879,8 +878,7 @@ void impl::queryFlatOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, const if ( nodesA.empty() || nodesB.empty() ) return; outPairs.reserve(uf::physics::settings.reserveCount); - static thread_local uf::stl::vector> stack; - stack.clear(); + STATIC_THREAD_LOCAL(pod::BVH::pairs_t, stack); stack.emplace_back(0, 0); while ( !stack.empty() ) { diff --git a/engine/src/utils/math/physics/broadphase/island.cpp b/engine/src/utils/math/physics/broadphase/island.cpp index 2ff37c12..e88a4319 100644 --- a/engine/src/utils/math/physics/broadphase/island.cpp +++ b/engine/src/utils/math/physics/broadphase/island.cpp @@ -48,8 +48,8 @@ void impl::buildIslands( const pod::BVH::pairs_t& pairs, const uf::stl::vector

rootToIsland; - rootToIsland.clear(); + typedef uf::stl::unordered_map map_t; + STATIC_THREAD_LOCAL(map_t, rootToIsland); islands.clear(); islands.reserve(bodies.size()); diff --git a/engine/src/utils/math/physics/impl.cpp b/engine/src/utils/math/physics/impl.cpp index 9f82cf17..ca3edd30 100644 --- a/engine/src/utils/math/physics/impl.cpp +++ b/engine/src/utils/math/physics/impl.cpp @@ -133,8 +133,7 @@ void uf::physics::step( pod::World& world, float dt ) { // iterate islands #pragma omp parallel for schedule(dynamic) for ( auto& island : islands ) { - static thread_local uf::stl::vector manifolds; - manifolds.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, manifolds); manifolds.reserve(uf::physics::settings.reserveCount); // sleeping island, skip (asleep islands shouldn't ever be in here) @@ -533,8 +532,7 @@ pod::RayQuery uf::physics::rayCast( const pod::Ray& ray, const pod::World& world auto& staticBvh = world.staticBvh; auto& bodies = world.bodies; - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( dynamicBvh, ray, candidates ); if ( uf::physics::settings.useSplitBvhs ) impl::queryBVH( staticBvh, ray, candidates ); diff --git a/engine/src/utils/math/physics/integration.cpp b/engine/src/utils/math/physics/integration.cpp index dae49c8d..3ba40165 100644 --- a/engine/src/utils/math/physics/integration.cpp +++ b/engine/src/utils/math/physics/integration.cpp @@ -152,8 +152,7 @@ bool impl::similarContact( const pod::Contact& a, const pod::Contact& b, float d void impl::reduceContacts( pod::Manifold& manifold ) { if ( manifold.points.size() <= 4 ) return; - static thread_local uf::stl::vector result; - result.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, result); result.reserve(4); for ( auto& c : manifold.points ) { @@ -184,8 +183,7 @@ void impl::reduceContacts( pod::Manifold& manifold ) { } void impl::mergeContacts( pod::Manifold& manifold ) { - static thread_local uf::stl::vector result; - result.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, result); result.reserve(4); for ( auto& c : manifold.points ) { diff --git a/engine/src/utils/math/physics/narrowphase/hull.cpp b/engine/src/utils/math/physics/narrowphase/hull.cpp index 59ada96a..f13f6370 100644 --- a/engine/src/utils/math/physics/narrowphase/hull.cpp +++ b/engine/src/utils/math/physics/narrowphase/hull.cpp @@ -12,8 +12,7 @@ namespace impl { // transform to local space for BVH query auto bounds = impl::transformAabbToLocal( body.bounds, impl::getTransform( hull ) ); - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -60,8 +59,7 @@ bool impl::hullHull( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod:: auto tB = impl::getTransform( b ); auto relTransform = uf::transform::relative( tA, tB ); - static thread_local pod::BVH::pairs_t pairs; - pairs.clear(); + STATIC_THREAD_LOCAL(pod::BVH::pairs_t, pairs); impl::queryOverlaps( bvhA, bvhB, relTransform, pairs ); bool hit = false; diff --git a/engine/src/utils/math/physics/narrowphase/mesh.cpp b/engine/src/utils/math/physics/narrowphase/mesh.cpp index da0660ee..84c63b8d 100644 --- a/engine/src/utils/math/physics/narrowphase/mesh.cpp +++ b/engine/src/utils/math/physics/narrowphase/mesh.cpp @@ -13,8 +13,7 @@ bool impl::meshAabb( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod:: // transform to local space for BVH query auto bounds = impl::transformAabbToLocal( aabb.bounds, impl::getTransform( mesh ) ); - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -37,8 +36,7 @@ bool impl::meshSphere( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod // transform to local space for BVH query auto bounds = impl::transformAabbToLocal( sphere.bounds, impl::getTransform( mesh ) ); - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -63,8 +61,7 @@ bool impl::meshPlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod: // transform to local space for BVH query auto bounds = impl::transformAabbToLocal( plane.bounds, impl::getTransform( mesh ) ); - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -88,8 +85,7 @@ bool impl::meshCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, po // transform to local space for BVH query auto bounds = impl::transformAabbToLocal( capsule.bounds, impl::getTransform( mesh ) ); - static thread_local uf::stl::vector candidates; - candidates.clear(); + STATIC_THREAD_LOCAL(uf::stl::vector, candidates); impl::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -117,8 +113,7 @@ bool impl::meshMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod:: auto relTransform = uf::transform::relative( tA, tB ); // compute overlaps between one BVH and another BVH - static thread_local pod::BVH::pairs_t pairs; - pairs.clear(); + STATIC_THREAD_LOCAL(pod::BVH::pairs_t, pairs); impl::queryOverlaps( bvhA, bvhB, relTransform, pairs ); @@ -152,8 +147,7 @@ bool impl::meshHull( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod:: auto relTransform = uf::transform::relative( tA, tB ); // compute overlaps between one BVH and another BVH - static thread_local pod::BVH::pairs_t pairs; - pairs.clear(); + STATIC_THREAD_LOCAL(pod::BVH::pairs_t, pairs); impl::queryOverlaps( bvhA, bvhB, relTransform, pairs ); bool hit = false; diff --git a/engine/src/utils/memory/allocator.cpp b/engine/src/utils/memory/allocator.cpp index 218a3a83..12a126ea 100644 --- a/engine/src/utils/memory/allocator.cpp +++ b/engine/src/utils/memory/allocator.cpp @@ -1,31 +1,54 @@ #include #include -#define UF_MEMORYPOOL_OVERRIDE_NEW_DELETE 0 bool uf::allocator::override = false; + void* uf::allocator::allocate( size_t n ) { - return uf::memoryPool::global.size() > 0 && uf::allocator::override ? uf::memoryPool::global.alloc( n ) : malloc( n ); + if ( override && uf::memoryPool::global.size() > 0 ) return uf::memoryPool::global.alloc( n ); + return std::malloc( n ); } void uf::allocator::deallocate( void* p, size_t n ) { - if ( uf::memoryPool::global.size() > 0 && uf::allocator::override ) uf::memoryPool::global.free( p ); - else free( p ); -} + if ( !p ) return; + if ( override && uf::memoryPool::global.size() > 0 ) uf::memoryPool::global.free( p, n ); + else std::free( p ); +} void* uf::allocator::malloc_m( size_t n ) { return std::malloc( n ); } -void uf::allocator::free_m( void* p, size_t n ) { + +void uf::allocator::free_m( void* p, size_t /*n*/ ) { std::free( p ); } -// #if UF_MEMORYPOOL_OVERRIDE_NEW_DELETE void* operator new( size_t n ) { - return uf::allocator::allocate( n ); + void* p = uf::allocator::allocate( n ); + if ( !p ) throw std::bad_alloc(); + return p; } -void operator delete( void* p ) { - uf::allocator::deallocate( p ); + +void operator delete( void* p ) noexcept { + uf::allocator::deallocate( p, 0 ); +} + +void* operator new[]( size_t n ) { + void* p = uf::allocator::allocate( n ); + if ( !p ) throw std::bad_alloc(); + return p; +} + +void operator delete[]( void* p ) noexcept { + uf::allocator::deallocate( p, 0 ); +} + +void operator delete( void* p, size_t n ) noexcept { + uf::allocator::deallocate( p, n ); +} + +void operator delete[]( void* p, size_t n ) noexcept { + uf::allocator::deallocate( p, n ); } #endif \ No newline at end of file diff --git a/engine/src/utils/memory/pool.cpp b/engine/src/utils/memory/pool.cpp index f9d5867e..56fb1977 100644 --- a/engine/src/utils/memory/pool.cpp +++ b/engine/src/utils/memory/pool.cpp @@ -63,7 +63,11 @@ void uf::memoryPool::initialize( pod::MemoryPool& pool, size_t size, pod::Memory pool.size = size; pool.strategy = strategy; - pool.memory = uf::allocator::malloc_m(size); + if ( uf::memoryPool::subPool && &pool != &uf::memoryPool::global.data() ) { + pool.memory = uf::memoryPool::global.alloc( size ); + } else { + pool.memory = uf::allocator::malloc_m( size ); + } UF_ASSERT( pool.memory ); switch ( pool.strategy ) { @@ -130,11 +134,11 @@ void uf::memoryPool::initialize( pod::MemoryPool& pool, size_t size, pod::Memory } } void uf::memoryPool::destroy( pod::MemoryPool& pool ) { - if ( uf::memoryPool::size( pool ) <= 0 ) goto CLEAR; + if ( uf::memoryPool::size( pool ) <= 0 || !pool.memory ) goto CLEAR; if ( uf::memoryPool::subPool && &pool != &uf::memoryPool::global.data() ) { - uf::memoryPool::global.free( pool.memory ); + uf::memoryPool::global.free( pool.memory, pool.size ); } else { - uf::allocator::free_m(pool.memory); + uf::allocator::free_m( pool.memory, pool.size ); } // per-pool destruction @@ -293,14 +297,14 @@ RETURN: } bool uf::memoryPool::free( pod::MemoryPool& pool, void* pointer, size_t size ) { - if (!pointer) return false; + if ( !pointer ) return false; #if UF_MEMORYPOOL_MUTEX std::lock_guard lock(pool.mutex); #endif bool oob = !exists( pool, pointer, size ); if ( oob ) goto MANUAL_FREE; - switch (pool.strategy) { + switch ( pool.strategy ) { case pod::MemoryPool::Strategy::LINEAR: { UF_EXCEPTION("cannot free individual allocation"); return false; @@ -322,13 +326,13 @@ bool uf::memoryPool::free( pod::MemoryPool& pool, void* pointer, size_t size ) { goto RETURN; } case pod::MemoryPool::Strategy::BUDDY: { - UF_ASSERT(size > 0); + UF_ASSERT( size > 0 ); void* block = pointer; // attempt to merge with buddies size_t currentSize = pool.state.buddy.minBlockSize; size_t level = getTargetLevel(size, currentSize, pool.state.buddy.maxLevel); - while (level < pool.state.buddy.maxLevel) { + while ( level < pool.state.buddy.maxLevel ) { void* buddy = getBuddy(block, currentSize, pool.memory); // search for buddy in the current level's free list @@ -382,11 +386,11 @@ MANUAL_FREE: #endif #if UF_MEMORYPOOL_INVALID_FREE - UF_MSG_DEBUG("manually freeing {}", pointer); + UF_MSG_DEBUG("memory pool {}: manually freeing {}", (void*) &pool, pointer ); uf::allocator::free_m(pointer); return true; #else - UF_EXCEPTION("cannot free: {}", pointer); + UF_ASSERT("cannot free: {}", pointer); return false; #endif @@ -420,12 +424,14 @@ uf::stl::string uf::memoryPool::stats( const pod::MemoryPool& pool ) { metadata["pool"] = ss.str(); return metadata; } +/* pod::Allocation& uf::memoryPool::fetch( pod::MemoryPool& pool, void* pointer, size_t size ) { UF_EXCEPTION("unimplemented"); } const pod::MemoryPool::allocations_t& uf::memoryPool::allocations( const pod::MemoryPool& pool ) { UF_EXCEPTION("unimplemented") } +*/ // uf::MemoryPool::MemoryPool( size_t size ) { if ( size > 0 ) this->initialize( size ); diff --git a/engine/src/utils/thread/thread.cpp b/engine/src/utils/thread/thread.cpp index 6de5e4bc..3c13178a 100644 --- a/engine/src/utils/thread/thread.cpp +++ b/engine/src/utils/thread/thread.cpp @@ -71,10 +71,18 @@ uf::stl::vector uf::thread::execute( pod::Thread::Tasks& tasks ) { if ( tasks.container.empty() ) return workers; if ( tasks.name == uf::thread::mainThreadName ) { + #if UF_THREAD_METRICS + auto& thread = uf::thread::get( uf::thread::mainThreadName ); + uint32_t tasksThisFrame = 0; for ( auto& task : tasks.container ) { task(); + ++tasksThisFrame; } tasks.container.clear(); + thread.metrics.tasksProcessed.store(tasksThisFrame, std::memory_order_relaxed); + #else + for ( auto& task : tasks.container ) task(); + #endif } else { for ( auto& task : tasks.container ) { auto& worker = uf::thread::fetchWorker( tasks.name ); @@ -114,10 +122,8 @@ void uf::thread::queue( pod::Thread& thread, const pod::Thread::function_t& func thread.conditions.queued.notify_one(); } void uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(thread.name) ) return; // ops - static thread_local pod::Thread::container_t local_queue; - static thread_local pod::Thread::container_t local_container; - local_queue.clear(); - local_container.clear(); + STATIC_THREAD_LOCAL(pod::Thread::container_t, local_queue); + STATIC_THREAD_LOCAL(pod::Thread::container_t, local_container); #if UF_THREAD_METRICS uint32_t tasksThisFrame = 0;