From 1e548265adddb0e172edaa5e8f6a03eb6badf232 Mon Sep 17 00:00:00 2001 From: ecker Date: Mon, 15 Sep 2025 16:01:05 -0500 Subject: [PATCH] more optimizations by making node bounds an AoS instead and some other tweaks and fixes (even though it doesn't seem to amount to much tangible results because the scene explodes when I make props in mds_mcdonalds dynamic) --- engine/inc/uf/utils/math/physics/impl.h | 36 +- engine/src/utils/math/physics/aabb.inl | 6 +- engine/src/utils/math/physics/bvh.inl | 390 ++++++++++++---------- engine/src/utils/math/physics/helpers.inl | 8 +- engine/src/utils/math/physics/impl.cpp | 22 +- engine/src/utils/math/physics/mesh.inl | 15 +- engine/src/utils/math/physics/ray.inl | 3 +- 7 files changed, 260 insertions(+), 220 deletions(-) diff --git a/engine/inc/uf/utils/math/physics/impl.h b/engine/inc/uf/utils/math/physics/impl.h index 2bee82ca..bd6b305a 100644 --- a/engine/inc/uf/utils/math/physics/impl.h +++ b/engine/inc/uf/utils/math/physics/impl.h @@ -44,7 +44,8 @@ namespace pod { }; struct BVH { - typedef std::pair pair_t; + typedef uint32_t index_t; + typedef std::pair pair_t; struct PairHash { size_t operator()( const pair_t& p ) const noexcept { @@ -62,21 +63,25 @@ namespace pod { typedef uf::stl::unordered_set pairs_t; struct Node { - /*alignas(16)*/ pod::AABB bounds = {}; - int32_t left = -1; - int32_t right = -1; - int32_t start = 0; - int32_t count = 0; + BVH::index_t left = 0; + BVH::index_t right = 0; + BVH::index_t start = 0; + BVH::index_t flags = 0; - bool asleep = false; + BVH::index_t getCount() const { return flags & 0x7FFFFFFF; } + bool isAsleep() const { return (flags & 0x80000000u) != 0; } + void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); } + void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); } }; struct FlatNode { - /*alignas(16)*/ pod::AABB bounds = {}; - int32_t start = -1; - int32_t count = -1; - int32_t skipIndex = -1; + BVH::index_t start = 0; + BVH::index_t skipIndex = 0; + BVH::index_t flags = 0; - bool asleep = false; + BVH::index_t getCount() const { return flags & 0x7FFFFFFF; } + bool isAsleep() const { return (flags & 0x80000000u) != 0; } + void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); } + void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); } }; struct UpdatePolicy { enum class Decision { @@ -87,13 +92,16 @@ namespace pod { float displacementThreshold = 0.25f; // 25% of AABB size float overlapThreshold = 2.0f; // 2x growth in root surface area float dirtyRatioThreshold = 0.3f; // 30% dirty bodies - int maxFramesBeforeRebuild = 60; // force rebuild every 60 frames + uint16_t maxFramesBeforeRebuild = 600; // force rebuild every 600 frames }; bool dirty = false; - uf::stl::vector indices; + uf::stl::vector indices; uf::stl::vector nodes; uf::stl::vector flattened; + + uf::stl::vector bounds; + uf::stl::vector flatBounds; }; struct MeshBVH { diff --git a/engine/src/utils/math/physics/aabb.inl b/engine/src/utils/math/physics/aabb.inl index 2058b413..70866edd 100644 --- a/engine/src/utils/math/physics/aabb.inl +++ b/engine/src/utils/math/physics/aabb.inl @@ -103,10 +103,10 @@ namespace { return ::computeSegmentAABB( p1, p2, body.collider.capsule.radius ); } break; case pod::ShapeType::MESH: { - if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->nodes.empty() ) + if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->bounds.empty() ) return { - transform.position + body.collider.mesh.bvh->nodes[0].bounds.min, - transform.position + body.collider.mesh.bvh->nodes[0].bounds.max, + transform.position + body.collider.mesh.bvh->bounds[0].min, + transform.position + body.collider.mesh.bvh->bounds[0].max, }; } break; default: { diff --git a/engine/src/utils/math/physics/bvh.inl b/engine/src/utils/math/physics/bvh.inl index 4ab32737..98140a53 100644 --- a/engine/src/utils/math/physics/bvh.inl +++ b/engine/src/utils/math/physics/bvh.inl @@ -1,8 +1,8 @@ namespace { - int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID ); + pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID ); - void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector& out ); - void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector& out, float maxDist = FLT_MAX ); + void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector& out ); + void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector& out, float maxDist = FLT_MAX ); void queryFlatOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs ); void queryFlatOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs ); @@ -10,41 +10,40 @@ namespace { // BVH namespace { - int32_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector& bounds, int32_t start, int32_t end, int32_t capacity = 2 ) { + pod::BVH::index_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 2 ) { pod::BVH::Node node{}; - node.left = -1; - node.right = -1; + node.left = 0; + node.right = 0; node.start = start; - node.count = 0; - node.bounds = bounds[bvh.indices[start]]; + node.setCount(0); - // compute bounds of this node - for ( auto i = start + 1; i < end; ++i) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] ); + pod::AABB bound = bounds[bvh.indices[start]]; + for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] ); - int32_t count = end - start; + pod::BVH::index_t count = end - start; if ( count <= capacity ) { // leaf node.start = start; - node.count = count; - int32_t index = (int32_t) bvh.nodes.size(); + node.setCount(count); + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back(node); + bvh.bounds.emplace_back(bound); return index; } // choose split axis by largest extent - auto extent = node.bounds.max - node.bounds.min; + auto extent = bound.max - bound.min; auto axis = (extent.x > extent.y && extent.x > extent.z) ? 0 : (extent.y > extent.z ? 1 : 2); // sort indices by centroid along axis - std::sort( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) { - float ca = ::aabbCenter( bounds[a] )[axis]; - float cb = ::aabbCenter( bounds[b] )[axis]; - return ca < cb; + auto mid = ( start + end ) / 2; + std::nth_element(bvh.indices.begin() + start, bvh.indices.begin() + mid, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) { + return ::aabbCenter(bounds[a])[axis] < ::aabbCenter(bounds[b])[axis]; }); - int32_t mid = ( start + end ) / 2; - int32_t index = (int32_t) bvh.nodes.size(); + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back( node ); // insert now, gets filled later + bvh.bounds.emplace_back( bound ); node.left = ::buildBVHNode( bvh, bounds, start, mid, capacity ); node.right = ::buildBVHNode( bvh, bounds, mid, end, capacity ); @@ -52,57 +51,58 @@ namespace { return index; } - int32_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector& bounds, int32_t start, int32_t end, int32_t capacity = 4 ) { + pod::BVH::index_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 4 ) { struct Bin { pod::AABB bounds; - int32_t count = 0; + pod::BVH::index_t count = 0; }; pod::BVH::Node node{}; - node.left = -1; - node.right = -1; + node.left = 0; + node.right = 0; node.start = start; - node.count = 0; - node.bounds = bounds[bvh.indices[start]]; + node.setCount(0); - for ( auto i = start + 1; i < end; ++i ) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] ); + pod::AABB bound = bounds[bvh.indices[start]]; + for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] ); - int32_t count = end - start; + pod::BVH::index_t count = end - start; if ( count <= capacity ) { - node.count = count; - int32_t index = (int32_t) bvh.nodes.size(); + node.setCount(count); + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back(node); + bvh.bounds.emplace_back(bound); return index; } constexpr auto numBins = 16; static thread_local Bin bins[numBins]; - for ( auto i = 0; i < numBins; i++ ) bins[i] = {}; + for ( auto i = 0; i < numBins; i++ ) bins[i].count = 0; - auto extent = node.bounds.max - node.bounds.min; + auto extent = bound.max - bound.min; auto bestAxis = -1, bestSplit = -1; float bestCost = std::numeric_limits::infinity(); for ( auto axis = 0; axis < 3; ++axis ) { if ( extent[axis] < EPS(1e-6f) ) continue; - float minC = node.bounds.min[axis]; - float maxC = node.bounds.max[axis]; + float minC = bound.min[axis]; + float maxC = bound.max[axis]; float scale = (float) numBins / (maxC - minC); for ( auto i = start; i < end; ++i ) { - int32_t idx = bvh.indices[i]; + pod::BVH::index_t idx = bvh.indices[i]; float c = ::aabbCenter( bounds[idx] )[axis]; - int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale)); - bins[binID].count++; - bins[binID].bounds = ::mergeAabb( bins[binID].bounds, bounds[idx] ); + pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale)); + bins[binID].bounds = bins[binID].count == 0 ? bounds[idx] : ::mergeAabb( bins[binID].bounds, bounds[idx] ); + ++bins[binID].count; } pod::AABB leftBounds[numBins], rightBounds[numBins]; - int32_t leftCount[numBins] = {}, rightCount[numBins] = {}; + pod::BVH::index_t leftCount[numBins] = {}, rightCount[numBins] = {}; pod::AABB acc; - int32_t cnt = 0; + pod::BVH::index_t cnt = 0; for ( auto i = 0; i < numBins; i++ ) { if ( bins[i].count > 0 ) acc = (cnt == 0) ? bins[i].bounds : ::mergeAabb( acc, bins[i].bounds ); cnt += bins[i].count; @@ -110,6 +110,7 @@ namespace { leftCount[i] = cnt; } + acc = {}; cnt = 0; for ( auto i = numBins - 1; i >= 0; i-- ) { @@ -119,12 +120,18 @@ namespace { rightCount[i] = cnt; } - float parentArea = ::aabbSurfaceArea(node.bounds); + // precompute area + float leftArea[numBins], rightArea[numBins]; + for ( auto i = 0; i < numBins; i++ ) leftArea[i] = ::aabbSurfaceArea( leftBounds[i] ); + for ( auto i = 0; i < numBins; i++ ) rightArea[i] = ::aabbSurfaceArea( rightBounds[i] ); + + float parentArea = ::aabbSurfaceArea(bound); + for ( auto i = 0; i < numBins - 1; i++ ) { if ( leftCount[i] == 0 || rightCount[i + 1] == 0 ) continue; float cost = 1.0f + ( - ( ::aabbSurfaceArea(leftBounds[i]) / parentArea ) * leftCount[i] + - ( ::aabbSurfaceArea(rightBounds[i + 1]) / parentArea ) * rightCount[i + 1] + ( leftArea[i] / parentArea ) * leftCount[i] + + ( rightArea[i + 1] / parentArea ) * rightCount[i + 1] ); if ( cost < bestCost ) { bestCost = cost; @@ -136,34 +143,37 @@ namespace { // fallback: no valid split → make leaf if ( bestAxis == -1 ) { - node.count = count; - int32_t index = (int32_t) bvh.nodes.size(); + node.setCount(count); // node.count = count; + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back(node); + bvh.bounds.emplace_back(bound); return index; } - float minC = node.bounds.min[bestAxis]; - float maxC = node.bounds.max[bestAxis]; + float minC = bound.min[bestAxis]; + float maxC = bound.max[bestAxis]; float scale = (float) numBins / (maxC - minC); - auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](int32_t idx) { - float c = ::aabbCenter( bounds[idx])[bestAxis ]; - int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale)); + auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](pod::BVH::index_t idx) { + float c = ::aabbCenter( bounds[idx] )[bestAxis ]; + pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale)); return binID <= bestSplit; }); - int32_t mid = (int32_t) ( midIt - bvh.indices.begin() ); + pod::BVH::index_t mid = (pod::BVH::index_t) ( midIt - bvh.indices.begin() ); // if partition failed (all left or all right), force leaf if ( mid == start || mid == end ) { - node.count = count; - int32_t index = (int32_t) bvh.nodes.size(); + node.setCount(count); // node.count = count; + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back(node); + bvh.bounds.emplace_back(bound); return index; } - int32_t index = (int32_t) bvh.nodes.size(); + pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size(); bvh.nodes.emplace_back(node); + bvh.bounds.emplace_back(bound); node.left = ::buildBVHNode_SAH( bvh, bounds, start, mid, capacity ); node.right = ::buildBVHNode_SAH( bvh, bounds, mid, end, capacity ); @@ -171,11 +181,12 @@ namespace { return index; } - void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector& bodies, int32_t capacity = 2, bool filters = false, bool filterType = false ) { + void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector& bodies, pod::BVH::index_t capacity = 2, bool filters = false, bool filterType = false ) { if ( bodies.empty() ) return; bvh.indices.clear(); bvh.nodes.clear(); + bvh.bounds.clear(); bvh.indices.reserve(bodies.size()); // stores bounds @@ -201,7 +212,7 @@ namespace { bvh.dirty = false; } - void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, int32_t capacity = 4 ) { + void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, pod::BVH::index_t capacity = 4 ) { uint32_t triangles = mesh.index.count / 3; bvh.indices.clear(); @@ -245,7 +256,7 @@ namespace { } namespace { - pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, uf::stl::vector& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) { + pod::BVH::UpdatePolicy::Decision decideBVHUpdate( pod::BVH& bvh, uf::stl::vector& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) { // BVH is not built if ( bvh.indices.empty() || bvh.nodes.empty() ) { return pod::BVH::UpdatePolicy::Decision::REBUILD; @@ -253,7 +264,7 @@ namespace { if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE; uint32_t dirtyCount = 0; - float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds ); + float oldRootArea = ::aabbSurfaceArea( bvh.bounds[0] ); // update/check each body for ( auto idx : bvh.indices ) { @@ -273,14 +284,20 @@ namespace { if ( displacement > policy.displacementThreshold * size ) ++dirtyCount; } + // update nodes + for ( auto i = 0; i < bvh.nodes.size(); ++i ) { + auto& node = bvh.nodes[i]; + if ( /*node.count*/ node.getCount() == 0 ) continue; + auto& bound = bvh.bounds[i]; + bound = bodies[bvh.indices[node.start]]->bounds; + for ( auto i = 1; i < node.getCount() /*node.count*/; ++i ) bound = ::mergeAabb( bound, bodies[bvh.indices[node.start + i]]->bounds ); + } float dirtyRatio = (float) dirtyCount / (float) bodies.size(); // compute new root bounds pod::AABB newRoot = bodies[bvh.indices[0]]->bounds; - for ( auto i = 1; i < bvh.indices.size(); ++i ) { - newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds); - } + for ( auto i = 1; i < bvh.indices.size(); ++i ) newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds); float newRootArea = ::aabbSurfaceArea( newRoot ); // BVH is too out of date, rebuild it @@ -298,25 +315,30 @@ namespace { if ( bvh.nodes.empty() ) return; // update leaf bounds - #pragma omp parallel for + uf::stl::vector leaves; + leaves.reserve(::reserveCount); for ( auto i = 0; i < bvh.nodes.size(); i++ ) { - auto& node = bvh.nodes[i]; - if ( node.count > 0 ) { - // leaf node: recompute bounds from bodies - node.bounds = bounds[bvh.indices[node.start]]; + if ( bvh.nodes[i].getCount() == 0 ) continue; + leaves.emplace_back(i); + } - for ( auto j = 1; j < node.count; j++ ) { - node.bounds = ::mergeAabb(node.bounds, bounds[bvh.indices[node.start + j]] ); - } - } + // recompute bounds from bodies + for ( auto i = 0; i < leaves.size(); i++ ) { + auto nodeID = leaves[i]; + auto& node = bvh.nodes[nodeID]; + auto& bound = bvh.bounds[nodeID]; + bound = bounds[bvh.indices[node.start]]; + for ( auto j = 1; j < node.getCount(); j++ ) + bound = ::mergeAabb(bound, bounds[bvh.indices[node.start + j]]); } // update internal nodes bottom-up - for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) { + for ( pod::BVH::index_t i = (pod::BVH::index_t) bvh.nodes.size() - 1; i >= 0; i-- ) { auto& node = bvh.nodes[i]; + auto& bound = bvh.bounds[i]; // internal node - if ( node.count == 0 ) { - node.bounds = ::mergeAabb(bvh.nodes[node.left].bounds, bvh.nodes[node.right].bounds); + if ( node.getCount() == 0 ) { + bound = ::mergeAabb(bvh.bounds[node.left], bvh.bounds[node.right]); } } } @@ -329,31 +351,28 @@ namespace { #pragma omp parallel for for ( auto i = 0; i < bvh.nodes.size(); i++ ) { auto& node = bvh.nodes[i]; - if ( node.count > 0 ) { - // leaf node: recompute bounds from bodies - auto nodeID = bvh.indices[node.start]; + if ( node.getCount() == 0 ) continue; + auto& bound = bvh.bounds[i]; + // leaf node: recompute bounds from bodies + auto nodeID = bvh.indices[node.start]; - node.bounds = bodies[nodeID]->bounds; - node.asleep = !bodies[nodeID]->activity.awake; + bound = bodies[nodeID]->bounds; + node.setAsleep(!bodies[nodeID]->activity.awake); - for ( auto j = 1; j < node.count; j++ ) { - auto bodyID = bvh.indices[node.start + j]; - node.bounds = ::mergeAabb(node.bounds, bodies[bodyID]->bounds ); - node.asleep = node.asleep && !bodies[bodyID]->activity.awake; - } + for ( auto j = 1; j < node.getCount(); j++ ) { + auto bodyID = bvh.indices[node.start + j]; + bound = ::mergeAabb( bound, bodies[bodyID]->bounds ); + node.setAsleep(node.isAsleep() && !bodies[bodyID]->activity.awake); } } // update internal nodes bottom-up - for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) { + for ( int64_t i = (int64_t) bvh.nodes.size() - 1; i >= 0; i-- ) { auto& node = bvh.nodes[i]; + if ( node.getCount() > 0 ) continue; // internal node - if ( node.count == 0 ) { - const auto& leftNode = bvh.nodes[node.left]; - const auto& rightNode = bvh.nodes[node.right]; - node.bounds = ::mergeAabb(leftNode.bounds, rightNode.bounds); - node.asleep = leftNode.asleep && rightNode.asleep; - } + bvh.bounds[i] = ::mergeAabb( bvh.bounds[node.left], bvh.bounds[node.right] ); + node.setAsleep( bvh.nodes[node.left].isAsleep() && bvh.nodes[node.right].isAsleep()); } } @@ -385,36 +404,41 @@ namespace { } namespace { - int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID ) { - if ( nodeID == 0 ) bvh.flattened.reserve(bvh.nodes.size()); + pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID ) { + if ( nodeID == 0 ) { + bvh.flattened.clear(); + bvh.flatBounds.clear(); + bvh.flattened.reserve(bvh.nodes.size()); + bvh.flatBounds.reserve(bvh.bounds.size()); + } const auto& node = bvh.nodes[nodeID]; - int32_t flatID = (int32_t) bvh.flattened.size(); + pod::BVH::index_t flatID = (pod::BVH::index_t) bvh.flattened.size(); bvh.flattened.emplace_back(); // placeholder + bvh.flatBounds.emplace_back( bvh.bounds[nodeID] ); pod::BVH::FlatNode flat{}; - flat.bounds = node.bounds; - flat.start = -1; - flat.count = -1; - flat.skipIndex = -1; - flat.asleep = node.asleep; + flat.start = 0; + flat.setCount(0); + flat.skipIndex = 0; + flat.setAsleep(node.isAsleep()); // leaf - if ( node.count > 0 ) { + if ( node.getCount() > 0 ) { flat.start = node.start; - flat.count = node.count; + flat.setCount(node.getCount()); flat.skipIndex = flatID + 1; // next node after this leaf bvh.flattened[flatID] = flat; return flatID + 1; } // internal else { - flat.start = -1; - flat.count = 0; + flat.start = 0; + flat.setCount(0); - int32_t leftID = ::flattenBVH( bvh, node.left ); - int32_t rightID = ::flattenBVH( bvh, node.right ); + pod::BVH::index_t leftID = ::flattenBVH( bvh, node.left ); + pod::BVH::index_t rightID = ::flattenBVH( bvh, node.right ); flat.skipIndex = rightID; // skip entire subtree bvh.flattened[flatID] = flat; @@ -425,17 +449,17 @@ namespace { namespace { // collects a list of nodes that are overlapping with each other - void traverseNodePair(const pod::BVH& bvh, int32_t nodeAID, int32_t nodeBID, pod::BVH::pairs_t& pairs) { + void traverseNodePair(const pod::BVH& bvh, pod::BVH::index_t nodeAID, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs) { const auto& nodeA = bvh.nodes[nodeAID]; const auto& nodeB = bvh.nodes[nodeBID]; - if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return; + if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvh.bounds[nodeAID], bvh.bounds[nodeBID] ) ) return; - if ( nodeA.count > 0 && nodeB.count > 0 ) { - for ( auto i = 0; i < nodeA.count; ++i ) { - for ( auto j = 0; j < nodeB.count; ++j ) { - int32_t bodyA = bvh.indices[nodeA.start + i]; - int32_t bodyB = bvh.indices[nodeB.start + j]; + if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) { + for ( auto i = 0; i < nodeA.getCount(); ++i ) { + for ( auto j = 0; j < nodeB.getCount(); ++j ) { + pod::BVH::index_t bodyA = bvh.indices[nodeA.start + i]; + pod::BVH::index_t bodyB = bvh.indices[nodeB.start + j]; if ( bodyA == bodyB ) continue; if ( bodyA > bodyB ) std::swap( bodyA, bodyB ); @@ -445,27 +469,27 @@ namespace { return; } - if ( nodeA.count == 0 ) { + if ( nodeA.getCount() == 0 ) { ::traverseNodePair( bvh, nodeA.left, nodeBID, pairs ); ::traverseNodePair( bvh, nodeA.right, nodeBID, pairs ); } - if ( nodeB.count == 0 ) { + if ( nodeB.getCount() == 0 ) { ::traverseNodePair( bvh, nodeAID, nodeB.left, pairs ); ::traverseNodePair( bvh, nodeAID, nodeB.right, pairs ); } } // collects a list of nodes from each BVH that are overlapping with each other (for mesh v mesh) - void traverseNodePair( const pod::BVH& bvhA, int32_t nodeAID, const pod::BVH& bvhB, int32_t nodeBID, pod::BVH::pairs_t& pairs ) { + void traverseNodePair( const pod::BVH& bvhA, pod::BVH::index_t nodeAID, const pod::BVH& bvhB, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs ) { const auto& nodeA = bvhA.nodes[nodeAID]; const auto& nodeB = bvhB.nodes[nodeBID]; - if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return; + if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvhA.bounds[nodeAID], bvhB.bounds[nodeBID] ) ) return; - if ( nodeA.count > 0 && nodeB.count > 0 ) { - for ( auto i = 0; i < nodeA.count; ++i ) { - for ( auto j = 0; j < nodeB.count; ++j ) { - int32_t bodyA = bvhA.indices[nodeA.start + i]; - int32_t bodyB = bvhB.indices[nodeB.start + j]; + if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) { + for ( auto i = 0; i < nodeA.getCount(); ++i ) { + for ( auto j = 0; j < nodeB.getCount(); ++j ) { + pod::BVH::index_t bodyA = bvhA.indices[nodeA.start + i]; + pod::BVH::index_t bodyB = bvhB.indices[nodeB.start + j]; if ( bodyA == bodyB ) continue; if ( bodyA > bodyB ) std::swap( bodyA, bodyB ); @@ -475,24 +499,24 @@ namespace { return; } - if ( nodeA.count == 0 ) { + if ( nodeA.getCount() == 0 ) { ::traverseNodePair( bvhA, nodeA.left, bvhB, nodeBID, pairs ); ::traverseNodePair( bvhA, nodeA.right, bvhB, nodeBID, pairs ); } - if ( nodeB.count == 0 ) { + if ( nodeB.getCount() == 0 ) { ::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.left, pairs ); ::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.right, pairs ); } } - void traverseBVH( const pod::BVH& bvh, int32_t nodeID, pod::BVH::pairs_t& pairs ) { + void traverseBVH( const pod::BVH& bvh, pod::BVH::index_t nodeID, pod::BVH::pairs_t& pairs ) { const auto& node = bvh.nodes[nodeID]; - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i ) { - for ( auto j = i + 1; j < node.count; ++j ) { - int32_t bodyA = bvh.indices[node.start + i]; - int32_t bodyB = bvh.indices[node.start + j]; + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i ) { + for ( auto j = i + 1; j < node.getCount(); ++j ) { + pod::BVH::index_t bodyA = bvh.indices[node.start + i]; + pod::BVH::index_t bodyB = bvh.indices[node.start + j]; if ( bodyA == bodyB ) continue; if ( bodyA > bodyB ) std::swap( bodyA, bodyB ); @@ -525,44 +549,45 @@ namespace { namespace { // query a BVH with an AABB via a stack - void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices ) { + void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices ) { if ( bvh.nodes.empty() ) return; if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices ); outIndices.reserve(::reserveCount); - uf::stl::stack stack; + thread_local uf::stl::stack stack; + //stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function stack.push(0); while ( !stack.empty() ) { - int32_t idx = stack.top(); stack.pop(); + pod::BVH::index_t idx = stack.top(); stack.pop(); auto& node = bvh.nodes[idx]; - if ( node.asleep || !::aabbOverlap( bounds, node.bounds ) ) continue; + if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[idx] ) ) continue; - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]); + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]); } else { stack.push(node.left); stack.push(node.right); } } } - void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector& outIndices ) { + void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector& outIndices ) { return ::queryBVH( bvh, body.bounds, outIndices ); } // query a BVH with an AABB via recursion - void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices, int32_t nodeID ) { + void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices, pod::BVH::index_t nodeID ) { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices ); if ( nodeID == 0 ) outIndices.reserve(::reserveCount); const auto& node = bvh.nodes[nodeID]; - if ( node.asleep || !::aabbOverlap( node.bounds, bounds ) ) return; + if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[nodeID] ) ) return; - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]); + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]); return; } @@ -572,25 +597,26 @@ namespace { } // query a BVH with a ray via a stack - void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, float maxDist ) { + void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, float maxDist ) { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist ); if ( bvh.nodes.empty() ) return; outIndices.reserve(::reserveCount); - uf::stl::stack stack; + thread_local uf::stl::stack stack; + //stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function stack.push(0); while ( !stack.empty() ) { - int32_t idx = stack.top(); stack.pop(); + pod::BVH::index_t idx = stack.top(); stack.pop(); const auto& node = bvh.nodes[idx]; float tMin, tMax; - if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) continue; + if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[idx], tMin, tMax ) ) continue; if ( tMin > maxDist ) continue; - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]); + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]); } else { stack.push(node.left); stack.push(node.right); @@ -598,18 +624,18 @@ namespace { } } // query a BVH with a ray via recursion - void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, int32_t nodeID, float maxDist ) { + void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, pod::BVH::index_t nodeID, float maxDist ) { if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist ); if ( nodeID == 0 ) outIndices.reserve(::reserveCount); const auto& node = bvh.nodes[nodeID]; float tMin, tMax; - if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) return; + if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[nodeID], tMin, tMax ) ) return; if ( tMin > maxDist ) return; - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]); + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]); return; } @@ -629,16 +655,16 @@ namespace { for ( auto i = 0; i < nodes.size(); ++i ) { const auto& nodeA = nodes[i]; - if ( nodeA.count <= 0 || nodeA.asleep ) continue; + if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue; for ( auto j = i + 1; j < nodes.size(); ++j ) { const auto& nodeB = nodes[j]; - if ( nodeB.count <= 0 || nodeB.asleep ) continue; + if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue; - if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue; + if ( !::aabbOverlap( bvh.flatBounds[i], bvh.flatBounds[j] ) ) continue; - for ( auto ia = 0; ia < nodeA.count; ++ia ) { - for ( auto ib = 0; ib < nodeB.count; ++ib ) { + for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) { + for ( auto ib = 0; ib < nodeB.getCount(); ++ib ) { auto indexA = indices[nodeA.start + ia]; auto indexB = indices[nodeB.start + ib]; @@ -664,16 +690,16 @@ namespace { for ( auto i = 0; i < nodesA.size(); ++i ) { const auto& nodeA = nodesA[i]; - if ( nodeA.count <= 0 || nodeA.asleep ) continue; + if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue; for ( auto j = 0; j < nodesB.size(); ++j ) { const auto& nodeB = nodesB[j]; - if ( nodeB.count <= 0 || nodeB.asleep ) continue; + if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue; - if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue; + if ( !::aabbOverlap( bvhA.flatBounds[i], bvhB.flatBounds[j] ) ) continue; - for ( auto ia = 0; ia < nodeA.count; ++ia ) { - for (auto ib = 0; ib < nodeB.count; ++ib ) { + for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) { + for (auto ib = 0; ib < nodeB.getCount(); ++ib ) { auto indexA = indicesA[nodeA.start + ia]; auto indexB = indicesB[nodeB.start + ib]; @@ -684,20 +710,20 @@ namespace { } } - void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices ) { + void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& outIndices ) { auto& nodes = bvh.flattened; auto& indices = bvh.indices; outIndices.reserve(::reserveCount); - int32_t idx = 0; + pod::BVH::index_t idx = 0; while ( idx < nodes.size() ) { const auto& node = nodes[idx]; - if ( !node.asleep && ::aabbOverlap( bounds, node.bounds ) ) { + if ( !node.isAsleep() && ::aabbOverlap( bounds, bvh.flatBounds[idx] ) ) { // leaf - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i ) { + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i ) { outIndices.emplace_back( indices[node.start + i] ); } } @@ -708,20 +734,20 @@ namespace { } } } - void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, float maxDist ) { + void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& outIndices, float maxDist ) { auto& nodes = bvh.flattened; auto& indices = bvh.indices; outIndices.reserve(::reserveCount); - int32_t idx = 0; + pod::BVH::index_t idx = 0; while ( idx < nodes.size() ) { const auto& node = nodes[idx]; float tMin, tMax; - if ( !node.asleep && ::rayAabbIntersect( ray, node.bounds, tMin, tMax ) && tMin <= maxDist ) { + if ( !node.isAsleep() && ::rayAabbIntersect( ray, bvh.flatBounds[idx], tMin, tMax ) && tMin <= maxDist ) { // leaf - if ( node.count > 0 ) { - for ( auto i = 0; i < node.count; ++i ) { + if ( node.getCount() > 0 ) { + for ( auto i = 0; i < node.getCount(); ++i ) { outIndices.emplace_back( indices[node.start + i] ); } } @@ -736,10 +762,10 @@ namespace { namespace { struct UnionFind { - uf::stl::vector parent; - uf::stl::vector rank; + uf::stl::vector parent; + uf::stl::vector rank; - UnionFind( int32_t n ) { + UnionFind( pod::BVH::index_t n ) { parent.resize(n); rank.resize(n, 0); @@ -747,14 +773,14 @@ namespace { parent[i] = i; } - int32_t find( int32_t x ) { + pod::BVH::index_t find( pod::BVH::index_t x ) { if ( parent[x] != x ) parent[x] = find(parent[x]); return parent[x]; } - void unite( int32_t a, int32_t b ) { - int32_t rootA = find(a); - int32_t rootB = find(b); + void unite( pod::BVH::index_t a, pod::BVH::index_t b ) { + pod::BVH::index_t rootA = find(a); + pod::BVH::index_t rootB = find(b); if ( rootA == rootB ) return; @@ -776,20 +802,20 @@ namespace { } // map root to island index - uf::stl::unordered_map rootToIsland; + uf::stl::unordered_map rootToIsland; islands.clear(); islands.reserve(bodies.size()); for ( auto i = 0; i < bodies.size(); i++ ) { - int32_t root = unionizer.find(i); + pod::BVH::index_t root = unionizer.find(i); if (rootToIsland.find(root) == rootToIsland.end()) { - rootToIsland[root] = (int32_t) islands.size(); + rootToIsland[root] = (pod::BVH::index_t) islands.size(); islands.emplace_back(); } - int32_t islandID = rootToIsland[root]; + pod::BVH::index_t islandID = rootToIsland[root]; islands[islandID].indices.emplace_back( i ); } @@ -798,8 +824,8 @@ namespace { // do not insert these pairs if they're non-colliding if ( !::shouldCollide( *bodies[a], *bodies[b] ) ) continue; - int32_t root = unionizer.find(a); - int32_t islandID = rootToIsland[root]; + pod::BVH::index_t root = unionizer.find(a); + pod::BVH::index_t islandID = rootToIsland[root]; islands[islandID].pairs.emplace(a, b); } } diff --git a/engine/src/utils/math/physics/helpers.inl b/engine/src/utils/math/physics/helpers.inl index bf9b4a8b..c10cf14e 100644 --- a/engine/src/utils/math/physics/helpers.inl +++ b/engine/src/utils/math/physics/helpers.inl @@ -47,10 +47,10 @@ namespace { pod::Vector3f aabbCenter( const pod::AABB& aabb ); - void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& indices ); - void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& indices, int32_t nodeID ); - void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& indices, float maxDist = FLT_MAX ); - void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& indices, int32_t nodeID, float maxDist = FLT_MAX ); + void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& indices ); + void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector& indices, pod::BVH::index_t nodeID ); + void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& indices, float maxDist = FLT_MAX ); + void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector& indices, pod::BVH::index_t nodeID, float maxDist = FLT_MAX ); void queryOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs ); void queryOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs ); } diff --git a/engine/src/utils/math/physics/impl.cpp b/engine/src/utils/math/physics/impl.cpp index 925df8c4..550ba950 100644 --- a/engine/src/utils/math/physics/impl.cpp +++ b/engine/src/utils/math/physics/impl.cpp @@ -10,40 +10,39 @@ namespace { bool psgContactSolver = true; // use PSG contact solver bool useGjk = false; // currently don't have a way to broadphase mesh => narrowphase tri via GJK bool fixedStep = true; // run physics simulation with a fixed delta time (with accumulation), rather than rely on actual engine deltatime - int32_t substeps = 0; // number of substeps per frame tick - int32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator + uint32_t substeps = 0; // number of substeps per frame tick + uint32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator - // increasing these make things lag for reasons I can imagine why - int32_t broadphaseBvhCapacity = 1; // number of bodies per leaf node - int32_t meshBvhCapacity = 1; // number of triangles per leaf node + // increasing these make things lag for reasons I can imagine why (having to test more triangles over just more boxes) + uint32_t broadphaseBvhCapacity = 4; // number of bodies per leaf node + uint32_t meshBvhCapacity = 1; // number of triangles per leaf node // additionally flattens a BVH for linear iteration, rather than a recursive / stack-based traversal bool flattenBvhBodies = true; bool flattenBvhMeshes = true; // use surface area heuristics for building the BVH, rather than naive splits - bool useBvhSahBodies = false; // it actually seems slower to use these...... + bool useBvhSahBodies = true; // it actually seems slower to use these...... bool useBvhSahMeshes = true; bool useSplitBvhs = true; // creates separate BVHs for static / dynamic objects // to-do: find possibly better values for this - int32_t solverIterations = 10; + uint32_t solverIterations = 10; float baumgarteCorrectionPercent = 0.2f; float baumgarteCorrectionSlop = 0.01f; uf::stl::unordered_map manifoldsCache; - int32_t manifoldCacheLifetime = 6; // to-do: find a good value for this + uint32_t manifoldCacheLifetime = 6; // to-do: find a good value for this uint32_t frameCounter = 0; // to-do: tweak this to not be annoying - // currently seems only reliable when it hits its TTL, but too long of a wait is gross, and too frequent of an update causes lag pod::BVH::UpdatePolicy bvhUpdatePolicy = { .displacementThreshold = 0.25f, .overlapThreshold = 2.0f, .dirtyRatioThreshold = 0.3f, - .maxFramesBeforeRebuild = 120, + .maxFramesBeforeRebuild = 60 * 10, // 10 seconds }; } @@ -476,7 +475,8 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World& auto& staticBvh = world.staticBvh; auto& bodies = world.bodies; - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( dynamicBvh, ray, candidates ); if ( ::useSplitBvhs ) ::queryBVH( staticBvh, ray, candidates ); diff --git a/engine/src/utils/math/physics/mesh.inl b/engine/src/utils/math/physics/mesh.inl index 5ca8b36e..83362502 100644 --- a/engine/src/utils/math/physics/mesh.inl +++ b/engine/src/utils/math/physics/mesh.inl @@ -24,7 +24,8 @@ namespace { // transform to local space for BVH query auto bounds = ::transformAabbToLocal( aabb.bounds, ::getTransform( mesh ) ); - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -47,7 +48,8 @@ namespace { // transform to local space for BVH query auto bounds = ::transformAabbToLocal( sphere.bounds, ::getTransform( mesh ) ); - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -72,7 +74,8 @@ namespace { // transform to local space for BVH query auto bounds = ::transformAabbToLocal( plane.bounds, ::getTransform( mesh ) ); - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -96,7 +99,8 @@ namespace { // transform to local space for BVH query auto bounds = ::transformAabbToLocal( capsule.bounds, ::getTransform( mesh ) ); - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( bvh, bounds, candidates ); bool hit = false; @@ -120,7 +124,8 @@ namespace { const auto& bvhB = *b.collider.mesh.bvh; // compute overlaps between one BVH and another BVH - pod::BVH::pairs_t pairs; + thread_local pod::BVH::pairs_t pairs; + pairs.clear(); ::queryOverlaps( bvhA, bvhB, pairs ); bool hit = false; diff --git a/engine/src/utils/math/physics/ray.inl b/engine/src/utils/math/physics/ray.inl index c87ac616..0b198313 100644 --- a/engine/src/utils/math/physics/ray.inl +++ b/engine/src/utils/math/physics/ray.inl @@ -202,7 +202,8 @@ namespace { ray.origin = uf::transform::applyInverse( transform, r.origin ); ray.direction = uf::quaternion::rotate( uf::quaternion::inverse( transform.orientation ), r.direction ); - uf::stl::vector candidates; + thread_local uf::stl::vector candidates; + candidates.clear(); ::queryBVH( bvh, ray, candidates ); for ( auto triID : candidates ) {