more optimizations by making node bounds an AoS instead and some other tweaks and fixes (even though it doesn't seem to amount to much tangible results because the scene explodes when I make props in mds_mcdonalds dynamic)

This commit is contained in:
ecker 2025-09-15 16:01:05 -05:00
parent 7dee5ccd53
commit 1e548265ad
7 changed files with 260 additions and 220 deletions

View File

@ -44,7 +44,8 @@ namespace pod {
};
struct BVH {
typedef std::pair<int32_t,int32_t> pair_t;
typedef uint32_t index_t;
typedef std::pair<index_t,index_t> pair_t;
struct PairHash {
size_t operator()( const pair_t& p ) const noexcept {
@ -62,21 +63,25 @@ namespace pod {
typedef uf::stl::unordered_set<pair_t, PairHash, PairEq> pairs_t;
struct Node {
/*alignas(16)*/ pod::AABB bounds = {};
int32_t left = -1;
int32_t right = -1;
int32_t start = 0;
int32_t count = 0;
BVH::index_t left = 0;
BVH::index_t right = 0;
BVH::index_t start = 0;
BVH::index_t flags = 0;
bool asleep = false;
BVH::index_t getCount() const { return flags & 0x7FFFFFFF; }
bool isAsleep() const { return (flags & 0x80000000u) != 0; }
void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); }
void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); }
};
struct FlatNode {
/*alignas(16)*/ pod::AABB bounds = {};
int32_t start = -1;
int32_t count = -1;
int32_t skipIndex = -1;
BVH::index_t start = 0;
BVH::index_t skipIndex = 0;
BVH::index_t flags = 0;
bool asleep = false;
BVH::index_t getCount() const { return flags & 0x7FFFFFFF; }
bool isAsleep() const { return (flags & 0x80000000u) != 0; }
void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); }
void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); }
};
struct UpdatePolicy {
enum class Decision {
@ -87,13 +92,16 @@ namespace pod {
float displacementThreshold = 0.25f; // 25% of AABB size
float overlapThreshold = 2.0f; // 2x growth in root surface area
float dirtyRatioThreshold = 0.3f; // 30% dirty bodies
int maxFramesBeforeRebuild = 60; // force rebuild every 60 frames
uint16_t maxFramesBeforeRebuild = 600; // force rebuild every 600 frames
};
bool dirty = false;
uf::stl::vector<uint32_t> indices;
uf::stl::vector<pod::BVH::index_t> indices;
uf::stl::vector<pod::BVH::Node> nodes;
uf::stl::vector<pod::BVH::FlatNode> flattened;
uf::stl::vector<pod::AABB> bounds;
uf::stl::vector<pod::AABB> flatBounds;
};
struct MeshBVH {

View File

@ -103,10 +103,10 @@ namespace {
return ::computeSegmentAABB( p1, p2, body.collider.capsule.radius );
} break;
case pod::ShapeType::MESH: {
if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->nodes.empty() )
if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->bounds.empty() )
return {
transform.position + body.collider.mesh.bvh->nodes[0].bounds.min,
transform.position + body.collider.mesh.bvh->nodes[0].bounds.max,
transform.position + body.collider.mesh.bvh->bounds[0].min,
transform.position + body.collider.mesh.bvh->bounds[0].max,
};
} break;
default: {

View File

@ -1,8 +1,8 @@
namespace {
int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID );
pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID );
void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector<int32_t>& out );
void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector<int32_t>& out, float maxDist = FLT_MAX );
void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& out );
void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& out, float maxDist = FLT_MAX );
void queryFlatOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs );
void queryFlatOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs );
@ -10,41 +10,40 @@ namespace {
// BVH
namespace {
int32_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, int32_t start, int32_t end, int32_t capacity = 2 ) {
pod::BVH::index_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 2 ) {
pod::BVH::Node node{};
node.left = -1;
node.right = -1;
node.left = 0;
node.right = 0;
node.start = start;
node.count = 0;
node.bounds = bounds[bvh.indices[start]];
node.setCount(0);
// compute bounds of this node
for ( auto i = start + 1; i < end; ++i) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] );
pod::AABB bound = bounds[bvh.indices[start]];
for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] );
int32_t count = end - start;
pod::BVH::index_t count = end - start;
if ( count <= capacity ) {
// leaf
node.start = start;
node.count = count;
int32_t index = (int32_t) bvh.nodes.size();
node.setCount(count);
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back(node);
bvh.bounds.emplace_back(bound);
return index;
}
// choose split axis by largest extent
auto extent = node.bounds.max - node.bounds.min;
auto extent = bound.max - bound.min;
auto axis = (extent.x > extent.y && extent.x > extent.z) ? 0 : (extent.y > extent.z ? 1 : 2);
// sort indices by centroid along axis
std::sort( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) {
float ca = ::aabbCenter( bounds[a] )[axis];
float cb = ::aabbCenter( bounds[b] )[axis];
return ca < cb;
auto mid = ( start + end ) / 2;
std::nth_element(bvh.indices.begin() + start, bvh.indices.begin() + mid, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) {
return ::aabbCenter(bounds[a])[axis] < ::aabbCenter(bounds[b])[axis];
});
int32_t mid = ( start + end ) / 2;
int32_t index = (int32_t) bvh.nodes.size();
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back( node ); // insert now, gets filled later
bvh.bounds.emplace_back( bound );
node.left = ::buildBVHNode( bvh, bounds, start, mid, capacity );
node.right = ::buildBVHNode( bvh, bounds, mid, end, capacity );
@ -52,57 +51,58 @@ namespace {
return index;
}
int32_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, int32_t start, int32_t end, int32_t capacity = 4 ) {
pod::BVH::index_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 4 ) {
struct Bin {
pod::AABB bounds;
int32_t count = 0;
pod::BVH::index_t count = 0;
};
pod::BVH::Node node{};
node.left = -1;
node.right = -1;
node.left = 0;
node.right = 0;
node.start = start;
node.count = 0;
node.bounds = bounds[bvh.indices[start]];
node.setCount(0);
for ( auto i = start + 1; i < end; ++i ) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] );
pod::AABB bound = bounds[bvh.indices[start]];
for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] );
int32_t count = end - start;
pod::BVH::index_t count = end - start;
if ( count <= capacity ) {
node.count = count;
int32_t index = (int32_t) bvh.nodes.size();
node.setCount(count);
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back(node);
bvh.bounds.emplace_back(bound);
return index;
}
constexpr auto numBins = 16;
static thread_local Bin bins[numBins];
for ( auto i = 0; i < numBins; i++ ) bins[i] = {};
for ( auto i = 0; i < numBins; i++ ) bins[i].count = 0;
auto extent = node.bounds.max - node.bounds.min;
auto extent = bound.max - bound.min;
auto bestAxis = -1, bestSplit = -1;
float bestCost = std::numeric_limits<float>::infinity();
for ( auto axis = 0; axis < 3; ++axis ) {
if ( extent[axis] < EPS(1e-6f) ) continue;
float minC = node.bounds.min[axis];
float maxC = node.bounds.max[axis];
float minC = bound.min[axis];
float maxC = bound.max[axis];
float scale = (float) numBins / (maxC - minC);
for ( auto i = start; i < end; ++i ) {
int32_t idx = bvh.indices[i];
pod::BVH::index_t idx = bvh.indices[i];
float c = ::aabbCenter( bounds[idx] )[axis];
int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale));
bins[binID].count++;
bins[binID].bounds = ::mergeAabb( bins[binID].bounds, bounds[idx] );
pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale));
bins[binID].bounds = bins[binID].count == 0 ? bounds[idx] : ::mergeAabb( bins[binID].bounds, bounds[idx] );
++bins[binID].count;
}
pod::AABB leftBounds[numBins], rightBounds[numBins];
int32_t leftCount[numBins] = {}, rightCount[numBins] = {};
pod::BVH::index_t leftCount[numBins] = {}, rightCount[numBins] = {};
pod::AABB acc;
int32_t cnt = 0;
pod::BVH::index_t cnt = 0;
for ( auto i = 0; i < numBins; i++ ) {
if ( bins[i].count > 0 ) acc = (cnt == 0) ? bins[i].bounds : ::mergeAabb( acc, bins[i].bounds );
cnt += bins[i].count;
@ -110,6 +110,7 @@ namespace {
leftCount[i] = cnt;
}
acc = {};
cnt = 0;
for ( auto i = numBins - 1; i >= 0; i-- ) {
@ -119,12 +120,18 @@ namespace {
rightCount[i] = cnt;
}
float parentArea = ::aabbSurfaceArea(node.bounds);
// precompute area
float leftArea[numBins], rightArea[numBins];
for ( auto i = 0; i < numBins; i++ ) leftArea[i] = ::aabbSurfaceArea( leftBounds[i] );
for ( auto i = 0; i < numBins; i++ ) rightArea[i] = ::aabbSurfaceArea( rightBounds[i] );
float parentArea = ::aabbSurfaceArea(bound);
for ( auto i = 0; i < numBins - 1; i++ ) {
if ( leftCount[i] == 0 || rightCount[i + 1] == 0 ) continue;
float cost = 1.0f + (
( ::aabbSurfaceArea(leftBounds[i]) / parentArea ) * leftCount[i] +
( ::aabbSurfaceArea(rightBounds[i + 1]) / parentArea ) * rightCount[i + 1]
( leftArea[i] / parentArea ) * leftCount[i] +
( rightArea[i + 1] / parentArea ) * rightCount[i + 1]
);
if ( cost < bestCost ) {
bestCost = cost;
@ -136,34 +143,37 @@ namespace {
// fallback: no valid split → make leaf
if ( bestAxis == -1 ) {
node.count = count;
int32_t index = (int32_t) bvh.nodes.size();
node.setCount(count); // node.count = count;
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back(node);
bvh.bounds.emplace_back(bound);
return index;
}
float minC = node.bounds.min[bestAxis];
float maxC = node.bounds.max[bestAxis];
float minC = bound.min[bestAxis];
float maxC = bound.max[bestAxis];
float scale = (float) numBins / (maxC - minC);
auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](int32_t idx) {
float c = ::aabbCenter( bounds[idx])[bestAxis ];
int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale));
auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](pod::BVH::index_t idx) {
float c = ::aabbCenter( bounds[idx] )[bestAxis ];
pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale));
return binID <= bestSplit;
});
int32_t mid = (int32_t) ( midIt - bvh.indices.begin() );
pod::BVH::index_t mid = (pod::BVH::index_t) ( midIt - bvh.indices.begin() );
// if partition failed (all left or all right), force leaf
if ( mid == start || mid == end ) {
node.count = count;
int32_t index = (int32_t) bvh.nodes.size();
node.setCount(count); // node.count = count;
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back(node);
bvh.bounds.emplace_back(bound);
return index;
}
int32_t index = (int32_t) bvh.nodes.size();
pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
bvh.nodes.emplace_back(node);
bvh.bounds.emplace_back(bound);
node.left = ::buildBVHNode_SAH( bvh, bounds, start, mid, capacity );
node.right = ::buildBVHNode_SAH( bvh, bounds, mid, end, capacity );
@ -171,11 +181,12 @@ namespace {
return index;
}
void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, int32_t capacity = 2, bool filters = false, bool filterType = false ) {
void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, pod::BVH::index_t capacity = 2, bool filters = false, bool filterType = false ) {
if ( bodies.empty() ) return;
bvh.indices.clear();
bvh.nodes.clear();
bvh.bounds.clear();
bvh.indices.reserve(bodies.size());
// stores bounds
@ -201,7 +212,7 @@ namespace {
bvh.dirty = false;
}
void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, int32_t capacity = 4 ) {
void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, pod::BVH::index_t capacity = 4 ) {
uint32_t triangles = mesh.index.count / 3;
bvh.indices.clear();
@ -245,7 +256,7 @@ namespace {
}
namespace {
pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) {
pod::BVH::UpdatePolicy::Decision decideBVHUpdate( pod::BVH& bvh, uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) {
// BVH is not built
if ( bvh.indices.empty() || bvh.nodes.empty() ) {
return pod::BVH::UpdatePolicy::Decision::REBUILD;
@ -253,7 +264,7 @@ namespace {
if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE;
uint32_t dirtyCount = 0;
float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds );
float oldRootArea = ::aabbSurfaceArea( bvh.bounds[0] );
// update/check each body
for ( auto idx : bvh.indices ) {
@ -273,14 +284,20 @@ namespace {
if ( displacement > policy.displacementThreshold * size ) ++dirtyCount;
}
// update nodes
for ( auto i = 0; i < bvh.nodes.size(); ++i ) {
auto& node = bvh.nodes[i];
if ( /*node.count*/ node.getCount() == 0 ) continue;
auto& bound = bvh.bounds[i];
bound = bodies[bvh.indices[node.start]]->bounds;
for ( auto i = 1; i < node.getCount() /*node.count*/; ++i ) bound = ::mergeAabb( bound, bodies[bvh.indices[node.start + i]]->bounds );
}
float dirtyRatio = (float) dirtyCount / (float) bodies.size();
// compute new root bounds
pod::AABB newRoot = bodies[bvh.indices[0]]->bounds;
for ( auto i = 1; i < bvh.indices.size(); ++i ) {
newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds);
}
for ( auto i = 1; i < bvh.indices.size(); ++i ) newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds);
float newRootArea = ::aabbSurfaceArea( newRoot );
// BVH is too out of date, rebuild it
@ -298,25 +315,30 @@ namespace {
if ( bvh.nodes.empty() ) return;
// update leaf bounds
#pragma omp parallel for
uf::stl::vector<pod::BVH::index_t> leaves;
leaves.reserve(::reserveCount);
for ( auto i = 0; i < bvh.nodes.size(); i++ ) {
auto& node = bvh.nodes[i];
if ( node.count > 0 ) {
// leaf node: recompute bounds from bodies
node.bounds = bounds[bvh.indices[node.start]];
if ( bvh.nodes[i].getCount() == 0 ) continue;
leaves.emplace_back(i);
}
for ( auto j = 1; j < node.count; j++ ) {
node.bounds = ::mergeAabb(node.bounds, bounds[bvh.indices[node.start + j]] );
}
}
// recompute bounds from bodies
for ( auto i = 0; i < leaves.size(); i++ ) {
auto nodeID = leaves[i];
auto& node = bvh.nodes[nodeID];
auto& bound = bvh.bounds[nodeID];
bound = bounds[bvh.indices[node.start]];
for ( auto j = 1; j < node.getCount(); j++ )
bound = ::mergeAabb(bound, bounds[bvh.indices[node.start + j]]);
}
// update internal nodes bottom-up
for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
for ( pod::BVH::index_t i = (pod::BVH::index_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
auto& node = bvh.nodes[i];
auto& bound = bvh.bounds[i];
// internal node
if ( node.count == 0 ) {
node.bounds = ::mergeAabb(bvh.nodes[node.left].bounds, bvh.nodes[node.right].bounds);
if ( node.getCount() == 0 ) {
bound = ::mergeAabb(bvh.bounds[node.left], bvh.bounds[node.right]);
}
}
}
@ -329,31 +351,28 @@ namespace {
#pragma omp parallel for
for ( auto i = 0; i < bvh.nodes.size(); i++ ) {
auto& node = bvh.nodes[i];
if ( node.count > 0 ) {
// leaf node: recompute bounds from bodies
auto nodeID = bvh.indices[node.start];
if ( node.getCount() == 0 ) continue;
auto& bound = bvh.bounds[i];
// leaf node: recompute bounds from bodies
auto nodeID = bvh.indices[node.start];
node.bounds = bodies[nodeID]->bounds;
node.asleep = !bodies[nodeID]->activity.awake;
bound = bodies[nodeID]->bounds;
node.setAsleep(!bodies[nodeID]->activity.awake);
for ( auto j = 1; j < node.count; j++ ) {
auto bodyID = bvh.indices[node.start + j];
node.bounds = ::mergeAabb(node.bounds, bodies[bodyID]->bounds );
node.asleep = node.asleep && !bodies[bodyID]->activity.awake;
}
for ( auto j = 1; j < node.getCount(); j++ ) {
auto bodyID = bvh.indices[node.start + j];
bound = ::mergeAabb( bound, bodies[bodyID]->bounds );
node.setAsleep(node.isAsleep() && !bodies[bodyID]->activity.awake);
}
}
// update internal nodes bottom-up
for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
for ( int64_t i = (int64_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
auto& node = bvh.nodes[i];
if ( node.getCount() > 0 ) continue;
// internal node
if ( node.count == 0 ) {
const auto& leftNode = bvh.nodes[node.left];
const auto& rightNode = bvh.nodes[node.right];
node.bounds = ::mergeAabb(leftNode.bounds, rightNode.bounds);
node.asleep = leftNode.asleep && rightNode.asleep;
}
bvh.bounds[i] = ::mergeAabb( bvh.bounds[node.left], bvh.bounds[node.right] );
node.setAsleep( bvh.nodes[node.left].isAsleep() && bvh.nodes[node.right].isAsleep());
}
}
@ -385,36 +404,41 @@ namespace {
}
namespace {
int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID ) {
if ( nodeID == 0 ) bvh.flattened.reserve(bvh.nodes.size());
pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID ) {
if ( nodeID == 0 ) {
bvh.flattened.clear();
bvh.flatBounds.clear();
bvh.flattened.reserve(bvh.nodes.size());
bvh.flatBounds.reserve(bvh.bounds.size());
}
const auto& node = bvh.nodes[nodeID];
int32_t flatID = (int32_t) bvh.flattened.size();
pod::BVH::index_t flatID = (pod::BVH::index_t) bvh.flattened.size();
bvh.flattened.emplace_back(); // placeholder
bvh.flatBounds.emplace_back( bvh.bounds[nodeID] );
pod::BVH::FlatNode flat{};
flat.bounds = node.bounds;
flat.start = -1;
flat.count = -1;
flat.skipIndex = -1;
flat.asleep = node.asleep;
flat.start = 0;
flat.setCount(0);
flat.skipIndex = 0;
flat.setAsleep(node.isAsleep());
// leaf
if ( node.count > 0 ) {
if ( node.getCount() > 0 ) {
flat.start = node.start;
flat.count = node.count;
flat.setCount(node.getCount());
flat.skipIndex = flatID + 1; // next node after this leaf
bvh.flattened[flatID] = flat;
return flatID + 1;
}
// internal
else {
flat.start = -1;
flat.count = 0;
flat.start = 0;
flat.setCount(0);
int32_t leftID = ::flattenBVH( bvh, node.left );
int32_t rightID = ::flattenBVH( bvh, node.right );
pod::BVH::index_t leftID = ::flattenBVH( bvh, node.left );
pod::BVH::index_t rightID = ::flattenBVH( bvh, node.right );
flat.skipIndex = rightID; // skip entire subtree
bvh.flattened[flatID] = flat;
@ -425,17 +449,17 @@ namespace {
namespace {
// collects a list of nodes that are overlapping with each other
void traverseNodePair(const pod::BVH& bvh, int32_t nodeAID, int32_t nodeBID, pod::BVH::pairs_t& pairs) {
void traverseNodePair(const pod::BVH& bvh, pod::BVH::index_t nodeAID, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs) {
const auto& nodeA = bvh.nodes[nodeAID];
const auto& nodeB = bvh.nodes[nodeBID];
if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return;
if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvh.bounds[nodeAID], bvh.bounds[nodeBID] ) ) return;
if ( nodeA.count > 0 && nodeB.count > 0 ) {
for ( auto i = 0; i < nodeA.count; ++i ) {
for ( auto j = 0; j < nodeB.count; ++j ) {
int32_t bodyA = bvh.indices[nodeA.start + i];
int32_t bodyB = bvh.indices[nodeB.start + j];
if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) {
for ( auto i = 0; i < nodeA.getCount(); ++i ) {
for ( auto j = 0; j < nodeB.getCount(); ++j ) {
pod::BVH::index_t bodyA = bvh.indices[nodeA.start + i];
pod::BVH::index_t bodyB = bvh.indices[nodeB.start + j];
if ( bodyA == bodyB ) continue;
if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
@ -445,27 +469,27 @@ namespace {
return;
}
if ( nodeA.count == 0 ) {
if ( nodeA.getCount() == 0 ) {
::traverseNodePair( bvh, nodeA.left, nodeBID, pairs );
::traverseNodePair( bvh, nodeA.right, nodeBID, pairs );
}
if ( nodeB.count == 0 ) {
if ( nodeB.getCount() == 0 ) {
::traverseNodePair( bvh, nodeAID, nodeB.left, pairs );
::traverseNodePair( bvh, nodeAID, nodeB.right, pairs );
}
}
// collects a list of nodes from each BVH that are overlapping with each other (for mesh v mesh)
void traverseNodePair( const pod::BVH& bvhA, int32_t nodeAID, const pod::BVH& bvhB, int32_t nodeBID, pod::BVH::pairs_t& pairs ) {
void traverseNodePair( const pod::BVH& bvhA, pod::BVH::index_t nodeAID, const pod::BVH& bvhB, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs ) {
const auto& nodeA = bvhA.nodes[nodeAID];
const auto& nodeB = bvhB.nodes[nodeBID];
if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return;
if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvhA.bounds[nodeAID], bvhB.bounds[nodeBID] ) ) return;
if ( nodeA.count > 0 && nodeB.count > 0 ) {
for ( auto i = 0; i < nodeA.count; ++i ) {
for ( auto j = 0; j < nodeB.count; ++j ) {
int32_t bodyA = bvhA.indices[nodeA.start + i];
int32_t bodyB = bvhB.indices[nodeB.start + j];
if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) {
for ( auto i = 0; i < nodeA.getCount(); ++i ) {
for ( auto j = 0; j < nodeB.getCount(); ++j ) {
pod::BVH::index_t bodyA = bvhA.indices[nodeA.start + i];
pod::BVH::index_t bodyB = bvhB.indices[nodeB.start + j];
if ( bodyA == bodyB ) continue;
if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
@ -475,24 +499,24 @@ namespace {
return;
}
if ( nodeA.count == 0 ) {
if ( nodeA.getCount() == 0 ) {
::traverseNodePair( bvhA, nodeA.left, bvhB, nodeBID, pairs );
::traverseNodePair( bvhA, nodeA.right, bvhB, nodeBID, pairs );
}
if ( nodeB.count == 0 ) {
if ( nodeB.getCount() == 0 ) {
::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.left, pairs );
::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.right, pairs );
}
}
void traverseBVH( const pod::BVH& bvh, int32_t nodeID, pod::BVH::pairs_t& pairs ) {
void traverseBVH( const pod::BVH& bvh, pod::BVH::index_t nodeID, pod::BVH::pairs_t& pairs ) {
const auto& node = bvh.nodes[nodeID];
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i ) {
for ( auto j = i + 1; j < node.count; ++j ) {
int32_t bodyA = bvh.indices[node.start + i];
int32_t bodyB = bvh.indices[node.start + j];
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i ) {
for ( auto j = i + 1; j < node.getCount(); ++j ) {
pod::BVH::index_t bodyA = bvh.indices[node.start + i];
pod::BVH::index_t bodyB = bvh.indices[node.start + j];
if ( bodyA == bodyB ) continue;
if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
@ -525,44 +549,45 @@ namespace {
namespace {
// query a BVH with an AABB via a stack
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices ) {
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
if ( bvh.nodes.empty() ) return;
if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices );
outIndices.reserve(::reserveCount);
uf::stl::stack<int32_t> stack;
thread_local uf::stl::stack<pod::BVH::index_t> stack;
//stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function
stack.push(0);
while ( !stack.empty() ) {
int32_t idx = stack.top(); stack.pop();
pod::BVH::index_t idx = stack.top(); stack.pop();
auto& node = bvh.nodes[idx];
if ( node.asleep || !::aabbOverlap( bounds, node.bounds ) ) continue;
if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[idx] ) ) continue;
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
} else {
stack.push(node.left);
stack.push(node.right);
}
}
}
void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector<int32_t>& outIndices ) {
void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
return ::queryBVH( bvh, body.bounds, outIndices );
}
// query a BVH with an AABB via recursion
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices, int32_t nodeID ) {
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices, pod::BVH::index_t nodeID ) {
if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices );
if ( nodeID == 0 ) outIndices.reserve(::reserveCount);
const auto& node = bvh.nodes[nodeID];
if ( node.asleep || !::aabbOverlap( node.bounds, bounds ) ) return;
if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[nodeID] ) ) return;
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
return;
}
@ -572,25 +597,26 @@ namespace {
}
// query a BVH with a ray via a stack
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, float maxDist ) {
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, float maxDist ) {
if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist );
if ( bvh.nodes.empty() ) return;
outIndices.reserve(::reserveCount);
uf::stl::stack<int32_t> stack;
thread_local uf::stl::stack<pod::BVH::index_t> stack;
//stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function
stack.push(0);
while ( !stack.empty() ) {
int32_t idx = stack.top(); stack.pop();
pod::BVH::index_t idx = stack.top(); stack.pop();
const auto& node = bvh.nodes[idx];
float tMin, tMax;
if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) continue;
if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[idx], tMin, tMax ) ) continue;
if ( tMin > maxDist ) continue;
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
} else {
stack.push(node.left);
stack.push(node.right);
@ -598,18 +624,18 @@ namespace {
}
}
// query a BVH with a ray via recursion
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, int32_t nodeID, float maxDist ) {
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, pod::BVH::index_t nodeID, float maxDist ) {
if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist );
if ( nodeID == 0 ) outIndices.reserve(::reserveCount);
const auto& node = bvh.nodes[nodeID];
float tMin, tMax;
if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) return;
if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[nodeID], tMin, tMax ) ) return;
if ( tMin > maxDist ) return;
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
return;
}
@ -629,16 +655,16 @@ namespace {
for ( auto i = 0; i < nodes.size(); ++i ) {
const auto& nodeA = nodes[i];
if ( nodeA.count <= 0 || nodeA.asleep ) continue;
if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue;
for ( auto j = i + 1; j < nodes.size(); ++j ) {
const auto& nodeB = nodes[j];
if ( nodeB.count <= 0 || nodeB.asleep ) continue;
if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue;
if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue;
if ( !::aabbOverlap( bvh.flatBounds[i], bvh.flatBounds[j] ) ) continue;
for ( auto ia = 0; ia < nodeA.count; ++ia ) {
for ( auto ib = 0; ib < nodeB.count; ++ib ) {
for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) {
for ( auto ib = 0; ib < nodeB.getCount(); ++ib ) {
auto indexA = indices[nodeA.start + ia];
auto indexB = indices[nodeB.start + ib];
@ -664,16 +690,16 @@ namespace {
for ( auto i = 0; i < nodesA.size(); ++i ) {
const auto& nodeA = nodesA[i];
if ( nodeA.count <= 0 || nodeA.asleep ) continue;
if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue;
for ( auto j = 0; j < nodesB.size(); ++j ) {
const auto& nodeB = nodesB[j];
if ( nodeB.count <= 0 || nodeB.asleep ) continue;
if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue;
if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue;
if ( !::aabbOverlap( bvhA.flatBounds[i], bvhB.flatBounds[j] ) ) continue;
for ( auto ia = 0; ia < nodeA.count; ++ia ) {
for (auto ib = 0; ib < nodeB.count; ++ib ) {
for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) {
for (auto ib = 0; ib < nodeB.getCount(); ++ib ) {
auto indexA = indicesA[nodeA.start + ia];
auto indexB = indicesB[nodeB.start + ib];
@ -684,20 +710,20 @@ namespace {
}
}
void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices ) {
void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
auto& nodes = bvh.flattened;
auto& indices = bvh.indices;
outIndices.reserve(::reserveCount);
int32_t idx = 0;
pod::BVH::index_t idx = 0;
while ( idx < nodes.size() ) {
const auto& node = nodes[idx];
if ( !node.asleep && ::aabbOverlap( bounds, node.bounds ) ) {
if ( !node.isAsleep() && ::aabbOverlap( bounds, bvh.flatBounds[idx] ) ) {
// leaf
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i ) {
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i ) {
outIndices.emplace_back( indices[node.start + i] );
}
}
@ -708,20 +734,20 @@ namespace {
}
}
}
void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, float maxDist ) {
void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, float maxDist ) {
auto& nodes = bvh.flattened;
auto& indices = bvh.indices;
outIndices.reserve(::reserveCount);
int32_t idx = 0;
pod::BVH::index_t idx = 0;
while ( idx < nodes.size() ) {
const auto& node = nodes[idx];
float tMin, tMax;
if ( !node.asleep && ::rayAabbIntersect( ray, node.bounds, tMin, tMax ) && tMin <= maxDist ) {
if ( !node.isAsleep() && ::rayAabbIntersect( ray, bvh.flatBounds[idx], tMin, tMax ) && tMin <= maxDist ) {
// leaf
if ( node.count > 0 ) {
for ( auto i = 0; i < node.count; ++i ) {
if ( node.getCount() > 0 ) {
for ( auto i = 0; i < node.getCount(); ++i ) {
outIndices.emplace_back( indices[node.start + i] );
}
}
@ -736,10 +762,10 @@ namespace {
namespace {
struct UnionFind {
uf::stl::vector<int32_t> parent;
uf::stl::vector<int32_t> rank;
uf::stl::vector<pod::BVH::index_t> parent;
uf::stl::vector<pod::BVH::index_t> rank;
UnionFind( int32_t n ) {
UnionFind( pod::BVH::index_t n ) {
parent.resize(n);
rank.resize(n, 0);
@ -747,14 +773,14 @@ namespace {
parent[i] = i;
}
int32_t find( int32_t x ) {
pod::BVH::index_t find( pod::BVH::index_t x ) {
if ( parent[x] != x ) parent[x] = find(parent[x]);
return parent[x];
}
void unite( int32_t a, int32_t b ) {
int32_t rootA = find(a);
int32_t rootB = find(b);
void unite( pod::BVH::index_t a, pod::BVH::index_t b ) {
pod::BVH::index_t rootA = find(a);
pod::BVH::index_t rootB = find(b);
if ( rootA == rootB ) return;
@ -776,20 +802,20 @@ namespace {
}
// map root to island index
uf::stl::unordered_map<int32_t, int32_t> rootToIsland;
uf::stl::unordered_map<pod::BVH::index_t, pod::BVH::index_t> rootToIsland;
islands.clear();
islands.reserve(bodies.size());
for ( auto i = 0; i < bodies.size(); i++ ) {
int32_t root = unionizer.find(i);
pod::BVH::index_t root = unionizer.find(i);
if (rootToIsland.find(root) == rootToIsland.end()) {
rootToIsland[root] = (int32_t) islands.size();
rootToIsland[root] = (pod::BVH::index_t) islands.size();
islands.emplace_back();
}
int32_t islandID = rootToIsland[root];
pod::BVH::index_t islandID = rootToIsland[root];
islands[islandID].indices.emplace_back( i );
}
@ -798,8 +824,8 @@ namespace {
// do not insert these pairs if they're non-colliding
if ( !::shouldCollide( *bodies[a], *bodies[b] ) ) continue;
int32_t root = unionizer.find(a);
int32_t islandID = rootToIsland[root];
pod::BVH::index_t root = unionizer.find(a);
pod::BVH::index_t islandID = rootToIsland[root];
islands[islandID].pairs.emplace(a, b);
}
}

View File

@ -47,10 +47,10 @@ namespace {
pod::Vector3f aabbCenter( const pod::AABB& aabb );
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& indices );
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& indices, int32_t nodeID );
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& indices, float maxDist = FLT_MAX );
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& indices, int32_t nodeID, float maxDist = FLT_MAX );
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& indices );
void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& indices, pod::BVH::index_t nodeID );
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& indices, float maxDist = FLT_MAX );
void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& indices, pod::BVH::index_t nodeID, float maxDist = FLT_MAX );
void queryOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs );
void queryOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs );
}

View File

@ -10,40 +10,39 @@ namespace {
bool psgContactSolver = true; // use PSG contact solver
bool useGjk = false; // currently don't have a way to broadphase mesh => narrowphase tri via GJK
bool fixedStep = true; // run physics simulation with a fixed delta time (with accumulation), rather than rely on actual engine deltatime
int32_t substeps = 0; // number of substeps per frame tick
int32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator
uint32_t substeps = 0; // number of substeps per frame tick
uint32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator
// increasing these make things lag for reasons I can imagine why
int32_t broadphaseBvhCapacity = 1; // number of bodies per leaf node
int32_t meshBvhCapacity = 1; // number of triangles per leaf node
// increasing these make things lag for reasons I can imagine why (having to test more triangles over just more boxes)
uint32_t broadphaseBvhCapacity = 4; // number of bodies per leaf node
uint32_t meshBvhCapacity = 1; // number of triangles per leaf node
// additionally flattens a BVH for linear iteration, rather than a recursive / stack-based traversal
bool flattenBvhBodies = true;
bool flattenBvhMeshes = true;
// use surface area heuristics for building the BVH, rather than naive splits
bool useBvhSahBodies = false; // it actually seems slower to use these......
bool useBvhSahBodies = true; // it actually seems slower to use these......
bool useBvhSahMeshes = true;
bool useSplitBvhs = true; // creates separate BVHs for static / dynamic objects
// to-do: find possibly better values for this
int32_t solverIterations = 10;
uint32_t solverIterations = 10;
float baumgarteCorrectionPercent = 0.2f;
float baumgarteCorrectionSlop = 0.01f;
uf::stl::unordered_map<size_t, pod::Manifold> manifoldsCache;
int32_t manifoldCacheLifetime = 6; // to-do: find a good value for this
uint32_t manifoldCacheLifetime = 6; // to-do: find a good value for this
uint32_t frameCounter = 0;
// to-do: tweak this to not be annoying
// currently seems only reliable when it hits its TTL, but too long of a wait is gross, and too frequent of an update causes lag
pod::BVH::UpdatePolicy bvhUpdatePolicy = {
.displacementThreshold = 0.25f,
.overlapThreshold = 2.0f,
.dirtyRatioThreshold = 0.3f,
.maxFramesBeforeRebuild = 120,
.maxFramesBeforeRebuild = 60 * 10, // 10 seconds
};
}
@ -476,7 +475,8 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World&
auto& staticBvh = world.staticBvh;
auto& bodies = world.bodies;
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( dynamicBvh, ray, candidates );
if ( ::useSplitBvhs ) ::queryBVH( staticBvh, ray, candidates );

View File

@ -24,7 +24,8 @@ namespace {
// transform to local space for BVH query
auto bounds = ::transformAabbToLocal( aabb.bounds, ::getTransform( mesh ) );
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( bvh, bounds, candidates );
bool hit = false;
@ -47,7 +48,8 @@ namespace {
// transform to local space for BVH query
auto bounds = ::transformAabbToLocal( sphere.bounds, ::getTransform( mesh ) );
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( bvh, bounds, candidates );
bool hit = false;
@ -72,7 +74,8 @@ namespace {
// transform to local space for BVH query
auto bounds = ::transformAabbToLocal( plane.bounds, ::getTransform( mesh ) );
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( bvh, bounds, candidates );
bool hit = false;
@ -96,7 +99,8 @@ namespace {
// transform to local space for BVH query
auto bounds = ::transformAabbToLocal( capsule.bounds, ::getTransform( mesh ) );
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( bvh, bounds, candidates );
bool hit = false;
@ -120,7 +124,8 @@ namespace {
const auto& bvhB = *b.collider.mesh.bvh;
// compute overlaps between one BVH and another BVH
pod::BVH::pairs_t pairs;
thread_local pod::BVH::pairs_t pairs;
pairs.clear();
::queryOverlaps( bvhA, bvhB, pairs );
bool hit = false;

View File

@ -202,7 +202,8 @@ namespace {
ray.origin = uf::transform::applyInverse( transform, r.origin );
ray.direction = uf::quaternion::rotate( uf::quaternion::inverse( transform.orientation ), r.direction );
uf::stl::vector<int32_t> candidates;
thread_local uf::stl::vector<pod::BVH::index_t> candidates;
candidates.clear();
::queryBVH( bvh, ray, candidates );
for ( auto triID : candidates ) {