diff --git a/engine/inc/uf/utils/math/physics/impl.h b/engine/inc/uf/utils/math/physics/impl.h
index 2bee82ca..bd6b305a 100644
--- a/engine/inc/uf/utils/math/physics/impl.h
+++ b/engine/inc/uf/utils/math/physics/impl.h
@@ -44,7 +44,8 @@ namespace pod {
 	};
 
 	struct BVH {
-		typedef std::pair<int32_t,int32_t> pair_t;
+		typedef uint32_t index_t;
+		typedef std::pair<index_t,index_t> pair_t;
 		
 		struct PairHash {
 			size_t operator()( const pair_t& p ) const noexcept {
@@ -62,21 +63,25 @@ namespace pod {
 		typedef uf::stl::unordered_set<pair_t, PairHash, PairEq> pairs_t;
 		
 		struct Node {
-			/*alignas(16)*/ pod::AABB bounds = {};
-			int32_t left = -1;
-			int32_t right = -1;
-			int32_t start = 0;
-			int32_t count = 0;
+			BVH::index_t left = 0;
+			BVH::index_t right = 0;
+			BVH::index_t start = 0;
+			BVH::index_t flags = 0;
 
-			bool asleep = false;
+			BVH::index_t getCount() const { return flags & 0x7FFFFFFF; }
+			bool isAsleep() const { return (flags & 0x80000000u) != 0; }
+			void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); }
+			void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); }
 		};
 		struct FlatNode {
-			/*alignas(16)*/ pod::AABB bounds = {};
-			int32_t start = -1;
-			int32_t count = -1;
-			int32_t skipIndex = -1;
+			BVH::index_t start = 0;
+			BVH::index_t skipIndex = 0;
+			BVH::index_t flags = 0;
 
-			bool asleep = false;
+			BVH::index_t getCount() const { return flags & 0x7FFFFFFF; }
+			bool isAsleep() const { return (flags & 0x80000000u) != 0; }
+			void setCount(BVH::index_t c) { flags = (flags & 0x80000000u) | (c & 0x7FFFFFFF); }
+			void setAsleep(bool a) { flags = (flags & 0x7FFFFFFF) | (a ? 0x80000000u : 0); }
 		};
 		struct UpdatePolicy {
 			enum class Decision {
@@ -87,13 +92,16 @@ namespace pod {
 			float displacementThreshold = 0.25f; // 25% of AABB size
 			float overlapThreshold = 2.0f;	   // 2x growth in root surface area
 			float dirtyRatioThreshold = 0.3f;	// 30% dirty bodies
-			int   maxFramesBeforeRebuild = 60;   // force rebuild every 60 frames
+			uint16_t maxFramesBeforeRebuild = 600;   // force rebuild every 600 frames
 		};
 
 		bool dirty = false;
-		uf::stl::vector<uint32_t> indices;
+		uf::stl::vector<pod::BVH::index_t> indices;
 		uf::stl::vector<pod::BVH::Node> nodes;
 		uf::stl::vector<pod::BVH::FlatNode> flattened;
+		
+		uf::stl::vector<pod::AABB> bounds;
+		uf::stl::vector<pod::AABB> flatBounds;
 	};
 
 	struct MeshBVH {
diff --git a/engine/src/utils/math/physics/aabb.inl b/engine/src/utils/math/physics/aabb.inl
index 2058b413..70866edd 100644
--- a/engine/src/utils/math/physics/aabb.inl
+++ b/engine/src/utils/math/physics/aabb.inl
@@ -103,10 +103,10 @@ namespace {
 				return ::computeSegmentAABB( p1, p2, body.collider.capsule.radius );
 			} break;
 			case pod::ShapeType::MESH: {
-				if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->nodes.empty() )
+				if ( body.collider.mesh.bvh && !body.collider.mesh.bvh->bounds.empty() )
 					return {
-						transform.position + body.collider.mesh.bvh->nodes[0].bounds.min,
-						transform.position + body.collider.mesh.bvh->nodes[0].bounds.max,
+						transform.position + body.collider.mesh.bvh->bounds[0].min,
+						transform.position + body.collider.mesh.bvh->bounds[0].max,
 					};
 			} break;
 			default: {
diff --git a/engine/src/utils/math/physics/bvh.inl b/engine/src/utils/math/physics/bvh.inl
index 4ab32737..98140a53 100644
--- a/engine/src/utils/math/physics/bvh.inl
+++ b/engine/src/utils/math/physics/bvh.inl
@@ -1,8 +1,8 @@
 namespace {
-	int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID );
+	pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID );
 
-	void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector<int32_t>& out );
-	void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector<int32_t>& out, float maxDist = FLT_MAX );
+	void queryFlatBVH( const pod::BVH&, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& out );
+	void queryFlatBVH( const pod::BVH&, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& out, float maxDist = FLT_MAX );
 	
 	void queryFlatOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs );
 	void queryFlatOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs );
@@ -10,41 +10,40 @@ namespace {
 
 // BVH
 namespace {
-	int32_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, int32_t start, int32_t end, int32_t capacity = 2 ) {
+	pod::BVH::index_t buildBVHNode( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 2 ) {
 		pod::BVH::Node node{};
-		node.left  = -1;
-		node.right = -1;
+		node.left  = 0;
+		node.right = 0;
 		node.start = start;
-		node.count = 0;
-		node.bounds = bounds[bvh.indices[start]];
+		node.setCount(0);
 
-		// compute bounds of this node
-		for ( auto i = start + 1; i < end; ++i) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] );
+		pod::AABB bound = bounds[bvh.indices[start]];
+		for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] );
 
-		int32_t count = end - start;
+		pod::BVH::index_t count = end - start;
 		if ( count <= capacity ) {
 			// leaf
 			node.start = start;
-			node.count = count;
-			int32_t index = (int32_t) bvh.nodes.size();
+			node.setCount(count);
+			pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 			bvh.nodes.emplace_back(node);
+			bvh.bounds.emplace_back(bound);
 			return index;
 		}
 
 		// choose split axis by largest extent
-		auto extent = node.bounds.max - node.bounds.min;
+		auto extent = bound.max - bound.min;
 		auto axis = (extent.x > extent.y && extent.x > extent.z) ? 0 : (extent.y > extent.z ? 1 : 2);
 
 		// sort indices by centroid along axis
-		std::sort( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) {
-			float ca = ::aabbCenter( bounds[a] )[axis];
-			float cb = ::aabbCenter( bounds[b] )[axis];
-			return ca < cb;
+		auto mid = ( start + end ) / 2;
+		std::nth_element(bvh.indices.begin() + start, bvh.indices.begin() + mid, bvh.indices.begin() + end, [&](uint32_t a, uint32_t b) {
+			return ::aabbCenter(bounds[a])[axis] < ::aabbCenter(bounds[b])[axis];
 		});
 
-		int32_t mid = ( start + end ) / 2;
-		int32_t index = (int32_t) bvh.nodes.size();
+		pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 		bvh.nodes.emplace_back( node ); // insert now, gets filled later
+		bvh.bounds.emplace_back( bound );
 
 		node.left = ::buildBVHNode( bvh, bounds, start, mid, capacity );
 		node.right = ::buildBVHNode( bvh, bounds, mid, end, capacity );
@@ -52,57 +51,58 @@ namespace {
 		return index;
 	}
 
-	int32_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, int32_t start, int32_t end, int32_t capacity = 4 ) {
+	pod::BVH::index_t buildBVHNode_SAH( pod::BVH& bvh, const uf::stl::vector<pod::AABB>& bounds, pod::BVH::index_t start, pod::BVH::index_t end, pod::BVH::index_t capacity = 4 ) {
 		struct Bin {
 			pod::AABB bounds;
-			int32_t count = 0;
+			pod::BVH::index_t count = 0;
 		};
 
 		pod::BVH::Node node{};
-		node.left  = -1;
-		node.right = -1;
+		node.left  = 0;
+		node.right = 0;
 		node.start = start;
-		node.count = 0;
-		node.bounds = bounds[bvh.indices[start]];
+		node.setCount(0);
 
-		for ( auto i = start + 1; i < end; ++i ) node.bounds = ::mergeAabb( node.bounds, bounds[bvh.indices[i]] );
+		pod::AABB bound = bounds[bvh.indices[start]];
+		for ( auto i = start + 1; i < end; ++i) bound = ::mergeAabb( bound, bounds[bvh.indices[i]] );
 
-		int32_t count = end - start;
+		pod::BVH::index_t count = end - start;
 		if ( count <= capacity ) {
-			node.count = count;
-			int32_t index = (int32_t) bvh.nodes.size();
+			node.setCount(count);
+			pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 			bvh.nodes.emplace_back(node);
+			bvh.bounds.emplace_back(bound);
 			return index;
 		}
 
 		constexpr auto numBins = 16;
 		static thread_local Bin bins[numBins];
-		for ( auto i = 0; i < numBins; i++ ) bins[i] = {};
+		for ( auto i = 0; i < numBins; i++ ) bins[i].count = 0;
 
-		auto extent = node.bounds.max - node.bounds.min;
+		auto extent = bound.max - bound.min;
 		auto bestAxis = -1, bestSplit = -1;
 		float bestCost = std::numeric_limits<float>::infinity();
 
 		for ( auto axis = 0; axis < 3; ++axis ) {
 			if ( extent[axis] < EPS(1e-6f) ) continue;
 
-			float minC = node.bounds.min[axis];
-			float maxC = node.bounds.max[axis];
+			float minC = bound.min[axis];
+			float maxC = bound.max[axis];
 			float scale = (float) numBins / (maxC - minC);
 
 			for ( auto i = start; i < end; ++i ) {
-				int32_t idx = bvh.indices[i];
+				pod::BVH::index_t idx = bvh.indices[i];
 				float c = ::aabbCenter( bounds[idx] )[axis];
-				int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale));
-				bins[binID].count++;
-				bins[binID].bounds = ::mergeAabb( bins[binID].bounds, bounds[idx] );
+				pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale));
+				bins[binID].bounds = bins[binID].count == 0 ? bounds[idx] : ::mergeAabb( bins[binID].bounds, bounds[idx] );
+				++bins[binID].count;
 			}
 
 			pod::AABB leftBounds[numBins], rightBounds[numBins];
-			int32_t leftCount[numBins] = {}, rightCount[numBins] = {};
+			pod::BVH::index_t leftCount[numBins] = {}, rightCount[numBins] = {};
 
 			pod::AABB acc;
-			int32_t cnt = 0;
+			pod::BVH::index_t cnt = 0;
 			for ( auto i = 0; i < numBins; i++ ) {
 				if ( bins[i].count > 0 ) acc = (cnt == 0) ? bins[i].bounds : ::mergeAabb( acc, bins[i].bounds );
 				cnt += bins[i].count;
@@ -110,6 +110,7 @@ namespace {
 				leftCount[i] = cnt;
 			}
 
+
 			acc = {};
 			cnt = 0;
 			for ( auto i = numBins - 1; i >= 0; i-- ) {
@@ -119,12 +120,18 @@ namespace {
 				rightCount[i] = cnt;
 			}
 
-			float parentArea = ::aabbSurfaceArea(node.bounds);
+			// precompute area
+			float leftArea[numBins], rightArea[numBins];
+			for ( auto i = 0; i < numBins; i++ ) leftArea[i] = ::aabbSurfaceArea( leftBounds[i] );
+			for ( auto i = 0; i < numBins; i++ ) rightArea[i] = ::aabbSurfaceArea( rightBounds[i] );
+			
+			float parentArea = ::aabbSurfaceArea(bound);
+
 			for ( auto i = 0; i < numBins - 1; i++ ) {
 				if ( leftCount[i] == 0 || rightCount[i + 1] == 0 ) continue;
 				float cost = 1.0f + (
-					( ::aabbSurfaceArea(leftBounds[i]) / parentArea ) * leftCount[i] +
-					( ::aabbSurfaceArea(rightBounds[i + 1]) / parentArea ) * rightCount[i + 1]
+					( leftArea[i] / parentArea ) * leftCount[i] +
+					( rightArea[i + 1] / parentArea ) * rightCount[i + 1]
 				);
 				if ( cost < bestCost ) {
 					bestCost = cost;
@@ -136,34 +143,37 @@ namespace {
 
 		// fallback: no valid split → make leaf
 		if ( bestAxis == -1 ) {
-			node.count = count;
-			int32_t index = (int32_t) bvh.nodes.size();
+			node.setCount(count); // node.count = count;
+			pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 			bvh.nodes.emplace_back(node);
+			bvh.bounds.emplace_back(bound);
 			return index;
 		}
 
-		float minC = node.bounds.min[bestAxis];
-		float maxC = node.bounds.max[bestAxis];
+		float minC = bound.min[bestAxis];
+		float maxC = bound.max[bestAxis];
 		float scale = (float) numBins / (maxC - minC);
 
-		auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](int32_t idx) {
-			float c = ::aabbCenter( bounds[idx])[bestAxis ];
-			int32_t binID = std::min(numBins - 1, (int32_t)((c - minC) * scale));
+		auto midIt = std::partition( bvh.indices.begin() + start, bvh.indices.begin() + end, [&](pod::BVH::index_t idx) {
+			float c = ::aabbCenter( bounds[idx] )[bestAxis ];
+			pod::BVH::index_t binID = std::min((pod::BVH::index_t)(numBins - 1), (pod::BVH::index_t)((c - minC) * scale));
 			return binID <= bestSplit;
 		});
 
-		int32_t mid = (int32_t) ( midIt - bvh.indices.begin() );
+		pod::BVH::index_t mid = (pod::BVH::index_t) ( midIt - bvh.indices.begin() );
 
 		// if partition failed (all left or all right), force leaf
 		if ( mid == start || mid == end ) {
-			node.count = count;
-			int32_t index = (int32_t) bvh.nodes.size();
+			node.setCount(count); // node.count = count;
+			pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 			bvh.nodes.emplace_back(node);
+			bvh.bounds.emplace_back(bound);
 			return index;
 		}
 
-		int32_t index = (int32_t) bvh.nodes.size();
+		pod::BVH::index_t index = (pod::BVH::index_t) bvh.nodes.size();
 		bvh.nodes.emplace_back(node);
+		bvh.bounds.emplace_back(bound);
 
 		node.left  = ::buildBVHNode_SAH( bvh, bounds, start, mid, capacity );
 		node.right = ::buildBVHNode_SAH( bvh, bounds, mid, end, capacity );
@@ -171,11 +181,12 @@ namespace {
 		return index;
 	}
 
-	void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, int32_t capacity = 2, bool filters = false, bool filterType = false ) {
+	void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, pod::BVH::index_t capacity = 2, bool filters = false, bool filterType = false ) {
 		if ( bodies.empty() ) return;
 
 		bvh.indices.clear();
 		bvh.nodes.clear();
+		bvh.bounds.clear();
 		bvh.indices.reserve(bodies.size());
 
 		// stores bounds
@@ -201,7 +212,7 @@ namespace {
 		bvh.dirty = false;
 	}
 
-	void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, int32_t capacity = 4 ) {
+	void buildMeshBVH( pod::BVH& bvh, const uf::Mesh& mesh, pod::BVH::index_t capacity = 4 ) {
 		uint32_t triangles = mesh.index.count / 3;
 
 		bvh.indices.clear();
@@ -245,7 +256,7 @@ namespace {
 }
 
 namespace {
-	pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) {
+	pod::BVH::UpdatePolicy::Decision decideBVHUpdate( pod::BVH& bvh, uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, size_t frameCounter ) {
 		// BVH is not built
 		if ( bvh.indices.empty() || bvh.nodes.empty() ) {
 			return pod::BVH::UpdatePolicy::Decision::REBUILD;
@@ -253,7 +264,7 @@ namespace {
 		if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE;
 
 		uint32_t dirtyCount = 0;
-		float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds );
+		float oldRootArea = ::aabbSurfaceArea( bvh.bounds[0] );
 
 		// update/check each body
 		for ( auto idx : bvh.indices ) {
@@ -273,14 +284,20 @@ namespace {
 
 			if ( displacement > policy.displacementThreshold * size ) ++dirtyCount;
 		}
+		// update nodes
+		for ( auto i = 0; i < bvh.nodes.size(); ++i ) {
+			auto& node = bvh.nodes[i];
+			if ( /*node.count*/ node.getCount() == 0 ) continue;
+			auto& bound = bvh.bounds[i];
+			bound = bodies[bvh.indices[node.start]]->bounds;
+			for ( auto i = 1; i < node.getCount() /*node.count*/; ++i ) bound = ::mergeAabb( bound, bodies[bvh.indices[node.start + i]]->bounds );
+		}
 
 		float dirtyRatio = (float) dirtyCount / (float) bodies.size();
 
 		// compute new root bounds
 		pod::AABB newRoot = bodies[bvh.indices[0]]->bounds;
-		for ( auto i = 1; i < bvh.indices.size(); ++i ) {
-			newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds);
-		}
+		for ( auto i = 1; i < bvh.indices.size(); ++i ) newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds);
 
 		float newRootArea = ::aabbSurfaceArea( newRoot );
 		// BVH is too out of date, rebuild it
@@ -298,25 +315,30 @@ namespace {
 		if ( bvh.nodes.empty() ) return;
 
 		// update leaf bounds
-		#pragma omp parallel for
+		uf::stl::vector<pod::BVH::index_t> leaves;
+		leaves.reserve(::reserveCount);
 		for ( auto i = 0; i < bvh.nodes.size(); i++ ) {
-			auto& node = bvh.nodes[i];
-			if ( node.count > 0 ) {
-				// leaf node: recompute bounds from bodies
-				node.bounds = bounds[bvh.indices[node.start]];
+			if ( bvh.nodes[i].getCount() == 0 ) continue;
+			leaves.emplace_back(i);
+		}
 
-				for ( auto j = 1; j < node.count; j++ ) {
-					node.bounds = ::mergeAabb(node.bounds, bounds[bvh.indices[node.start + j]] );
-				}
-			}
+		// recompute bounds from bodies
+		for ( auto i = 0; i < leaves.size(); i++ ) {
+			auto nodeID = leaves[i];
+			auto& node = bvh.nodes[nodeID];
+			auto& bound = bvh.bounds[nodeID];
+			bound = bounds[bvh.indices[node.start]];
+			for ( auto j = 1; j < node.getCount(); j++ )
+				bound = ::mergeAabb(bound, bounds[bvh.indices[node.start + j]]);
 		}
 
 		// update internal nodes bottom-up
-		for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
+		for ( pod::BVH::index_t i = (pod::BVH::index_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
 			auto& node = bvh.nodes[i];
+			auto& bound = bvh.bounds[i];
 			// internal node
-			if ( node.count == 0 ) {
-				node.bounds = ::mergeAabb(bvh.nodes[node.left].bounds, bvh.nodes[node.right].bounds);
+			if ( node.getCount() == 0 ) {
+				bound = ::mergeAabb(bvh.bounds[node.left], bvh.bounds[node.right]);
 			}
 		}
 	}
@@ -329,31 +351,28 @@ namespace {
 		#pragma omp parallel for
 		for ( auto i = 0; i < bvh.nodes.size(); i++ ) {
 			auto& node = bvh.nodes[i];
-			if ( node.count > 0 ) {
-				// leaf node: recompute bounds from bodies
-				auto nodeID = bvh.indices[node.start];
+			if ( node.getCount() == 0 ) continue;
+			auto& bound = bvh.bounds[i];
+			// leaf node: recompute bounds from bodies
+			auto nodeID = bvh.indices[node.start];
 
-				node.bounds = bodies[nodeID]->bounds;
-				node.asleep = !bodies[nodeID]->activity.awake;
+			bound = bodies[nodeID]->bounds;
+			node.setAsleep(!bodies[nodeID]->activity.awake);
 
-				for ( auto j = 1; j < node.count; j++ ) {
-					auto bodyID = bvh.indices[node.start + j];
-					node.bounds = ::mergeAabb(node.bounds, bodies[bodyID]->bounds );
-					node.asleep = node.asleep && !bodies[bodyID]->activity.awake;
-				}
+			for ( auto j = 1; j < node.getCount(); j++ ) {
+				auto bodyID = bvh.indices[node.start + j];
+				bound = ::mergeAabb( bound, bodies[bodyID]->bounds );
+				node.setAsleep(node.isAsleep() && !bodies[bodyID]->activity.awake);
 			}
 		}
 
 		// update internal nodes bottom-up
-		for ( int32_t i = (int32_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
+		for ( int64_t i = (int64_t) bvh.nodes.size() - 1; i >= 0; i-- ) {
 			auto& node = bvh.nodes[i];
+			if ( node.getCount() > 0 ) continue;
 			// internal node
-			if ( node.count == 0 ) {
-				const auto& leftNode = bvh.nodes[node.left];
-				const auto& rightNode = bvh.nodes[node.right];
-				node.bounds = ::mergeAabb(leftNode.bounds, rightNode.bounds);
-				node.asleep = leftNode.asleep && rightNode.asleep;
-			}
+			bvh.bounds[i] = ::mergeAabb( bvh.bounds[node.left], bvh.bounds[node.right] );
+			node.setAsleep( bvh.nodes[node.left].isAsleep() && bvh.nodes[node.right].isAsleep());
 		}
 	}
 
@@ -385,36 +404,41 @@ namespace {
 }
 
 namespace {
-	int32_t flattenBVH( pod::BVH& bvh, int32_t nodeID ) {
-		if ( nodeID == 0 ) bvh.flattened.reserve(bvh.nodes.size());
+	pod::BVH::index_t flattenBVH( pod::BVH& bvh, pod::BVH::index_t nodeID ) {
+		if ( nodeID == 0 ) {
+			bvh.flattened.clear();
+			bvh.flatBounds.clear();
+			bvh.flattened.reserve(bvh.nodes.size());
+			bvh.flatBounds.reserve(bvh.bounds.size());
+		}
 
 		const auto& node = bvh.nodes[nodeID];
 
-		int32_t flatID = (int32_t) bvh.flattened.size();
+		pod::BVH::index_t flatID = (pod::BVH::index_t) bvh.flattened.size();
 		bvh.flattened.emplace_back(); // placeholder
+		bvh.flatBounds.emplace_back( bvh.bounds[nodeID] );
 
 		pod::BVH::FlatNode flat{};
-		flat.bounds = node.bounds;
-		flat.start = -1;
-		flat.count = -1;
-		flat.skipIndex = -1;
-		flat.asleep = node.asleep;
+		flat.start = 0;
+		flat.setCount(0);
+		flat.skipIndex = 0;
+		flat.setAsleep(node.isAsleep());
 
 		// leaf
-		if ( node.count > 0 ) {
+		if ( node.getCount() > 0 ) {
 			flat.start = node.start;
-			flat.count = node.count;
+			flat.setCount(node.getCount());
 			flat.skipIndex = flatID + 1; // next node after this leaf
 			bvh.flattened[flatID] = flat;
 			return flatID + 1;
 		}
 		// internal
 		else {
-			flat.start = -1;
-			flat.count = 0;
+			flat.start = 0;
+			flat.setCount(0);
 
-			int32_t leftID  = ::flattenBVH( bvh, node.left );
-			int32_t rightID = ::flattenBVH( bvh, node.right );
+			pod::BVH::index_t leftID  = ::flattenBVH( bvh, node.left );
+			pod::BVH::index_t rightID = ::flattenBVH( bvh, node.right );
 
 			flat.skipIndex = rightID; // skip entire subtree
 			bvh.flattened[flatID] = flat;
@@ -425,17 +449,17 @@ namespace {
 
 namespace {
 	// collects a list of nodes that are overlapping with each other
-	void traverseNodePair(const pod::BVH& bvh, int32_t nodeAID, int32_t nodeBID, pod::BVH::pairs_t& pairs) {
+	void traverseNodePair(const pod::BVH& bvh, pod::BVH::index_t nodeAID, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs) {
 		const auto& nodeA = bvh.nodes[nodeAID];
 		const auto& nodeB = bvh.nodes[nodeBID];
 
-		if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return;
+		if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvh.bounds[nodeAID], bvh.bounds[nodeBID] ) ) return;
 
-		if ( nodeA.count > 0 && nodeB.count > 0 ) {
-			for ( auto i = 0; i < nodeA.count; ++i ) {
-				for ( auto j = 0; j < nodeB.count; ++j ) {
-					int32_t bodyA = bvh.indices[nodeA.start + i];
-					int32_t bodyB = bvh.indices[nodeB.start + j];
+		if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) {
+			for ( auto i = 0; i < nodeA.getCount(); ++i ) {
+				for ( auto j = 0; j < nodeB.getCount(); ++j ) {
+					pod::BVH::index_t bodyA = bvh.indices[nodeA.start + i];
+					pod::BVH::index_t bodyB = bvh.indices[nodeB.start + j];
 					if ( bodyA == bodyB ) continue;
 					if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
 
@@ -445,27 +469,27 @@ namespace {
 			return;
 		}
 
-		if ( nodeA.count == 0 ) {
+		if ( nodeA.getCount() == 0 ) {
 			::traverseNodePair( bvh, nodeA.left, nodeBID, pairs );
 			::traverseNodePair( bvh, nodeA.right, nodeBID, pairs );
 		}
-		if ( nodeB.count == 0 ) {
+		if ( nodeB.getCount() == 0 ) {
 			::traverseNodePair( bvh, nodeAID, nodeB.left, pairs );
 			::traverseNodePair( bvh, nodeAID, nodeB.right, pairs );
 		}
 	}
 	// collects a list of nodes from each BVH that are overlapping with each other (for mesh v mesh)
-	void traverseNodePair( const pod::BVH& bvhA, int32_t nodeAID, const pod::BVH& bvhB, int32_t nodeBID, pod::BVH::pairs_t& pairs ) {
+	void traverseNodePair( const pod::BVH& bvhA, pod::BVH::index_t nodeAID, const pod::BVH& bvhB, pod::BVH::index_t nodeBID, pod::BVH::pairs_t& pairs ) {
 		const auto& nodeA = bvhA.nodes[nodeAID];
 		const auto& nodeB = bvhB.nodes[nodeBID];
 
-		if ( nodeA.asleep || nodeB.asleep || !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) return;
+		if ( nodeA.isAsleep() || nodeB.isAsleep() || !::aabbOverlap( bvhA.bounds[nodeAID], bvhB.bounds[nodeBID] ) ) return;
 
-		if ( nodeA.count > 0 && nodeB.count > 0 ) {
-			for ( auto i = 0; i < nodeA.count; ++i ) {
-				for ( auto j = 0; j < nodeB.count; ++j ) {
-					int32_t bodyA = bvhA.indices[nodeA.start + i];
-					int32_t bodyB = bvhB.indices[nodeB.start + j];
+		if ( nodeA.getCount() > 0 && nodeB.getCount() > 0 ) {
+			for ( auto i = 0; i < nodeA.getCount(); ++i ) {
+				for ( auto j = 0; j < nodeB.getCount(); ++j ) {
+					pod::BVH::index_t bodyA = bvhA.indices[nodeA.start + i];
+					pod::BVH::index_t bodyB = bvhB.indices[nodeB.start + j];
 					if ( bodyA == bodyB ) continue;
 					if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
 
@@ -475,24 +499,24 @@ namespace {
 			return;
 		}
 
-		if ( nodeA.count == 0 ) {
+		if ( nodeA.getCount() == 0 ) {
 			::traverseNodePair( bvhA, nodeA.left, bvhB, nodeBID, pairs );
 			::traverseNodePair( bvhA, nodeA.right, bvhB, nodeBID, pairs );
 		}
-		if ( nodeB.count == 0 ) {
+		if ( nodeB.getCount() == 0 ) {
 			::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.left, pairs );
 			::traverseNodePair( bvhA, nodeAID, bvhB, nodeB.right, pairs );
 		}
 	}
 
-	void traverseBVH( const pod::BVH& bvh, int32_t nodeID, pod::BVH::pairs_t& pairs ) {
+	void traverseBVH( const pod::BVH& bvh, pod::BVH::index_t nodeID, pod::BVH::pairs_t& pairs ) {
 		const auto& node = bvh.nodes[nodeID];
 
-		if ( node.count > 0 ) {
-			for ( auto i = 0; i < node.count; ++i ) {
-				for ( auto j = i + 1; j < node.count; ++j ) {
-					int32_t bodyA = bvh.indices[node.start + i];
-					int32_t bodyB = bvh.indices[node.start + j];
+		if ( node.getCount() > 0 ) {
+			for ( auto i = 0; i < node.getCount(); ++i ) {
+				for ( auto j = i + 1; j < node.getCount(); ++j ) {
+					pod::BVH::index_t bodyA = bvh.indices[node.start + i];
+					pod::BVH::index_t bodyB = bvh.indices[node.start + j];
 
 					if ( bodyA == bodyB ) continue;
 					if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
@@ -525,44 +549,45 @@ namespace {
 
 namespace {
 	// query a BVH with an AABB via a stack
-	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices ) {
+	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
 		if ( bvh.nodes.empty() ) return;
 		
 		if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices );
 
 		outIndices.reserve(::reserveCount);
 
-		uf::stl::stack<int32_t> stack;
+		thread_local uf::stl::stack<pod::BVH::index_t> stack;
+		//stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function
 		stack.push(0);
 
 		while ( !stack.empty() ) {
-			int32_t idx = stack.top(); stack.pop();
+			pod::BVH::index_t idx = stack.top(); stack.pop();
 			auto& node = bvh.nodes[idx];
-			if ( node.asleep || !::aabbOverlap( bounds, node.bounds ) ) continue;
+			if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[idx] ) ) continue;
 
-			if ( node.count > 0 ) {
-				for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
+			if ( node.getCount() > 0 ) {
+				for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
 			} else {
 				stack.push(node.left);
 				stack.push(node.right);
 			}
 		}
 	}
-	void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector<int32_t>& outIndices ) {
+	void queryBVH( const pod::BVH& bvh, const pod::PhysicsBody& body, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
 		return ::queryBVH( bvh, body.bounds, outIndices );
 	}
 	
 	// query a BVH with an AABB via recursion
-	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices, int32_t nodeID ) {
+	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices, pod::BVH::index_t nodeID ) {
 		if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, bounds, outIndices );
 
 		if ( nodeID == 0 ) outIndices.reserve(::reserveCount);
 
 		const auto& node = bvh.nodes[nodeID];
-		if ( node.asleep || !::aabbOverlap( node.bounds, bounds ) ) return;
+		if ( node.isAsleep() || !::aabbOverlap( bounds, bvh.bounds[nodeID] ) ) return;
 
-		if ( node.count > 0 ) {
-			for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
+		if ( node.getCount() > 0 ) {
+			for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
 			return;
 		}
 
@@ -572,25 +597,26 @@ namespace {
 	}
 
 	// query a BVH with a ray via a stack
-	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, float maxDist ) {
+	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, float maxDist ) {
 		if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist );
 
 		if ( bvh.nodes.empty() ) return;
 		outIndices.reserve(::reserveCount);
 
-		uf::stl::stack<int32_t> stack;
+		thread_local uf::stl::stack<pod::BVH::index_t> stack;
+		//stack.clear(); // there is no stack.clear(), and the stack should already be cleared by the end of this function
 		stack.push(0);
 
 		while ( !stack.empty() ) {
-			int32_t idx = stack.top(); stack.pop();
+			pod::BVH::index_t idx = stack.top(); stack.pop();
 			const auto& node = bvh.nodes[idx];
 
 			float tMin, tMax;
-			if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) continue;
+			if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[idx], tMin, tMax ) ) continue;
 			if ( tMin > maxDist ) continue;
 
-			if ( node.count > 0 ) {
-				for ( auto i = 0; i < node.count; ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
+			if ( node.getCount() > 0 ) {
+				for ( auto i = 0; i < node.getCount(); ++i) outIndices.emplace_back(bvh.indices[node.start + i]);
 			} else {
 				stack.push(node.left);
 				stack.push(node.right);
@@ -598,18 +624,18 @@ namespace {
 		}
 	}
 	// query a BVH with a ray via recursion
-	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, int32_t nodeID, float maxDist ) {
+	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, pod::BVH::index_t nodeID, float maxDist ) {
 		if ( !bvh.flattened.empty() ) return ::queryFlatBVH( bvh, ray, outIndices, maxDist );
 
 		if ( nodeID == 0 ) outIndices.reserve(::reserveCount);
 
 		const auto& node = bvh.nodes[nodeID];
 		float tMin, tMax;
-		if ( node.asleep || !::rayAabbIntersect( ray, node.bounds, tMin, tMax ) ) return;
+		if ( node.isAsleep() || !::rayAabbIntersect( ray, bvh.bounds[nodeID], tMin, tMax ) ) return;
 		if ( tMin > maxDist ) return;
 
-		if ( node.count > 0 ) {
-			for ( auto i = 0; i < node.count; ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
+		if ( node.getCount() > 0 ) {
+			for ( auto i = 0; i < node.getCount(); ++i ) outIndices.emplace_back(bvh.indices[node.start + i]);
 			return;
 		}
 
@@ -629,16 +655,16 @@ namespace {
 
 		for ( auto i = 0; i < nodes.size(); ++i ) {
 			const auto& nodeA = nodes[i];
-			if ( nodeA.count <= 0 || nodeA.asleep ) continue;
+			if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue;
 
 			for ( auto j = i + 1; j < nodes.size(); ++j ) {
 				const auto& nodeB = nodes[j];
-				if ( nodeB.count <= 0 || nodeB.asleep ) continue;
+				if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue;
 
-				if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue;
+				if ( !::aabbOverlap( bvh.flatBounds[i], bvh.flatBounds[j] ) ) continue;
 
-				for ( auto ia = 0; ia < nodeA.count; ++ia ) {
-					for ( auto ib = 0; ib < nodeB.count; ++ib ) {
+				for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) {
+					for ( auto ib = 0; ib < nodeB.getCount(); ++ib ) {
 						auto indexA = indices[nodeA.start + ia];
 						auto indexB = indices[nodeB.start + ib];
 
@@ -664,16 +690,16 @@ namespace {
 
 		for ( auto i = 0; i < nodesA.size(); ++i ) {
 			const auto& nodeA = nodesA[i];
-			if ( nodeA.count <= 0 || nodeA.asleep ) continue;
+			if ( nodeA.getCount() <= 0 || nodeA.isAsleep() ) continue;
 
 			for ( auto j = 0; j < nodesB.size(); ++j ) {
 				const auto& nodeB = nodesB[j];
-				if ( nodeB.count <= 0 || nodeB.asleep ) continue;
+				if ( nodeB.getCount() <= 0 || nodeB.isAsleep() ) continue;
 
-				if ( !::aabbOverlap( nodeA.bounds, nodeB.bounds ) ) continue;
+				if ( !::aabbOverlap( bvhA.flatBounds[i], bvhB.flatBounds[j] ) ) continue;
 
-				for ( auto ia = 0; ia < nodeA.count; ++ia ) {
-					for (auto ib = 0; ib < nodeB.count; ++ib ) {
+				for ( auto ia = 0; ia < nodeA.getCount(); ++ia ) {
+					for (auto ib = 0; ib < nodeB.getCount(); ++ib ) {
 						auto indexA = indicesA[nodeA.start + ia];
 						auto indexB = indicesB[nodeB.start + ib];
 
@@ -684,20 +710,20 @@ namespace {
 		}
 	}
 
-	void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& outIndices ) {
+	void queryFlatBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& outIndices ) {
 		auto& nodes = bvh.flattened;
 		auto& indices = bvh.indices;
 
 		outIndices.reserve(::reserveCount);
 
-		int32_t idx = 0;
+		pod::BVH::index_t idx = 0;
 		while ( idx < nodes.size() ) {
 			const auto& node = nodes[idx];
 
-			if ( !node.asleep && ::aabbOverlap( bounds, node.bounds ) ) {
+			if ( !node.isAsleep() && ::aabbOverlap( bounds, bvh.flatBounds[idx] ) ) {
 				// leaf
-				if ( node.count > 0 ) {
-					for ( auto i = 0; i < node.count; ++i ) {
+				if ( node.getCount() > 0 ) {
+					for ( auto i = 0; i < node.getCount(); ++i ) {
 						outIndices.emplace_back( indices[node.start + i] );
 					}
 				}
@@ -708,20 +734,20 @@ namespace {
 			}
 		}
 	}
-	void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& outIndices, float maxDist ) {
+	void queryFlatBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& outIndices, float maxDist ) {
 		auto& nodes = bvh.flattened;
 		auto& indices = bvh.indices;
 
 		outIndices.reserve(::reserveCount);
 
-		int32_t idx = 0;
+		pod::BVH::index_t idx = 0;
 		while ( idx < nodes.size() ) {
 			const auto& node = nodes[idx];
 			float tMin, tMax;
-			if ( !node.asleep && ::rayAabbIntersect( ray, node.bounds, tMin, tMax ) && tMin <= maxDist ) {
+			if ( !node.isAsleep() && ::rayAabbIntersect( ray, bvh.flatBounds[idx], tMin, tMax ) && tMin <= maxDist ) {
 				// leaf
-				if ( node.count > 0 ) {
-					for ( auto i = 0; i < node.count; ++i ) {
+				if ( node.getCount() > 0 ) {
+					for ( auto i = 0; i < node.getCount(); ++i ) {
 						outIndices.emplace_back( indices[node.start + i] );
 					}
 				}
@@ -736,10 +762,10 @@ namespace {
 
 namespace {
 	struct UnionFind {
-		uf::stl::vector<int32_t> parent;
-		uf::stl::vector<int32_t> rank;
+		uf::stl::vector<pod::BVH::index_t> parent;
+		uf::stl::vector<pod::BVH::index_t> rank;
 
-		UnionFind( int32_t n ) {
+		UnionFind( pod::BVH::index_t n ) {
 			parent.resize(n);
 			rank.resize(n, 0);
 			
@@ -747,14 +773,14 @@ namespace {
 				parent[i] = i;
 		}
 
-		int32_t find( int32_t x ) {
+		pod::BVH::index_t find( pod::BVH::index_t x ) {
 			if ( parent[x] != x ) parent[x] = find(parent[x]);
 			return parent[x];
 		}
 
-		void unite( int32_t a, int32_t b ) {
-			int32_t rootA = find(a);
-			int32_t rootB = find(b);
+		void unite( pod::BVH::index_t a, pod::BVH::index_t b ) {
+			pod::BVH::index_t rootA = find(a);
+			pod::BVH::index_t rootB = find(b);
 
 			if ( rootA == rootB ) return;
 
@@ -776,20 +802,20 @@ namespace {
 		}
 
 		// map root to island index
-		uf::stl::unordered_map<int32_t, int32_t> rootToIsland;
+		uf::stl::unordered_map<pod::BVH::index_t, pod::BVH::index_t> rootToIsland;
 
 		islands.clear();
 		islands.reserve(bodies.size());
 
 		for ( auto i = 0; i < bodies.size(); i++ ) {
-			int32_t root = unionizer.find(i);
+			pod::BVH::index_t root = unionizer.find(i);
 
 			if (rootToIsland.find(root) == rootToIsland.end()) {
-				rootToIsland[root] = (int32_t) islands.size();
+				rootToIsland[root] = (pod::BVH::index_t) islands.size();
 				islands.emplace_back();
 			}
 
-			int32_t islandID = rootToIsland[root];
+			pod::BVH::index_t islandID = rootToIsland[root];
 			islands[islandID].indices.emplace_back( i );
 		}
 
@@ -798,8 +824,8 @@ namespace {
 			// do not insert these pairs if they're non-colliding
 			if ( !::shouldCollide( *bodies[a], *bodies[b] ) ) continue;
 
-			int32_t root = unionizer.find(a);
-			int32_t islandID = rootToIsland[root];
+			pod::BVH::index_t root = unionizer.find(a);
+			pod::BVH::index_t islandID = rootToIsland[root];
 			islands[islandID].pairs.emplace(a, b);
 		}
 	}
diff --git a/engine/src/utils/math/physics/helpers.inl b/engine/src/utils/math/physics/helpers.inl
index bf9b4a8b..c10cf14e 100644
--- a/engine/src/utils/math/physics/helpers.inl
+++ b/engine/src/utils/math/physics/helpers.inl
@@ -47,10 +47,10 @@ namespace {
 	
 	pod::Vector3f aabbCenter( const pod::AABB& aabb );
 
-	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& indices );
-	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<int32_t>& indices, int32_t nodeID );
-	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& indices, float maxDist = FLT_MAX );
-	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<int32_t>& indices, int32_t nodeID, float maxDist = FLT_MAX );
+	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& indices );
+	void queryBVH( const pod::BVH& bvh, const pod::AABB& bounds, uf::stl::vector<pod::BVH::index_t>& indices, pod::BVH::index_t nodeID );
+	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& indices, float maxDist = FLT_MAX );
+	void queryBVH( const pod::BVH& bvh, const pod::Ray& ray, uf::stl::vector<pod::BVH::index_t>& indices, pod::BVH::index_t nodeID, float maxDist = FLT_MAX );
 	void queryOverlaps( const pod::BVH& bvh, pod::BVH::pairs_t& outPairs );
 	void queryOverlaps( const pod::BVH& bvhA, const pod::BVH& bvhB, pod::BVH::pairs_t& outPairs );
 }
diff --git a/engine/src/utils/math/physics/impl.cpp b/engine/src/utils/math/physics/impl.cpp
index 925df8c4..550ba950 100644
--- a/engine/src/utils/math/physics/impl.cpp
+++ b/engine/src/utils/math/physics/impl.cpp
@@ -10,40 +10,39 @@ namespace {
 	bool psgContactSolver = true; // use PSG contact solver
 	bool useGjk = false; // currently don't have a way to broadphase mesh => narrowphase tri via GJK
 	bool fixedStep = true; // run physics simulation with a fixed delta time (with accumulation), rather than rely on actual engine deltatime
-	int32_t substeps = 0; // number of substeps per frame tick
-	int32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator
+	uint32_t substeps = 0; // number of substeps per frame tick
+	uint32_t reserveCount = 32; // amount of elements to reserve for vectors used in this system, to-do: have it tie to a memory pool allocator
 
-	// increasing these make things lag for reasons I can imagine why
-	int32_t broadphaseBvhCapacity = 1; // number of bodies per leaf node
-	int32_t meshBvhCapacity = 1; // number of triangles per leaf node
+	// increasing these make things lag for reasons I can imagine why (having to test more triangles over just more boxes)
+	uint32_t broadphaseBvhCapacity = 4; // number of bodies per leaf node
+	uint32_t meshBvhCapacity = 1; // number of triangles per leaf node
 
 	// additionally flattens a BVH for linear iteration, rather than a recursive / stack-based traversal
 	bool flattenBvhBodies = true;
 	bool flattenBvhMeshes = true;
 	
 	// use surface area heuristics for building the BVH, rather than naive splits
-	bool useBvhSahBodies = false; // it actually seems slower to use these......
+	bool useBvhSahBodies = true; // it actually seems slower to use these......
 	bool useBvhSahMeshes = true;
 
 	bool useSplitBvhs = true; // creates separate BVHs for static / dynamic objects
 
 	// to-do: find possibly better values for this
-	int32_t solverIterations = 10;
+	uint32_t solverIterations = 10;
 	float baumgarteCorrectionPercent = 0.2f;
 	float baumgarteCorrectionSlop = 0.01f;
 	
 	uf::stl::unordered_map<size_t, pod::Manifold> manifoldsCache;
-	int32_t manifoldCacheLifetime = 6; // to-do: find a good value for this
+	uint32_t manifoldCacheLifetime = 6; // to-do: find a good value for this
 
 	uint32_t frameCounter = 0;
 
 	// to-do: tweak this to not be annoying
-	// currently seems only reliable when it hits its TTL, but too long of a wait is gross, and too frequent of an update causes lag
 	pod::BVH::UpdatePolicy bvhUpdatePolicy = {
 		.displacementThreshold = 0.25f,
 		.overlapThreshold = 2.0f,
 		.dirtyRatioThreshold = 0.3f,
-		.maxFramesBeforeRebuild = 120,
+		.maxFramesBeforeRebuild = 60 * 10, // 10 seconds
 	};
 }
 
@@ -476,7 +475,8 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World&
 	auto& staticBvh = world.staticBvh;
 	auto& bodies = world.bodies;
 
-	uf::stl::vector<int32_t> candidates;
+	thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+	candidates.clear();
 	::queryBVH( dynamicBvh, ray, candidates );
 	if ( ::useSplitBvhs ) ::queryBVH( staticBvh, ray, candidates );
 
diff --git a/engine/src/utils/math/physics/mesh.inl b/engine/src/utils/math/physics/mesh.inl
index 5ca8b36e..83362502 100644
--- a/engine/src/utils/math/physics/mesh.inl
+++ b/engine/src/utils/math/physics/mesh.inl
@@ -24,7 +24,8 @@ namespace {
 
 		// transform to local space for BVH query
 		auto bounds = ::transformAabbToLocal( aabb.bounds, ::getTransform( mesh ) );
-		uf::stl::vector<int32_t> candidates;
+		thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+		candidates.clear();
 		::queryBVH( bvh, bounds, candidates );
 
 		bool hit = false;
@@ -47,7 +48,8 @@ namespace {
 
 		// transform to local space for BVH query
 		auto bounds = ::transformAabbToLocal( sphere.bounds, ::getTransform( mesh ) );		
-		uf::stl::vector<int32_t> candidates;
+		thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+		candidates.clear();
 		::queryBVH( bvh, bounds, candidates );
 
 		bool hit = false;
@@ -72,7 +74,8 @@ namespace {
 
 		// transform to local space for BVH query
 		auto bounds = ::transformAabbToLocal( plane.bounds, ::getTransform( mesh ) );		
-		uf::stl::vector<int32_t> candidates;
+		thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+		candidates.clear();
 		::queryBVH( bvh, bounds, candidates );
 
 		bool hit = false;
@@ -96,7 +99,8 @@ namespace {
 
 		// transform to local space for BVH query
 		auto bounds = ::transformAabbToLocal( capsule.bounds, ::getTransform( mesh ) );		
-		uf::stl::vector<int32_t> candidates;
+		thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+		candidates.clear();
 		::queryBVH( bvh, bounds, candidates );
 
 		bool hit = false;
@@ -120,7 +124,8 @@ namespace {
 		const auto& bvhB = *b.collider.mesh.bvh;
 
 		// compute overlaps between one BVH and another BVH
-		pod::BVH::pairs_t pairs;
+		thread_local pod::BVH::pairs_t pairs;
+		pairs.clear();
 		::queryOverlaps( bvhA, bvhB, pairs );
 
 		bool hit = false;
diff --git a/engine/src/utils/math/physics/ray.inl b/engine/src/utils/math/physics/ray.inl
index c87ac616..0b198313 100644
--- a/engine/src/utils/math/physics/ray.inl
+++ b/engine/src/utils/math/physics/ray.inl
@@ -202,7 +202,8 @@ namespace {
 		ray.origin	= uf::transform::applyInverse( transform, r.origin );
 		ray.direction = uf::quaternion::rotate( uf::quaternion::inverse( transform.orientation ), r.direction );
 
-		uf::stl::vector<int32_t> candidates;
+		thread_local uf::stl::vector<pod::BVH::index_t> candidates;
+		candidates.clear();
 		::queryBVH( bvh, ray, candidates );
 
 		for ( auto triID : candidates ) {