fixed dynamic/static split BVHs because of some dumb assumptions, fixed contacts being order dependent from not flipping the normals for '*Mesh' contacts generated, some other things (scenes don't lag to high hell but my props and doors aren't collidable...)

This commit is contained in:
ecker 2025-09-04 22:22:08 -05:00
parent e1d824a5ac
commit fff258917e
14 changed files with 309 additions and 262 deletions

View File

@ -7,6 +7,10 @@
#define S_1(x) #x #define S_1(x) #x
#define S_2(x) S_1(x) #define S_2(x) S_1(x)
// negligible increase in the lib size, mainly to ensure SIMD intrinsics do inline
#define FORCE_INLINE __attribute__((always_inline)) inline
// #define FORCE_INLINE inline
#define TIMER(x, ...)\ #define TIMER(x, ...)\
static bool first = true;\ static bool first = true;\
static uf::Timer<long long> timer(false);\ static uf::Timer<long long> timer(false);\

View File

@ -16,11 +16,11 @@ namespace pod {
static const size_t columns = C; static const size_t columns = C;
// Overload access // Overload access
// Accessing via subscripts // Accessing via subscripts
inline T& operator[](size_t i); FORCE_INLINE T& operator[](size_t i);
inline const T& operator[](size_t i) const; FORCE_INLINE const T& operator[](size_t i) const;
inline T& operator()(size_t r, size_t c); FORCE_INLINE T& operator()(size_t r, size_t c);
inline const T& operator()(size_t r, size_t c) const; FORCE_INLINE const T& operator()(size_t r, size_t c) const;
// Arithmetic // Arithmetic
Matrix<T,R,C> operator*( const Matrix<T,R,C>& matrix ) const; Matrix<T,R,C> operator*( const Matrix<T,R,C>& matrix ) const;
@ -86,18 +86,18 @@ namespace uf {
template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ orthographic( T, T, T, T, T, T ); template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ orthographic( T, T, T, T, T, T );
template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ orthographic( T, T, T, T ); template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ orthographic( T, T, T, T );
template<typename T=NUM> pod::Matrix4t<T> inline /*UF_API*/ orthographic( const pod::Vector2t<T>& lr, const pod::Vector2t<T>& bt, const pod::Vector2t<T>& nf ) { template<typename T=NUM> pod::Matrix4t<T> FORCE_INLINE /*UF_API*/ orthographic( const pod::Vector2t<T>& lr, const pod::Vector2t<T>& bt, const pod::Vector2t<T>& nf ) {
return orthographic<T>( lr.x, lr.y, bt.x, bt.y, nf.x, nf.y ); return orthographic<T>( lr.x, lr.y, bt.x, bt.y, nf.x, nf.y );
} }
template<typename T=NUM> pod::Matrix4t<T> inline /*UF_API*/ orthographic( const pod::Vector2t<T>& lr, const pod::Vector2t<T>& bt ) { template<typename T=NUM> pod::Matrix4t<T> FORCE_INLINE /*UF_API*/ orthographic( const pod::Vector2t<T>& lr, const pod::Vector2t<T>& bt ) {
return orthographic<T>( lr.x, lr.y, bt.x, bt.y ); return orthographic<T>( lr.x, lr.y, bt.x, bt.y );
} }
template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ perspective( T, T, T, T ); template<typename T=NUM> pod::Matrix4t<T> /*UF_API*/ perspective( T, T, T, T );
template<typename T=NUM> pod::Matrix4t<T> inline /*UF_API*/ perspective( T fov, T raidou, const pod::Vector2f& range ) { template<typename T=NUM> pod::Matrix4t<T> FORCE_INLINE /*UF_API*/ perspective( T fov, T raidou, const pod::Vector2f& range ) {
return perspective( fov, raidou, range.x, range.y ); return perspective( fov, raidou, range.x, range.y );
} }
template<typename T=NUM> pod::Matrix4t<T> inline /*UF_API*/ perspective( T fov, const pod::Vector2ui& size, const pod::Vector2f& range ) { template<typename T=NUM> pod::Matrix4t<T> FORCE_INLINE /*UF_API*/ perspective( T fov, const pod::Vector2ui& size, const pod::Vector2f& range ) {
return perspective( fov, (T) size.x / (T) size.y, range.x, range.y ); return perspective( fov, (T) size.x / (T) size.y, range.x, range.y );
} }
// Setting // Setting

View File

@ -5,19 +5,19 @@
#define INDEX( R, C, r, c ) COL_MAJOR_INDEX( R, C, r, c ) #define INDEX( R, C, r, c ) COL_MAJOR_INDEX( R, C, r, c )
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline T& pod::Matrix<T,R,C>::operator[](size_t i) { FORCE_INLINE T& pod::Matrix<T,R,C>::operator[](size_t i) {
return this->components[i]; return this->components[i];
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline const T& pod::Matrix<T,R,C>::operator[](size_t i) const { FORCE_INLINE const T& pod::Matrix<T,R,C>::operator[](size_t i) const {
return this->components[i]; return this->components[i];
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline T& pod::Matrix<T,R,C>::operator()(size_t r, size_t c) { FORCE_INLINE T& pod::Matrix<T,R,C>::operator()(size_t r, size_t c) {
return this->components[INDEX( R, C, r, c )]; return this->components[INDEX( R, C, r, c )];
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline const T& pod::Matrix<T,R,C>::operator()(size_t r, size_t c) const { FORCE_INLINE const T& pod::Matrix<T,R,C>::operator()(size_t r, size_t c) const {
return this->components[INDEX( R, C, r, c )]; return this->components[INDEX( R, C, r, c )];
} }
@ -57,27 +57,27 @@ template<typename T> pod::Matrix<typename T::type_t, T::columns, T::columns> uf:
return matrix; return matrix;
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator*( const Matrix<T,R,C>& matrix ) const { FORCE_INLINE pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator*( const Matrix<T,R,C>& matrix ) const {
return uf::matrix::multiply(*this, matrix); return uf::matrix::multiply(*this, matrix);
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator*( T scalar ) const { FORCE_INLINE pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator*( T scalar ) const {
return uf::matrix::multiplyAll(*this, scalar); return uf::matrix::multiplyAll(*this, scalar);
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator+( const Matrix<T,R,C>& matrix ) const { FORCE_INLINE pod::Matrix<T,R,C> pod::Matrix<T,R,C>::operator+( const Matrix<T,R,C>& matrix ) const {
return uf::matrix::add(*this, matrix); return uf::matrix::add(*this, matrix);
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline pod::Matrix<T,R,C>& pod::Matrix<T,R,C>::operator*=( const Matrix<T,R,C>& matrix ) { FORCE_INLINE pod::Matrix<T,R,C>& pod::Matrix<T,R,C>::operator*=( const Matrix<T,R,C>& matrix ) {
return uf::matrix::multiply_(*this, matrix); return uf::matrix::multiply_(*this, matrix);
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline bool pod::Matrix<T,R,C>::operator==( const Matrix<T,R,C>& matrix ) const { FORCE_INLINE bool pod::Matrix<T,R,C>::operator==( const Matrix<T,R,C>& matrix ) const {
return uf::matrix::equals( *this, matrix ); return uf::matrix::equals( *this, matrix );
} }
template<typename T, size_t R, size_t C> template<typename T, size_t R, size_t C>
inline bool pod::Matrix<T,R,C>::operator!=( const Matrix<T,R,C>& matrix ) const { FORCE_INLINE bool pod::Matrix<T,R,C>::operator!=( const Matrix<T,R,C>& matrix ) const {
return !uf::matrix::equals( *this, matrix ); return !uf::matrix::equals( *this, matrix );
} }
template<typename T> bool uf::matrix::equals( const T& left, const T& right, float eps ) { template<typename T> bool uf::matrix::equals( const T& left, const T& right, float eps ) {

View File

@ -6,19 +6,19 @@ namespace uf {
typedef typename traits<T>::value value_type; typedef typename traits<T>::value value_type;
value_type m[4]; // 4 x 4 value_type m[4]; // 4 x 4
inline matrix_value(); FORCE_INLINE matrix_value();
inline matrix_value(const pod::Matrix<T,4>& rhs); FORCE_INLINE matrix_value(const pod::Matrix<T,4>& rhs);
inline bool operator==(const matrix_value&) const; FORCE_INLINE bool operator==(const matrix_value&) const;
inline operator pod::Matrix<T,4>() const; FORCE_INLINE operator pod::Matrix<T,4>() const;
}; };
} }
namespace simd { namespace simd {
inline uf::simd::matrix_value<float> matMult( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B ); FORCE_INLINE uf::simd::matrix_value<float> matMult( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B );
inline uf::simd::vector<float> matMult( const uf::simd::matrix_value<float>& A, uf::simd::vector<float> B ); FORCE_INLINE uf::simd::vector<float> matMult( const uf::simd::matrix_value<float>& A, uf::simd::vector<float> B );
inline uf::simd::matrix_value<float> matTranspose( const uf::simd::matrix_value<float>& M ); FORCE_INLINE uf::simd::matrix_value<float> matTranspose( const uf::simd::matrix_value<float>& M );
inline bool matEquals( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B, float eps ); FORCE_INLINE bool matEquals( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B, float eps );
} }
} }
@ -181,20 +181,20 @@ namespace {
} }
template<typename T> template<typename T>
inline uf::simd::matrix_value<T>::matrix_value() {} FORCE_INLINE uf::simd::matrix_value<T>::matrix_value() {}
template<typename T> template<typename T>
inline uf::simd::matrix_value<T>::matrix_value( const pod::Matrix<T,4>& mat ) { FORCE_INLINE uf::simd::matrix_value<T>::matrix_value( const pod::Matrix<T,4>& mat ) {
m[0] = _mm_loadu_ps(&mat[0]); m[0] = _mm_loadu_ps(&mat[0]);
m[1] = _mm_loadu_ps(&mat[4]); m[1] = _mm_loadu_ps(&mat[4]);
m[2] = _mm_loadu_ps(&mat[8]); m[2] = _mm_loadu_ps(&mat[8]);
m[3] = _mm_loadu_ps(&mat[12]); m[3] = _mm_loadu_ps(&mat[12]);
} }
template<typename T> template<typename T>
inline bool uf::simd::matrix_value<T>::operator==(const matrix_value& rhs) const { FORCE_INLINE bool uf::simd::matrix_value<T>::operator==(const matrix_value& rhs) const {
return uf::simd::matEquals( *this, rhs ); return uf::simd::matEquals( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::matrix_value<T>::operator pod::Matrix<T,4>() const { FORCE_INLINE uf::simd::matrix_value<T>::operator pod::Matrix<T,4>() const {
pod::Matrix4f mat; pod::Matrix4f mat;
_mm_storeu_ps(&mat[0], m[0]); _mm_storeu_ps(&mat[0], m[0]);
_mm_storeu_ps(&mat[4], m[1]); _mm_storeu_ps(&mat[4], m[1]);
@ -203,18 +203,18 @@ inline uf::simd::matrix_value<T>::operator pod::Matrix<T,4>() const {
return mat; return mat;
} }
inline uf::simd::matrix_value<float> uf::simd::matMult( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B ) { FORCE_INLINE uf::simd::matrix_value<float> uf::simd::matMult( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B ) {
return ::matMult_impl( A, B ); return ::matMult_impl( A, B );
} }
inline uf::simd::vector<float> uf::simd::matMult( const uf::simd::matrix_value<float>& M, uf::simd::vector<float> vec ) { FORCE_INLINE uf::simd::vector<float> uf::simd::matMult( const uf::simd::matrix_value<float>& M, uf::simd::vector<float> vec ) {
return ::matMult_impl( M, vec ); return ::matMult_impl( M, vec );
} }
inline uf::simd::matrix_value<float> uf::simd::matTranspose( const uf::simd::matrix_value<float>& M ) { FORCE_INLINE uf::simd::matrix_value<float> uf::simd::matTranspose( const uf::simd::matrix_value<float>& M ) {
uf::simd::matrix_value<float> R = M; uf::simd::matrix_value<float> R = M;
_MM_TRANSPOSE4_PS(R.m[0], R.m[1], R.m[2], R.m[3]); _MM_TRANSPOSE4_PS(R.m[0], R.m[1], R.m[2], R.m[3]);
return R; return R;
} }
inline bool uf::simd::matEquals( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B, float eps ) { FORCE_INLINE bool uf::simd::matEquals( const uf::simd::matrix_value<float>& A, const uf::simd::matrix_value<float>& B, float eps ) {
bool result = true; bool result = true;
__m128 e = _mm_set1_ps(eps); __m128 e = _mm_set1_ps(eps);
FOR_EACH(4, { FOR_EACH(4, {

View File

@ -1,12 +1,12 @@
namespace uf { namespace uf {
namespace simd { namespace simd {
inline vector<float> /*UF_API*/ quatMul( vector<float>, vector<float> ); FORCE_INLINE vector<float> /*UF_API*/ quatMul( vector<float>, vector<float> );
inline vector<float> /*UF_API*/ quatRot_3f( vector<float>, vector<float> ); FORCE_INLINE vector<float> /*UF_API*/ quatRot_3f( vector<float>, vector<float> );
inline pod::Matrix4f /*UF_API*/ quatMat( vector<float> ); FORCE_INLINE pod::Matrix4f /*UF_API*/ quatMat( vector<float> );
} }
} }
inline uf::simd::vector<float> uf::simd::quatMul( uf::simd::vector<float> Q1, uf::simd::vector<float> Q2 ) { FORCE_INLINE uf::simd::vector<float> uf::simd::quatMul( uf::simd::vector<float> Q1, uf::simd::vector<float> Q2 ) {
// broadcast q1 components // broadcast q1 components
__m128 x1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(0,0,0,0)); __m128 x1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(0,0,0,0));
__m128 y1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1,1,1,1)); __m128 y1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1,1,1,1));
@ -47,7 +47,7 @@ inline uf::simd::vector<float> uf::simd::quatMul( uf::simd::vector<float> Q1, uf
__m128 result = _mm_movelh_ps(_mm_unpacklo_ps(X, Y), _mm_unpacklo_ps(Z, W)); __m128 result = _mm_movelh_ps(_mm_unpacklo_ps(X, Y), _mm_unpacklo_ps(Z, W));
return result; return result;
} }
inline uf::simd::vector<float> uf::simd::quatRot_3f( uf::simd::vector<float> Q, uf::simd::vector<float> V ) { FORCE_INLINE uf::simd::vector<float> uf::simd::quatRot_3f( uf::simd::vector<float> Q, uf::simd::vector<float> V ) {
// extract q.xyz and q.w // extract q.xyz and q.w
__m128 qxyz = _mm_and_ps(Q, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // mask out w __m128 qxyz = _mm_and_ps(Q, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // mask out w
__m128 qw = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(3,3,3,3)); __m128 qw = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(3,3,3,3));
@ -75,7 +75,7 @@ inline uf::simd::vector<float> uf::simd::quatRot_3f( uf::simd::vector<float> Q,
return result; return result;
} }
inline pod::Matrix4f uf::simd::quatMat( uf::simd::vector<float> Q ) { FORCE_INLINE pod::Matrix4f uf::simd::quatMat( uf::simd::vector<float> Q ) {
// Shuffle out components // Shuffle out components
__m128 qx = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(0,0,0,0)); __m128 qx = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(0,0,0,0));
__m128 qy = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(1,1,1,1)); __m128 qy = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(1,1,1,1));

View File

@ -169,33 +169,33 @@ namespace uf {
using type_t = T;\ using type_t = T;\
using container_t = T*;\ using container_t = T*;\
static constexpr size_t size = N;\ static constexpr size_t size = N;\
inline T& operator[](size_t i) { return components[i]; }\ FORCE_INLINE T& operator[](size_t i) { return components[i]; }\
inline const T& operator[](size_t i) const { return components[i]; }\ FORCE_INLINE const T& operator[](size_t i) const { return components[i]; }\
inline Vector<T,N> operator-() const { return uf::vector::negate(*this); } \ FORCE_INLINE Vector<T,N> operator-() const { return uf::vector::negate(*this); } \
inline Vector<T,N> operator+(const Vector<T,N>& rhs) const { return uf::vector::add(*this, rhs); } \ FORCE_INLINE Vector<T,N> operator+(const Vector<T,N>& rhs) const { return uf::vector::add(*this, rhs); } \
inline Vector<T,N> operator-(const Vector<T,N>& rhs) const { return uf::vector::subtract(*this, rhs); } \ FORCE_INLINE Vector<T,N> operator-(const Vector<T,N>& rhs) const { return uf::vector::subtract(*this, rhs); } \
inline Vector<T,N> operator*(const Vector<T,N>& rhs) const { return uf::vector::multiply(*this, rhs); } \ FORCE_INLINE Vector<T,N> operator*(const Vector<T,N>& rhs) const { return uf::vector::multiply(*this, rhs); } \
inline Vector<T,N> operator/(const Vector<T,N>& rhs) const { return uf::vector::divide(*this, rhs); } \ FORCE_INLINE Vector<T,N> operator/(const Vector<T,N>& rhs) const { return uf::vector::divide(*this, rhs); } \
inline Vector<T,N> operator+(type_t scalar) const { return uf::vector::add(*this, scalar); } \ FORCE_INLINE Vector<T,N> operator+(type_t scalar) const { return uf::vector::add(*this, scalar); } \
inline Vector<T,N> operator-(type_t scalar) const { return uf::vector::subtract(*this, scalar); } \ FORCE_INLINE Vector<T,N> operator-(type_t scalar) const { return uf::vector::subtract(*this, scalar); } \
inline Vector<T,N> operator*(type_t scalar) const { return uf::vector::multiply(*this, scalar); } \ FORCE_INLINE Vector<T,N> operator*(type_t scalar) const { return uf::vector::multiply(*this, scalar); } \
inline Vector<T,N> operator/(type_t scalar) const { return uf::vector::divide(*this, scalar); } \ FORCE_INLINE Vector<T,N> operator/(type_t scalar) const { return uf::vector::divide(*this, scalar); } \
inline Vector<T,N>& operator+=(const Vector<T,N>& rhs) { return uf::vector::add_(*this, rhs); } \ FORCE_INLINE Vector<T,N>& operator+=(const Vector<T,N>& rhs) { return uf::vector::add_(*this, rhs); } \
inline Vector<T,N>& operator-=(const Vector<T,N>& rhs) { return uf::vector::subtract_(*this, rhs); } \ FORCE_INLINE Vector<T,N>& operator-=(const Vector<T,N>& rhs) { return uf::vector::subtract_(*this, rhs); } \
inline Vector<T,N>& operator*=(const Vector<T,N>& rhs) { return uf::vector::multiply_(*this, rhs); } \ FORCE_INLINE Vector<T,N>& operator*=(const Vector<T,N>& rhs) { return uf::vector::multiply_(*this, rhs); } \
inline Vector<T,N>& operator/=(const Vector<T,N>& rhs) { return uf::vector::divide_(*this, rhs); } \ FORCE_INLINE Vector<T,N>& operator/=(const Vector<T,N>& rhs) { return uf::vector::divide_(*this, rhs); } \
inline Vector<T,N>& operator+=(type_t scalar) { return uf::vector::add_(*this, scalar); } \ FORCE_INLINE Vector<T,N>& operator+=(type_t scalar) { return uf::vector::add_(*this, scalar); } \
inline Vector<T,N>& operator-=(type_t scalar) { return uf::vector::subtract_(*this, scalar); } \ FORCE_INLINE Vector<T,N>& operator-=(type_t scalar) { return uf::vector::subtract_(*this, scalar); } \
inline Vector<T,N>& operator*=(type_t scalar) { return uf::vector::multiply_(*this, scalar); } \ FORCE_INLINE Vector<T,N>& operator*=(type_t scalar) { return uf::vector::multiply_(*this, scalar); } \
inline Vector<T,N>& operator/=(type_t scalar) { return uf::vector::divide_(*this, scalar); } \ FORCE_INLINE Vector<T,N>& operator/=(type_t scalar) { return uf::vector::divide_(*this, scalar); } \
inline bool operator==(const Vector<T,N>& rhs) const { return uf::vector::equals(*this, rhs); } \ FORCE_INLINE bool operator==(const Vector<T,N>& rhs) const { return uf::vector::equals(*this, rhs); } \
inline bool operator!=(const Vector<T,N>& rhs) const { return uf::vector::notEquals(*this, rhs); } \ FORCE_INLINE bool operator!=(const Vector<T,N>& rhs) const { return uf::vector::notEquals(*this, rhs); } \
inline bool operator<(const Vector<T,N>& rhs) const { return uf::vector::less(*this, rhs); } \ FORCE_INLINE bool operator<(const Vector<T,N>& rhs) const { return uf::vector::less(*this, rhs); } \
inline bool operator<=(const Vector<T,N>& rhs) const { return uf::vector::lessEquals(*this, rhs); } \ FORCE_INLINE bool operator<=(const Vector<T,N>& rhs) const { return uf::vector::lessEquals(*this, rhs); } \
inline bool operator>(const Vector<T,N>& rhs) const { return uf::vector::greater(*this, rhs); } \ FORCE_INLINE bool operator>(const Vector<T,N>& rhs) const { return uf::vector::greater(*this, rhs); } \
inline bool operator>=(const Vector<T,N>& rhs) const { return uf::vector::greaterEquals(*this, rhs); } \ FORCE_INLINE bool operator>=(const Vector<T,N>& rhs) const { return uf::vector::greaterEquals(*this, rhs); } \
inline explicit operator bool() const { return uf::vector::notEquals(*this, Vector<T,N>{}); } \ FORCE_INLINE explicit operator bool() const { return uf::vector::notEquals(*this, Vector<T,N>{}); } \
template<typename U, size_t M> inline operator Vector<U,M>() const { return uf::vector::cast<U,M>(*this); } template<typename U, size_t M> FORCE_INLINE operator Vector<U,M>() const { return uf::vector::cast<U,M>(*this); }
namespace pod { namespace pod {
template<typename T, size_t N> template<typename T, size_t N>

View File

@ -7,28 +7,28 @@
#endif #endif
#define DEFINE_SIMD(T)\ #define DEFINE_SIMD(T)\
inline vector<T> /*UF_API*/ load( const T* );\ FORCE_INLINE vector<T> /*UF_API*/ load( const T* );\
inline void /*UF_API*/ store( vector<T>, T* );\ FORCE_INLINE void /*UF_API*/ store( vector<T>, T* );\
inline vector<T> /*UF_API*/ set( T );\ FORCE_INLINE vector<T> /*UF_API*/ set( T );\
inline vector<T> /*UF_API*/ set( T, T, T, T );\ FORCE_INLINE vector<T> /*UF_API*/ set( T, T, T, T );\
inline vector<T> /*UF_API*/ add( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ add( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ sub( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ sub( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ mul( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ mul( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ div( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ div( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ min( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ min( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ max( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ max( vector<T>, vector<T> );\
inline bool /*UF_API*/ all( vector<T> );\ FORCE_INLINE bool /*UF_API*/ all( vector<T> );\
inline bool /*UF_API*/ any( vector<T> );\ FORCE_INLINE bool /*UF_API*/ any( vector<T> );\
inline vector<T> /*UF_API*/ less( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ less( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ lessEquals( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ lessEquals( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ greater( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ greater( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ greaterEquals( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ greaterEquals( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ equals( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ equals( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ notEquals( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ notEquals( vector<T>, vector<T> );\
inline vector<T> /*UF_API*/ sqrt( vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ sqrt( vector<T> );\
inline vector<T> /*UF_API*/ hadd( vector<T>, vector<T> );\ FORCE_INLINE vector<T> /*UF_API*/ hadd( vector<T>, vector<T> );\
inline T /*UF_API*/ dot( vector<T>, vector<T> );\ FORCE_INLINE T /*UF_API*/ dot( vector<T>, vector<T> );\
template<size_t N = 4> inline pod::Vector<T,N> cast( const vector<T> );\ template<size_t N = 4> FORCE_INLINE pod::Vector<T,N> cast( const vector<T> );\
namespace uf { namespace uf {
namespace simd { namespace simd {
@ -68,38 +68,38 @@ namespace uf {
// __m128 m; // __m128 m;
typedef typename traits<T>::value value_type; typedef typename traits<T>::value value_type;
value_type m; value_type m;
inline vector(); FORCE_INLINE vector();
inline vector(const T* f); FORCE_INLINE vector(const T* f);
inline vector(T f); FORCE_INLINE vector(T f);
inline vector(T f0, T f1, T f2, T f3); FORCE_INLINE vector(T f0, T f1, T f2, T f3);
inline vector(bool f0, bool f1, bool f2, bool f3); FORCE_INLINE vector(bool f0, bool f1, bool f2, bool f3);
inline vector(const value_type& rhs); FORCE_INLINE vector(const value_type& rhs);
inline vector(const vector& rhs); FORCE_INLINE vector(const vector& rhs);
inline vector(const pod::Vector<T,1>& rhs); FORCE_INLINE vector(const pod::Vector<T,1>& rhs);
inline vector(const pod::Vector<T,2>& rhs); FORCE_INLINE vector(const pod::Vector<T,2>& rhs);
inline vector(const pod::Vector<T,3>& rhs); FORCE_INLINE vector(const pod::Vector<T,3>& rhs);
inline vector(const pod::Vector<T,4>& rhs); FORCE_INLINE vector(const pod::Vector<T,4>& rhs);
inline vector operator+( const vector& rhs ); FORCE_INLINE vector operator+( const vector& rhs );
inline vector operator-( const vector& rhs ); FORCE_INLINE vector operator-( const vector& rhs );
inline vector operator*( const vector& rhs ); FORCE_INLINE vector operator*( const vector& rhs );
inline vector operator/( const vector& rhs ); FORCE_INLINE vector operator/( const vector& rhs );
inline vector operator<( const vector& rhs ); FORCE_INLINE vector operator<( const vector& rhs );
inline vector operator<=( const vector& rhs ); FORCE_INLINE vector operator<=( const vector& rhs );
inline vector operator>( const vector& rhs ); FORCE_INLINE vector operator>( const vector& rhs );
inline vector operator>=( const vector& rhs ); FORCE_INLINE vector operator>=( const vector& rhs );
inline vector operator==( const vector& rhs ); FORCE_INLINE vector operator==( const vector& rhs );
inline vector operator!=( const vector& rhs ); FORCE_INLINE vector operator!=( const vector& rhs );
inline vector& operator=(const value_type& rhs); FORCE_INLINE vector& operator=(const value_type& rhs);
inline vector& operator=(const vector& rhs); FORCE_INLINE vector& operator=(const vector& rhs);
inline vector& operator=(const pod::Vector<T,4>& rhs); FORCE_INLINE vector& operator=(const pod::Vector<T,4>& rhs);
inline operator value_type() const; FORCE_INLINE operator value_type() const;
template<size_t N> inline operator pod::Vector<T,N>() const; template<size_t N> FORCE_INLINE operator pod::Vector<T,N>() const;
}; };
DEFINE_SIMD(float); DEFINE_SIMD(float);
@ -117,13 +117,13 @@ namespace uf {
*/ */
// specializations // specializations
inline vector<float> /*UF_API*/ set_f( bool, bool, bool, bool ); FORCE_INLINE vector<float> /*UF_API*/ set_f( bool, bool, bool, bool );
inline vector<int32_t> /*UF_API*/ set_i( bool, bool, bool, bool ); FORCE_INLINE vector<int32_t> /*UF_API*/ set_i( bool, bool, bool, bool );
inline vector<uint32_t> /*UF_API*/ set_ui( bool, bool, bool, bool ); FORCE_INLINE vector<uint32_t> /*UF_API*/ set_ui( bool, bool, bool, bool );
inline vector<float> /*UF_API*/ cross( vector<float> x, vector<float> y ); FORCE_INLINE vector<float> /*UF_API*/ cross( vector<float> x, vector<float> y );
inline vector<float> /*UF_API*/ normalize( vector<float> x ); FORCE_INLINE vector<float> /*UF_API*/ normalize( vector<float> x );
inline vector<float> /*UF_API*/ normalize_fast( vector<float> x ); FORCE_INLINE vector<float> /*UF_API*/ normalize_fast( vector<float> x );
} }
} }

View File

@ -1,12 +1,12 @@
#include <uf/utils/memory/alignment.h> #include <uf/utils/memory/alignment.h>
namespace { namespace {
inline __m128i bias_unsigned(__m128i v) { FORCE_INLINE __m128i bias_unsigned(__m128i v) {
const __m128i signbit = _mm_set1_epi32(0x80000000); const __m128i signbit = _mm_set1_epi32(0x80000000);
return _mm_xor_si128(v, signbit); return _mm_xor_si128(v, signbit);
} }
inline int32_t boolMask(bool b) { FORCE_INLINE int32_t boolMask(bool b) {
return b ? -1 : 0; // 0xFFFFFFFF for true, 0x00000000 for false return b ? -1 : 0; // 0xFFFFFFFF for true, 0x00000000 for false
} }
} }
@ -20,142 +20,142 @@ namespace {
#define MV_INSTR_SET_7 __attribute__((target("avx"))) #define MV_INSTR_SET_7 __attribute__((target("avx")))
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector() {} FORCE_INLINE uf::simd::vector<T>::vector() {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const T* f) : m(uf::simd::load(f)) {} FORCE_INLINE uf::simd::vector<T>::vector(const T* f) : m(uf::simd::load(f)) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(T f) : m(uf::simd::set(f)) {} FORCE_INLINE uf::simd::vector<T>::vector(T f) : m(uf::simd::set(f)) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(T f0, T f1, T f2, T f3) : m(uf::simd::set(f0,f1,f2,f3)) {} FORCE_INLINE uf::simd::vector<T>::vector(T f0, T f1, T f2, T f3) : m(uf::simd::set(f0,f1,f2,f3)) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(bool f0, bool f1, bool f2, bool f3) : m(uf::simd::set(f0,f1,f2,f3)) {} FORCE_INLINE uf::simd::vector<T>::vector(bool f0, bool f1, bool f2, bool f3) : m(uf::simd::set(f0,f1,f2,f3)) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const value_type& rhs) : m(rhs) {} FORCE_INLINE uf::simd::vector<T>::vector(const value_type& rhs) : m(rhs) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const vector& rhs) : m(rhs.m) {} FORCE_INLINE uf::simd::vector<T>::vector(const vector& rhs) : m(rhs.m) {}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const pod::Vector<T,1>& rhs) : vector((T) rhs[0]){} FORCE_INLINE uf::simd::vector<T>::vector(const pod::Vector<T,1>& rhs) : vector((T) rhs[0]){}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const pod::Vector<T,2>& rhs) : vector((T) rhs[0], (T) rhs[1], 0, 0){} FORCE_INLINE uf::simd::vector<T>::vector(const pod::Vector<T,2>& rhs) : vector((T) rhs[0], (T) rhs[1], 0, 0){}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const pod::Vector<T,3>& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], 0){} FORCE_INLINE uf::simd::vector<T>::vector(const pod::Vector<T,3>& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], 0){}
template<typename T> template<typename T>
inline uf::simd::vector<T>::vector(const pod::Vector<T,4>& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], (T) rhs[3]){} FORCE_INLINE uf::simd::vector<T>::vector(const pod::Vector<T,4>& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], (T) rhs[3]){}
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator+( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator+( const vector& rhs ) {
return uf::simd::add( *this, rhs ); return uf::simd::add( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator-( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator-( const vector& rhs ) {
return uf::simd::sub( *this, rhs ); return uf::simd::sub( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator*( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator*( const vector& rhs ) {
return uf::simd::mul( *this, rhs ); return uf::simd::mul( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator/( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator/( const vector& rhs ) {
return uf::simd::div( *this, rhs ); return uf::simd::div( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator<( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator<( const vector& rhs ) {
return uf::simd::less( *this, rhs ); return uf::simd::less( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator<=( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator<=( const vector& rhs ) {
return uf::simd::lessEquals( *this, rhs ); return uf::simd::lessEquals( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator>( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator>( const vector& rhs ) {
return uf::simd::greater( *this, rhs ); return uf::simd::greater( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator>=( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator>=( const vector& rhs ) {
return uf::simd::greaterEquals( *this, rhs ); return uf::simd::greaterEquals( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator==( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator==( const vector& rhs ) {
return uf::simd::equals( *this, rhs ); return uf::simd::equals( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T> uf::simd::vector<T>::operator!=( const vector& rhs ) { FORCE_INLINE uf::simd::vector<T> uf::simd::vector<T>::operator!=( const vector& rhs ) {
return uf::simd::notEquals( *this, rhs ); return uf::simd::notEquals( *this, rhs );
} }
template<typename T> template<typename T>
inline uf::simd::vector<T>& uf::simd::vector<T>::operator=(const uf::simd::vector<T>::value_type& rhs) { FORCE_INLINE uf::simd::vector<T>& uf::simd::vector<T>::operator=(const uf::simd::vector<T>::value_type& rhs) {
m = rhs; m = rhs;
return *this; return *this;
} }
template<typename T> template<typename T>
inline uf::simd::vector<T>& uf::simd::vector<T>::operator=(const vector& rhs) { FORCE_INLINE uf::simd::vector<T>& uf::simd::vector<T>::operator=(const vector& rhs) {
m = rhs.m; m = rhs.m;
return *this; return *this;
} }
template<typename T> template<typename T>
inline uf::simd::vector<T>& uf::simd::vector<T>::operator=(const pod::Vector<T,4>& rhs) { FORCE_INLINE uf::simd::vector<T>& uf::simd::vector<T>::operator=(const pod::Vector<T,4>& rhs) {
m = uf::simd::load(&rhs[0]); m = uf::simd::load(&rhs[0]);
return *this; return *this;
} }
template<typename T> template<typename T>
inline uf::simd::vector<T>::operator uf::simd::vector<T>::value_type() const { FORCE_INLINE uf::simd::vector<T>::operator uf::simd::vector<T>::value_type() const {
return m; return m;
} }
template<typename T> template<typename T>
template<size_t N> template<size_t N>
inline uf::simd::vector<T>::operator pod::Vector<T,N>() const { FORCE_INLINE uf::simd::vector<T>::operator pod::Vector<T,N>() const {
return uf::simd::cast<N>(*this); return uf::simd::cast<N>(*this);
} }
template<size_t N> template<size_t N>
inline pod::Vector<float,N> uf::simd::cast( const uf::simd::vector<float> v ){ FORCE_INLINE pod::Vector<float,N> uf::simd::cast( const uf::simd::vector<float> v ){
pod::Vector4f r; pod::Vector4f r;
uf::simd::store( v, &r[0] ); uf::simd::store( v, &r[0] );
return uf::vector::cast<float,N>(r); return uf::vector::cast<float,N>(r);
} }
template<size_t N> template<size_t N>
inline pod::Vector<int32_t,N> uf::simd::cast( const uf::simd::vector<int32_t> v ){ FORCE_INLINE pod::Vector<int32_t,N> uf::simd::cast( const uf::simd::vector<int32_t> v ){
pod::Vector4i r; pod::Vector4i r;
uf::simd::store( v, &r[0] ); uf::simd::store( v, &r[0] );
return uf::vector::cast<int32_t,N>(r); return uf::vector::cast<int32_t,N>(r);
} }
template<size_t N> template<size_t N>
inline pod::Vector<uint32_t,N> uf::simd::cast( const uf::simd::vector<uint32_t> v ){ FORCE_INLINE pod::Vector<uint32_t,N> uf::simd::cast( const uf::simd::vector<uint32_t> v ){
pod::Vector4ui r; pod::Vector4ui r;
uf::simd::store( v, &r[0] ); uf::simd::store( v, &r[0] );
return uf::vector::cast<uint32_t,N>(r); return uf::vector::cast<uint32_t,N>(r);
} }
inline uf::simd::vector<float> uf::simd::load( const float* f ) { FORCE_INLINE uf::simd::vector<float> uf::simd::load( const float* f ) {
// if ( uf::aligned(f, 16) ) return _mm_load_ps(f); // if ( uf::aligned(f, 16) ) return _mm_load_ps(f);
return _mm_loadu_ps( f ); return _mm_loadu_ps( f );
} }
inline void uf::simd::store( uf::simd::vector<float> v, float* f ) { FORCE_INLINE void uf::simd::store( uf::simd::vector<float> v, float* f ) {
/* if ( uf::aligned(f, 16) ) _mm_store_ps(f, v); /* if ( uf::aligned(f, 16) ) _mm_store_ps(f, v);
else */ _mm_storeu_ps( f, v ); else */ _mm_storeu_ps( f, v );
} }
inline uf::simd::vector<float> uf::simd::set( float f ) { FORCE_INLINE uf::simd::vector<float> uf::simd::set( float f ) {
return _mm_set1_ps( f ); return _mm_set1_ps( f );
} }
inline uf::simd::vector<float> uf::simd::set( float x, float y, float z, float w ) { FORCE_INLINE uf::simd::vector<float> uf::simd::set( float x, float y, float z, float w ) {
return _mm_setr_ps( x, y, z, w ); return _mm_setr_ps( x, y, z, w );
} }
inline uf::simd::vector<float> uf::simd::add( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::add( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_add_ps( x, y ); return _mm_add_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::sub( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::sub( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_sub_ps( x, y ); return _mm_sub_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::mul( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::mul( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_mul_ps( x, y ); return _mm_mul_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::div( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::div( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_div_ps( x, y ); return _mm_div_ps( x, y );
} }
/* /*
inline uf::simd::vector<float> uf::simd::hadd( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::hadd( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
#if 0 #if 0
return _mm_hadd_ps( x, y ); return _mm_hadd_ps( x, y );
#else #else
@ -168,37 +168,37 @@ inline uf::simd::vector<float> uf::simd::hadd( uf::simd::vector<float> x, uf::si
} }
*/ */
inline uf::simd::vector<float> uf::simd::min( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::min( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_min_ps( x, y ); return _mm_min_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::max( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::max( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_max_ps( x, y ); return _mm_max_ps( x, y );
} }
inline bool uf::simd::all( uf::simd::vector<float> mask) { FORCE_INLINE bool uf::simd::all( uf::simd::vector<float> mask) {
return _mm_movemask_ps( mask ) == 0xF; // all 4 bits set return _mm_movemask_ps( mask ) == 0xF; // all 4 bits set
} }
inline bool uf::simd::any( uf::simd::vector<float> mask) { FORCE_INLINE bool uf::simd::any( uf::simd::vector<float> mask) {
return _mm_movemask_ps( mask ) != 0x0; // any bit set return _mm_movemask_ps( mask ) != 0x0; // any bit set
} }
inline uf::simd::vector<float> uf::simd::less( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::less( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmplt_ps( x, y ); return _mm_cmplt_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::lessEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::lessEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmple_ps( x, y ); return _mm_cmple_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::greater( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::greater( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmpgt_ps( x, y ); return _mm_cmpgt_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::greaterEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::greaterEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmpge_ps( x, y ); return _mm_cmpge_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::equals( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::equals( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmpeq_ps( x, y ); return _mm_cmpeq_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::notEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::notEquals( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cmpneq_ps( x, y ); return _mm_cmpneq_ps( x, y );
} }
inline uf::simd::vector<float> uf::simd::sqrt( uf::simd::vector<float> v ) { FORCE_INLINE uf::simd::vector<float> uf::simd::sqrt( uf::simd::vector<float> v ) {
return _mm_sqrt_ps( v ); return _mm_sqrt_ps( v );
} }
@ -220,7 +220,7 @@ namespace {
return _mm_dp_ps(x, y, 0xF1); return _mm_dp_ps(x, y, 0xF1);
} }
} }
inline float uf::simd::dot( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE float uf::simd::dot( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return _mm_cvtss_f32( ::dot_impl( x, y ) ); return _mm_cvtss_f32( ::dot_impl( x, y ) );
} }
@ -235,7 +235,7 @@ namespace {
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(f)); return _mm_loadu_si128(reinterpret_cast<const __m128i*>(f));
} }
} }
inline uf::simd::vector<int32_t> uf::simd::load( const int32_t* f ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::load( const int32_t* f ) {
return ::load_impl( f ); return ::load_impl( f );
} }
@ -255,21 +255,21 @@ namespace {
else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v); else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v);
} }
} }
inline void uf::simd::store( uf::simd::vector<int32_t> v, int32_t* f ) { FORCE_INLINE void uf::simd::store( uf::simd::vector<int32_t> v, int32_t* f ) {
return ::store_impl( v, f ); return ::store_impl( v, f );
} }
inline uf::simd::vector<int32_t> uf::simd::set( int32_t f ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::set( int32_t f ) {
return _mm_set1_epi32(f); return _mm_set1_epi32(f);
} }
inline uf::simd::vector<int32_t> uf::simd::set( int32_t x, int32_t y, int32_t z, int32_t w ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::set( int32_t x, int32_t y, int32_t z, int32_t w ) {
return _mm_setr_epi32(x, y, z, w); return _mm_setr_epi32(x, y, z, w);
} }
inline uf::simd::vector<int32_t> uf::simd::add( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::add( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return _mm_add_epi32(x, y); return _mm_add_epi32(x, y);
} }
inline uf::simd::vector<int32_t> uf::simd::sub( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::sub( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return _mm_sub_epi32(x, y); return _mm_sub_epi32(x, y);
} }
@ -285,18 +285,18 @@ namespace {
return _mm_mullo_epi32(x, y); return _mm_mullo_epi32(x, y);
} }
} }
inline uf::simd::vector<int32_t> uf::simd::mul( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::mul( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::mul_impl( x, y ); return ::mul_impl( x, y );
} }
inline uf::simd::vector<int32_t> uf::simd::div( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::div( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] ); return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] );
} }
/* /*
inline uf::simd::vector<int32_t> uf::simd::hadd( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::hadd( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] ); return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] );
@ -325,18 +325,18 @@ namespace {
return _mm_max_epi32(x, y); return _mm_max_epi32(x, y);
} }
} }
inline uf::simd::vector<int32_t> uf::simd::min( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::min( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::min_impl(x, y); return ::min_impl(x, y);
} }
inline uf::simd::vector<int32_t> uf::simd::max( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::max( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::max_impl(x, y); return ::max_impl(x, y);
} }
inline bool uf::simd::all( uf::simd::vector<int32_t> mask) { FORCE_INLINE bool uf::simd::all( uf::simd::vector<int32_t> mask) {
return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set
} }
inline bool uf::simd::any( uf::simd::vector<int32_t> mask) { FORCE_INLINE bool uf::simd::any( uf::simd::vector<int32_t> mask) {
return _mm_movemask_epi8( mask ) != 0x0; // any bit set return _mm_movemask_epi8( mask ) != 0x0; // any bit set
} }
@ -383,30 +383,30 @@ namespace {
return _mm_xor_si128(gt, _mm_set1_epi32(-1)); return _mm_xor_si128(gt, _mm_set1_epi32(-1));
} }
} }
inline uf::simd::vector<int32_t> uf::simd::less( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::less( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::less_impl( x, y ); return ::less_impl( x, y );
} }
inline uf::simd::vector<int32_t> uf::simd::lessEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::lessEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::lessEquals_impl( x, y ); return ::lessEquals_impl( x, y );
} }
inline uf::simd::vector<int32_t> uf::simd::greater( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::greater( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::greater_impl( x, y ); return ::greater_impl( x, y );
} }
inline uf::simd::vector<int32_t> uf::simd::greaterEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::greaterEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return ::greaterEquals_impl( x, y ); return ::greaterEquals_impl( x, y );
} }
inline uf::simd::vector<int32_t> uf::simd::equals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::equals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return _mm_cmpeq_epi32(x, y); return _mm_cmpeq_epi32(x, y);
} }
inline uf::simd::vector<int32_t> uf::simd::notEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::notEquals( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1)); return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1));
} }
inline uf::simd::vector<int32_t> uf::simd::sqrt( uf::simd::vector<int32_t> v ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::sqrt( uf::simd::vector<int32_t> v ) {
auto V = uf::simd::cast( v ); auto V = uf::simd::cast( v );
return uf::simd::set( (int32_t) std::sqrt(V[0]), (int32_t) std::sqrt(V[1]), (int32_t) std::sqrt(V[2]), (int32_t) std::sqrt(V[3]) ); return uf::simd::set( (int32_t) std::sqrt(V[0]), (int32_t) std::sqrt(V[1]), (int32_t) std::sqrt(V[2]), (int32_t) std::sqrt(V[3]) );
} }
inline int32_t uf::simd::dot( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) { FORCE_INLINE int32_t uf::simd::dot( uf::simd::vector<int32_t> x, uf::simd::vector<int32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3]; return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3];
@ -438,23 +438,23 @@ namespace {
else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v); else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v);
} }
} }
inline uf::simd::vector<uint32_t> uf::simd::load( const uint32_t* f ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::load( const uint32_t* f ) {
return ::load_impl( f ); return ::load_impl( f );
} }
inline void uf::simd::store( uf::simd::vector<uint32_t> v, uint32_t* f ) { FORCE_INLINE void uf::simd::store( uf::simd::vector<uint32_t> v, uint32_t* f ) {
return ::store_impl( v, f ); return ::store_impl( v, f );
} }
inline uf::simd::vector<uint32_t> uf::simd::set( uint32_t f ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::set( uint32_t f ) {
return _mm_set1_epi32(f); return _mm_set1_epi32(f);
} }
inline uf::simd::vector<uint32_t> uf::simd::set( uint32_t x, uint32_t y, uint32_t z, uint32_t w ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::set( uint32_t x, uint32_t y, uint32_t z, uint32_t w ) {
return _mm_setr_epi32(x, y, z, w); return _mm_setr_epi32(x, y, z, w);
} }
inline uf::simd::vector<uint32_t> uf::simd::add( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::add( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return _mm_add_epi32(x, y); return _mm_add_epi32(x, y);
} }
inline uf::simd::vector<uint32_t> uf::simd::sub( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::sub( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return _mm_sub_epi32(x, y); return _mm_sub_epi32(x, y);
} }
@ -470,17 +470,17 @@ namespace {
return _mm_mullo_epi32(x, y); return _mm_mullo_epi32(x, y);
} }
} }
inline uf::simd::vector<uint32_t> uf::simd::mul( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::mul( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::mul_impl( x, y ); return ::mul_impl( x, y );
} }
inline uf::simd::vector<uint32_t> uf::simd::div( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::div( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] ); return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] );
} }
/* /*
inline uf::simd::vector<uint32_t> uf::simd::hadd( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::hadd( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] ); return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] );
@ -510,17 +510,17 @@ namespace {
return _mm_max_epu32(x, y); // unsigned max return _mm_max_epu32(x, y); // unsigned max
} }
} }
inline uf::simd::vector<uint32_t> uf::simd::min( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::min( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::min_impl(x, y); return ::min_impl(x, y);
} }
inline uf::simd::vector<uint32_t> uf::simd::max( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::max( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::max_impl(x, y); return ::max_impl(x, y);
} }
inline bool uf::simd::all( uf::simd::vector<uint32_t> mask) { FORCE_INLINE bool uf::simd::all( uf::simd::vector<uint32_t> mask) {
return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set
} }
inline bool uf::simd::any( uf::simd::vector<uint32_t> mask) { FORCE_INLINE bool uf::simd::any( uf::simd::vector<uint32_t> mask) {
return _mm_movemask_epi8( mask ) != 0x0; // any bit set return _mm_movemask_epi8( mask ) != 0x0; // any bit set
} }
@ -573,43 +573,43 @@ namespace {
return _mm_xor_si128(lt, _mm_set1_epi32(-1)); // invert mask return _mm_xor_si128(lt, _mm_set1_epi32(-1)); // invert mask
} }
} }
inline uf::simd::vector<uint32_t> uf::simd::less( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::less( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::less_impl( x, y ); return ::less_impl( x, y );
} }
inline uf::simd::vector<uint32_t> uf::simd::lessEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::lessEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::lessEquals_impl( x, y ); return ::lessEquals_impl( x, y );
} }
inline uf::simd::vector<uint32_t> uf::simd::greater( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::greater( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::greater_impl( x, y ); return ::greater_impl( x, y );
} }
inline uf::simd::vector<uint32_t> uf::simd::greaterEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::greaterEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return ::greaterEquals_impl( x, y ); return ::greaterEquals_impl( x, y );
} }
inline uf::simd::vector<uint32_t> uf::simd::equals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::equals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return _mm_cmpeq_epi32(x, y); return _mm_cmpeq_epi32(x, y);
} }
inline uf::simd::vector<uint32_t> uf::simd::notEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::notEquals( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1)); return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1));
} }
inline uf::simd::vector<uint32_t> uf::simd::sqrt( uf::simd::vector<uint32_t> v ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::sqrt( uf::simd::vector<uint32_t> v ) {
auto V = uf::simd::cast( v ); auto V = uf::simd::cast( v );
return uf::simd::set( (uint32_t) std::sqrt(V[0]), (uint32_t) std::sqrt(V[1]), (uint32_t) std::sqrt(V[2]), (uint32_t) std::sqrt(V[3]) ); return uf::simd::set( (uint32_t) std::sqrt(V[0]), (uint32_t) std::sqrt(V[1]), (uint32_t) std::sqrt(V[2]), (uint32_t) std::sqrt(V[3]) );
} }
inline uint32_t uf::simd::dot( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) { FORCE_INLINE uint32_t uf::simd::dot( uf::simd::vector<uint32_t> x, uf::simd::vector<uint32_t> y ) {
auto X = uf::simd::cast( x ); auto X = uf::simd::cast( x );
auto Y = uf::simd::cast( y ); auto Y = uf::simd::cast( y );
return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3]; return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3];
} }
inline uf::simd::vector<float> uf::simd::set_f( bool x, bool y, bool z, bool w ) { FORCE_INLINE uf::simd::vector<float> uf::simd::set_f( bool x, bool y, bool z, bool w ) {
return _mm_castsi128_ps(_mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w))); return _mm_castsi128_ps(_mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w)));
} }
inline uf::simd::vector<int32_t> uf::simd::set_i( bool x, bool y, bool z, bool w ) { FORCE_INLINE uf::simd::vector<int32_t> uf::simd::set_i( bool x, bool y, bool z, bool w ) {
return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w)); return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w));
} }
inline uf::simd::vector<uint32_t> uf::simd::set_ui( bool x, bool y, bool z, bool w ) { FORCE_INLINE uf::simd::vector<uint32_t> uf::simd::set_ui( bool x, bool y, bool z, bool w ) {
return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w)); return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w));
} }
@ -635,15 +635,15 @@ namespace {
} }
} }
inline uf::simd::vector<float> uf::simd::cross( uf::simd::vector<float> x, uf::simd::vector<float> y ) { FORCE_INLINE uf::simd::vector<float> uf::simd::cross( uf::simd::vector<float> x, uf::simd::vector<float> y ) {
return ::cross_impl( x, y ); return ::cross_impl( x, y );
} }
inline uf::simd::vector<float> uf::simd::normalize( uf::simd::vector<float> v ) { FORCE_INLINE uf::simd::vector<float> uf::simd::normalize( uf::simd::vector<float> v ) {
__m128 len = _mm_sqrt_ss( ::dot_impl( v,v ) ); __m128 len = _mm_sqrt_ss( ::dot_impl( v,v ) );
len = _mm_shuffle_ps(len, len, 0x00); len = _mm_shuffle_ps(len, len, 0x00);
return _mm_div_ps(v, len); return _mm_div_ps(v, len);
} }
inline uf::simd::vector<float> uf::simd::normalize_fast( uf::simd::vector<float> v ) { FORCE_INLINE uf::simd::vector<float> uf::simd::normalize_fast( uf::simd::vector<float> v ) {
__m128 invLen = _mm_rsqrt_ps(::dot_impl(v, v)); __m128 invLen = _mm_rsqrt_ps(::dot_impl(v, v));
return _mm_mul_ps(v, invLen); return _mm_mul_ps(v, invLen);
} }

View File

@ -235,18 +235,30 @@ namespace {
bool aabbSphere( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool aabbSphere( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( AABB, SPHERE ); ASSERT_COLLIDER_TYPES( AABB, SPHERE );
return ::sphereAabb( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::sphereAabb( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool aabbPlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool aabbPlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( AABB, PLANE ); ASSERT_COLLIDER_TYPES( AABB, PLANE );
return ::planeAabb( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::planeAabb( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool aabbCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool aabbCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( AABB, CAPSULE ); ASSERT_COLLIDER_TYPES( AABB, CAPSULE );
return ::capsuleAabb( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::capsuleAabb( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool aabbMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool aabbMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( AABB, MESH ); ASSERT_COLLIDER_TYPES( AABB, MESH );
return ::meshAabb( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::meshAabb( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
} }

View File

@ -161,7 +161,7 @@ namespace {
return index; return index;
} }
void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, int capacity = 2 ) { void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, int capacity = 2, bool filters = false, bool filterType = false ) {
if ( bodies.empty() ) return; if ( bodies.empty() ) return;
bvh.indices.clear(); bvh.indices.clear();
@ -169,18 +169,21 @@ namespace {
bvh.indices.reserve(bodies.size()); bvh.indices.reserve(bodies.size());
// stores bounds // stores bounds
uf::stl::vector<pod::AABB> bounds; uf::stl::vector<pod::AABB> bounds(bodies.size(), { {FLT_MAX, FLT_MAX, FLT_MAX}, {-FLT_MAX, -FLT_MAX, -FLT_MAX} });
bounds.reserve(bodies.size());
// populate initial indices and bounds // populate initial indices and bounds
for ( auto i = 0; i < bodies.size(); ++i ) { for ( auto i = 0; i < bodies.size(); ++i ) {
bounds.emplace_back(::computeAABB( *bodies[i] )); if ( filters && bodies[i]->isStatic != filterType ) continue;
bvh.indices.emplace_back(i); // i => bodies[i]
bounds[i] = ::computeAABB(*bodies[i]);
bvh.indices.emplace_back(i);
} }
if ( bvh.indices.empty() ) return; // inserted nothing
// recursively build BVH from indices // recursively build BVH from indices
if ( ::useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bodies.size(), capacity ); if ( ::useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity );
else ::buildBVHNode( bvh, bounds, 0, bodies.size(), capacity ); else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity );
// flatten if requested // flatten if requested
if ( ::flattenBvhBodies ) ::flattenBVH( bvh, 0 ); if ( ::flattenBvhBodies ) ::flattenBVH( bvh, 0 );
@ -221,8 +224,8 @@ namespace {
} }
// recursively build BVH from indices // recursively build BVH from indices
if ( ::useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, triangles, capacity ); if ( ::useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity );
else ::buildBVHNode( bvh, bounds, 0, triangles, capacity ); else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity );
// flatten if requested // flatten if requested
if ( ::flattenBvhMeshes ) ::flattenBVH( bvh, 0 ); if ( ::flattenBvhMeshes ) ::flattenBVH( bvh, 0 );
@ -233,14 +236,19 @@ namespace {
namespace { namespace {
pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, int frameCounter ) { pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, const uf::stl::vector<pod::PhysicsBody*>& bodies, const pod::BVH::UpdatePolicy& policy, int frameCounter ) {
if ( bvh.indices.empty() || bvh.nodes.empty() || bvh.dirty ) return pod::BVH::UpdatePolicy::Decision::REBUILD; if ( bvh.indices.empty() || bvh.nodes.empty() || bvh.dirty ) {
UF_MSG_DEBUG("Force rebuild, bvh.indices.empty={}, bvh.nodes.empty={}, bvh.dirty={}", bvh.indices.empty(), bvh.nodes.empty(), bvh.dirty );
return pod::BVH::UpdatePolicy::Decision::REBUILD;
}
if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE; if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE;
int dirtyCount = 0; int dirtyCount = 0;
float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds ); float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds );
// check each body // check each body
for ( const auto* body : bodies ) { for ( auto idx : bvh.indices ) {
const auto* body = bodies[idx];
pod::AABB newBounds = ::computeAABB(*body); pod::AABB newBounds = ::computeAABB(*body);
pod::AABB oldBounds = body->bounds; pod::AABB oldBounds = body->bounds;
@ -258,13 +266,14 @@ namespace {
float dirtyRatio = (float) dirtyCount / (float) bodies.size(); float dirtyRatio = (float) dirtyCount / (float) bodies.size();
// compute new root bounds // compute new root bounds
pod::AABB newRoot = bodies[0]->bounds; pod::AABB newRoot = bodies[bvh.indices[0]]->bounds;
for ( auto i = 1; i < bodies.size(); ++i ) { for ( auto i = 1; i < bvh.indices.size(); ++i ) {
newRoot = ::mergeAabb(newRoot, bodies[i]->bounds); newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds);
} }
float newRootArea = ::aabbSurfaceArea( newRoot ); float newRootArea = ::aabbSurfaceArea( newRoot );
if ( dirtyRatio > policy.dirtyRatioThreshold || newRootArea > oldRootArea * policy.overlapThreshold || frameCounter % policy.maxFramesBeforeRebuild == 0 ) { if ( dirtyRatio > policy.dirtyRatioThreshold || newRootArea > oldRootArea * policy.overlapThreshold || frameCounter % policy.maxFramesBeforeRebuild == 0 ) {
UF_MSG_DEBUG( "Rebuild, dirtyRatio={}, oldRootArea={}, newRootArea={}, frameCounter={}", dirtyRatio, oldRootArea, newRootArea, frameCounter );
return pod::BVH::UpdatePolicy::Decision::REBUILD; return pod::BVH::UpdatePolicy::Decision::REBUILD;
} }
if ( dirtyCount > 0 ) return pod::BVH::UpdatePolicy::Decision::REFIT; if ( dirtyCount > 0 ) return pod::BVH::UpdatePolicy::Decision::REFIT;
@ -445,6 +454,8 @@ namespace {
for ( auto j = 0; j < nodeB.count; ++j ) { for ( auto j = 0; j < nodeB.count; ++j ) {
int bodyA = bvhA.indices[nodeA.start + i]; int bodyA = bvhA.indices[nodeA.start + i];
int bodyB = bvhB.indices[nodeB.start + j]; int bodyB = bvhB.indices[nodeB.start + j];
if ( bodyA == bodyB ) continue;
if ( bodyA > bodyB ) std::swap( bodyA, bodyB );
pairs.emplace(bodyA, bodyB); pairs.emplace(bodyA, bodyB);
} }

View File

@ -104,6 +104,9 @@ namespace {
} }
bool capsuleMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool capsuleMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( CAPSULE, MESH ); ASSERT_COLLIDER_TYPES( CAPSULE, MESH );
return meshCapsule( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::meshCapsule( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
} }

View File

@ -18,17 +18,17 @@ namespace {
int broadphaseBvhCapacity = 1; int broadphaseBvhCapacity = 1;
int meshBvhCapacity = 1; int meshBvhCapacity = 1;
bool flattenBvhBodies = true; bool flattenBvhBodies = false; // bugged
bool flattenBvhMeshes = true; bool flattenBvhMeshes = true;
// it actually seems slower to use these...... // it actually seems slower to use these......
bool useBvhSahBodies = false; bool useBvhSahBodies = false;
bool useBvhSahMeshes = false; bool useBvhSahMeshes = false;
bool useSplitBvhs = false; // currently bugged if enabled bool useSplitBvhs = true; // currently bugged if enabled
int solverIterations = 10; int solverIterations = 10;
float baumgarteCorrectionPercent = 0.02f; float baumgarteCorrectionPercent = 0.2f;
float baumgarteCorrectionSlop = 0.01f; float baumgarteCorrectionSlop = 0.01f;
uf::stl::unordered_map<size_t, pod::Manifold> manifoldsCache; uf::stl::unordered_map<size_t, pod::Manifold> manifoldsCache;
@ -39,7 +39,7 @@ namespace {
.displacementThreshold = 0.25f, .displacementThreshold = 0.25f,
.overlapThreshold = 2.0f, .overlapThreshold = 2.0f,
.dirtyRatioThreshold = 0.3f, .dirtyRatioThreshold = 0.3f,
.maxFramesBeforeRebuild = 3600, .maxFramesBeforeRebuild = 600,
}; };
} }
@ -122,31 +122,26 @@ void uf::physics::impl::step( pod::World& world, float dt ) {
auto& dynamicBvh = world.dynamicBvh; auto& dynamicBvh = world.dynamicBvh;
auto& staticBvh = world.staticBvh; auto& staticBvh = world.staticBvh;
uf::stl::vector<pod::PhysicsBody*> staticBodies;
uf::stl::vector<pod::PhysicsBody*> dynamicBodies;
if ( bodies.empty() ) return; if ( bodies.empty() ) return;
++::frameCounter; ++::frameCounter;
for ( auto* body : bodies ) { for ( auto* body : bodies ) {
( body->isStatic ? staticBodies : dynamicBodies ).emplace_back(body);
if ( !body->activity.awake ) continue; if ( !body->activity.awake ) continue;
::integrate( *body, dt ); ::integrate( *body, dt );
} }
// rebuild static bvh if diry // rebuild static bvh if diry
if ( staticBvh.dirty && ::useSplitBvhs ) { if ( staticBvh.dirty && ::useSplitBvhs ) {
::buildBroadphaseBVH( staticBvh, staticBodies, ::broadphaseBvhCapacity ); // (re)build ::buildBroadphaseBVH( staticBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, true ); // (re)build
} }
switch ( ::decideBVHUpdate( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies, ::bvhUpdatePolicy, ::frameCounter ) ) { switch ( ::decideBVHUpdate( dynamicBvh, bodies, ::bvhUpdatePolicy, ::frameCounter ) ) {
case pod::BVH::UpdatePolicy::Decision::REBUILD: { case pod::BVH::UpdatePolicy::Decision::REBUILD: {
::buildBroadphaseBVH( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies, ::broadphaseBvhCapacity ); // (re)build ::buildBroadphaseBVH( dynamicBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, false ); // (re)build
} break; } break;
case pod::BVH::UpdatePolicy::Decision::REFIT: { case pod::BVH::UpdatePolicy::Decision::REFIT: {
::refitBVH( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies ); // refit ::refitBVH( dynamicBvh, bodies ); // refit
} break; } break;
case pod::BVH::UpdatePolicy::Decision::NONE: case pod::BVH::UpdatePolicy::Decision::NONE:
default: default:
@ -157,7 +152,9 @@ void uf::physics::impl::step( pod::World& world, float dt ) {
// query for overlaps // query for overlaps
pod::BVH::pairs_t pairs; pod::BVH::pairs_t pairs;
::queryOverlaps( dynamicBvh, pairs ); ::queryOverlaps( dynamicBvh, pairs );
if ( ::useSplitBvhs ) ::queryOverlaps( dynamicBvh, staticBvh, pairs ); if ( ::useSplitBvhs ) {
::queryOverlaps( dynamicBvh, staticBvh, pairs );
}
// build islands // build islands
uf::stl::vector<pod::Island> islands; uf::stl::vector<pod::Island> islands;
@ -169,7 +166,8 @@ void uf::physics::impl::step( pod::World& world, float dt ) {
// iterate overlaps // iterate overlaps
uf::stl::vector<pod::Manifold> manifolds; uf::stl::vector<pod::Manifold> manifolds;
manifolds.reserve(::reserveCount); manifolds.reserve(::reserveCount);
for ( auto& [ia, ib] : pairs ) {
for ( auto [ia, ib] : pairs ) {
auto& a = *bodies[ia]; auto& a = *bodies[ia];
auto& b = *bodies[ib]; auto& b = *bodies[ib];
@ -196,6 +194,7 @@ void uf::physics::impl::step( pod::World& world, float dt ) {
// wake up bodies // wake up bodies
if ( a.activity.awake && !b.activity.awake ) ::wakeBody( b ); if ( a.activity.awake && !b.activity.awake ) ::wakeBody( b );
if ( b.activity.awake && !a.activity.awake ) ::wakeBody( a ); if ( b.activity.awake && !a.activity.awake ) ::wakeBody( a );
// store manifold // store manifold
manifolds.emplace_back(manifold); manifolds.emplace_back(manifold);
} }
@ -209,7 +208,8 @@ void uf::physics::impl::step( pod::World& world, float dt ) {
if ( ::warmupSolver ) ::storeManifolds( manifolds, ::manifoldsCache ); if ( ::warmupSolver ) ::storeManifolds( manifolds, ::manifoldsCache );
// recompute bounds for further queries // recompute bounds for further queries
for ( auto* body : dynamicBodies ) { for ( auto* body : bodies ) {
if ( body->isStatic ) continue;
body->bounds = ::computeAABB( *body ); body->bounds = ::computeAABB( *body );
} }
} }
@ -476,7 +476,7 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World&
rayHit.contact.penetration = maxDistance; rayHit.contact.penetration = maxDistance;
auto& dynamicBvh = world.dynamicBvh; auto& dynamicBvh = world.dynamicBvh;
auto& staticBvh = world.dynamicBvh; auto& staticBvh = world.staticBvh;
auto& bodies = world.bodies; auto& bodies = world.bodies;
uf::stl::vector<int32_t> candidates; uf::stl::vector<int32_t> candidates;
@ -485,7 +485,9 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World&
for ( auto i : candidates ) { for ( auto i : candidates ) {
auto* b = bodies[i]; auto* b = bodies[i];
if ( body == b ) continue; if ( body == b ) continue;
switch ( b->collider.type ) { switch ( b->collider.type ) {
case pod::ShapeType::AABB: rayAabb( ray, *b, rayHit ); break; case pod::ShapeType::AABB: rayAabb( ray, *b, rayHit ); break;
case pod::ShapeType::SPHERE: raySphere( ray, *b, rayHit ); break; case pod::ShapeType::SPHERE: raySphere( ray, *b, rayHit ); break;

View File

@ -48,10 +48,16 @@ namespace {
} }
bool planeCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool planeCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( PLANE, CAPSULE ); ASSERT_COLLIDER_TYPES( PLANE, CAPSULE );
return capsulePlane( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::capsulePlane( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool planeMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool planeMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( PLANE, MESH ); ASSERT_COLLIDER_TYPES( PLANE, MESH );
return meshPlane( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::meshPlane( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
} }

View File

@ -53,14 +53,23 @@ namespace {
} }
bool spherePlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool spherePlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( SPHERE, PLANE ); ASSERT_COLLIDER_TYPES( SPHERE, PLANE );
return planeSphere( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::planeSphere( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool sphereCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool sphereCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( SPHERE, CAPSULE ); ASSERT_COLLIDER_TYPES( SPHERE, CAPSULE );
return capsuleSphere( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::capsuleSphere( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
bool sphereMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { bool sphereMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) {
ASSERT_COLLIDER_TYPES( SPHERE, MESH ); ASSERT_COLLIDER_TYPES( SPHERE, MESH );
return meshSphere( b, a, manifold, eps ); auto start = manifold.points.size();
if ( !::meshSphere( b, a, manifold, eps ) ) return false;
for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal;
return true;
} }
} }