diff --git a/engine/inc/uf/macros.h b/engine/inc/uf/macros.h index 72bd3e51..84e3898f 100644 --- a/engine/inc/uf/macros.h +++ b/engine/inc/uf/macros.h @@ -7,6 +7,10 @@ #define S_1(x) #x #define S_2(x) S_1(x) +// negligible increase in the lib size, mainly to ensure SIMD intrinsics do inline +#define FORCE_INLINE __attribute__((always_inline)) inline +// #define FORCE_INLINE inline + #define TIMER(x, ...)\ static bool first = true;\ static uf::Timer timer(false);\ diff --git a/engine/inc/uf/utils/math/matrix.h b/engine/inc/uf/utils/math/matrix.h index 24508792..f1692d7b 100644 --- a/engine/inc/uf/utils/math/matrix.h +++ b/engine/inc/uf/utils/math/matrix.h @@ -16,11 +16,11 @@ namespace pod { static const size_t columns = C; // Overload access // Accessing via subscripts - inline T& operator[](size_t i); - inline const T& operator[](size_t i) const; + FORCE_INLINE T& operator[](size_t i); + FORCE_INLINE const T& operator[](size_t i) const; - inline T& operator()(size_t r, size_t c); - inline const T& operator()(size_t r, size_t c) const; + FORCE_INLINE T& operator()(size_t r, size_t c); + FORCE_INLINE const T& operator()(size_t r, size_t c) const; // Arithmetic Matrix operator*( const Matrix& matrix ) const; @@ -86,18 +86,18 @@ namespace uf { template pod::Matrix4t /*UF_API*/ orthographic( T, T, T, T, T, T ); template pod::Matrix4t /*UF_API*/ orthographic( T, T, T, T ); - template pod::Matrix4t inline /*UF_API*/ orthographic( const pod::Vector2t& lr, const pod::Vector2t& bt, const pod::Vector2t& nf ) { + template pod::Matrix4t FORCE_INLINE /*UF_API*/ orthographic( const pod::Vector2t& lr, const pod::Vector2t& bt, const pod::Vector2t& nf ) { return orthographic( lr.x, lr.y, bt.x, bt.y, nf.x, nf.y ); } - template pod::Matrix4t inline /*UF_API*/ orthographic( const pod::Vector2t& lr, const pod::Vector2t& bt ) { + template pod::Matrix4t FORCE_INLINE /*UF_API*/ orthographic( const pod::Vector2t& lr, const pod::Vector2t& bt ) { return orthographic( lr.x, lr.y, bt.x, bt.y ); } template pod::Matrix4t /*UF_API*/ perspective( T, T, T, T ); - template pod::Matrix4t inline /*UF_API*/ perspective( T fov, T raidou, const pod::Vector2f& range ) { + template pod::Matrix4t FORCE_INLINE /*UF_API*/ perspective( T fov, T raidou, const pod::Vector2f& range ) { return perspective( fov, raidou, range.x, range.y ); } - template pod::Matrix4t inline /*UF_API*/ perspective( T fov, const pod::Vector2ui& size, const pod::Vector2f& range ) { + template pod::Matrix4t FORCE_INLINE /*UF_API*/ perspective( T fov, const pod::Vector2ui& size, const pod::Vector2f& range ) { return perspective( fov, (T) size.x / (T) size.y, range.x, range.y ); } // Setting diff --git a/engine/inc/uf/utils/math/matrix/pod.inl b/engine/inc/uf/utils/math/matrix/pod.inl index 9ae45090..36998bce 100644 --- a/engine/inc/uf/utils/math/matrix/pod.inl +++ b/engine/inc/uf/utils/math/matrix/pod.inl @@ -5,19 +5,19 @@ #define INDEX( R, C, r, c ) COL_MAJOR_INDEX( R, C, r, c ) template -inline T& pod::Matrix::operator[](size_t i) { +FORCE_INLINE T& pod::Matrix::operator[](size_t i) { return this->components[i]; } template -inline const T& pod::Matrix::operator[](size_t i) const { +FORCE_INLINE const T& pod::Matrix::operator[](size_t i) const { return this->components[i]; } template -inline T& pod::Matrix::operator()(size_t r, size_t c) { +FORCE_INLINE T& pod::Matrix::operator()(size_t r, size_t c) { return this->components[INDEX( R, C, r, c )]; } template -inline const T& pod::Matrix::operator()(size_t r, size_t c) const { +FORCE_INLINE const T& pod::Matrix::operator()(size_t r, size_t c) const { return this->components[INDEX( R, C, r, c )]; } @@ -57,27 +57,27 @@ template pod::Matrix uf: return matrix; } template -inline pod::Matrix pod::Matrix::operator*( const Matrix& matrix ) const { +FORCE_INLINE pod::Matrix pod::Matrix::operator*( const Matrix& matrix ) const { return uf::matrix::multiply(*this, matrix); } template -inline pod::Matrix pod::Matrix::operator*( T scalar ) const { +FORCE_INLINE pod::Matrix pod::Matrix::operator*( T scalar ) const { return uf::matrix::multiplyAll(*this, scalar); } template -inline pod::Matrix pod::Matrix::operator+( const Matrix& matrix ) const { +FORCE_INLINE pod::Matrix pod::Matrix::operator+( const Matrix& matrix ) const { return uf::matrix::add(*this, matrix); } template -inline pod::Matrix& pod::Matrix::operator*=( const Matrix& matrix ) { +FORCE_INLINE pod::Matrix& pod::Matrix::operator*=( const Matrix& matrix ) { return uf::matrix::multiply_(*this, matrix); } template -inline bool pod::Matrix::operator==( const Matrix& matrix ) const { +FORCE_INLINE bool pod::Matrix::operator==( const Matrix& matrix ) const { return uf::matrix::equals( *this, matrix ); } template -inline bool pod::Matrix::operator!=( const Matrix& matrix ) const { +FORCE_INLINE bool pod::Matrix::operator!=( const Matrix& matrix ) const { return !uf::matrix::equals( *this, matrix ); } template bool uf::matrix::equals( const T& left, const T& right, float eps ) { diff --git a/engine/inc/uf/utils/math/matrix/simd.inl b/engine/inc/uf/utils/math/matrix/simd.inl index 65bf0ae9..de498703 100644 --- a/engine/inc/uf/utils/math/matrix/simd.inl +++ b/engine/inc/uf/utils/math/matrix/simd.inl @@ -6,19 +6,19 @@ namespace uf { typedef typename traits::value value_type; value_type m[4]; // 4 x 4 - inline matrix_value(); - inline matrix_value(const pod::Matrix& rhs); + FORCE_INLINE matrix_value(); + FORCE_INLINE matrix_value(const pod::Matrix& rhs); - inline bool operator==(const matrix_value&) const; - inline operator pod::Matrix() const; + FORCE_INLINE bool operator==(const matrix_value&) const; + FORCE_INLINE operator pod::Matrix() const; }; } namespace simd { - inline uf::simd::matrix_value matMult( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B ); - inline uf::simd::vector matMult( const uf::simd::matrix_value& A, uf::simd::vector B ); - inline uf::simd::matrix_value matTranspose( const uf::simd::matrix_value& M ); - inline bool matEquals( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B, float eps ); + FORCE_INLINE uf::simd::matrix_value matMult( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B ); + FORCE_INLINE uf::simd::vector matMult( const uf::simd::matrix_value& A, uf::simd::vector B ); + FORCE_INLINE uf::simd::matrix_value matTranspose( const uf::simd::matrix_value& M ); + FORCE_INLINE bool matEquals( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B, float eps ); } } @@ -181,20 +181,20 @@ namespace { } template -inline uf::simd::matrix_value::matrix_value() {} +FORCE_INLINE uf::simd::matrix_value::matrix_value() {} template -inline uf::simd::matrix_value::matrix_value( const pod::Matrix& mat ) { +FORCE_INLINE uf::simd::matrix_value::matrix_value( const pod::Matrix& mat ) { m[0] = _mm_loadu_ps(&mat[0]); m[1] = _mm_loadu_ps(&mat[4]); m[2] = _mm_loadu_ps(&mat[8]); m[3] = _mm_loadu_ps(&mat[12]); } template -inline bool uf::simd::matrix_value::operator==(const matrix_value& rhs) const { +FORCE_INLINE bool uf::simd::matrix_value::operator==(const matrix_value& rhs) const { return uf::simd::matEquals( *this, rhs ); } template -inline uf::simd::matrix_value::operator pod::Matrix() const { +FORCE_INLINE uf::simd::matrix_value::operator pod::Matrix() const { pod::Matrix4f mat; _mm_storeu_ps(&mat[0], m[0]); _mm_storeu_ps(&mat[4], m[1]); @@ -203,18 +203,18 @@ inline uf::simd::matrix_value::operator pod::Matrix() const { return mat; } -inline uf::simd::matrix_value uf::simd::matMult( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B ) { +FORCE_INLINE uf::simd::matrix_value uf::simd::matMult( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B ) { return ::matMult_impl( A, B ); } -inline uf::simd::vector uf::simd::matMult( const uf::simd::matrix_value& M, uf::simd::vector vec ) { +FORCE_INLINE uf::simd::vector uf::simd::matMult( const uf::simd::matrix_value& M, uf::simd::vector vec ) { return ::matMult_impl( M, vec ); } -inline uf::simd::matrix_value uf::simd::matTranspose( const uf::simd::matrix_value& M ) { +FORCE_INLINE uf::simd::matrix_value uf::simd::matTranspose( const uf::simd::matrix_value& M ) { uf::simd::matrix_value R = M; _MM_TRANSPOSE4_PS(R.m[0], R.m[1], R.m[2], R.m[3]); return R; } -inline bool uf::simd::matEquals( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B, float eps ) { +FORCE_INLINE bool uf::simd::matEquals( const uf::simd::matrix_value& A, const uf::simd::matrix_value& B, float eps ) { bool result = true; __m128 e = _mm_set1_ps(eps); FOR_EACH(4, { diff --git a/engine/inc/uf/utils/math/quaternion/simd.inl b/engine/inc/uf/utils/math/quaternion/simd.inl index a3f71f48..c2b7a964 100644 --- a/engine/inc/uf/utils/math/quaternion/simd.inl +++ b/engine/inc/uf/utils/math/quaternion/simd.inl @@ -1,12 +1,12 @@ namespace uf { namespace simd { - inline vector /*UF_API*/ quatMul( vector, vector ); - inline vector /*UF_API*/ quatRot_3f( vector, vector ); - inline pod::Matrix4f /*UF_API*/ quatMat( vector ); + FORCE_INLINE vector /*UF_API*/ quatMul( vector, vector ); + FORCE_INLINE vector /*UF_API*/ quatRot_3f( vector, vector ); + FORCE_INLINE pod::Matrix4f /*UF_API*/ quatMat( vector ); } } -inline uf::simd::vector uf::simd::quatMul( uf::simd::vector Q1, uf::simd::vector Q2 ) { +FORCE_INLINE uf::simd::vector uf::simd::quatMul( uf::simd::vector Q1, uf::simd::vector Q2 ) { // broadcast q1 components __m128 x1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(0,0,0,0)); __m128 y1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1,1,1,1)); @@ -47,7 +47,7 @@ inline uf::simd::vector uf::simd::quatMul( uf::simd::vector Q1, uf __m128 result = _mm_movelh_ps(_mm_unpacklo_ps(X, Y), _mm_unpacklo_ps(Z, W)); return result; } -inline uf::simd::vector uf::simd::quatRot_3f( uf::simd::vector Q, uf::simd::vector V ) { +FORCE_INLINE uf::simd::vector uf::simd::quatRot_3f( uf::simd::vector Q, uf::simd::vector V ) { // extract q.xyz and q.w __m128 qxyz = _mm_and_ps(Q, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // mask out w __m128 qw = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(3,3,3,3)); @@ -75,7 +75,7 @@ inline uf::simd::vector uf::simd::quatRot_3f( uf::simd::vector Q, return result; } -inline pod::Matrix4f uf::simd::quatMat( uf::simd::vector Q ) { +FORCE_INLINE pod::Matrix4f uf::simd::quatMat( uf::simd::vector Q ) { // Shuffle out components __m128 qx = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(0,0,0,0)); __m128 qy = _mm_shuffle_ps(Q, Q, _MM_SHUFFLE(1,1,1,1)); diff --git a/engine/inc/uf/utils/math/vector.h b/engine/inc/uf/utils/math/vector.h index 5c166523..cc71dcf9 100644 --- a/engine/inc/uf/utils/math/vector.h +++ b/engine/inc/uf/utils/math/vector.h @@ -169,33 +169,33 @@ namespace uf { using type_t = T;\ using container_t = T*;\ static constexpr size_t size = N;\ - inline T& operator[](size_t i) { return components[i]; }\ - inline const T& operator[](size_t i) const { return components[i]; }\ - inline Vector operator-() const { return uf::vector::negate(*this); } \ - inline Vector operator+(const Vector& rhs) const { return uf::vector::add(*this, rhs); } \ - inline Vector operator-(const Vector& rhs) const { return uf::vector::subtract(*this, rhs); } \ - inline Vector operator*(const Vector& rhs) const { return uf::vector::multiply(*this, rhs); } \ - inline Vector operator/(const Vector& rhs) const { return uf::vector::divide(*this, rhs); } \ - inline Vector operator+(type_t scalar) const { return uf::vector::add(*this, scalar); } \ - inline Vector operator-(type_t scalar) const { return uf::vector::subtract(*this, scalar); } \ - inline Vector operator*(type_t scalar) const { return uf::vector::multiply(*this, scalar); } \ - inline Vector operator/(type_t scalar) const { return uf::vector::divide(*this, scalar); } \ - inline Vector& operator+=(const Vector& rhs) { return uf::vector::add_(*this, rhs); } \ - inline Vector& operator-=(const Vector& rhs) { return uf::vector::subtract_(*this, rhs); } \ - inline Vector& operator*=(const Vector& rhs) { return uf::vector::multiply_(*this, rhs); } \ - inline Vector& operator/=(const Vector& rhs) { return uf::vector::divide_(*this, rhs); } \ - inline Vector& operator+=(type_t scalar) { return uf::vector::add_(*this, scalar); } \ - inline Vector& operator-=(type_t scalar) { return uf::vector::subtract_(*this, scalar); } \ - inline Vector& operator*=(type_t scalar) { return uf::vector::multiply_(*this, scalar); } \ - inline Vector& operator/=(type_t scalar) { return uf::vector::divide_(*this, scalar); } \ - inline bool operator==(const Vector& rhs) const { return uf::vector::equals(*this, rhs); } \ - inline bool operator!=(const Vector& rhs) const { return uf::vector::notEquals(*this, rhs); } \ - inline bool operator<(const Vector& rhs) const { return uf::vector::less(*this, rhs); } \ - inline bool operator<=(const Vector& rhs) const { return uf::vector::lessEquals(*this, rhs); } \ - inline bool operator>(const Vector& rhs) const { return uf::vector::greater(*this, rhs); } \ - inline bool operator>=(const Vector& rhs) const { return uf::vector::greaterEquals(*this, rhs); } \ - inline explicit operator bool() const { return uf::vector::notEquals(*this, Vector{}); } \ - template inline operator Vector() const { return uf::vector::cast(*this); } + FORCE_INLINE T& operator[](size_t i) { return components[i]; }\ + FORCE_INLINE const T& operator[](size_t i) const { return components[i]; }\ + FORCE_INLINE Vector operator-() const { return uf::vector::negate(*this); } \ + FORCE_INLINE Vector operator+(const Vector& rhs) const { return uf::vector::add(*this, rhs); } \ + FORCE_INLINE Vector operator-(const Vector& rhs) const { return uf::vector::subtract(*this, rhs); } \ + FORCE_INLINE Vector operator*(const Vector& rhs) const { return uf::vector::multiply(*this, rhs); } \ + FORCE_INLINE Vector operator/(const Vector& rhs) const { return uf::vector::divide(*this, rhs); } \ + FORCE_INLINE Vector operator+(type_t scalar) const { return uf::vector::add(*this, scalar); } \ + FORCE_INLINE Vector operator-(type_t scalar) const { return uf::vector::subtract(*this, scalar); } \ + FORCE_INLINE Vector operator*(type_t scalar) const { return uf::vector::multiply(*this, scalar); } \ + FORCE_INLINE Vector operator/(type_t scalar) const { return uf::vector::divide(*this, scalar); } \ + FORCE_INLINE Vector& operator+=(const Vector& rhs) { return uf::vector::add_(*this, rhs); } \ + FORCE_INLINE Vector& operator-=(const Vector& rhs) { return uf::vector::subtract_(*this, rhs); } \ + FORCE_INLINE Vector& operator*=(const Vector& rhs) { return uf::vector::multiply_(*this, rhs); } \ + FORCE_INLINE Vector& operator/=(const Vector& rhs) { return uf::vector::divide_(*this, rhs); } \ + FORCE_INLINE Vector& operator+=(type_t scalar) { return uf::vector::add_(*this, scalar); } \ + FORCE_INLINE Vector& operator-=(type_t scalar) { return uf::vector::subtract_(*this, scalar); } \ + FORCE_INLINE Vector& operator*=(type_t scalar) { return uf::vector::multiply_(*this, scalar); } \ + FORCE_INLINE Vector& operator/=(type_t scalar) { return uf::vector::divide_(*this, scalar); } \ + FORCE_INLINE bool operator==(const Vector& rhs) const { return uf::vector::equals(*this, rhs); } \ + FORCE_INLINE bool operator!=(const Vector& rhs) const { return uf::vector::notEquals(*this, rhs); } \ + FORCE_INLINE bool operator<(const Vector& rhs) const { return uf::vector::less(*this, rhs); } \ + FORCE_INLINE bool operator<=(const Vector& rhs) const { return uf::vector::lessEquals(*this, rhs); } \ + FORCE_INLINE bool operator>(const Vector& rhs) const { return uf::vector::greater(*this, rhs); } \ + FORCE_INLINE bool operator>=(const Vector& rhs) const { return uf::vector::greaterEquals(*this, rhs); } \ + FORCE_INLINE explicit operator bool() const { return uf::vector::notEquals(*this, Vector{}); } \ + template FORCE_INLINE operator Vector() const { return uf::vector::cast(*this); } namespace pod { template diff --git a/engine/inc/uf/utils/math/vector/simd.h b/engine/inc/uf/utils/math/vector/simd.h index 0e8248e7..363a0521 100644 --- a/engine/inc/uf/utils/math/vector/simd.h +++ b/engine/inc/uf/utils/math/vector/simd.h @@ -7,28 +7,28 @@ #endif #define DEFINE_SIMD(T)\ - inline vector /*UF_API*/ load( const T* );\ - inline void /*UF_API*/ store( vector, T* );\ - inline vector /*UF_API*/ set( T );\ - inline vector /*UF_API*/ set( T, T, T, T );\ - inline vector /*UF_API*/ add( vector, vector );\ - inline vector /*UF_API*/ sub( vector, vector );\ - inline vector /*UF_API*/ mul( vector, vector );\ - inline vector /*UF_API*/ div( vector, vector );\ - inline vector /*UF_API*/ min( vector, vector );\ - inline vector /*UF_API*/ max( vector, vector );\ - inline bool /*UF_API*/ all( vector );\ - inline bool /*UF_API*/ any( vector );\ - inline vector /*UF_API*/ less( vector, vector );\ - inline vector /*UF_API*/ lessEquals( vector, vector );\ - inline vector /*UF_API*/ greater( vector, vector );\ - inline vector /*UF_API*/ greaterEquals( vector, vector );\ - inline vector /*UF_API*/ equals( vector, vector );\ - inline vector /*UF_API*/ notEquals( vector, vector );\ - inline vector /*UF_API*/ sqrt( vector );\ - inline vector /*UF_API*/ hadd( vector, vector );\ - inline T /*UF_API*/ dot( vector, vector );\ - template inline pod::Vector cast( const vector );\ + FORCE_INLINE vector /*UF_API*/ load( const T* );\ + FORCE_INLINE void /*UF_API*/ store( vector, T* );\ + FORCE_INLINE vector /*UF_API*/ set( T );\ + FORCE_INLINE vector /*UF_API*/ set( T, T, T, T );\ + FORCE_INLINE vector /*UF_API*/ add( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ sub( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ mul( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ div( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ min( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ max( vector, vector );\ + FORCE_INLINE bool /*UF_API*/ all( vector );\ + FORCE_INLINE bool /*UF_API*/ any( vector );\ + FORCE_INLINE vector /*UF_API*/ less( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ lessEquals( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ greater( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ greaterEquals( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ equals( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ notEquals( vector, vector );\ + FORCE_INLINE vector /*UF_API*/ sqrt( vector );\ + FORCE_INLINE vector /*UF_API*/ hadd( vector, vector );\ + FORCE_INLINE T /*UF_API*/ dot( vector, vector );\ + template FORCE_INLINE pod::Vector cast( const vector );\ namespace uf { namespace simd { @@ -68,38 +68,38 @@ namespace uf { // __m128 m; typedef typename traits::value value_type; value_type m; - inline vector(); - inline vector(const T* f); - inline vector(T f); - inline vector(T f0, T f1, T f2, T f3); - inline vector(bool f0, bool f1, bool f2, bool f3); - inline vector(const value_type& rhs); - inline vector(const vector& rhs); + FORCE_INLINE vector(); + FORCE_INLINE vector(const T* f); + FORCE_INLINE vector(T f); + FORCE_INLINE vector(T f0, T f1, T f2, T f3); + FORCE_INLINE vector(bool f0, bool f1, bool f2, bool f3); + FORCE_INLINE vector(const value_type& rhs); + FORCE_INLINE vector(const vector& rhs); - inline vector(const pod::Vector& rhs); - inline vector(const pod::Vector& rhs); - inline vector(const pod::Vector& rhs); - inline vector(const pod::Vector& rhs); + FORCE_INLINE vector(const pod::Vector& rhs); + FORCE_INLINE vector(const pod::Vector& rhs); + FORCE_INLINE vector(const pod::Vector& rhs); + FORCE_INLINE vector(const pod::Vector& rhs); - inline vector operator+( const vector& rhs ); - inline vector operator-( const vector& rhs ); - inline vector operator*( const vector& rhs ); - inline vector operator/( const vector& rhs ); + FORCE_INLINE vector operator+( const vector& rhs ); + FORCE_INLINE vector operator-( const vector& rhs ); + FORCE_INLINE vector operator*( const vector& rhs ); + FORCE_INLINE vector operator/( const vector& rhs ); - inline vector operator<( const vector& rhs ); - inline vector operator<=( const vector& rhs ); - inline vector operator>( const vector& rhs ); - inline vector operator>=( const vector& rhs ); - inline vector operator==( const vector& rhs ); - inline vector operator!=( const vector& rhs ); + FORCE_INLINE vector operator<( const vector& rhs ); + FORCE_INLINE vector operator<=( const vector& rhs ); + FORCE_INLINE vector operator>( const vector& rhs ); + FORCE_INLINE vector operator>=( const vector& rhs ); + FORCE_INLINE vector operator==( const vector& rhs ); + FORCE_INLINE vector operator!=( const vector& rhs ); - inline vector& operator=(const value_type& rhs); - inline vector& operator=(const vector& rhs); - inline vector& operator=(const pod::Vector& rhs); + FORCE_INLINE vector& operator=(const value_type& rhs); + FORCE_INLINE vector& operator=(const vector& rhs); + FORCE_INLINE vector& operator=(const pod::Vector& rhs); - inline operator value_type() const; + FORCE_INLINE operator value_type() const; - template inline operator pod::Vector() const; + template FORCE_INLINE operator pod::Vector() const; }; DEFINE_SIMD(float); @@ -117,13 +117,13 @@ namespace uf { */ // specializations - inline vector /*UF_API*/ set_f( bool, bool, bool, bool ); - inline vector /*UF_API*/ set_i( bool, bool, bool, bool ); - inline vector /*UF_API*/ set_ui( bool, bool, bool, bool ); + FORCE_INLINE vector /*UF_API*/ set_f( bool, bool, bool, bool ); + FORCE_INLINE vector /*UF_API*/ set_i( bool, bool, bool, bool ); + FORCE_INLINE vector /*UF_API*/ set_ui( bool, bool, bool, bool ); - inline vector /*UF_API*/ cross( vector x, vector y ); - inline vector /*UF_API*/ normalize( vector x ); - inline vector /*UF_API*/ normalize_fast( vector x ); + FORCE_INLINE vector /*UF_API*/ cross( vector x, vector y ); + FORCE_INLINE vector /*UF_API*/ normalize( vector x ); + FORCE_INLINE vector /*UF_API*/ normalize_fast( vector x ); } } diff --git a/engine/inc/uf/utils/math/vector/simd.inl b/engine/inc/uf/utils/math/vector/simd.inl index c71cc347..a86b2147 100644 --- a/engine/inc/uf/utils/math/vector/simd.inl +++ b/engine/inc/uf/utils/math/vector/simd.inl @@ -1,12 +1,12 @@ #include namespace { - inline __m128i bias_unsigned(__m128i v) { + FORCE_INLINE __m128i bias_unsigned(__m128i v) { const __m128i signbit = _mm_set1_epi32(0x80000000); return _mm_xor_si128(v, signbit); } - inline int32_t boolMask(bool b) { + FORCE_INLINE int32_t boolMask(bool b) { return b ? -1 : 0; // 0xFFFFFFFF for true, 0x00000000 for false } } @@ -20,142 +20,142 @@ namespace { #define MV_INSTR_SET_7 __attribute__((target("avx"))) template -inline uf::simd::vector::vector() {} +FORCE_INLINE uf::simd::vector::vector() {} template -inline uf::simd::vector::vector(const T* f) : m(uf::simd::load(f)) {} +FORCE_INLINE uf::simd::vector::vector(const T* f) : m(uf::simd::load(f)) {} template -inline uf::simd::vector::vector(T f) : m(uf::simd::set(f)) {} +FORCE_INLINE uf::simd::vector::vector(T f) : m(uf::simd::set(f)) {} template -inline uf::simd::vector::vector(T f0, T f1, T f2, T f3) : m(uf::simd::set(f0,f1,f2,f3)) {} +FORCE_INLINE uf::simd::vector::vector(T f0, T f1, T f2, T f3) : m(uf::simd::set(f0,f1,f2,f3)) {} template -inline uf::simd::vector::vector(bool f0, bool f1, bool f2, bool f3) : m(uf::simd::set(f0,f1,f2,f3)) {} +FORCE_INLINE uf::simd::vector::vector(bool f0, bool f1, bool f2, bool f3) : m(uf::simd::set(f0,f1,f2,f3)) {} template -inline uf::simd::vector::vector(const value_type& rhs) : m(rhs) {} +FORCE_INLINE uf::simd::vector::vector(const value_type& rhs) : m(rhs) {} template -inline uf::simd::vector::vector(const vector& rhs) : m(rhs.m) {} +FORCE_INLINE uf::simd::vector::vector(const vector& rhs) : m(rhs.m) {} template -inline uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0]){} +FORCE_INLINE uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0]){} template -inline uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], 0, 0){} +FORCE_INLINE uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], 0, 0){} template -inline uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], 0){} +FORCE_INLINE uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], 0){} template -inline uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], (T) rhs[3]){} +FORCE_INLINE uf::simd::vector::vector(const pod::Vector& rhs) : vector((T) rhs[0], (T) rhs[1], (T) rhs[2], (T) rhs[3]){} template -inline uf::simd::vector uf::simd::vector::operator+( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator+( const vector& rhs ) { return uf::simd::add( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator-( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator-( const vector& rhs ) { return uf::simd::sub( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator*( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator*( const vector& rhs ) { return uf::simd::mul( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator/( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator/( const vector& rhs ) { return uf::simd::div( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator<( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator<( const vector& rhs ) { return uf::simd::less( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator<=( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator<=( const vector& rhs ) { return uf::simd::lessEquals( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator>( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator>( const vector& rhs ) { return uf::simd::greater( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator>=( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator>=( const vector& rhs ) { return uf::simd::greaterEquals( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator==( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator==( const vector& rhs ) { return uf::simd::equals( *this, rhs ); } template -inline uf::simd::vector uf::simd::vector::operator!=( const vector& rhs ) { +FORCE_INLINE uf::simd::vector uf::simd::vector::operator!=( const vector& rhs ) { return uf::simd::notEquals( *this, rhs ); } template -inline uf::simd::vector& uf::simd::vector::operator=(const uf::simd::vector::value_type& rhs) { +FORCE_INLINE uf::simd::vector& uf::simd::vector::operator=(const uf::simd::vector::value_type& rhs) { m = rhs; return *this; } template -inline uf::simd::vector& uf::simd::vector::operator=(const vector& rhs) { +FORCE_INLINE uf::simd::vector& uf::simd::vector::operator=(const vector& rhs) { m = rhs.m; return *this; } template -inline uf::simd::vector& uf::simd::vector::operator=(const pod::Vector& rhs) { +FORCE_INLINE uf::simd::vector& uf::simd::vector::operator=(const pod::Vector& rhs) { m = uf::simd::load(&rhs[0]); return *this; } template -inline uf::simd::vector::operator uf::simd::vector::value_type() const { +FORCE_INLINE uf::simd::vector::operator uf::simd::vector::value_type() const { return m; } template template -inline uf::simd::vector::operator pod::Vector() const { +FORCE_INLINE uf::simd::vector::operator pod::Vector() const { return uf::simd::cast(*this); } template -inline pod::Vector uf::simd::cast( const uf::simd::vector v ){ +FORCE_INLINE pod::Vector uf::simd::cast( const uf::simd::vector v ){ pod::Vector4f r; uf::simd::store( v, &r[0] ); return uf::vector::cast(r); } template -inline pod::Vector uf::simd::cast( const uf::simd::vector v ){ +FORCE_INLINE pod::Vector uf::simd::cast( const uf::simd::vector v ){ pod::Vector4i r; uf::simd::store( v, &r[0] ); return uf::vector::cast(r); } template -inline pod::Vector uf::simd::cast( const uf::simd::vector v ){ +FORCE_INLINE pod::Vector uf::simd::cast( const uf::simd::vector v ){ pod::Vector4ui r; uf::simd::store( v, &r[0] ); return uf::vector::cast(r); } -inline uf::simd::vector uf::simd::load( const float* f ) { +FORCE_INLINE uf::simd::vector uf::simd::load( const float* f ) { // if ( uf::aligned(f, 16) ) return _mm_load_ps(f); return _mm_loadu_ps( f ); } -inline void uf::simd::store( uf::simd::vector v, float* f ) { +FORCE_INLINE void uf::simd::store( uf::simd::vector v, float* f ) { /* if ( uf::aligned(f, 16) ) _mm_store_ps(f, v); else */ _mm_storeu_ps( f, v ); } -inline uf::simd::vector uf::simd::set( float f ) { +FORCE_INLINE uf::simd::vector uf::simd::set( float f ) { return _mm_set1_ps( f ); } -inline uf::simd::vector uf::simd::set( float x, float y, float z, float w ) { +FORCE_INLINE uf::simd::vector uf::simd::set( float x, float y, float z, float w ) { return _mm_setr_ps( x, y, z, w ); } -inline uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { return _mm_add_ps( x, y ); } -inline uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { return _mm_sub_ps( x, y ); } -inline uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { return _mm_mul_ps( x, y ); } -inline uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { return _mm_div_ps( x, y ); } /* -inline uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { #if 0 return _mm_hadd_ps( x, y ); #else @@ -168,37 +168,37 @@ inline uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::si } */ -inline uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { return _mm_min_ps( x, y ); } -inline uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { return _mm_max_ps( x, y ); } -inline bool uf::simd::all( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::all( uf::simd::vector mask) { return _mm_movemask_ps( mask ) == 0xF; // all 4 bits set } -inline bool uf::simd::any( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::any( uf::simd::vector mask) { return _mm_movemask_ps( mask ) != 0x0; // any bit set } -inline uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmplt_ps( x, y ); } -inline uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmple_ps( x, y ); } -inline uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpgt_ps( x, y ); } -inline uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpge_ps( x, y ); } -inline uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpeq_ps( x, y ); } -inline uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpneq_ps( x, y ); } -inline uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { +FORCE_INLINE uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { return _mm_sqrt_ps( v ); } @@ -220,7 +220,7 @@ namespace { return _mm_dp_ps(x, y, 0xF1); } } -inline float uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE float uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { return _mm_cvtss_f32( ::dot_impl( x, y ) ); } @@ -235,7 +235,7 @@ namespace { return _mm_loadu_si128(reinterpret_cast(f)); } } -inline uf::simd::vector uf::simd::load( const int32_t* f ) { +FORCE_INLINE uf::simd::vector uf::simd::load( const int32_t* f ) { return ::load_impl( f ); } @@ -255,21 +255,21 @@ namespace { else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v); } } -inline void uf::simd::store( uf::simd::vector v, int32_t* f ) { +FORCE_INLINE void uf::simd::store( uf::simd::vector v, int32_t* f ) { return ::store_impl( v, f ); } -inline uf::simd::vector uf::simd::set( int32_t f ) { +FORCE_INLINE uf::simd::vector uf::simd::set( int32_t f ) { return _mm_set1_epi32(f); } -inline uf::simd::vector uf::simd::set( int32_t x, int32_t y, int32_t z, int32_t w ) { +FORCE_INLINE uf::simd::vector uf::simd::set( int32_t x, int32_t y, int32_t z, int32_t w ) { return _mm_setr_epi32(x, y, z, w); } -inline uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { return _mm_add_epi32(x, y); } -inline uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { return _mm_sub_epi32(x, y); } @@ -285,18 +285,18 @@ namespace { return _mm_mullo_epi32(x, y); } } -inline uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { return ::mul_impl( x, y ); } -inline uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] ); } /* -inline uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] ); @@ -325,18 +325,18 @@ namespace { return _mm_max_epi32(x, y); } } -inline uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { return ::min_impl(x, y); } -inline uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { return ::max_impl(x, y); } -inline bool uf::simd::all( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::all( uf::simd::vector mask) { return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set } -inline bool uf::simd::any( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::any( uf::simd::vector mask) { return _mm_movemask_epi8( mask ) != 0x0; // any bit set } @@ -383,30 +383,30 @@ namespace { return _mm_xor_si128(gt, _mm_set1_epi32(-1)); } } -inline uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { return ::less_impl( x, y ); } -inline uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { return ::lessEquals_impl( x, y ); } -inline uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { return ::greater_impl( x, y ); } -inline uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { return ::greaterEquals_impl( x, y ); } -inline uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpeq_epi32(x, y); } -inline uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1)); } -inline uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { +FORCE_INLINE uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { auto V = uf::simd::cast( v ); return uf::simd::set( (int32_t) std::sqrt(V[0]), (int32_t) std::sqrt(V[1]), (int32_t) std::sqrt(V[2]), (int32_t) std::sqrt(V[3]) ); } -inline int32_t uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE int32_t uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3]; @@ -438,23 +438,23 @@ namespace { else*/ _mm_storeu_si128(reinterpret_cast<__m128i*>(f), v); } } -inline uf::simd::vector uf::simd::load( const uint32_t* f ) { +FORCE_INLINE uf::simd::vector uf::simd::load( const uint32_t* f ) { return ::load_impl( f ); } -inline void uf::simd::store( uf::simd::vector v, uint32_t* f ) { +FORCE_INLINE void uf::simd::store( uf::simd::vector v, uint32_t* f ) { return ::store_impl( v, f ); } -inline uf::simd::vector uf::simd::set( uint32_t f ) { +FORCE_INLINE uf::simd::vector uf::simd::set( uint32_t f ) { return _mm_set1_epi32(f); } -inline uf::simd::vector uf::simd::set( uint32_t x, uint32_t y, uint32_t z, uint32_t w ) { +FORCE_INLINE uf::simd::vector uf::simd::set( uint32_t x, uint32_t y, uint32_t z, uint32_t w ) { return _mm_setr_epi32(x, y, z, w); } -inline uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::add( uf::simd::vector x, uf::simd::vector y ) { return _mm_add_epi32(x, y); } -inline uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::sub( uf::simd::vector x, uf::simd::vector y ) { return _mm_sub_epi32(x, y); } @@ -470,17 +470,17 @@ namespace { return _mm_mullo_epi32(x, y); } } -inline uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::mul( uf::simd::vector x, uf::simd::vector y ) { return ::mul_impl( x, y ); } -inline uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::div( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return uf::simd::set( X[0] / Y[0], X[1] / Y[1], X[2] / Y[2], X[3] / Y[3] ); } /* -inline uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::hadd( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return uf::simd::set( X[0] + Y[0], X[1] + Y[1], X[2] + Y[2], X[3] + Y[3] ); @@ -510,17 +510,17 @@ namespace { return _mm_max_epu32(x, y); // unsigned max } } -inline uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::min( uf::simd::vector x, uf::simd::vector y ) { return ::min_impl(x, y); } -inline uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::max( uf::simd::vector x, uf::simd::vector y ) { return ::max_impl(x, y); } -inline bool uf::simd::all( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::all( uf::simd::vector mask) { return _mm_movemask_epi8( mask ) == 0xFFFF; // all 4 bits set } -inline bool uf::simd::any( uf::simd::vector mask) { +FORCE_INLINE bool uf::simd::any( uf::simd::vector mask) { return _mm_movemask_epi8( mask ) != 0x0; // any bit set } @@ -573,43 +573,43 @@ namespace { return _mm_xor_si128(lt, _mm_set1_epi32(-1)); // invert mask } } -inline uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::less( uf::simd::vector x, uf::simd::vector y ) { return ::less_impl( x, y ); } -inline uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::lessEquals( uf::simd::vector x, uf::simd::vector y ) { return ::lessEquals_impl( x, y ); } -inline uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greater( uf::simd::vector x, uf::simd::vector y ) { return ::greater_impl( x, y ); } -inline uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::greaterEquals( uf::simd::vector x, uf::simd::vector y ) { return ::greaterEquals_impl( x, y ); } -inline uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::equals( uf::simd::vector x, uf::simd::vector y ) { return _mm_cmpeq_epi32(x, y); } -inline uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::notEquals( uf::simd::vector x, uf::simd::vector y ) { return _mm_xor_si128(_mm_cmpeq_epi32(x, y), _mm_set1_epi32(-1)); } -inline uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { +FORCE_INLINE uf::simd::vector uf::simd::sqrt( uf::simd::vector v ) { auto V = uf::simd::cast( v ); return uf::simd::set( (uint32_t) std::sqrt(V[0]), (uint32_t) std::sqrt(V[1]), (uint32_t) std::sqrt(V[2]), (uint32_t) std::sqrt(V[3]) ); } -inline uint32_t uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uint32_t uf::simd::dot( uf::simd::vector x, uf::simd::vector y ) { auto X = uf::simd::cast( x ); auto Y = uf::simd::cast( y ); return X[0] * Y[0] + X[1] * Y[1] + X[2] * Y[2] + X[3] * Y[3]; } -inline uf::simd::vector uf::simd::set_f( bool x, bool y, bool z, bool w ) { +FORCE_INLINE uf::simd::vector uf::simd::set_f( bool x, bool y, bool z, bool w ) { return _mm_castsi128_ps(_mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w))); } -inline uf::simd::vector uf::simd::set_i( bool x, bool y, bool z, bool w ) { +FORCE_INLINE uf::simd::vector uf::simd::set_i( bool x, bool y, bool z, bool w ) { return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w)); } -inline uf::simd::vector uf::simd::set_ui( bool x, bool y, bool z, bool w ) { +FORCE_INLINE uf::simd::vector uf::simd::set_ui( bool x, bool y, bool z, bool w ) { return _mm_setr_epi32(::boolMask(x), ::boolMask(y), ::boolMask(z), ::boolMask(w)); } @@ -635,15 +635,15 @@ namespace { } } -inline uf::simd::vector uf::simd::cross( uf::simd::vector x, uf::simd::vector y ) { +FORCE_INLINE uf::simd::vector uf::simd::cross( uf::simd::vector x, uf::simd::vector y ) { return ::cross_impl( x, y ); } -inline uf::simd::vector uf::simd::normalize( uf::simd::vector v ) { +FORCE_INLINE uf::simd::vector uf::simd::normalize( uf::simd::vector v ) { __m128 len = _mm_sqrt_ss( ::dot_impl( v,v ) ); len = _mm_shuffle_ps(len, len, 0x00); return _mm_div_ps(v, len); } -inline uf::simd::vector uf::simd::normalize_fast( uf::simd::vector v ) { +FORCE_INLINE uf::simd::vector uf::simd::normalize_fast( uf::simd::vector v ) { __m128 invLen = _mm_rsqrt_ps(::dot_impl(v, v)); return _mm_mul_ps(v, invLen); } \ No newline at end of file diff --git a/engine/src/utils/math/physics/aabb.inl b/engine/src/utils/math/physics/aabb.inl index bee07c8d..dfa470a0 100644 --- a/engine/src/utils/math/physics/aabb.inl +++ b/engine/src/utils/math/physics/aabb.inl @@ -235,18 +235,30 @@ namespace { bool aabbSphere( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( AABB, SPHERE ); - return ::sphereAabb( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::sphereAabb( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool aabbPlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( AABB, PLANE ); - return ::planeAabb( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::planeAabb( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool aabbCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( AABB, CAPSULE ); - return ::capsuleAabb( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::capsuleAabb( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool aabbMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( AABB, MESH ); - return ::meshAabb( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::meshAabb( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } } \ No newline at end of file diff --git a/engine/src/utils/math/physics/bvh.inl b/engine/src/utils/math/physics/bvh.inl index 82b21d6c..28873012 100644 --- a/engine/src/utils/math/physics/bvh.inl +++ b/engine/src/utils/math/physics/bvh.inl @@ -161,7 +161,7 @@ namespace { return index; } - void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector& bodies, int capacity = 2 ) { + void buildBroadphaseBVH( pod::BVH& bvh, const uf::stl::vector& bodies, int capacity = 2, bool filters = false, bool filterType = false ) { if ( bodies.empty() ) return; bvh.indices.clear(); @@ -169,18 +169,21 @@ namespace { bvh.indices.reserve(bodies.size()); // stores bounds - uf::stl::vector bounds; - bounds.reserve(bodies.size()); + uf::stl::vector bounds(bodies.size(), { {FLT_MAX, FLT_MAX, FLT_MAX}, {-FLT_MAX, -FLT_MAX, -FLT_MAX} }); // populate initial indices and bounds for ( auto i = 0; i < bodies.size(); ++i ) { - bounds.emplace_back(::computeAABB( *bodies[i] )); - bvh.indices.emplace_back(i); // i => bodies[i] + if ( filters && bodies[i]->isStatic != filterType ) continue; + + bounds[i] = ::computeAABB(*bodies[i]); + bvh.indices.emplace_back(i); } + if ( bvh.indices.empty() ) return; // inserted nothing + // recursively build BVH from indices - if ( ::useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bodies.size(), capacity ); - else ::buildBVHNode( bvh, bounds, 0, bodies.size(), capacity ); + if ( ::useBvhSahBodies ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); + else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity ); // flatten if requested if ( ::flattenBvhBodies ) ::flattenBVH( bvh, 0 ); @@ -221,8 +224,8 @@ namespace { } // recursively build BVH from indices - if ( ::useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, triangles, capacity ); - else ::buildBVHNode( bvh, bounds, 0, triangles, capacity ); + if ( ::useBvhSahMeshes ) ::buildBVHNode_SAH( bvh, bounds, 0, bvh.indices.size(), capacity ); + else ::buildBVHNode( bvh, bounds, 0, bvh.indices.size(), capacity ); // flatten if requested if ( ::flattenBvhMeshes ) ::flattenBVH( bvh, 0 ); @@ -233,14 +236,19 @@ namespace { namespace { pod::BVH::UpdatePolicy::Decision decideBVHUpdate( const pod::BVH& bvh, const uf::stl::vector& bodies, const pod::BVH::UpdatePolicy& policy, int frameCounter ) { - if ( bvh.indices.empty() || bvh.nodes.empty() || bvh.dirty ) return pod::BVH::UpdatePolicy::Decision::REBUILD; + if ( bvh.indices.empty() || bvh.nodes.empty() || bvh.dirty ) { + UF_MSG_DEBUG("Force rebuild, bvh.indices.empty={}, bvh.nodes.empty={}, bvh.dirty={}", bvh.indices.empty(), bvh.nodes.empty(), bvh.dirty ); + return pod::BVH::UpdatePolicy::Decision::REBUILD; + } if ( bodies.empty() ) return pod::BVH::UpdatePolicy::Decision::NONE; int dirtyCount = 0; float oldRootArea = ::aabbSurfaceArea( bvh.nodes[0].bounds ); // check each body - for ( const auto* body : bodies ) { + for ( auto idx : bvh.indices ) { + const auto* body = bodies[idx]; + pod::AABB newBounds = ::computeAABB(*body); pod::AABB oldBounds = body->bounds; @@ -258,13 +266,14 @@ namespace { float dirtyRatio = (float) dirtyCount / (float) bodies.size(); // compute new root bounds - pod::AABB newRoot = bodies[0]->bounds; - for ( auto i = 1; i < bodies.size(); ++i ) { - newRoot = ::mergeAabb(newRoot, bodies[i]->bounds); + pod::AABB newRoot = bodies[bvh.indices[0]]->bounds; + for ( auto i = 1; i < bvh.indices.size(); ++i ) { + newRoot = ::mergeAabb(newRoot, bodies[bvh.indices[i]]->bounds); } float newRootArea = ::aabbSurfaceArea( newRoot ); if ( dirtyRatio > policy.dirtyRatioThreshold || newRootArea > oldRootArea * policy.overlapThreshold || frameCounter % policy.maxFramesBeforeRebuild == 0 ) { + UF_MSG_DEBUG( "Rebuild, dirtyRatio={}, oldRootArea={}, newRootArea={}, frameCounter={}", dirtyRatio, oldRootArea, newRootArea, frameCounter ); return pod::BVH::UpdatePolicy::Decision::REBUILD; } if ( dirtyCount > 0 ) return pod::BVH::UpdatePolicy::Decision::REFIT; @@ -445,6 +454,8 @@ namespace { for ( auto j = 0; j < nodeB.count; ++j ) { int bodyA = bvhA.indices[nodeA.start + i]; int bodyB = bvhB.indices[nodeB.start + j]; + if ( bodyA == bodyB ) continue; + if ( bodyA > bodyB ) std::swap( bodyA, bodyB ); pairs.emplace(bodyA, bodyB); } diff --git a/engine/src/utils/math/physics/capsule.inl b/engine/src/utils/math/physics/capsule.inl index 861bfa19..68866bc5 100644 --- a/engine/src/utils/math/physics/capsule.inl +++ b/engine/src/utils/math/physics/capsule.inl @@ -104,6 +104,9 @@ namespace { } bool capsuleMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( CAPSULE, MESH ); - return meshCapsule( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::meshCapsule( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } } \ No newline at end of file diff --git a/engine/src/utils/math/physics/impl.cpp b/engine/src/utils/math/physics/impl.cpp index bec58ea8..0841041a 100644 --- a/engine/src/utils/math/physics/impl.cpp +++ b/engine/src/utils/math/physics/impl.cpp @@ -18,17 +18,17 @@ namespace { int broadphaseBvhCapacity = 1; int meshBvhCapacity = 1; - bool flattenBvhBodies = true; + bool flattenBvhBodies = false; // bugged bool flattenBvhMeshes = true; // it actually seems slower to use these...... bool useBvhSahBodies = false; bool useBvhSahMeshes = false; - bool useSplitBvhs = false; // currently bugged if enabled + bool useSplitBvhs = true; // currently bugged if enabled int solverIterations = 10; - float baumgarteCorrectionPercent = 0.02f; + float baumgarteCorrectionPercent = 0.2f; float baumgarteCorrectionSlop = 0.01f; uf::stl::unordered_map manifoldsCache; @@ -39,7 +39,7 @@ namespace { .displacementThreshold = 0.25f, .overlapThreshold = 2.0f, .dirtyRatioThreshold = 0.3f, - .maxFramesBeforeRebuild = 3600, + .maxFramesBeforeRebuild = 600, }; } @@ -122,31 +122,26 @@ void uf::physics::impl::step( pod::World& world, float dt ) { auto& dynamicBvh = world.dynamicBvh; auto& staticBvh = world.staticBvh; - uf::stl::vector staticBodies; - uf::stl::vector dynamicBodies; - if ( bodies.empty() ) return; ++::frameCounter; for ( auto* body : bodies ) { - ( body->isStatic ? staticBodies : dynamicBodies ).emplace_back(body); - if ( !body->activity.awake ) continue; ::integrate( *body, dt ); } // rebuild static bvh if diry if ( staticBvh.dirty && ::useSplitBvhs ) { - ::buildBroadphaseBVH( staticBvh, staticBodies, ::broadphaseBvhCapacity ); // (re)build + ::buildBroadphaseBVH( staticBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, true ); // (re)build } - switch ( ::decideBVHUpdate( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies, ::bvhUpdatePolicy, ::frameCounter ) ) { + switch ( ::decideBVHUpdate( dynamicBvh, bodies, ::bvhUpdatePolicy, ::frameCounter ) ) { case pod::BVH::UpdatePolicy::Decision::REBUILD: { - ::buildBroadphaseBVH( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies, ::broadphaseBvhCapacity ); // (re)build + ::buildBroadphaseBVH( dynamicBvh, bodies, ::broadphaseBvhCapacity, ::useSplitBvhs, false ); // (re)build } break; case pod::BVH::UpdatePolicy::Decision::REFIT: { - ::refitBVH( dynamicBvh, ::useSplitBvhs ? dynamicBodies : bodies ); // refit + ::refitBVH( dynamicBvh, bodies ); // refit } break; case pod::BVH::UpdatePolicy::Decision::NONE: default: @@ -157,7 +152,9 @@ void uf::physics::impl::step( pod::World& world, float dt ) { // query for overlaps pod::BVH::pairs_t pairs; ::queryOverlaps( dynamicBvh, pairs ); - if ( ::useSplitBvhs ) ::queryOverlaps( dynamicBvh, staticBvh, pairs ); + if ( ::useSplitBvhs ) { + ::queryOverlaps( dynamicBvh, staticBvh, pairs ); + } // build islands uf::stl::vector islands; @@ -169,7 +166,8 @@ void uf::physics::impl::step( pod::World& world, float dt ) { // iterate overlaps uf::stl::vector manifolds; manifolds.reserve(::reserveCount); - for ( auto& [ia, ib] : pairs ) { + + for ( auto [ia, ib] : pairs ) { auto& a = *bodies[ia]; auto& b = *bodies[ib]; @@ -196,6 +194,7 @@ void uf::physics::impl::step( pod::World& world, float dt ) { // wake up bodies if ( a.activity.awake && !b.activity.awake ) ::wakeBody( b ); if ( b.activity.awake && !a.activity.awake ) ::wakeBody( a ); + // store manifold manifolds.emplace_back(manifold); } @@ -209,7 +208,8 @@ void uf::physics::impl::step( pod::World& world, float dt ) { if ( ::warmupSolver ) ::storeManifolds( manifolds, ::manifoldsCache ); // recompute bounds for further queries - for ( auto* body : dynamicBodies ) { + for ( auto* body : bodies ) { + if ( body->isStatic ) continue; body->bounds = ::computeAABB( *body ); } } @@ -476,7 +476,7 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World& rayHit.contact.penetration = maxDistance; auto& dynamicBvh = world.dynamicBvh; - auto& staticBvh = world.dynamicBvh; + auto& staticBvh = world.staticBvh; auto& bodies = world.bodies; uf::stl::vector candidates; @@ -485,7 +485,9 @@ pod::RayQuery uf::physics::impl::rayCast( const pod::Ray& ray, const pod::World& for ( auto i : candidates ) { auto* b = bodies[i]; + if ( body == b ) continue; + switch ( b->collider.type ) { case pod::ShapeType::AABB: rayAabb( ray, *b, rayHit ); break; case pod::ShapeType::SPHERE: raySphere( ray, *b, rayHit ); break; diff --git a/engine/src/utils/math/physics/plane.inl b/engine/src/utils/math/physics/plane.inl index beace668..a5eb96dc 100644 --- a/engine/src/utils/math/physics/plane.inl +++ b/engine/src/utils/math/physics/plane.inl @@ -48,10 +48,16 @@ namespace { } bool planeCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( PLANE, CAPSULE ); - return capsulePlane( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::capsulePlane( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool planeMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( PLANE, MESH ); - return meshPlane( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::meshPlane( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } } \ No newline at end of file diff --git a/engine/src/utils/math/physics/sphere.inl b/engine/src/utils/math/physics/sphere.inl index 6fdb12fb..77c0a807 100644 --- a/engine/src/utils/math/physics/sphere.inl +++ b/engine/src/utils/math/physics/sphere.inl @@ -53,14 +53,23 @@ namespace { } bool spherePlane( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( SPHERE, PLANE ); - return planeSphere( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::planeSphere( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool sphereCapsule( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( SPHERE, CAPSULE ); - return capsuleSphere( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::capsuleSphere( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } bool sphereMesh( const pod::PhysicsBody& a, const pod::PhysicsBody& b, pod::Manifold& manifold, float eps ) { ASSERT_COLLIDER_TYPES( SPHERE, MESH ); - return meshSphere( b, a, manifold, eps ); + auto start = manifold.points.size(); + if ( !::meshSphere( b, a, manifold, eps ) ) return false; + for ( auto i = start; i < manifold.points.size(); ++i ) manifold.points[i].normal = -manifold.points[i].normal; + return true; } } \ No newline at end of file