Algo-Direct2.h: fix hipcc issue

from https://github.com/agrocylo/bitsandbytes-rocm, thanks
master
arlo-phoenix 2023-08-05 02:12:14 +07:00
parent d10197bc93
commit 3682106eb0
1 changed files with 8 additions and 8 deletions

@ -93,8 +93,8 @@ private:
__m128 vxp = _mm_shuffle_ps(xp01, xp23, (1) + (3 << 2) + (1 << 4) + (3 << 6));
#endif
IVec<SSE, float> i(u.vec);
IVec<SSE, float> vlem = vz < vxm;
IVec<SSE, float> vlep = vz < vxp;
IVec<SSE, float> vlem = operator< (vz,vxm);
IVec<SSE, float> vlep = operator< (vz,vxp);
i = i + vlem + vlep;
i.store(pr);
}
@ -123,8 +123,8 @@ private:
__m128d vxp = _mm_shuffle_pd(vx0, vx1, 3);
IVec<SSE, double> i(b1, b0);
IVec<SSE, double> vlem = (vz < vxm);
IVec<SSE, double> vlep = (vz < vxp);
IVec<SSE, double> vlem = operator< (vz, vxm);
IVec<SSE, double> vlep = operator< (vz, vxp);
i = i + vlem + vlep;
union {
@ -227,8 +227,8 @@ private:
#endif
IVec<AVX, float> vlem = vz < vxm;
IVec<AVX, float> vlep = vz < vxp;
IVec<AVX, float> vlem = operator< (vz, vxm);
IVec<AVX, float> vlep = operator< (vz, vxp);
ip = ip + vlem + vlep;
ip.store(pr);
@ -277,8 +277,8 @@ private:
// FVec<AVX, double> vxp = _mm256_insertf128_pd(_mm256_castpd128_pd256(h01p), h23p, 1);
IVec<AVX, double> i(u.vec);
IVec<AVX, double> vlem = vz < vxm;
IVec<AVX, double> vlep = vz < vxp;
IVec<AVX, double> vlem = operator< (vz,vxm);
IVec<AVX, double> vlep = operator< (vz,vxp);
i = i + vlem + vlep;
i.extractLo32s().store(pr);
}