@@ -1166,7 +1166,7 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
11661166#define sseSwizzle ( a, mask ) _mm_shuffle_ps( (a), (a), SWZ_##mask )
11671167
11681168 inline __m128 unitQuat () {
1169- return _mm_set_ps ( 1 .0f , 0 .0f , 0 .0f , 0 .0f ); // order is reversed
1169+ return _mm_setr_ps ( 0 .0f , 0 .0f , 0 .0f , 1 .0f );
11701170 }
11711171 inline __m128 sseLoadInts ( const int vec[4 ] ) {
11721172 return *(__m128 *)vec;
@@ -1175,13 +1175,14 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
11751175 alignas (16 ) static const std::array<int , 4 > vec = { 0 , 0 , 0 , 0 };
11761176 return sseLoadInts ( vec.data () );
11771177 }
1178- inline __m128 mask_000W () {
1179- alignas (16 ) static const std::array<int , 4 > vec = { 0 , 0 , 0 , -1 };
1180- return sseLoadInts ( vec.data () );
1181- }
1182- inline __m128 mask_XYZ0 () {
1183- alignas (16 ) static const std::array<int , 4 > vec = { -1 , -1 , -1 , 0 };
1184- return sseLoadInts ( vec.data () );
1178+
1179+ // {first.x, first,y, first.z, second.w}
1180+ inline __m128 first_XYZ_second_W ( __m128 first, __m128 second)
1181+ {
1182+ // second.w, dontcare, first.z, dontcare
1183+ __m128 tmp = _mm_shuffle_ps (second, first, 3 << 0 | 2 << 4 );
1184+ // first.x, first.y, tmp.z, tmp.x
1185+ return _mm_shuffle_ps (first, tmp, 0 << 0 | 1 << 2 | 2 << 4 | 0 << 6 );
11851186 }
11861187
11871188 inline __m128 sign_000W () {
@@ -1295,10 +1296,8 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
12951296 t->sseTransScale = _mm_or_ps ( v, unitQuat () );
12961297 }
12971298 inline void TransInitScale ( float factor, transform_t *t ) {
1298- __m128 f = _mm_set1_ps ( factor );
1299- f = _mm_and_ps ( f, mask_000W () );
13001299 t->sseRot = unitQuat ();
1301- t->sseTransScale = f ;
1300+ t->sseTransScale = _mm_setr_ps ( 0 . 0f , 0 . 0f , 0 . 0f , factor ) ;
13021301 }
13031302 inline void TransInsRotationQuat ( const quat_t quat, transform_t *t ) {
13041303 __m128 q = _mm_loadu_ps ( quat );
@@ -1318,11 +1317,10 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
13181317 }
13191318 inline void TransInsTranslation (
13201319 const vec3_t vec, transform_t *t ) {
1321- __m128 v = sseLoadVec3Unsafe ( vec );
1320+ __m128 v = sseLoadVec3 ( vec );
13221321 __m128 ts = t->sseTransScale ;
13231322 v = sseQuatTransform ( t->sseRot , v );
13241323 v = _mm_mul_ps ( v, sseSwizzle ( ts, WWWW ) );
1325- v = _mm_and_ps ( v, mask_XYZ0 () );
13261324 t->sseTransScale = _mm_add_ps ( ts, v );
13271325 }
13281326 inline void TransAddTranslation (
@@ -1339,7 +1337,8 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
13391337 __m128 bTS = b->sseTransScale ;
13401338 __m128 tmp = sseQuatTransform ( bRot, aTS );
13411339 tmp = _mm_mul_ps ( tmp, sseSwizzle ( bTS, WWWW ) );
1342- out->sseTransScale = _mm_add_ps ( tmp, _mm_and_ps ( bTS, mask_XYZ0 () ) );
1340+ __m128 bT = first_XYZ_second_W ( bTS, mask_0000 () );
1341+ out->sseTransScale = _mm_add_ps ( tmp, bT );
13431342 out->sseRot = sseQuatMul ( bRot, aRot );
13441343 }
13451344 inline void TransInverse ( const transform_t *in,
@@ -1352,8 +1351,7 @@ inline vec_t VectorNormalize2( const vec3_t v, vec3_t out )
13521351 __m128 tmp = sseQuatTransform ( invRot, invT );
13531352 tmp = _mm_mul_ps ( tmp, invS );
13541353 out->sseRot = invRot;
1355- out->sseTransScale = _mm_or_ps ( _mm_and_ps ( tmp, mask_XYZ0 () ),
1356- _mm_and_ps ( invS, mask_000W () ) );
1354+ out->sseTransScale = first_XYZ_second_W ( tmp, invS );
13571355 }
13581356 inline void TransStartLerp ( transform_t *t ) {
13591357 t->sseRot = mask_0000 ();
0 commit comments