@@ -754,36 +754,6 @@ HWY_INLINE Vec<RebindToSigned<D>> ILogB(const D df, Vec<D> d) {
754754 return q;
755755}
756756
757- // Computes ln(x) in double-float precision (version 2)
758- // Translated from libm/sleefsimdsp.c:2526 logk2f
759- template <class D >
760- HWY_INLINE Vec2<D> LogFastDF (const D df, Vec2<D> d) {
761- Vec2<D> x, x2, m, s;
762- Vec<D> t;
763- Vec<RebindToSigned<D>> e;
764-
765- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
766- e = ILogB (df, Mul (Get2<0 >(d), Set (df, 1 .0f /0 .75f )));
767- #else
768- e = NearestInt (_mm512_getexp_ps (f.raw ));
769- #endif
770- m = ScaleDF (df, d, Pow2I (df, Neg (e)));
771-
772- x = DivDF (df, AddDF (df, m, Set (df, -1 )), AddDF (df, m, Set (df, 1 )));
773- x2 = SquareDF (df, x);
774-
775- t = Set (df, 0 .2392828464508056640625f );
776- t = MulAdd (t, Get2<0 >(x2), Set (df, 0 .28518211841583251953125f ));
777- t = MulAdd (t, Get2<0 >(x2), Set (df, 0 .400005877017974853515625f ));
778- t = MulAdd (t, Get2<0 >(x2), Set (df, 0 .666666686534881591796875f ));
779-
780- s = MulDF (df, Create2 (df, Set (df, 0 .69314718246459960938f ), Set (df, -1 .904654323148236017e-09f )), ConvertTo (df, e));
781- s = AddFastDF (df, s, ScaleDF (df, x, Set (df, 2 )));
782- s = AddFastDF (df, s, MulDF (df, MulDF (df, x2, x), t));
783-
784- return s;
785- }
786-
787757// Create a mask of which is true if x's sign bit is set
788758// Translated from libm/sleefsimdsp.c:472 vsignbit_vo_vf
789759template <class D >
@@ -900,193 +870,6 @@ HWY_INLINE Vec<D> Expm1(const D df, Vec<D> a) {
900870 return x;
901871}
902872
903- // Computes ln(x) with 1.0 ULP accuracy
904- // Translated from libm/sleefsimdsp.c:2268 xlogf_u1
905- template <class D >
906- HWY_INLINE Vec<D> Log (const D df, Vec<D> d) {
907- RebindToSigned<D> di;
908-
909- Vec2<D> x;
910- Vec<D> t, m, x2;
911-
912- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
913- Mask<D> o = Lt (d, Set (df, FloatMin));
914- d = IfThenElse (RebindMask (df, o), Mul (d, Set (df, (float )(INT64_C (1 ) << 32 ) * (float )(INT64_C (1 ) << 32 ))), d);
915- Vec<RebindToSigned<D>> e = ILogB2 (df, Mul (d, Set (df, 1 .0f /0 .75f )));
916- m = LoadExp3 (df, d, Neg (e));
917- e = IfThenElse (RebindMask (di, o), Sub (e, Set (di, 64 )), e);
918- Vec2<D> s = MulDF (df, Create2 (df, Set (df, 0 .69314718246459960938f ), Set (df, -1 .904654323148236017e-09f )), ConvertTo (df, e));
919- #else
920- Vec<D> e = _mm512_getexp_ps (f.raw );
921- e = IfThenElse (RebindMask (df, Eq (e, Inf (df))), Set (df, 128 .0f ), e);
922- m = _mm512_getmant_ps (f.raw , _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
923- Vec2<D> s = MulDF (df, Create2 (df, Set (df, 0 .69314718246459960938f ), Set (df, -1 .904654323148236017e-09f )), e);
924- #endif
925-
926- x = DivDF (df, AddDF (df, Set (df, -1 ), m), AddDF (df, Set (df, 1 ), m));
927- x2 = Mul (Get2<0 >(x), Get2<0 >(x));
928-
929- t = Set (df, +0 .3027294874e+0f );
930- t = MulAdd (t, x2, Set (df, +0 .3996108174e+0f ));
931- t = MulAdd (t, x2, Set (df, +0 .6666694880e+0f ));
932-
933- s = AddFastDF (df, s, ScaleDF (df, x, Set (df, 2 )));
934- s = AddFastDF (df, s, Mul (Mul (x2, Get2<0 >(x)), t));
935-
936- Vec<D> r = Add (Get2<0 >(s), Get2<1 >(s));
937-
938- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
939- r = IfThenElse (RebindMask (df, Eq (d, Inf (df))), Set (df, InfFloat), r);
940- r = IfThenElse (RebindMask (df, Or (Lt (d, Set (df, 0 )), IsNaN (d))), Set (df, NanFloat), r);
941- r = IfThenElse (RebindMask (df, Eq (d, Set (df, 0 ))), Set (df, -InfFloat), r);
942- #else
943- r = vfixup_vf_vf_vf_vi2_i (r, d, Set (di, (4 << (2 *4 )) | (3 << (4 *4 )) | (5 << (5 *4 )) | (2 << (6 *4 ))), 0 );
944- #endif
945-
946- return r;
947- }
948-
949- // Computes ln(x) with 3.5 ULP accuracy
950- // Translated from libm/sleefsimdsp.c:1984 xlogf
951- template <class D >
952- HWY_INLINE Vec<D> LogFast (const D df, Vec<D> d) {
953- RebindToSigned<D> di;
954-
955- Vec<D> x, x2, t, m;
956-
957- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
958- Mask<D> o = Lt (d, Set (df, FloatMin));
959- d = IfThenElse (RebindMask (df, o), Mul (d, Set (df, (float )(INT64_C (1 ) << 32 ) * (float )(INT64_C (1 ) << 32 ))), d);
960- Vec<RebindToSigned<D>> e = ILogB2 (df, Mul (d, Set (df, 1 .0f /0 .75f )));
961- m = LoadExp3 (df, d, Neg (e));
962- e = IfThenElse (RebindMask (di, o), Sub (e, Set (di, 64 )), e);
963- #else
964- Vec<D> e = _mm512_getexp_ps (f.raw );
965- e = IfThenElse (RebindMask (df, Eq (e, Inf (df))), Set (df, 128 .0f ), e);
966- m = _mm512_getmant_ps (f.raw , _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
967- #endif
968-
969- x = Div (Sub (m, Set (df, 1 .0f )), Add (Set (df, 1 .0f ), m));
970- x2 = Mul (x, x);
971-
972- t = Set (df, 0 .2392828464508056640625f );
973- t = MulAdd (t, x2, Set (df, 0 .28518211841583251953125f ));
974- t = MulAdd (t, x2, Set (df, 0 .400005877017974853515625f ));
975- t = MulAdd (t, x2, Set (df, 0 .666666686534881591796875f ));
976- t = MulAdd (t, x2, Set (df, 2 .0f ));
977-
978- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
979- x = MulAdd (x, t, Mul (Set (df, 0 .693147180559945286226764f ), ConvertTo (df, e)));
980- x = IfThenElse (RebindMask (df, Eq (d, Inf (df))), Set (df, InfFloat), x);
981- x = IfThenElse (RebindMask (df, Or (Lt (d, Set (df, 0 )), IsNaN (d))), Set (df, NanFloat), x);
982- x = IfThenElse (RebindMask (df, Eq (d, Set (df, 0 ))), Set (df, -InfFloat), x);
983- #else
984- x = MulAdd (x, t, Mul (Set (df, 0 .693147180559945286226764f ), e));
985- x = vfixup_vf_vf_vf_vi2_i (x, d, Set (di, (5 << (5 *4 ))), 0 );
986- #endif
987-
988- return x;
989- }
990-
991- // Computes log1p(x) with 1.0 ULP accuracy
992- // Translated from libm/sleefsimdsp.c:2842 xlog1pf
993- template <class D >
994- HWY_INLINE Vec<D> Log1p (const D df, Vec<D> d) {
995- RebindToUnsigned<D> du;
996- RebindToSigned<D> di;
997-
998- Vec2<D> x;
999- Vec<D> t, m, x2;
1000-
1001- Vec<D> dp1 = Add (d, Set (df, 1 ));
1002-
1003- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
1004- Mask<D> o = Lt (dp1, Set (df, FloatMin));
1005- dp1 = IfThenElse (RebindMask (df, o), Mul (dp1, Set (df, (float )(INT64_C (1 ) << 32 ) * (float )(INT64_C (1 ) << 32 ))), dp1);
1006- Vec<RebindToSigned<D>> e = ILogB2 (df, Mul (dp1, Set (df, 1 .0f /0 .75f )));
1007- t = LoadExp3 (df, Set (df, 1 ), Neg (e));
1008- m = MulAdd (d, t, Sub (t, Set (df, 1 )));
1009- e = IfThenElse (RebindMask (di, o), Sub (e, Set (di, 64 )), e);
1010- Vec2<D> s = MulDF (df, Create2 (df, Set (df, 0 .69314718246459960938f ), Set (df, -1 .904654323148236017e-09f )), ConvertTo (df, e));
1011- #else
1012- Vec<D> e = _mm512_getexp_ps (f.raw );
1013- e = IfThenElse (RebindMask (df, Eq (e, Inf (df))), Set (df, 128 .0f ), e);
1014- t = LoadExp3 (df, Set (df, 1 ), Neg (NearestInt (e)));
1015- m = MulAdd (d, t, Sub (t, Set (df, 1 )));
1016- Vec2<D> s = MulDF (df, Create2 (df, Set (df, 0 .69314718246459960938f ), Set (df, -1 .904654323148236017e-09f )), e);
1017- #endif
1018-
1019- x = DivDF (df, Create2 (df, m, Set (df, 0 )), AddFastDF (df, Set (df, 2 ), m));
1020- x2 = Mul (Get2<0 >(x), Get2<0 >(x));
1021-
1022- t = Set (df, +0 .3027294874e+0f );
1023- t = MulAdd (t, x2, Set (df, +0 .3996108174e+0f ));
1024- t = MulAdd (t, x2, Set (df, +0 .6666694880e+0f ));
1025-
1026- s = AddFastDF (df, s, ScaleDF (df, x, Set (df, 2 )));
1027- s = AddFastDF (df, s, Mul (Mul (x2, Get2<0 >(x)), t));
1028-
1029- Vec<D> r = Add (Get2<0 >(s), Get2<1 >(s));
1030-
1031- r = IfThenElse (RebindMask (df, Gt (d, Set (df, 1e+38 ))), Set (df, InfFloat), r);
1032- r = BitCast (df, IfThenElse (RebindMask (du, Gt (Set (df, -1 ), d)), Set (du, -1 ), BitCast (du, r)));
1033- r = IfThenElse (RebindMask (df, Eq (d, Set (df, -1 ))), Set (df, -InfFloat), r);
1034- r = IfThenElse (RebindMask (df, Eq (BitCast (du, d), Set (du, 0x80000000 ))), Set (df, -0 .0f ), r);
1035-
1036- return r;
1037- }
1038-
1039- // Computes log2(x) with 1.0 ULP accuracy
1040- // Translated from libm/sleefsimdsp.c:2757 xlog2f
1041- template <class D >
1042- HWY_INLINE Vec<D> Log2 (const D df, Vec<D> d) {
1043- RebindToSigned<D> di;
1044-
1045- Vec2<D> x;
1046- Vec<D> t, m, x2;
1047-
1048- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
1049- Mask<D> o = Lt (d, Set (df, FloatMin));
1050- d = IfThenElse (RebindMask (df, o), Mul (d, Set (df, (float )(INT64_C (1 ) << 32 ) * (float )(INT64_C (1 ) << 32 ))), d);
1051- Vec<RebindToSigned<D>> e = ILogB2 (df, Mul (d, Set (df, 1.0 /0.75 )));
1052- m = LoadExp3 (df, d, Neg (e));
1053- e = IfThenElse (RebindMask (di, o), Sub (e, Set (di, 64 )), e);
1054- #else
1055- Vec<D> e = _mm512_getexp_ps (f.raw );
1056- e = IfThenElse (RebindMask (df, Eq (e, Inf (df))), Set (df, 128 .0f ), e);
1057- m = _mm512_getmant_ps (f.raw , _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
1058- #endif
1059-
1060- x = DivDF (df, AddDF (df, Set (df, -1 ), m), AddDF (df, Set (df, 1 ), m));
1061- x2 = Mul (Get2<0 >(x), Get2<0 >(x));
1062-
1063- t = Set (df, +0 .4374550283e+0f );
1064- t = MulAdd (t, x2, Set (df, +0 .5764790177e+0f ));
1065- t = MulAdd (t, x2, Set (df, +0 .9618012905120f ));
1066-
1067- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
1068- Vec2<D> s = AddDF (df, ConvertTo (df, e),
1069- MulDF (df, x, Create2 (df, Set (df, 2.8853900432586669922 ), Set (df, 3.2734474483568488616e-08 ))));
1070- #else
1071- Vec2<D> s = AddDF (df, e,
1072- MulDF (df, x, Create2 (df, Set (df, 2.8853900432586669922 ), Set (df, 3.2734474483568488616e-08 ))));
1073- #endif
1074-
1075- s = AddDF (df, s, Mul (Mul (x2, Get2<0 >(x)), t));
1076-
1077- Vec<D> r = Add (Get2<0 >(s), Get2<1 >(s));
1078-
1079- #if !(HWY_ARCH_X86 && HWY_TARGET <= HWY_AVX3)
1080- r = IfThenElse (RebindMask (df, Eq (d, Inf (df))), Set (df, InfDouble), r);
1081- r = IfThenElse (RebindMask (df, Or (Lt (d, Set (df, 0 )), IsNaN (d))), Set (df, NanDouble), r);
1082- r = IfThenElse (RebindMask (df, Eq (d, Set (df, 0 ))), Set (df, -InfDouble), r);
1083- #else
1084- r = vfixup_vf_vf_vf_vi2_i (r, d, Set (di, (4 << (2 *4 )) | (3 << (4 *4 )) | (5 << (5 *4 )) | (2 << (6 *4 ))), 0 );
1085- #endif
1086-
1087- return r;
1088- }
1089-
1090873// Computes sin(x) with 1.0 ULP accuracy
1091874// Translated from libm/sleefsimdsp.c:969 xsinf_u1
1092875template <class D >
0 commit comments