@@ -2599,38 +2599,14 @@ HWY_API VFromD<DN> ReorderDemote2To(DN dn, V a, V b) {
25992599
26002600template <class D , HWY_IF_F32_D(D)>
26012601HWY_API VFromD<D> PromoteTo (D df, Vec<Rebind<uint16_t , D>> v) {
2602- const RebindToUnsigned<decltype (df)> du32;
2603-
2604- // Floats have 23 bits of mantissa.
2605- // We want least significant 8 bits to be shifted to [ 0 .. 255 ], therefore need to add 2^23
2606- // See this page for details: https://www.h-schmidt.net/FloatConverter/IEEE754.html
2607- // If you want output floats in [ 0 .. 255.0 / 256.0 ] interval, change into 2^15 = 0x47000000
2608- constexpr uint32_t offsetValue = 0x4b000000 ;
2609- // Check disassembly & verify your compiler has moved this initialization outside the loop
2610- const auto offsetInt = Set (du32, offsetValue);
2611- // Bitwise is probably slightly faster than addition, delivers same results for our input
2612- auto u32 = PromoteTo (du32, v);
2613- u32 = Or (u32 , offsetInt);
2614- // The only FP operation required is subtraction, hopefully faster than UCVTF
2615- return Sub (BitCast (df, u32 ), BitCast (df, offsetInt));
2602+ const RebindToUnsigned<decltype (df)> du32;
2603+ return ConvertTo (df, PromoteTo (du32, v));
26162604}
26172605
26182606template <class D , HWY_IF_F32_D(D)>
26192607HWY_API VFromD<D> PromoteTo (D df, Vec<Rebind<uint8_t , D>> v) {
2620- const RebindToUnsigned<decltype (df)> du32;
2621-
2622- // Floats have 23 bits of mantissa.
2623- // We want least significant 8 bits to be shifted to [ 0 .. 255 ], therefore need to add 2^23
2624- // See this page for details: https://www.h-schmidt.net/FloatConverter/IEEE754.html
2625- // If you want output floats in [ 0 .. 255.0 / 256.0 ] interval, change into 2^15 = 0x47000000
2626- constexpr uint32_t offsetValue = 0x4b000000 ;
2627- // Check disassembly & verify your compiler has moved this initialization outside the loop
2628- const auto offsetInt = Set (du32, offsetValue);
2629- // Bitwise is probably slightly faster than addition, delivers same results for our input
2630- auto u32 = PromoteTo (du32, v);
2631- u32 = Or (u32 , offsetInt);
2632- // The only FP operation required is subtraction, hopefully faster than UCVTF
2633- return Sub (BitCast (df, u32 ), BitCast (df, offsetInt));
2608+ const RebindToUnsigned<decltype (df)> du32;
2609+ return ConvertTo (df, PromoteTo (du32, v));
26342610}
26352611
26362612template <class D , HWY_IF_U8_D(D)>
0 commit comments